1 // Copyright (c) Facebook, Inc. and its affiliates. 2 // All rights reserved. 3 // 4 // Copyright 2019 Google LLC 5 // 6 // This source code is licensed under the BSD-style license found in the 7 // LICENSE file in the root directory of this source tree. 8 // 9 // Auto-generated file. Do not edit! 10 // Specification: test/qs8-gemm-minmax-rndnu.yaml 11 // Generator: tools/generate-gemm-test.py 12 13 14 #include <gtest/gtest.h> 15 16 #include <xnnpack/allocator.h> 17 #include <xnnpack/common.h> 18 #include <xnnpack/isa-checks.h> 19 #include <xnnpack/microparams-init.h> 20 21 #include <xnnpack/gemm.h> 22 #include <xnnpack/igemm.h> 23 #include <xnnpack/ppmm.h> 24 #include "gemm-microkernel-tester.h" 25 26 27 #if XNN_ARCH_ARM && XNN_ENABLE_ASSEMBLY TEST(QS8_GEMM_MINMAX_RNDNU_1X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7,k_eq_8)28 TEST(QS8_GEMM_MINMAX_RNDNU_1X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7, k_eq_8) { 29 TEST_REQUIRES_ARM_NEON; 30 GemmMicrokernelTester() 31 .mr(1) 32 .nr(8) 33 .kr(1) 34 .sr(1) 35 .m(1) 36 .n(8) 37 .k(8) 38 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8__aarch32_neon_mlal_lane_cortex_a7, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 39 } 40 TEST(QS8_GEMM_MINMAX_RNDNU_1X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7,strided_cn)41 TEST(QS8_GEMM_MINMAX_RNDNU_1X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7, strided_cn) { 42 TEST_REQUIRES_ARM_NEON; 43 GemmMicrokernelTester() 44 .mr(1) 45 .nr(8) 46 .kr(1) 47 .sr(1) 48 .m(1) 49 .n(8) 50 .k(8) 51 .cn_stride(11) 52 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8__aarch32_neon_mlal_lane_cortex_a7, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 53 } 54 TEST(QS8_GEMM_MINMAX_RNDNU_1X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7,k_eq_8_strided_a)55 TEST(QS8_GEMM_MINMAX_RNDNU_1X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7, k_eq_8_strided_a) { 56 TEST_REQUIRES_ARM_NEON; 57 GemmMicrokernelTester() 58 .mr(1) 59 .nr(8) 60 .kr(1) 61 .sr(1) 62 .m(1) 63 .n(8) 64 .k(8) 65 .a_stride(11) 66 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8__aarch32_neon_mlal_lane_cortex_a7, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 67 } 68 TEST(QS8_GEMM_MINMAX_RNDNU_1X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7,k_eq_8_subtile)69 TEST(QS8_GEMM_MINMAX_RNDNU_1X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7, k_eq_8_subtile) { 70 TEST_REQUIRES_ARM_NEON; 71 for (uint32_t n = 1; n <= 8; n++) { 72 for (uint32_t m = 1; m <= 1; m++) { 73 GemmMicrokernelTester() 74 .mr(1) 75 .nr(8) 76 .kr(1) 77 .sr(1) 78 .m(m) 79 .n(n) 80 .k(8) 81 .iterations(1) 82 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8__aarch32_neon_mlal_lane_cortex_a7, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 83 } 84 } 85 } 86 TEST(QS8_GEMM_MINMAX_RNDNU_1X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7,k_eq_8_subtile_m)87 TEST(QS8_GEMM_MINMAX_RNDNU_1X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7, k_eq_8_subtile_m) { 88 TEST_REQUIRES_ARM_NEON; 89 for (uint32_t m = 1; m <= 1; m++) { 90 GemmMicrokernelTester() 91 .mr(1) 92 .nr(8) 93 .kr(1) 94 .sr(1) 95 .m(m) 96 .n(8) 97 .k(8) 98 .iterations(1) 99 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8__aarch32_neon_mlal_lane_cortex_a7, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 100 } 101 } 102 TEST(QS8_GEMM_MINMAX_RNDNU_1X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7,k_eq_8_subtile_n)103 TEST(QS8_GEMM_MINMAX_RNDNU_1X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7, k_eq_8_subtile_n) { 104 TEST_REQUIRES_ARM_NEON; 105 for (uint32_t n = 1; n <= 8; n++) { 106 GemmMicrokernelTester() 107 .mr(1) 108 .nr(8) 109 .kr(1) 110 .sr(1) 111 .m(1) 112 .n(n) 113 .k(8) 114 .iterations(1) 115 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8__aarch32_neon_mlal_lane_cortex_a7, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 116 } 117 } 118 TEST(QS8_GEMM_MINMAX_RNDNU_1X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7,k_lt_8)119 TEST(QS8_GEMM_MINMAX_RNDNU_1X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7, k_lt_8) { 120 TEST_REQUIRES_ARM_NEON; 121 for (size_t k = 1; k < 8; k++) { 122 GemmMicrokernelTester() 123 .mr(1) 124 .nr(8) 125 .kr(1) 126 .sr(1) 127 .m(1) 128 .n(8) 129 .k(k) 130 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8__aarch32_neon_mlal_lane_cortex_a7, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 131 } 132 } 133 TEST(QS8_GEMM_MINMAX_RNDNU_1X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7,k_lt_8_strided_a)134 TEST(QS8_GEMM_MINMAX_RNDNU_1X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7, k_lt_8_strided_a) { 135 TEST_REQUIRES_ARM_NEON; 136 for (size_t k = 1; k < 8; k++) { 137 GemmMicrokernelTester() 138 .mr(1) 139 .nr(8) 140 .kr(1) 141 .sr(1) 142 .m(1) 143 .n(8) 144 .k(k) 145 .a_stride(11) 146 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8__aarch32_neon_mlal_lane_cortex_a7, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 147 } 148 } 149 TEST(QS8_GEMM_MINMAX_RNDNU_1X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7,k_lt_8_subtile)150 TEST(QS8_GEMM_MINMAX_RNDNU_1X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7, k_lt_8_subtile) { 151 TEST_REQUIRES_ARM_NEON; 152 for (size_t k = 1; k < 8; k++) { 153 for (uint32_t n = 1; n <= 8; n++) { 154 for (uint32_t m = 1; m <= 1; m++) { 155 GemmMicrokernelTester() 156 .mr(1) 157 .nr(8) 158 .kr(1) 159 .sr(1) 160 .m(m) 161 .n(n) 162 .k(k) 163 .iterations(1) 164 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8__aarch32_neon_mlal_lane_cortex_a7, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 165 } 166 } 167 } 168 } 169 TEST(QS8_GEMM_MINMAX_RNDNU_1X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7,k_gt_8)170 TEST(QS8_GEMM_MINMAX_RNDNU_1X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7, k_gt_8) { 171 TEST_REQUIRES_ARM_NEON; 172 for (size_t k = 9; k < 16; k++) { 173 GemmMicrokernelTester() 174 .mr(1) 175 .nr(8) 176 .kr(1) 177 .sr(1) 178 .m(1) 179 .n(8) 180 .k(k) 181 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8__aarch32_neon_mlal_lane_cortex_a7, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 182 } 183 } 184 TEST(QS8_GEMM_MINMAX_RNDNU_1X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7,k_gt_8_strided_a)185 TEST(QS8_GEMM_MINMAX_RNDNU_1X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7, k_gt_8_strided_a) { 186 TEST_REQUIRES_ARM_NEON; 187 for (size_t k = 9; k < 16; k++) { 188 GemmMicrokernelTester() 189 .mr(1) 190 .nr(8) 191 .kr(1) 192 .sr(1) 193 .m(1) 194 .n(8) 195 .k(k) 196 .a_stride(19) 197 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8__aarch32_neon_mlal_lane_cortex_a7, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 198 } 199 } 200 TEST(QS8_GEMM_MINMAX_RNDNU_1X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7,k_gt_8_subtile)201 TEST(QS8_GEMM_MINMAX_RNDNU_1X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7, k_gt_8_subtile) { 202 TEST_REQUIRES_ARM_NEON; 203 for (size_t k = 9; k < 16; k++) { 204 for (uint32_t n = 1; n <= 8; n++) { 205 for (uint32_t m = 1; m <= 1; m++) { 206 GemmMicrokernelTester() 207 .mr(1) 208 .nr(8) 209 .kr(1) 210 .sr(1) 211 .m(m) 212 .n(n) 213 .k(k) 214 .iterations(1) 215 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8__aarch32_neon_mlal_lane_cortex_a7, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 216 } 217 } 218 } 219 } 220 TEST(QS8_GEMM_MINMAX_RNDNU_1X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7,k_div_8)221 TEST(QS8_GEMM_MINMAX_RNDNU_1X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7, k_div_8) { 222 TEST_REQUIRES_ARM_NEON; 223 for (size_t k = 16; k <= 80; k += 8) { 224 GemmMicrokernelTester() 225 .mr(1) 226 .nr(8) 227 .kr(1) 228 .sr(1) 229 .m(1) 230 .n(8) 231 .k(k) 232 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8__aarch32_neon_mlal_lane_cortex_a7, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 233 } 234 } 235 TEST(QS8_GEMM_MINMAX_RNDNU_1X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7,k_div_8_strided_a)236 TEST(QS8_GEMM_MINMAX_RNDNU_1X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7, k_div_8_strided_a) { 237 TEST_REQUIRES_ARM_NEON; 238 for (size_t k = 16; k <= 80; k += 8) { 239 GemmMicrokernelTester() 240 .mr(1) 241 .nr(8) 242 .kr(1) 243 .sr(1) 244 .m(1) 245 .n(8) 246 .k(k) 247 .a_stride(83) 248 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8__aarch32_neon_mlal_lane_cortex_a7, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 249 } 250 } 251 TEST(QS8_GEMM_MINMAX_RNDNU_1X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7,k_div_8_subtile)252 TEST(QS8_GEMM_MINMAX_RNDNU_1X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7, k_div_8_subtile) { 253 TEST_REQUIRES_ARM_NEON; 254 for (size_t k = 16; k <= 80; k += 8) { 255 for (uint32_t n = 1; n <= 8; n++) { 256 for (uint32_t m = 1; m <= 1; m++) { 257 GemmMicrokernelTester() 258 .mr(1) 259 .nr(8) 260 .kr(1) 261 .sr(1) 262 .m(m) 263 .n(n) 264 .k(k) 265 .iterations(1) 266 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8__aarch32_neon_mlal_lane_cortex_a7, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 267 } 268 } 269 } 270 } 271 TEST(QS8_GEMM_MINMAX_RNDNU_1X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7,n_gt_8)272 TEST(QS8_GEMM_MINMAX_RNDNU_1X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7, n_gt_8) { 273 TEST_REQUIRES_ARM_NEON; 274 for (uint32_t n = 9; n < 16; n++) { 275 for (size_t k = 1; k <= 40; k += 9) { 276 GemmMicrokernelTester() 277 .mr(1) 278 .nr(8) 279 .kr(1) 280 .sr(1) 281 .m(1) 282 .n(n) 283 .k(k) 284 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8__aarch32_neon_mlal_lane_cortex_a7, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 285 } 286 } 287 } 288 TEST(QS8_GEMM_MINMAX_RNDNU_1X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7,n_gt_8_strided_cn)289 TEST(QS8_GEMM_MINMAX_RNDNU_1X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7, n_gt_8_strided_cn) { 290 TEST_REQUIRES_ARM_NEON; 291 for (uint32_t n = 9; n < 16; n++) { 292 for (size_t k = 1; k <= 40; k += 9) { 293 GemmMicrokernelTester() 294 .mr(1) 295 .nr(8) 296 .kr(1) 297 .sr(1) 298 .m(1) 299 .n(n) 300 .k(k) 301 .cn_stride(11) 302 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8__aarch32_neon_mlal_lane_cortex_a7, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 303 } 304 } 305 } 306 TEST(QS8_GEMM_MINMAX_RNDNU_1X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7,n_gt_8_strided_a)307 TEST(QS8_GEMM_MINMAX_RNDNU_1X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7, n_gt_8_strided_a) { 308 TEST_REQUIRES_ARM_NEON; 309 for (uint32_t n = 9; n < 16; n++) { 310 for (size_t k = 1; k <= 40; k += 9) { 311 GemmMicrokernelTester() 312 .mr(1) 313 .nr(8) 314 .kr(1) 315 .sr(1) 316 .m(1) 317 .n(n) 318 .k(k) 319 .a_stride(43) 320 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8__aarch32_neon_mlal_lane_cortex_a7, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 321 } 322 } 323 } 324 TEST(QS8_GEMM_MINMAX_RNDNU_1X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7,n_gt_8_subtile)325 TEST(QS8_GEMM_MINMAX_RNDNU_1X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7, n_gt_8_subtile) { 326 TEST_REQUIRES_ARM_NEON; 327 for (uint32_t n = 9; n < 16; n++) { 328 for (size_t k = 1; k <= 40; k += 9) { 329 for (uint32_t m = 1; m <= 1; m++) { 330 GemmMicrokernelTester() 331 .mr(1) 332 .nr(8) 333 .kr(1) 334 .sr(1) 335 .m(m) 336 .n(n) 337 .k(k) 338 .iterations(1) 339 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8__aarch32_neon_mlal_lane_cortex_a7, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 340 } 341 } 342 } 343 } 344 TEST(QS8_GEMM_MINMAX_RNDNU_1X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7,n_div_8)345 TEST(QS8_GEMM_MINMAX_RNDNU_1X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7, n_div_8) { 346 TEST_REQUIRES_ARM_NEON; 347 for (uint32_t n = 16; n <= 24; n += 8) { 348 for (size_t k = 1; k <= 40; k += 9) { 349 GemmMicrokernelTester() 350 .mr(1) 351 .nr(8) 352 .kr(1) 353 .sr(1) 354 .m(1) 355 .n(n) 356 .k(k) 357 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8__aarch32_neon_mlal_lane_cortex_a7, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 358 } 359 } 360 } 361 TEST(QS8_GEMM_MINMAX_RNDNU_1X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7,n_div_8_strided_cn)362 TEST(QS8_GEMM_MINMAX_RNDNU_1X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7, n_div_8_strided_cn) { 363 TEST_REQUIRES_ARM_NEON; 364 for (uint32_t n = 16; n <= 24; n += 8) { 365 for (size_t k = 1; k <= 40; k += 9) { 366 GemmMicrokernelTester() 367 .mr(1) 368 .nr(8) 369 .kr(1) 370 .sr(1) 371 .m(1) 372 .n(n) 373 .k(k) 374 .cn_stride(11) 375 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8__aarch32_neon_mlal_lane_cortex_a7, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 376 } 377 } 378 } 379 TEST(QS8_GEMM_MINMAX_RNDNU_1X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7,n_div_8_strided_a)380 TEST(QS8_GEMM_MINMAX_RNDNU_1X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7, n_div_8_strided_a) { 381 TEST_REQUIRES_ARM_NEON; 382 for (uint32_t n = 16; n <= 24; n += 8) { 383 for (size_t k = 1; k <= 40; k += 9) { 384 GemmMicrokernelTester() 385 .mr(1) 386 .nr(8) 387 .kr(1) 388 .sr(1) 389 .m(1) 390 .n(n) 391 .k(k) 392 .a_stride(43) 393 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8__aarch32_neon_mlal_lane_cortex_a7, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 394 } 395 } 396 } 397 TEST(QS8_GEMM_MINMAX_RNDNU_1X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7,n_div_8_subtile)398 TEST(QS8_GEMM_MINMAX_RNDNU_1X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7, n_div_8_subtile) { 399 TEST_REQUIRES_ARM_NEON; 400 for (uint32_t n = 16; n <= 24; n += 8) { 401 for (size_t k = 1; k <= 40; k += 9) { 402 for (uint32_t m = 1; m <= 1; m++) { 403 GemmMicrokernelTester() 404 .mr(1) 405 .nr(8) 406 .kr(1) 407 .sr(1) 408 .m(m) 409 .n(n) 410 .k(k) 411 .iterations(1) 412 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8__aarch32_neon_mlal_lane_cortex_a7, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 413 } 414 } 415 } 416 } 417 TEST(QS8_GEMM_MINMAX_RNDNU_1X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7,strided_cm_subtile)418 TEST(QS8_GEMM_MINMAX_RNDNU_1X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7, strided_cm_subtile) { 419 TEST_REQUIRES_ARM_NEON; 420 for (size_t k = 1; k <= 40; k += 9) { 421 for (uint32_t n = 1; n <= 8; n++) { 422 for (uint32_t m = 1; m <= 1; m++) { 423 GemmMicrokernelTester() 424 .mr(1) 425 .nr(8) 426 .kr(1) 427 .sr(1) 428 .m(m) 429 .n(n) 430 .k(k) 431 .cm_stride(11) 432 .iterations(1) 433 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8__aarch32_neon_mlal_lane_cortex_a7, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 434 } 435 } 436 } 437 } 438 TEST(QS8_GEMM_MINMAX_RNDNU_1X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7,qmin)439 TEST(QS8_GEMM_MINMAX_RNDNU_1X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7, qmin) { 440 TEST_REQUIRES_ARM_NEON; 441 GemmMicrokernelTester() 442 .mr(1) 443 .nr(8) 444 .kr(1) 445 .sr(1) 446 .m(1) 447 .n(8) 448 .k(8) 449 .qmin(128) 450 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8__aarch32_neon_mlal_lane_cortex_a7, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 451 } 452 TEST(QS8_GEMM_MINMAX_RNDNU_1X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7,qmax)453 TEST(QS8_GEMM_MINMAX_RNDNU_1X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7, qmax) { 454 TEST_REQUIRES_ARM_NEON; 455 GemmMicrokernelTester() 456 .mr(1) 457 .nr(8) 458 .kr(1) 459 .sr(1) 460 .m(1) 461 .n(8) 462 .k(8) 463 .qmax(128) 464 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8__aarch32_neon_mlal_lane_cortex_a7, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 465 } 466 TEST(QS8_GEMM_MINMAX_RNDNU_1X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7,strided_cm)467 TEST(QS8_GEMM_MINMAX_RNDNU_1X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7, strided_cm) { 468 TEST_REQUIRES_ARM_NEON; 469 GemmMicrokernelTester() 470 .mr(1) 471 .nr(8) 472 .kr(1) 473 .sr(1) 474 .m(1) 475 .n(8) 476 .k(8) 477 .cm_stride(11) 478 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8__aarch32_neon_mlal_lane_cortex_a7, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 479 } 480 #endif // XNN_ARCH_ARM && XNN_ENABLE_ASSEMBLY 481 482 483 #if XNN_ARCH_ARM && XNN_ENABLE_ASSEMBLY TEST(QS8_GEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64,k_eq_8)484 TEST(QS8_GEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, k_eq_8) { 485 TEST_REQUIRES_ARM_NEON; 486 GemmMicrokernelTester() 487 .mr(4) 488 .nr(8) 489 .kr(1) 490 .sr(1) 491 .m(4) 492 .n(8) 493 .k(8) 494 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 495 } 496 TEST(QS8_GEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64,strided_cn)497 TEST(QS8_GEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, strided_cn) { 498 TEST_REQUIRES_ARM_NEON; 499 GemmMicrokernelTester() 500 .mr(4) 501 .nr(8) 502 .kr(1) 503 .sr(1) 504 .m(4) 505 .n(8) 506 .k(8) 507 .cn_stride(11) 508 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 509 } 510 TEST(QS8_GEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64,k_eq_8_strided_a)511 TEST(QS8_GEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, k_eq_8_strided_a) { 512 TEST_REQUIRES_ARM_NEON; 513 GemmMicrokernelTester() 514 .mr(4) 515 .nr(8) 516 .kr(1) 517 .sr(1) 518 .m(4) 519 .n(8) 520 .k(8) 521 .a_stride(11) 522 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 523 } 524 TEST(QS8_GEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64,k_eq_8_subtile)525 TEST(QS8_GEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, k_eq_8_subtile) { 526 TEST_REQUIRES_ARM_NEON; 527 for (uint32_t n = 1; n <= 8; n++) { 528 for (uint32_t m = 1; m <= 4; m++) { 529 GemmMicrokernelTester() 530 .mr(4) 531 .nr(8) 532 .kr(1) 533 .sr(1) 534 .m(m) 535 .n(n) 536 .k(8) 537 .iterations(1) 538 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 539 } 540 } 541 } 542 TEST(QS8_GEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64,k_eq_8_subtile_m)543 TEST(QS8_GEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, k_eq_8_subtile_m) { 544 TEST_REQUIRES_ARM_NEON; 545 for (uint32_t m = 1; m <= 4; m++) { 546 GemmMicrokernelTester() 547 .mr(4) 548 .nr(8) 549 .kr(1) 550 .sr(1) 551 .m(m) 552 .n(8) 553 .k(8) 554 .iterations(1) 555 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 556 } 557 } 558 TEST(QS8_GEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64,k_eq_8_subtile_n)559 TEST(QS8_GEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, k_eq_8_subtile_n) { 560 TEST_REQUIRES_ARM_NEON; 561 for (uint32_t n = 1; n <= 8; n++) { 562 GemmMicrokernelTester() 563 .mr(4) 564 .nr(8) 565 .kr(1) 566 .sr(1) 567 .m(4) 568 .n(n) 569 .k(8) 570 .iterations(1) 571 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 572 } 573 } 574 TEST(QS8_GEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64,k_lt_8)575 TEST(QS8_GEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, k_lt_8) { 576 TEST_REQUIRES_ARM_NEON; 577 for (size_t k = 1; k < 8; k++) { 578 GemmMicrokernelTester() 579 .mr(4) 580 .nr(8) 581 .kr(1) 582 .sr(1) 583 .m(4) 584 .n(8) 585 .k(k) 586 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 587 } 588 } 589 TEST(QS8_GEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64,k_lt_8_strided_a)590 TEST(QS8_GEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, k_lt_8_strided_a) { 591 TEST_REQUIRES_ARM_NEON; 592 for (size_t k = 1; k < 8; k++) { 593 GemmMicrokernelTester() 594 .mr(4) 595 .nr(8) 596 .kr(1) 597 .sr(1) 598 .m(4) 599 .n(8) 600 .k(k) 601 .a_stride(11) 602 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 603 } 604 } 605 TEST(QS8_GEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64,k_lt_8_subtile)606 TEST(QS8_GEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, k_lt_8_subtile) { 607 TEST_REQUIRES_ARM_NEON; 608 for (size_t k = 1; k < 8; k++) { 609 for (uint32_t n = 1; n <= 8; n++) { 610 for (uint32_t m = 1; m <= 4; m++) { 611 GemmMicrokernelTester() 612 .mr(4) 613 .nr(8) 614 .kr(1) 615 .sr(1) 616 .m(m) 617 .n(n) 618 .k(k) 619 .iterations(1) 620 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 621 } 622 } 623 } 624 } 625 TEST(QS8_GEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64,k_gt_8)626 TEST(QS8_GEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, k_gt_8) { 627 TEST_REQUIRES_ARM_NEON; 628 for (size_t k = 9; k < 16; k++) { 629 GemmMicrokernelTester() 630 .mr(4) 631 .nr(8) 632 .kr(1) 633 .sr(1) 634 .m(4) 635 .n(8) 636 .k(k) 637 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 638 } 639 } 640 TEST(QS8_GEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64,k_gt_8_strided_a)641 TEST(QS8_GEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, k_gt_8_strided_a) { 642 TEST_REQUIRES_ARM_NEON; 643 for (size_t k = 9; k < 16; k++) { 644 GemmMicrokernelTester() 645 .mr(4) 646 .nr(8) 647 .kr(1) 648 .sr(1) 649 .m(4) 650 .n(8) 651 .k(k) 652 .a_stride(19) 653 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 654 } 655 } 656 TEST(QS8_GEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64,k_gt_8_subtile)657 TEST(QS8_GEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, k_gt_8_subtile) { 658 TEST_REQUIRES_ARM_NEON; 659 for (size_t k = 9; k < 16; k++) { 660 for (uint32_t n = 1; n <= 8; n++) { 661 for (uint32_t m = 1; m <= 4; m++) { 662 GemmMicrokernelTester() 663 .mr(4) 664 .nr(8) 665 .kr(1) 666 .sr(1) 667 .m(m) 668 .n(n) 669 .k(k) 670 .iterations(1) 671 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 672 } 673 } 674 } 675 } 676 TEST(QS8_GEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64,k_div_8)677 TEST(QS8_GEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, k_div_8) { 678 TEST_REQUIRES_ARM_NEON; 679 for (size_t k = 16; k <= 80; k += 8) { 680 GemmMicrokernelTester() 681 .mr(4) 682 .nr(8) 683 .kr(1) 684 .sr(1) 685 .m(4) 686 .n(8) 687 .k(k) 688 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 689 } 690 } 691 TEST(QS8_GEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64,k_div_8_strided_a)692 TEST(QS8_GEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, k_div_8_strided_a) { 693 TEST_REQUIRES_ARM_NEON; 694 for (size_t k = 16; k <= 80; k += 8) { 695 GemmMicrokernelTester() 696 .mr(4) 697 .nr(8) 698 .kr(1) 699 .sr(1) 700 .m(4) 701 .n(8) 702 .k(k) 703 .a_stride(83) 704 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 705 } 706 } 707 TEST(QS8_GEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64,k_div_8_subtile)708 TEST(QS8_GEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, k_div_8_subtile) { 709 TEST_REQUIRES_ARM_NEON; 710 for (size_t k = 16; k <= 80; k += 8) { 711 for (uint32_t n = 1; n <= 8; n++) { 712 for (uint32_t m = 1; m <= 4; m++) { 713 GemmMicrokernelTester() 714 .mr(4) 715 .nr(8) 716 .kr(1) 717 .sr(1) 718 .m(m) 719 .n(n) 720 .k(k) 721 .iterations(1) 722 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 723 } 724 } 725 } 726 } 727 TEST(QS8_GEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64,n_gt_8)728 TEST(QS8_GEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, n_gt_8) { 729 TEST_REQUIRES_ARM_NEON; 730 for (uint32_t n = 9; n < 16; n++) { 731 for (size_t k = 1; k <= 40; k += 9) { 732 GemmMicrokernelTester() 733 .mr(4) 734 .nr(8) 735 .kr(1) 736 .sr(1) 737 .m(4) 738 .n(n) 739 .k(k) 740 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 741 } 742 } 743 } 744 TEST(QS8_GEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64,n_gt_8_strided_cn)745 TEST(QS8_GEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, n_gt_8_strided_cn) { 746 TEST_REQUIRES_ARM_NEON; 747 for (uint32_t n = 9; n < 16; n++) { 748 for (size_t k = 1; k <= 40; k += 9) { 749 GemmMicrokernelTester() 750 .mr(4) 751 .nr(8) 752 .kr(1) 753 .sr(1) 754 .m(4) 755 .n(n) 756 .k(k) 757 .cn_stride(11) 758 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 759 } 760 } 761 } 762 TEST(QS8_GEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64,n_gt_8_strided_a)763 TEST(QS8_GEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, n_gt_8_strided_a) { 764 TEST_REQUIRES_ARM_NEON; 765 for (uint32_t n = 9; n < 16; n++) { 766 for (size_t k = 1; k <= 40; k += 9) { 767 GemmMicrokernelTester() 768 .mr(4) 769 .nr(8) 770 .kr(1) 771 .sr(1) 772 .m(4) 773 .n(n) 774 .k(k) 775 .a_stride(43) 776 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 777 } 778 } 779 } 780 TEST(QS8_GEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64,n_gt_8_subtile)781 TEST(QS8_GEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, n_gt_8_subtile) { 782 TEST_REQUIRES_ARM_NEON; 783 for (uint32_t n = 9; n < 16; n++) { 784 for (size_t k = 1; k <= 40; k += 9) { 785 for (uint32_t m = 1; m <= 4; m++) { 786 GemmMicrokernelTester() 787 .mr(4) 788 .nr(8) 789 .kr(1) 790 .sr(1) 791 .m(m) 792 .n(n) 793 .k(k) 794 .iterations(1) 795 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 796 } 797 } 798 } 799 } 800 TEST(QS8_GEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64,n_div_8)801 TEST(QS8_GEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, n_div_8) { 802 TEST_REQUIRES_ARM_NEON; 803 for (uint32_t n = 16; n <= 24; n += 8) { 804 for (size_t k = 1; k <= 40; k += 9) { 805 GemmMicrokernelTester() 806 .mr(4) 807 .nr(8) 808 .kr(1) 809 .sr(1) 810 .m(4) 811 .n(n) 812 .k(k) 813 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 814 } 815 } 816 } 817 TEST(QS8_GEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64,n_div_8_strided_cn)818 TEST(QS8_GEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, n_div_8_strided_cn) { 819 TEST_REQUIRES_ARM_NEON; 820 for (uint32_t n = 16; n <= 24; n += 8) { 821 for (size_t k = 1; k <= 40; k += 9) { 822 GemmMicrokernelTester() 823 .mr(4) 824 .nr(8) 825 .kr(1) 826 .sr(1) 827 .m(4) 828 .n(n) 829 .k(k) 830 .cn_stride(11) 831 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 832 } 833 } 834 } 835 TEST(QS8_GEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64,n_div_8_strided_a)836 TEST(QS8_GEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, n_div_8_strided_a) { 837 TEST_REQUIRES_ARM_NEON; 838 for (uint32_t n = 16; n <= 24; n += 8) { 839 for (size_t k = 1; k <= 40; k += 9) { 840 GemmMicrokernelTester() 841 .mr(4) 842 .nr(8) 843 .kr(1) 844 .sr(1) 845 .m(4) 846 .n(n) 847 .k(k) 848 .a_stride(43) 849 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 850 } 851 } 852 } 853 TEST(QS8_GEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64,n_div_8_subtile)854 TEST(QS8_GEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, n_div_8_subtile) { 855 TEST_REQUIRES_ARM_NEON; 856 for (uint32_t n = 16; n <= 24; n += 8) { 857 for (size_t k = 1; k <= 40; k += 9) { 858 for (uint32_t m = 1; m <= 4; m++) { 859 GemmMicrokernelTester() 860 .mr(4) 861 .nr(8) 862 .kr(1) 863 .sr(1) 864 .m(m) 865 .n(n) 866 .k(k) 867 .iterations(1) 868 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 869 } 870 } 871 } 872 } 873 TEST(QS8_GEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64,strided_cm_subtile)874 TEST(QS8_GEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, strided_cm_subtile) { 875 TEST_REQUIRES_ARM_NEON; 876 for (size_t k = 1; k <= 40; k += 9) { 877 for (uint32_t n = 1; n <= 8; n++) { 878 for (uint32_t m = 1; m <= 4; m++) { 879 GemmMicrokernelTester() 880 .mr(4) 881 .nr(8) 882 .kr(1) 883 .sr(1) 884 .m(m) 885 .n(n) 886 .k(k) 887 .cm_stride(11) 888 .iterations(1) 889 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 890 } 891 } 892 } 893 } 894 TEST(QS8_GEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64,qmin)895 TEST(QS8_GEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, qmin) { 896 TEST_REQUIRES_ARM_NEON; 897 GemmMicrokernelTester() 898 .mr(4) 899 .nr(8) 900 .kr(1) 901 .sr(1) 902 .m(4) 903 .n(8) 904 .k(8) 905 .qmin(128) 906 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 907 } 908 TEST(QS8_GEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64,qmax)909 TEST(QS8_GEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, qmax) { 910 TEST_REQUIRES_ARM_NEON; 911 GemmMicrokernelTester() 912 .mr(4) 913 .nr(8) 914 .kr(1) 915 .sr(1) 916 .m(4) 917 .n(8) 918 .k(8) 919 .qmax(128) 920 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 921 } 922 TEST(QS8_GEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64,strided_cm)923 TEST(QS8_GEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, strided_cm) { 924 TEST_REQUIRES_ARM_NEON; 925 GemmMicrokernelTester() 926 .mr(4) 927 .nr(8) 928 .kr(1) 929 .sr(1) 930 .m(4) 931 .n(8) 932 .k(8) 933 .cm_stride(11) 934 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 935 } 936 #endif // XNN_ARCH_ARM && XNN_ENABLE_ASSEMBLY 937 938 939 #if XNN_ARCH_ARM || XNN_ARCH_ARM64 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD4R,k_eq_8)940 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD4R, k_eq_8) { 941 TEST_REQUIRES_ARM_NEON; 942 GemmMicrokernelTester() 943 .mr(1) 944 .nr(8) 945 .kr(2) 946 .sr(1) 947 .m(1) 948 .n(8) 949 .k(8) 950 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 951 } 952 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD4R,strided_cn)953 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD4R, strided_cn) { 954 TEST_REQUIRES_ARM_NEON; 955 GemmMicrokernelTester() 956 .mr(1) 957 .nr(8) 958 .kr(2) 959 .sr(1) 960 .m(1) 961 .n(8) 962 .k(8) 963 .cn_stride(11) 964 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 965 } 966 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD4R,k_eq_8_strided_a)967 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD4R, k_eq_8_strided_a) { 968 TEST_REQUIRES_ARM_NEON; 969 GemmMicrokernelTester() 970 .mr(1) 971 .nr(8) 972 .kr(2) 973 .sr(1) 974 .m(1) 975 .n(8) 976 .k(8) 977 .a_stride(11) 978 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 979 } 980 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD4R,k_eq_8_subtile)981 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD4R, k_eq_8_subtile) { 982 TEST_REQUIRES_ARM_NEON; 983 for (uint32_t n = 1; n <= 8; n++) { 984 for (uint32_t m = 1; m <= 1; m++) { 985 GemmMicrokernelTester() 986 .mr(1) 987 .nr(8) 988 .kr(2) 989 .sr(1) 990 .m(m) 991 .n(n) 992 .k(8) 993 .iterations(1) 994 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 995 } 996 } 997 } 998 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD4R,k_eq_8_subtile_m)999 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD4R, k_eq_8_subtile_m) { 1000 TEST_REQUIRES_ARM_NEON; 1001 for (uint32_t m = 1; m <= 1; m++) { 1002 GemmMicrokernelTester() 1003 .mr(1) 1004 .nr(8) 1005 .kr(2) 1006 .sr(1) 1007 .m(m) 1008 .n(8) 1009 .k(8) 1010 .iterations(1) 1011 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 1012 } 1013 } 1014 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD4R,k_eq_8_subtile_n)1015 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD4R, k_eq_8_subtile_n) { 1016 TEST_REQUIRES_ARM_NEON; 1017 for (uint32_t n = 1; n <= 8; n++) { 1018 GemmMicrokernelTester() 1019 .mr(1) 1020 .nr(8) 1021 .kr(2) 1022 .sr(1) 1023 .m(1) 1024 .n(n) 1025 .k(8) 1026 .iterations(1) 1027 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 1028 } 1029 } 1030 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD4R,k_lt_8)1031 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD4R, k_lt_8) { 1032 TEST_REQUIRES_ARM_NEON; 1033 for (size_t k = 1; k < 8; k++) { 1034 GemmMicrokernelTester() 1035 .mr(1) 1036 .nr(8) 1037 .kr(2) 1038 .sr(1) 1039 .m(1) 1040 .n(8) 1041 .k(k) 1042 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 1043 } 1044 } 1045 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD4R,k_lt_8_strided_a)1046 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD4R, k_lt_8_strided_a) { 1047 TEST_REQUIRES_ARM_NEON; 1048 for (size_t k = 1; k < 8; k++) { 1049 GemmMicrokernelTester() 1050 .mr(1) 1051 .nr(8) 1052 .kr(2) 1053 .sr(1) 1054 .m(1) 1055 .n(8) 1056 .k(k) 1057 .a_stride(11) 1058 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 1059 } 1060 } 1061 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD4R,k_lt_8_subtile)1062 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD4R, k_lt_8_subtile) { 1063 TEST_REQUIRES_ARM_NEON; 1064 for (size_t k = 1; k < 8; k++) { 1065 for (uint32_t n = 1; n <= 8; n++) { 1066 for (uint32_t m = 1; m <= 1; m++) { 1067 GemmMicrokernelTester() 1068 .mr(1) 1069 .nr(8) 1070 .kr(2) 1071 .sr(1) 1072 .m(m) 1073 .n(n) 1074 .k(k) 1075 .iterations(1) 1076 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 1077 } 1078 } 1079 } 1080 } 1081 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD4R,k_gt_8)1082 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD4R, k_gt_8) { 1083 TEST_REQUIRES_ARM_NEON; 1084 for (size_t k = 9; k < 16; k++) { 1085 GemmMicrokernelTester() 1086 .mr(1) 1087 .nr(8) 1088 .kr(2) 1089 .sr(1) 1090 .m(1) 1091 .n(8) 1092 .k(k) 1093 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 1094 } 1095 } 1096 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD4R,k_gt_8_strided_a)1097 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD4R, k_gt_8_strided_a) { 1098 TEST_REQUIRES_ARM_NEON; 1099 for (size_t k = 9; k < 16; k++) { 1100 GemmMicrokernelTester() 1101 .mr(1) 1102 .nr(8) 1103 .kr(2) 1104 .sr(1) 1105 .m(1) 1106 .n(8) 1107 .k(k) 1108 .a_stride(19) 1109 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 1110 } 1111 } 1112 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD4R,k_gt_8_subtile)1113 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD4R, k_gt_8_subtile) { 1114 TEST_REQUIRES_ARM_NEON; 1115 for (size_t k = 9; k < 16; k++) { 1116 for (uint32_t n = 1; n <= 8; n++) { 1117 for (uint32_t m = 1; m <= 1; m++) { 1118 GemmMicrokernelTester() 1119 .mr(1) 1120 .nr(8) 1121 .kr(2) 1122 .sr(1) 1123 .m(m) 1124 .n(n) 1125 .k(k) 1126 .iterations(1) 1127 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 1128 } 1129 } 1130 } 1131 } 1132 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD4R,k_div_8)1133 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD4R, k_div_8) { 1134 TEST_REQUIRES_ARM_NEON; 1135 for (size_t k = 16; k <= 80; k += 8) { 1136 GemmMicrokernelTester() 1137 .mr(1) 1138 .nr(8) 1139 .kr(2) 1140 .sr(1) 1141 .m(1) 1142 .n(8) 1143 .k(k) 1144 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 1145 } 1146 } 1147 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD4R,k_div_8_strided_a)1148 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD4R, k_div_8_strided_a) { 1149 TEST_REQUIRES_ARM_NEON; 1150 for (size_t k = 16; k <= 80; k += 8) { 1151 GemmMicrokernelTester() 1152 .mr(1) 1153 .nr(8) 1154 .kr(2) 1155 .sr(1) 1156 .m(1) 1157 .n(8) 1158 .k(k) 1159 .a_stride(83) 1160 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 1161 } 1162 } 1163 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD4R,k_div_8_subtile)1164 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD4R, k_div_8_subtile) { 1165 TEST_REQUIRES_ARM_NEON; 1166 for (size_t k = 16; k <= 80; k += 8) { 1167 for (uint32_t n = 1; n <= 8; n++) { 1168 for (uint32_t m = 1; m <= 1; m++) { 1169 GemmMicrokernelTester() 1170 .mr(1) 1171 .nr(8) 1172 .kr(2) 1173 .sr(1) 1174 .m(m) 1175 .n(n) 1176 .k(k) 1177 .iterations(1) 1178 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 1179 } 1180 } 1181 } 1182 } 1183 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD4R,n_gt_8)1184 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD4R, n_gt_8) { 1185 TEST_REQUIRES_ARM_NEON; 1186 for (uint32_t n = 9; n < 16; n++) { 1187 for (size_t k = 1; k <= 40; k += 9) { 1188 GemmMicrokernelTester() 1189 .mr(1) 1190 .nr(8) 1191 .kr(2) 1192 .sr(1) 1193 .m(1) 1194 .n(n) 1195 .k(k) 1196 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 1197 } 1198 } 1199 } 1200 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD4R,n_gt_8_strided_cn)1201 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD4R, n_gt_8_strided_cn) { 1202 TEST_REQUIRES_ARM_NEON; 1203 for (uint32_t n = 9; n < 16; n++) { 1204 for (size_t k = 1; k <= 40; k += 9) { 1205 GemmMicrokernelTester() 1206 .mr(1) 1207 .nr(8) 1208 .kr(2) 1209 .sr(1) 1210 .m(1) 1211 .n(n) 1212 .k(k) 1213 .cn_stride(11) 1214 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 1215 } 1216 } 1217 } 1218 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD4R,n_gt_8_strided_a)1219 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD4R, n_gt_8_strided_a) { 1220 TEST_REQUIRES_ARM_NEON; 1221 for (uint32_t n = 9; n < 16; n++) { 1222 for (size_t k = 1; k <= 40; k += 9) { 1223 GemmMicrokernelTester() 1224 .mr(1) 1225 .nr(8) 1226 .kr(2) 1227 .sr(1) 1228 .m(1) 1229 .n(n) 1230 .k(k) 1231 .a_stride(43) 1232 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 1233 } 1234 } 1235 } 1236 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD4R,n_gt_8_subtile)1237 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD4R, n_gt_8_subtile) { 1238 TEST_REQUIRES_ARM_NEON; 1239 for (uint32_t n = 9; n < 16; n++) { 1240 for (size_t k = 1; k <= 40; k += 9) { 1241 for (uint32_t m = 1; m <= 1; m++) { 1242 GemmMicrokernelTester() 1243 .mr(1) 1244 .nr(8) 1245 .kr(2) 1246 .sr(1) 1247 .m(m) 1248 .n(n) 1249 .k(k) 1250 .iterations(1) 1251 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 1252 } 1253 } 1254 } 1255 } 1256 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD4R,n_div_8)1257 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD4R, n_div_8) { 1258 TEST_REQUIRES_ARM_NEON; 1259 for (uint32_t n = 16; n <= 24; n += 8) { 1260 for (size_t k = 1; k <= 40; k += 9) { 1261 GemmMicrokernelTester() 1262 .mr(1) 1263 .nr(8) 1264 .kr(2) 1265 .sr(1) 1266 .m(1) 1267 .n(n) 1268 .k(k) 1269 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 1270 } 1271 } 1272 } 1273 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD4R,n_div_8_strided_cn)1274 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD4R, n_div_8_strided_cn) { 1275 TEST_REQUIRES_ARM_NEON; 1276 for (uint32_t n = 16; n <= 24; n += 8) { 1277 for (size_t k = 1; k <= 40; k += 9) { 1278 GemmMicrokernelTester() 1279 .mr(1) 1280 .nr(8) 1281 .kr(2) 1282 .sr(1) 1283 .m(1) 1284 .n(n) 1285 .k(k) 1286 .cn_stride(11) 1287 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 1288 } 1289 } 1290 } 1291 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD4R,n_div_8_strided_a)1292 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD4R, n_div_8_strided_a) { 1293 TEST_REQUIRES_ARM_NEON; 1294 for (uint32_t n = 16; n <= 24; n += 8) { 1295 for (size_t k = 1; k <= 40; k += 9) { 1296 GemmMicrokernelTester() 1297 .mr(1) 1298 .nr(8) 1299 .kr(2) 1300 .sr(1) 1301 .m(1) 1302 .n(n) 1303 .k(k) 1304 .a_stride(43) 1305 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 1306 } 1307 } 1308 } 1309 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD4R,n_div_8_subtile)1310 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD4R, n_div_8_subtile) { 1311 TEST_REQUIRES_ARM_NEON; 1312 for (uint32_t n = 16; n <= 24; n += 8) { 1313 for (size_t k = 1; k <= 40; k += 9) { 1314 for (uint32_t m = 1; m <= 1; m++) { 1315 GemmMicrokernelTester() 1316 .mr(1) 1317 .nr(8) 1318 .kr(2) 1319 .sr(1) 1320 .m(m) 1321 .n(n) 1322 .k(k) 1323 .iterations(1) 1324 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 1325 } 1326 } 1327 } 1328 } 1329 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD4R,strided_cm_subtile)1330 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD4R, strided_cm_subtile) { 1331 TEST_REQUIRES_ARM_NEON; 1332 for (size_t k = 1; k <= 40; k += 9) { 1333 for (uint32_t n = 1; n <= 8; n++) { 1334 for (uint32_t m = 1; m <= 1; m++) { 1335 GemmMicrokernelTester() 1336 .mr(1) 1337 .nr(8) 1338 .kr(2) 1339 .sr(1) 1340 .m(m) 1341 .n(n) 1342 .k(k) 1343 .cm_stride(11) 1344 .iterations(1) 1345 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 1346 } 1347 } 1348 } 1349 } 1350 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD4R,qmin)1351 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD4R, qmin) { 1352 TEST_REQUIRES_ARM_NEON; 1353 GemmMicrokernelTester() 1354 .mr(1) 1355 .nr(8) 1356 .kr(2) 1357 .sr(1) 1358 .m(1) 1359 .n(8) 1360 .k(8) 1361 .qmin(128) 1362 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 1363 } 1364 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD4R,qmax)1365 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD4R, qmax) { 1366 TEST_REQUIRES_ARM_NEON; 1367 GemmMicrokernelTester() 1368 .mr(1) 1369 .nr(8) 1370 .kr(2) 1371 .sr(1) 1372 .m(1) 1373 .n(8) 1374 .k(8) 1375 .qmax(128) 1376 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 1377 } 1378 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD4R,strided_cm)1379 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD4R, strided_cm) { 1380 TEST_REQUIRES_ARM_NEON; 1381 GemmMicrokernelTester() 1382 .mr(1) 1383 .nr(8) 1384 .kr(2) 1385 .sr(1) 1386 .m(1) 1387 .n(8) 1388 .k(8) 1389 .cm_stride(11) 1390 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 1391 } 1392 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 1393 1394 1395 #if XNN_ARCH_ARM || XNN_ARCH_ARM64 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C2S4__NEON_MLAL,k_eq_16)1396 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C2S4__NEON_MLAL, k_eq_16) { 1397 TEST_REQUIRES_ARM_NEON; 1398 GemmMicrokernelTester() 1399 .mr(1) 1400 .nr(8) 1401 .kr(2) 1402 .sr(4) 1403 .m(1) 1404 .n(8) 1405 .k(16) 1406 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 1407 } 1408 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C2S4__NEON_MLAL,strided_cn)1409 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C2S4__NEON_MLAL, strided_cn) { 1410 TEST_REQUIRES_ARM_NEON; 1411 GemmMicrokernelTester() 1412 .mr(1) 1413 .nr(8) 1414 .kr(2) 1415 .sr(4) 1416 .m(1) 1417 .n(8) 1418 .k(16) 1419 .cn_stride(11) 1420 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 1421 } 1422 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C2S4__NEON_MLAL,k_eq_16_strided_a)1423 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C2S4__NEON_MLAL, k_eq_16_strided_a) { 1424 TEST_REQUIRES_ARM_NEON; 1425 GemmMicrokernelTester() 1426 .mr(1) 1427 .nr(8) 1428 .kr(2) 1429 .sr(4) 1430 .m(1) 1431 .n(8) 1432 .k(16) 1433 .a_stride(19) 1434 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 1435 } 1436 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C2S4__NEON_MLAL,k_eq_16_subtile)1437 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C2S4__NEON_MLAL, k_eq_16_subtile) { 1438 TEST_REQUIRES_ARM_NEON; 1439 for (uint32_t n = 1; n <= 8; n++) { 1440 for (uint32_t m = 1; m <= 1; m++) { 1441 GemmMicrokernelTester() 1442 .mr(1) 1443 .nr(8) 1444 .kr(2) 1445 .sr(4) 1446 .m(m) 1447 .n(n) 1448 .k(16) 1449 .iterations(1) 1450 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 1451 } 1452 } 1453 } 1454 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C2S4__NEON_MLAL,k_eq_16_subtile_m)1455 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C2S4__NEON_MLAL, k_eq_16_subtile_m) { 1456 TEST_REQUIRES_ARM_NEON; 1457 for (uint32_t m = 1; m <= 1; m++) { 1458 GemmMicrokernelTester() 1459 .mr(1) 1460 .nr(8) 1461 .kr(2) 1462 .sr(4) 1463 .m(m) 1464 .n(8) 1465 .k(16) 1466 .iterations(1) 1467 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 1468 } 1469 } 1470 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C2S4__NEON_MLAL,k_eq_16_subtile_n)1471 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C2S4__NEON_MLAL, k_eq_16_subtile_n) { 1472 TEST_REQUIRES_ARM_NEON; 1473 for (uint32_t n = 1; n <= 8; n++) { 1474 GemmMicrokernelTester() 1475 .mr(1) 1476 .nr(8) 1477 .kr(2) 1478 .sr(4) 1479 .m(1) 1480 .n(n) 1481 .k(16) 1482 .iterations(1) 1483 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 1484 } 1485 } 1486 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C2S4__NEON_MLAL,k_lt_16)1487 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C2S4__NEON_MLAL, k_lt_16) { 1488 TEST_REQUIRES_ARM_NEON; 1489 for (size_t k = 1; k < 16; k++) { 1490 GemmMicrokernelTester() 1491 .mr(1) 1492 .nr(8) 1493 .kr(2) 1494 .sr(4) 1495 .m(1) 1496 .n(8) 1497 .k(k) 1498 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 1499 } 1500 } 1501 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C2S4__NEON_MLAL,k_lt_16_strided_a)1502 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C2S4__NEON_MLAL, k_lt_16_strided_a) { 1503 TEST_REQUIRES_ARM_NEON; 1504 for (size_t k = 1; k < 16; k++) { 1505 GemmMicrokernelTester() 1506 .mr(1) 1507 .nr(8) 1508 .kr(2) 1509 .sr(4) 1510 .m(1) 1511 .n(8) 1512 .k(k) 1513 .a_stride(19) 1514 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 1515 } 1516 } 1517 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C2S4__NEON_MLAL,k_lt_16_subtile)1518 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C2S4__NEON_MLAL, k_lt_16_subtile) { 1519 TEST_REQUIRES_ARM_NEON; 1520 for (size_t k = 1; k < 16; k++) { 1521 for (uint32_t n = 1; n <= 8; n++) { 1522 for (uint32_t m = 1; m <= 1; m++) { 1523 GemmMicrokernelTester() 1524 .mr(1) 1525 .nr(8) 1526 .kr(2) 1527 .sr(4) 1528 .m(m) 1529 .n(n) 1530 .k(k) 1531 .iterations(1) 1532 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 1533 } 1534 } 1535 } 1536 } 1537 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C2S4__NEON_MLAL,k_gt_16)1538 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C2S4__NEON_MLAL, k_gt_16) { 1539 TEST_REQUIRES_ARM_NEON; 1540 for (size_t k = 17; k < 32; k++) { 1541 GemmMicrokernelTester() 1542 .mr(1) 1543 .nr(8) 1544 .kr(2) 1545 .sr(4) 1546 .m(1) 1547 .n(8) 1548 .k(k) 1549 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 1550 } 1551 } 1552 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C2S4__NEON_MLAL,k_gt_16_strided_a)1553 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C2S4__NEON_MLAL, k_gt_16_strided_a) { 1554 TEST_REQUIRES_ARM_NEON; 1555 for (size_t k = 17; k < 32; k++) { 1556 GemmMicrokernelTester() 1557 .mr(1) 1558 .nr(8) 1559 .kr(2) 1560 .sr(4) 1561 .m(1) 1562 .n(8) 1563 .k(k) 1564 .a_stride(37) 1565 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 1566 } 1567 } 1568 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C2S4__NEON_MLAL,k_gt_16_subtile)1569 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C2S4__NEON_MLAL, k_gt_16_subtile) { 1570 TEST_REQUIRES_ARM_NEON; 1571 for (size_t k = 17; k < 32; k++) { 1572 for (uint32_t n = 1; n <= 8; n++) { 1573 for (uint32_t m = 1; m <= 1; m++) { 1574 GemmMicrokernelTester() 1575 .mr(1) 1576 .nr(8) 1577 .kr(2) 1578 .sr(4) 1579 .m(m) 1580 .n(n) 1581 .k(k) 1582 .iterations(1) 1583 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 1584 } 1585 } 1586 } 1587 } 1588 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C2S4__NEON_MLAL,k_div_16)1589 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C2S4__NEON_MLAL, k_div_16) { 1590 TEST_REQUIRES_ARM_NEON; 1591 for (size_t k = 32; k <= 160; k += 16) { 1592 GemmMicrokernelTester() 1593 .mr(1) 1594 .nr(8) 1595 .kr(2) 1596 .sr(4) 1597 .m(1) 1598 .n(8) 1599 .k(k) 1600 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 1601 } 1602 } 1603 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C2S4__NEON_MLAL,k_div_16_strided_a)1604 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C2S4__NEON_MLAL, k_div_16_strided_a) { 1605 TEST_REQUIRES_ARM_NEON; 1606 for (size_t k = 32; k <= 160; k += 16) { 1607 GemmMicrokernelTester() 1608 .mr(1) 1609 .nr(8) 1610 .kr(2) 1611 .sr(4) 1612 .m(1) 1613 .n(8) 1614 .k(k) 1615 .a_stride(163) 1616 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 1617 } 1618 } 1619 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C2S4__NEON_MLAL,k_div_16_subtile)1620 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C2S4__NEON_MLAL, k_div_16_subtile) { 1621 TEST_REQUIRES_ARM_NEON; 1622 for (size_t k = 32; k <= 160; k += 16) { 1623 for (uint32_t n = 1; n <= 8; n++) { 1624 for (uint32_t m = 1; m <= 1; m++) { 1625 GemmMicrokernelTester() 1626 .mr(1) 1627 .nr(8) 1628 .kr(2) 1629 .sr(4) 1630 .m(m) 1631 .n(n) 1632 .k(k) 1633 .iterations(1) 1634 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 1635 } 1636 } 1637 } 1638 } 1639 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C2S4__NEON_MLAL,n_gt_8)1640 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C2S4__NEON_MLAL, n_gt_8) { 1641 TEST_REQUIRES_ARM_NEON; 1642 for (uint32_t n = 9; n < 16; n++) { 1643 for (size_t k = 1; k <= 80; k += 17) { 1644 GemmMicrokernelTester() 1645 .mr(1) 1646 .nr(8) 1647 .kr(2) 1648 .sr(4) 1649 .m(1) 1650 .n(n) 1651 .k(k) 1652 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 1653 } 1654 } 1655 } 1656 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C2S4__NEON_MLAL,n_gt_8_strided_cn)1657 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C2S4__NEON_MLAL, n_gt_8_strided_cn) { 1658 TEST_REQUIRES_ARM_NEON; 1659 for (uint32_t n = 9; n < 16; n++) { 1660 for (size_t k = 1; k <= 80; k += 17) { 1661 GemmMicrokernelTester() 1662 .mr(1) 1663 .nr(8) 1664 .kr(2) 1665 .sr(4) 1666 .m(1) 1667 .n(n) 1668 .k(k) 1669 .cn_stride(11) 1670 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 1671 } 1672 } 1673 } 1674 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C2S4__NEON_MLAL,n_gt_8_strided_a)1675 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C2S4__NEON_MLAL, n_gt_8_strided_a) { 1676 TEST_REQUIRES_ARM_NEON; 1677 for (uint32_t n = 9; n < 16; n++) { 1678 for (size_t k = 1; k <= 80; k += 17) { 1679 GemmMicrokernelTester() 1680 .mr(1) 1681 .nr(8) 1682 .kr(2) 1683 .sr(4) 1684 .m(1) 1685 .n(n) 1686 .k(k) 1687 .a_stride(83) 1688 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 1689 } 1690 } 1691 } 1692 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C2S4__NEON_MLAL,n_gt_8_subtile)1693 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C2S4__NEON_MLAL, n_gt_8_subtile) { 1694 TEST_REQUIRES_ARM_NEON; 1695 for (uint32_t n = 9; n < 16; n++) { 1696 for (size_t k = 1; k <= 80; k += 17) { 1697 for (uint32_t m = 1; m <= 1; m++) { 1698 GemmMicrokernelTester() 1699 .mr(1) 1700 .nr(8) 1701 .kr(2) 1702 .sr(4) 1703 .m(m) 1704 .n(n) 1705 .k(k) 1706 .iterations(1) 1707 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 1708 } 1709 } 1710 } 1711 } 1712 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C2S4__NEON_MLAL,n_div_8)1713 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C2S4__NEON_MLAL, n_div_8) { 1714 TEST_REQUIRES_ARM_NEON; 1715 for (uint32_t n = 16; n <= 24; n += 8) { 1716 for (size_t k = 1; k <= 80; k += 17) { 1717 GemmMicrokernelTester() 1718 .mr(1) 1719 .nr(8) 1720 .kr(2) 1721 .sr(4) 1722 .m(1) 1723 .n(n) 1724 .k(k) 1725 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 1726 } 1727 } 1728 } 1729 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C2S4__NEON_MLAL,n_div_8_strided_cn)1730 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C2S4__NEON_MLAL, n_div_8_strided_cn) { 1731 TEST_REQUIRES_ARM_NEON; 1732 for (uint32_t n = 16; n <= 24; n += 8) { 1733 for (size_t k = 1; k <= 80; k += 17) { 1734 GemmMicrokernelTester() 1735 .mr(1) 1736 .nr(8) 1737 .kr(2) 1738 .sr(4) 1739 .m(1) 1740 .n(n) 1741 .k(k) 1742 .cn_stride(11) 1743 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 1744 } 1745 } 1746 } 1747 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C2S4__NEON_MLAL,n_div_8_strided_a)1748 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C2S4__NEON_MLAL, n_div_8_strided_a) { 1749 TEST_REQUIRES_ARM_NEON; 1750 for (uint32_t n = 16; n <= 24; n += 8) { 1751 for (size_t k = 1; k <= 80; k += 17) { 1752 GemmMicrokernelTester() 1753 .mr(1) 1754 .nr(8) 1755 .kr(2) 1756 .sr(4) 1757 .m(1) 1758 .n(n) 1759 .k(k) 1760 .a_stride(83) 1761 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 1762 } 1763 } 1764 } 1765 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C2S4__NEON_MLAL,n_div_8_subtile)1766 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C2S4__NEON_MLAL, n_div_8_subtile) { 1767 TEST_REQUIRES_ARM_NEON; 1768 for (uint32_t n = 16; n <= 24; n += 8) { 1769 for (size_t k = 1; k <= 80; k += 17) { 1770 for (uint32_t m = 1; m <= 1; m++) { 1771 GemmMicrokernelTester() 1772 .mr(1) 1773 .nr(8) 1774 .kr(2) 1775 .sr(4) 1776 .m(m) 1777 .n(n) 1778 .k(k) 1779 .iterations(1) 1780 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 1781 } 1782 } 1783 } 1784 } 1785 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C2S4__NEON_MLAL,strided_cm_subtile)1786 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C2S4__NEON_MLAL, strided_cm_subtile) { 1787 TEST_REQUIRES_ARM_NEON; 1788 for (size_t k = 1; k <= 80; k += 17) { 1789 for (uint32_t n = 1; n <= 8; n++) { 1790 for (uint32_t m = 1; m <= 1; m++) { 1791 GemmMicrokernelTester() 1792 .mr(1) 1793 .nr(8) 1794 .kr(2) 1795 .sr(4) 1796 .m(m) 1797 .n(n) 1798 .k(k) 1799 .cm_stride(11) 1800 .iterations(1) 1801 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 1802 } 1803 } 1804 } 1805 } 1806 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C2S4__NEON_MLAL,qmin)1807 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C2S4__NEON_MLAL, qmin) { 1808 TEST_REQUIRES_ARM_NEON; 1809 GemmMicrokernelTester() 1810 .mr(1) 1811 .nr(8) 1812 .kr(2) 1813 .sr(4) 1814 .m(1) 1815 .n(8) 1816 .k(16) 1817 .qmin(128) 1818 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 1819 } 1820 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C2S4__NEON_MLAL,qmax)1821 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C2S4__NEON_MLAL, qmax) { 1822 TEST_REQUIRES_ARM_NEON; 1823 GemmMicrokernelTester() 1824 .mr(1) 1825 .nr(8) 1826 .kr(2) 1827 .sr(4) 1828 .m(1) 1829 .n(8) 1830 .k(16) 1831 .qmax(128) 1832 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 1833 } 1834 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C2S4__NEON_MLAL,strided_cm)1835 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C2S4__NEON_MLAL, strided_cm) { 1836 TEST_REQUIRES_ARM_NEON; 1837 GemmMicrokernelTester() 1838 .mr(1) 1839 .nr(8) 1840 .kr(2) 1841 .sr(4) 1842 .m(1) 1843 .n(8) 1844 .k(16) 1845 .cm_stride(11) 1846 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 1847 } 1848 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 1849 1850 1851 #if XNN_ARCH_ARM || XNN_ARCH_ARM64 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD2R,k_eq_16)1852 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD2R, k_eq_16) { 1853 TEST_REQUIRES_ARM_NEON; 1854 GemmMicrokernelTester() 1855 .mr(1) 1856 .nr(8) 1857 .kr(4) 1858 .sr(1) 1859 .m(1) 1860 .n(8) 1861 .k(16) 1862 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 1863 } 1864 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD2R,strided_cn)1865 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD2R, strided_cn) { 1866 TEST_REQUIRES_ARM_NEON; 1867 GemmMicrokernelTester() 1868 .mr(1) 1869 .nr(8) 1870 .kr(4) 1871 .sr(1) 1872 .m(1) 1873 .n(8) 1874 .k(16) 1875 .cn_stride(11) 1876 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 1877 } 1878 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD2R,k_eq_16_strided_a)1879 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD2R, k_eq_16_strided_a) { 1880 TEST_REQUIRES_ARM_NEON; 1881 GemmMicrokernelTester() 1882 .mr(1) 1883 .nr(8) 1884 .kr(4) 1885 .sr(1) 1886 .m(1) 1887 .n(8) 1888 .k(16) 1889 .a_stride(19) 1890 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 1891 } 1892 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD2R,k_eq_16_subtile)1893 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD2R, k_eq_16_subtile) { 1894 TEST_REQUIRES_ARM_NEON; 1895 for (uint32_t n = 1; n <= 8; n++) { 1896 for (uint32_t m = 1; m <= 1; m++) { 1897 GemmMicrokernelTester() 1898 .mr(1) 1899 .nr(8) 1900 .kr(4) 1901 .sr(1) 1902 .m(m) 1903 .n(n) 1904 .k(16) 1905 .iterations(1) 1906 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 1907 } 1908 } 1909 } 1910 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD2R,k_eq_16_subtile_m)1911 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD2R, k_eq_16_subtile_m) { 1912 TEST_REQUIRES_ARM_NEON; 1913 for (uint32_t m = 1; m <= 1; m++) { 1914 GemmMicrokernelTester() 1915 .mr(1) 1916 .nr(8) 1917 .kr(4) 1918 .sr(1) 1919 .m(m) 1920 .n(8) 1921 .k(16) 1922 .iterations(1) 1923 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 1924 } 1925 } 1926 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD2R,k_eq_16_subtile_n)1927 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD2R, k_eq_16_subtile_n) { 1928 TEST_REQUIRES_ARM_NEON; 1929 for (uint32_t n = 1; n <= 8; n++) { 1930 GemmMicrokernelTester() 1931 .mr(1) 1932 .nr(8) 1933 .kr(4) 1934 .sr(1) 1935 .m(1) 1936 .n(n) 1937 .k(16) 1938 .iterations(1) 1939 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 1940 } 1941 } 1942 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD2R,k_lt_16)1943 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD2R, k_lt_16) { 1944 TEST_REQUIRES_ARM_NEON; 1945 for (size_t k = 1; k < 16; k++) { 1946 GemmMicrokernelTester() 1947 .mr(1) 1948 .nr(8) 1949 .kr(4) 1950 .sr(1) 1951 .m(1) 1952 .n(8) 1953 .k(k) 1954 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 1955 } 1956 } 1957 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD2R,k_lt_16_strided_a)1958 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD2R, k_lt_16_strided_a) { 1959 TEST_REQUIRES_ARM_NEON; 1960 for (size_t k = 1; k < 16; k++) { 1961 GemmMicrokernelTester() 1962 .mr(1) 1963 .nr(8) 1964 .kr(4) 1965 .sr(1) 1966 .m(1) 1967 .n(8) 1968 .k(k) 1969 .a_stride(19) 1970 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 1971 } 1972 } 1973 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD2R,k_lt_16_subtile)1974 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD2R, k_lt_16_subtile) { 1975 TEST_REQUIRES_ARM_NEON; 1976 for (size_t k = 1; k < 16; k++) { 1977 for (uint32_t n = 1; n <= 8; n++) { 1978 for (uint32_t m = 1; m <= 1; m++) { 1979 GemmMicrokernelTester() 1980 .mr(1) 1981 .nr(8) 1982 .kr(4) 1983 .sr(1) 1984 .m(m) 1985 .n(n) 1986 .k(k) 1987 .iterations(1) 1988 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 1989 } 1990 } 1991 } 1992 } 1993 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD2R,k_gt_16)1994 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD2R, k_gt_16) { 1995 TEST_REQUIRES_ARM_NEON; 1996 for (size_t k = 17; k < 32; k++) { 1997 GemmMicrokernelTester() 1998 .mr(1) 1999 .nr(8) 2000 .kr(4) 2001 .sr(1) 2002 .m(1) 2003 .n(8) 2004 .k(k) 2005 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 2006 } 2007 } 2008 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD2R,k_gt_16_strided_a)2009 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD2R, k_gt_16_strided_a) { 2010 TEST_REQUIRES_ARM_NEON; 2011 for (size_t k = 17; k < 32; k++) { 2012 GemmMicrokernelTester() 2013 .mr(1) 2014 .nr(8) 2015 .kr(4) 2016 .sr(1) 2017 .m(1) 2018 .n(8) 2019 .k(k) 2020 .a_stride(37) 2021 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 2022 } 2023 } 2024 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD2R,k_gt_16_subtile)2025 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD2R, k_gt_16_subtile) { 2026 TEST_REQUIRES_ARM_NEON; 2027 for (size_t k = 17; k < 32; k++) { 2028 for (uint32_t n = 1; n <= 8; n++) { 2029 for (uint32_t m = 1; m <= 1; m++) { 2030 GemmMicrokernelTester() 2031 .mr(1) 2032 .nr(8) 2033 .kr(4) 2034 .sr(1) 2035 .m(m) 2036 .n(n) 2037 .k(k) 2038 .iterations(1) 2039 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 2040 } 2041 } 2042 } 2043 } 2044 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD2R,k_div_16)2045 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD2R, k_div_16) { 2046 TEST_REQUIRES_ARM_NEON; 2047 for (size_t k = 32; k <= 160; k += 16) { 2048 GemmMicrokernelTester() 2049 .mr(1) 2050 .nr(8) 2051 .kr(4) 2052 .sr(1) 2053 .m(1) 2054 .n(8) 2055 .k(k) 2056 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 2057 } 2058 } 2059 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD2R,k_div_16_strided_a)2060 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD2R, k_div_16_strided_a) { 2061 TEST_REQUIRES_ARM_NEON; 2062 for (size_t k = 32; k <= 160; k += 16) { 2063 GemmMicrokernelTester() 2064 .mr(1) 2065 .nr(8) 2066 .kr(4) 2067 .sr(1) 2068 .m(1) 2069 .n(8) 2070 .k(k) 2071 .a_stride(163) 2072 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 2073 } 2074 } 2075 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD2R,k_div_16_subtile)2076 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD2R, k_div_16_subtile) { 2077 TEST_REQUIRES_ARM_NEON; 2078 for (size_t k = 32; k <= 160; k += 16) { 2079 for (uint32_t n = 1; n <= 8; n++) { 2080 for (uint32_t m = 1; m <= 1; m++) { 2081 GemmMicrokernelTester() 2082 .mr(1) 2083 .nr(8) 2084 .kr(4) 2085 .sr(1) 2086 .m(m) 2087 .n(n) 2088 .k(k) 2089 .iterations(1) 2090 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 2091 } 2092 } 2093 } 2094 } 2095 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD2R,n_gt_8)2096 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD2R, n_gt_8) { 2097 TEST_REQUIRES_ARM_NEON; 2098 for (uint32_t n = 9; n < 16; n++) { 2099 for (size_t k = 1; k <= 80; k += 17) { 2100 GemmMicrokernelTester() 2101 .mr(1) 2102 .nr(8) 2103 .kr(4) 2104 .sr(1) 2105 .m(1) 2106 .n(n) 2107 .k(k) 2108 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 2109 } 2110 } 2111 } 2112 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD2R,n_gt_8_strided_cn)2113 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD2R, n_gt_8_strided_cn) { 2114 TEST_REQUIRES_ARM_NEON; 2115 for (uint32_t n = 9; n < 16; n++) { 2116 for (size_t k = 1; k <= 80; k += 17) { 2117 GemmMicrokernelTester() 2118 .mr(1) 2119 .nr(8) 2120 .kr(4) 2121 .sr(1) 2122 .m(1) 2123 .n(n) 2124 .k(k) 2125 .cn_stride(11) 2126 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 2127 } 2128 } 2129 } 2130 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD2R,n_gt_8_strided_a)2131 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD2R, n_gt_8_strided_a) { 2132 TEST_REQUIRES_ARM_NEON; 2133 for (uint32_t n = 9; n < 16; n++) { 2134 for (size_t k = 1; k <= 80; k += 17) { 2135 GemmMicrokernelTester() 2136 .mr(1) 2137 .nr(8) 2138 .kr(4) 2139 .sr(1) 2140 .m(1) 2141 .n(n) 2142 .k(k) 2143 .a_stride(83) 2144 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 2145 } 2146 } 2147 } 2148 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD2R,n_gt_8_subtile)2149 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD2R, n_gt_8_subtile) { 2150 TEST_REQUIRES_ARM_NEON; 2151 for (uint32_t n = 9; n < 16; n++) { 2152 for (size_t k = 1; k <= 80; k += 17) { 2153 for (uint32_t m = 1; m <= 1; m++) { 2154 GemmMicrokernelTester() 2155 .mr(1) 2156 .nr(8) 2157 .kr(4) 2158 .sr(1) 2159 .m(m) 2160 .n(n) 2161 .k(k) 2162 .iterations(1) 2163 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 2164 } 2165 } 2166 } 2167 } 2168 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD2R,n_div_8)2169 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD2R, n_div_8) { 2170 TEST_REQUIRES_ARM_NEON; 2171 for (uint32_t n = 16; n <= 24; n += 8) { 2172 for (size_t k = 1; k <= 80; k += 17) { 2173 GemmMicrokernelTester() 2174 .mr(1) 2175 .nr(8) 2176 .kr(4) 2177 .sr(1) 2178 .m(1) 2179 .n(n) 2180 .k(k) 2181 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 2182 } 2183 } 2184 } 2185 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD2R,n_div_8_strided_cn)2186 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD2R, n_div_8_strided_cn) { 2187 TEST_REQUIRES_ARM_NEON; 2188 for (uint32_t n = 16; n <= 24; n += 8) { 2189 for (size_t k = 1; k <= 80; k += 17) { 2190 GemmMicrokernelTester() 2191 .mr(1) 2192 .nr(8) 2193 .kr(4) 2194 .sr(1) 2195 .m(1) 2196 .n(n) 2197 .k(k) 2198 .cn_stride(11) 2199 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 2200 } 2201 } 2202 } 2203 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD2R,n_div_8_strided_a)2204 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD2R, n_div_8_strided_a) { 2205 TEST_REQUIRES_ARM_NEON; 2206 for (uint32_t n = 16; n <= 24; n += 8) { 2207 for (size_t k = 1; k <= 80; k += 17) { 2208 GemmMicrokernelTester() 2209 .mr(1) 2210 .nr(8) 2211 .kr(4) 2212 .sr(1) 2213 .m(1) 2214 .n(n) 2215 .k(k) 2216 .a_stride(83) 2217 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 2218 } 2219 } 2220 } 2221 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD2R,n_div_8_subtile)2222 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD2R, n_div_8_subtile) { 2223 TEST_REQUIRES_ARM_NEON; 2224 for (uint32_t n = 16; n <= 24; n += 8) { 2225 for (size_t k = 1; k <= 80; k += 17) { 2226 for (uint32_t m = 1; m <= 1; m++) { 2227 GemmMicrokernelTester() 2228 .mr(1) 2229 .nr(8) 2230 .kr(4) 2231 .sr(1) 2232 .m(m) 2233 .n(n) 2234 .k(k) 2235 .iterations(1) 2236 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 2237 } 2238 } 2239 } 2240 } 2241 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD2R,strided_cm_subtile)2242 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD2R, strided_cm_subtile) { 2243 TEST_REQUIRES_ARM_NEON; 2244 for (size_t k = 1; k <= 80; k += 17) { 2245 for (uint32_t n = 1; n <= 8; n++) { 2246 for (uint32_t m = 1; m <= 1; m++) { 2247 GemmMicrokernelTester() 2248 .mr(1) 2249 .nr(8) 2250 .kr(4) 2251 .sr(1) 2252 .m(m) 2253 .n(n) 2254 .k(k) 2255 .cm_stride(11) 2256 .iterations(1) 2257 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 2258 } 2259 } 2260 } 2261 } 2262 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD2R,qmin)2263 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD2R, qmin) { 2264 TEST_REQUIRES_ARM_NEON; 2265 GemmMicrokernelTester() 2266 .mr(1) 2267 .nr(8) 2268 .kr(4) 2269 .sr(1) 2270 .m(1) 2271 .n(8) 2272 .k(16) 2273 .qmin(128) 2274 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 2275 } 2276 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD2R,qmax)2277 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD2R, qmax) { 2278 TEST_REQUIRES_ARM_NEON; 2279 GemmMicrokernelTester() 2280 .mr(1) 2281 .nr(8) 2282 .kr(4) 2283 .sr(1) 2284 .m(1) 2285 .n(8) 2286 .k(16) 2287 .qmax(128) 2288 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 2289 } 2290 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD2R,strided_cm)2291 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD2R, strided_cm) { 2292 TEST_REQUIRES_ARM_NEON; 2293 GemmMicrokernelTester() 2294 .mr(1) 2295 .nr(8) 2296 .kr(4) 2297 .sr(1) 2298 .m(1) 2299 .n(8) 2300 .k(16) 2301 .cm_stride(11) 2302 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 2303 } 2304 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 2305 2306 2307 #if XNN_ARCH_ARM || XNN_ARCH_ARM64 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD2R,k_eq_8)2308 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD2R, k_eq_8) { 2309 TEST_REQUIRES_ARM_NEON; 2310 GemmMicrokernelTester() 2311 .mr(1) 2312 .nr(8) 2313 .kr(4) 2314 .sr(1) 2315 .m(1) 2316 .n(8) 2317 .k(8) 2318 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 2319 } 2320 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD2R,strided_cn)2321 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD2R, strided_cn) { 2322 TEST_REQUIRES_ARM_NEON; 2323 GemmMicrokernelTester() 2324 .mr(1) 2325 .nr(8) 2326 .kr(4) 2327 .sr(1) 2328 .m(1) 2329 .n(8) 2330 .k(8) 2331 .cn_stride(11) 2332 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 2333 } 2334 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD2R,k_eq_8_strided_a)2335 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD2R, k_eq_8_strided_a) { 2336 TEST_REQUIRES_ARM_NEON; 2337 GemmMicrokernelTester() 2338 .mr(1) 2339 .nr(8) 2340 .kr(4) 2341 .sr(1) 2342 .m(1) 2343 .n(8) 2344 .k(8) 2345 .a_stride(11) 2346 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 2347 } 2348 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD2R,k_eq_8_subtile)2349 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD2R, k_eq_8_subtile) { 2350 TEST_REQUIRES_ARM_NEON; 2351 for (uint32_t n = 1; n <= 8; n++) { 2352 for (uint32_t m = 1; m <= 1; m++) { 2353 GemmMicrokernelTester() 2354 .mr(1) 2355 .nr(8) 2356 .kr(4) 2357 .sr(1) 2358 .m(m) 2359 .n(n) 2360 .k(8) 2361 .iterations(1) 2362 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 2363 } 2364 } 2365 } 2366 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD2R,k_eq_8_subtile_m)2367 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD2R, k_eq_8_subtile_m) { 2368 TEST_REQUIRES_ARM_NEON; 2369 for (uint32_t m = 1; m <= 1; m++) { 2370 GemmMicrokernelTester() 2371 .mr(1) 2372 .nr(8) 2373 .kr(4) 2374 .sr(1) 2375 .m(m) 2376 .n(8) 2377 .k(8) 2378 .iterations(1) 2379 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 2380 } 2381 } 2382 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD2R,k_eq_8_subtile_n)2383 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD2R, k_eq_8_subtile_n) { 2384 TEST_REQUIRES_ARM_NEON; 2385 for (uint32_t n = 1; n <= 8; n++) { 2386 GemmMicrokernelTester() 2387 .mr(1) 2388 .nr(8) 2389 .kr(4) 2390 .sr(1) 2391 .m(1) 2392 .n(n) 2393 .k(8) 2394 .iterations(1) 2395 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 2396 } 2397 } 2398 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD2R,k_lt_8)2399 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD2R, k_lt_8) { 2400 TEST_REQUIRES_ARM_NEON; 2401 for (size_t k = 1; k < 8; k++) { 2402 GemmMicrokernelTester() 2403 .mr(1) 2404 .nr(8) 2405 .kr(4) 2406 .sr(1) 2407 .m(1) 2408 .n(8) 2409 .k(k) 2410 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 2411 } 2412 } 2413 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD2R,k_lt_8_strided_a)2414 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD2R, k_lt_8_strided_a) { 2415 TEST_REQUIRES_ARM_NEON; 2416 for (size_t k = 1; k < 8; k++) { 2417 GemmMicrokernelTester() 2418 .mr(1) 2419 .nr(8) 2420 .kr(4) 2421 .sr(1) 2422 .m(1) 2423 .n(8) 2424 .k(k) 2425 .a_stride(11) 2426 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 2427 } 2428 } 2429 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD2R,k_lt_8_subtile)2430 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD2R, k_lt_8_subtile) { 2431 TEST_REQUIRES_ARM_NEON; 2432 for (size_t k = 1; k < 8; k++) { 2433 for (uint32_t n = 1; n <= 8; n++) { 2434 for (uint32_t m = 1; m <= 1; m++) { 2435 GemmMicrokernelTester() 2436 .mr(1) 2437 .nr(8) 2438 .kr(4) 2439 .sr(1) 2440 .m(m) 2441 .n(n) 2442 .k(k) 2443 .iterations(1) 2444 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 2445 } 2446 } 2447 } 2448 } 2449 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD2R,k_gt_8)2450 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD2R, k_gt_8) { 2451 TEST_REQUIRES_ARM_NEON; 2452 for (size_t k = 9; k < 16; k++) { 2453 GemmMicrokernelTester() 2454 .mr(1) 2455 .nr(8) 2456 .kr(4) 2457 .sr(1) 2458 .m(1) 2459 .n(8) 2460 .k(k) 2461 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 2462 } 2463 } 2464 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD2R,k_gt_8_strided_a)2465 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD2R, k_gt_8_strided_a) { 2466 TEST_REQUIRES_ARM_NEON; 2467 for (size_t k = 9; k < 16; k++) { 2468 GemmMicrokernelTester() 2469 .mr(1) 2470 .nr(8) 2471 .kr(4) 2472 .sr(1) 2473 .m(1) 2474 .n(8) 2475 .k(k) 2476 .a_stride(19) 2477 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 2478 } 2479 } 2480 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD2R,k_gt_8_subtile)2481 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD2R, k_gt_8_subtile) { 2482 TEST_REQUIRES_ARM_NEON; 2483 for (size_t k = 9; k < 16; k++) { 2484 for (uint32_t n = 1; n <= 8; n++) { 2485 for (uint32_t m = 1; m <= 1; m++) { 2486 GemmMicrokernelTester() 2487 .mr(1) 2488 .nr(8) 2489 .kr(4) 2490 .sr(1) 2491 .m(m) 2492 .n(n) 2493 .k(k) 2494 .iterations(1) 2495 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 2496 } 2497 } 2498 } 2499 } 2500 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD2R,k_div_8)2501 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD2R, k_div_8) { 2502 TEST_REQUIRES_ARM_NEON; 2503 for (size_t k = 16; k <= 80; k += 8) { 2504 GemmMicrokernelTester() 2505 .mr(1) 2506 .nr(8) 2507 .kr(4) 2508 .sr(1) 2509 .m(1) 2510 .n(8) 2511 .k(k) 2512 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 2513 } 2514 } 2515 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD2R,k_div_8_strided_a)2516 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD2R, k_div_8_strided_a) { 2517 TEST_REQUIRES_ARM_NEON; 2518 for (size_t k = 16; k <= 80; k += 8) { 2519 GemmMicrokernelTester() 2520 .mr(1) 2521 .nr(8) 2522 .kr(4) 2523 .sr(1) 2524 .m(1) 2525 .n(8) 2526 .k(k) 2527 .a_stride(83) 2528 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 2529 } 2530 } 2531 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD2R,k_div_8_subtile)2532 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD2R, k_div_8_subtile) { 2533 TEST_REQUIRES_ARM_NEON; 2534 for (size_t k = 16; k <= 80; k += 8) { 2535 for (uint32_t n = 1; n <= 8; n++) { 2536 for (uint32_t m = 1; m <= 1; m++) { 2537 GemmMicrokernelTester() 2538 .mr(1) 2539 .nr(8) 2540 .kr(4) 2541 .sr(1) 2542 .m(m) 2543 .n(n) 2544 .k(k) 2545 .iterations(1) 2546 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 2547 } 2548 } 2549 } 2550 } 2551 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD2R,n_gt_8)2552 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD2R, n_gt_8) { 2553 TEST_REQUIRES_ARM_NEON; 2554 for (uint32_t n = 9; n < 16; n++) { 2555 for (size_t k = 1; k <= 40; k += 9) { 2556 GemmMicrokernelTester() 2557 .mr(1) 2558 .nr(8) 2559 .kr(4) 2560 .sr(1) 2561 .m(1) 2562 .n(n) 2563 .k(k) 2564 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 2565 } 2566 } 2567 } 2568 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD2R,n_gt_8_strided_cn)2569 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD2R, n_gt_8_strided_cn) { 2570 TEST_REQUIRES_ARM_NEON; 2571 for (uint32_t n = 9; n < 16; n++) { 2572 for (size_t k = 1; k <= 40; k += 9) { 2573 GemmMicrokernelTester() 2574 .mr(1) 2575 .nr(8) 2576 .kr(4) 2577 .sr(1) 2578 .m(1) 2579 .n(n) 2580 .k(k) 2581 .cn_stride(11) 2582 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 2583 } 2584 } 2585 } 2586 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD2R,n_gt_8_strided_a)2587 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD2R, n_gt_8_strided_a) { 2588 TEST_REQUIRES_ARM_NEON; 2589 for (uint32_t n = 9; n < 16; n++) { 2590 for (size_t k = 1; k <= 40; k += 9) { 2591 GemmMicrokernelTester() 2592 .mr(1) 2593 .nr(8) 2594 .kr(4) 2595 .sr(1) 2596 .m(1) 2597 .n(n) 2598 .k(k) 2599 .a_stride(43) 2600 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 2601 } 2602 } 2603 } 2604 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD2R,n_gt_8_subtile)2605 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD2R, n_gt_8_subtile) { 2606 TEST_REQUIRES_ARM_NEON; 2607 for (uint32_t n = 9; n < 16; n++) { 2608 for (size_t k = 1; k <= 40; k += 9) { 2609 for (uint32_t m = 1; m <= 1; m++) { 2610 GemmMicrokernelTester() 2611 .mr(1) 2612 .nr(8) 2613 .kr(4) 2614 .sr(1) 2615 .m(m) 2616 .n(n) 2617 .k(k) 2618 .iterations(1) 2619 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 2620 } 2621 } 2622 } 2623 } 2624 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD2R,n_div_8)2625 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD2R, n_div_8) { 2626 TEST_REQUIRES_ARM_NEON; 2627 for (uint32_t n = 16; n <= 24; n += 8) { 2628 for (size_t k = 1; k <= 40; k += 9) { 2629 GemmMicrokernelTester() 2630 .mr(1) 2631 .nr(8) 2632 .kr(4) 2633 .sr(1) 2634 .m(1) 2635 .n(n) 2636 .k(k) 2637 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 2638 } 2639 } 2640 } 2641 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD2R,n_div_8_strided_cn)2642 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD2R, n_div_8_strided_cn) { 2643 TEST_REQUIRES_ARM_NEON; 2644 for (uint32_t n = 16; n <= 24; n += 8) { 2645 for (size_t k = 1; k <= 40; k += 9) { 2646 GemmMicrokernelTester() 2647 .mr(1) 2648 .nr(8) 2649 .kr(4) 2650 .sr(1) 2651 .m(1) 2652 .n(n) 2653 .k(k) 2654 .cn_stride(11) 2655 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 2656 } 2657 } 2658 } 2659 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD2R,n_div_8_strided_a)2660 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD2R, n_div_8_strided_a) { 2661 TEST_REQUIRES_ARM_NEON; 2662 for (uint32_t n = 16; n <= 24; n += 8) { 2663 for (size_t k = 1; k <= 40; k += 9) { 2664 GemmMicrokernelTester() 2665 .mr(1) 2666 .nr(8) 2667 .kr(4) 2668 .sr(1) 2669 .m(1) 2670 .n(n) 2671 .k(k) 2672 .a_stride(43) 2673 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 2674 } 2675 } 2676 } 2677 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD2R,n_div_8_subtile)2678 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD2R, n_div_8_subtile) { 2679 TEST_REQUIRES_ARM_NEON; 2680 for (uint32_t n = 16; n <= 24; n += 8) { 2681 for (size_t k = 1; k <= 40; k += 9) { 2682 for (uint32_t m = 1; m <= 1; m++) { 2683 GemmMicrokernelTester() 2684 .mr(1) 2685 .nr(8) 2686 .kr(4) 2687 .sr(1) 2688 .m(m) 2689 .n(n) 2690 .k(k) 2691 .iterations(1) 2692 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 2693 } 2694 } 2695 } 2696 } 2697 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD2R,strided_cm_subtile)2698 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD2R, strided_cm_subtile) { 2699 TEST_REQUIRES_ARM_NEON; 2700 for (size_t k = 1; k <= 40; k += 9) { 2701 for (uint32_t n = 1; n <= 8; n++) { 2702 for (uint32_t m = 1; m <= 1; m++) { 2703 GemmMicrokernelTester() 2704 .mr(1) 2705 .nr(8) 2706 .kr(4) 2707 .sr(1) 2708 .m(m) 2709 .n(n) 2710 .k(k) 2711 .cm_stride(11) 2712 .iterations(1) 2713 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 2714 } 2715 } 2716 } 2717 } 2718 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD2R,qmin)2719 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD2R, qmin) { 2720 TEST_REQUIRES_ARM_NEON; 2721 GemmMicrokernelTester() 2722 .mr(1) 2723 .nr(8) 2724 .kr(4) 2725 .sr(1) 2726 .m(1) 2727 .n(8) 2728 .k(8) 2729 .qmin(128) 2730 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 2731 } 2732 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD2R,qmax)2733 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD2R, qmax) { 2734 TEST_REQUIRES_ARM_NEON; 2735 GemmMicrokernelTester() 2736 .mr(1) 2737 .nr(8) 2738 .kr(4) 2739 .sr(1) 2740 .m(1) 2741 .n(8) 2742 .k(8) 2743 .qmax(128) 2744 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 2745 } 2746 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD2R,strided_cm)2747 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD2R, strided_cm) { 2748 TEST_REQUIRES_ARM_NEON; 2749 GemmMicrokernelTester() 2750 .mr(1) 2751 .nr(8) 2752 .kr(4) 2753 .sr(1) 2754 .m(1) 2755 .n(8) 2756 .k(8) 2757 .cm_stride(11) 2758 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 2759 } 2760 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 2761 2762 2763 #if XNN_ARCH_ARM || XNN_ARCH_ARM64 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD2R,k_eq_16)2764 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD2R, k_eq_16) { 2765 TEST_REQUIRES_ARM_NEON; 2766 GemmMicrokernelTester() 2767 .mr(1) 2768 .nr(16) 2769 .kr(2) 2770 .sr(1) 2771 .m(1) 2772 .n(16) 2773 .k(16) 2774 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 2775 } 2776 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD2R,strided_cn)2777 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD2R, strided_cn) { 2778 TEST_REQUIRES_ARM_NEON; 2779 GemmMicrokernelTester() 2780 .mr(1) 2781 .nr(16) 2782 .kr(2) 2783 .sr(1) 2784 .m(1) 2785 .n(16) 2786 .k(16) 2787 .cn_stride(19) 2788 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 2789 } 2790 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD2R,k_eq_16_strided_a)2791 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD2R, k_eq_16_strided_a) { 2792 TEST_REQUIRES_ARM_NEON; 2793 GemmMicrokernelTester() 2794 .mr(1) 2795 .nr(16) 2796 .kr(2) 2797 .sr(1) 2798 .m(1) 2799 .n(16) 2800 .k(16) 2801 .a_stride(19) 2802 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 2803 } 2804 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD2R,k_eq_16_subtile)2805 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD2R, k_eq_16_subtile) { 2806 TEST_REQUIRES_ARM_NEON; 2807 for (uint32_t n = 1; n <= 16; n++) { 2808 for (uint32_t m = 1; m <= 1; m++) { 2809 GemmMicrokernelTester() 2810 .mr(1) 2811 .nr(16) 2812 .kr(2) 2813 .sr(1) 2814 .m(m) 2815 .n(n) 2816 .k(16) 2817 .iterations(1) 2818 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 2819 } 2820 } 2821 } 2822 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD2R,k_eq_16_subtile_m)2823 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD2R, k_eq_16_subtile_m) { 2824 TEST_REQUIRES_ARM_NEON; 2825 for (uint32_t m = 1; m <= 1; m++) { 2826 GemmMicrokernelTester() 2827 .mr(1) 2828 .nr(16) 2829 .kr(2) 2830 .sr(1) 2831 .m(m) 2832 .n(16) 2833 .k(16) 2834 .iterations(1) 2835 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 2836 } 2837 } 2838 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD2R,k_eq_16_subtile_n)2839 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD2R, k_eq_16_subtile_n) { 2840 TEST_REQUIRES_ARM_NEON; 2841 for (uint32_t n = 1; n <= 16; n++) { 2842 GemmMicrokernelTester() 2843 .mr(1) 2844 .nr(16) 2845 .kr(2) 2846 .sr(1) 2847 .m(1) 2848 .n(n) 2849 .k(16) 2850 .iterations(1) 2851 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 2852 } 2853 } 2854 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD2R,k_lt_16)2855 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD2R, k_lt_16) { 2856 TEST_REQUIRES_ARM_NEON; 2857 for (size_t k = 1; k < 16; k++) { 2858 GemmMicrokernelTester() 2859 .mr(1) 2860 .nr(16) 2861 .kr(2) 2862 .sr(1) 2863 .m(1) 2864 .n(16) 2865 .k(k) 2866 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 2867 } 2868 } 2869 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD2R,k_lt_16_strided_a)2870 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD2R, k_lt_16_strided_a) { 2871 TEST_REQUIRES_ARM_NEON; 2872 for (size_t k = 1; k < 16; k++) { 2873 GemmMicrokernelTester() 2874 .mr(1) 2875 .nr(16) 2876 .kr(2) 2877 .sr(1) 2878 .m(1) 2879 .n(16) 2880 .k(k) 2881 .a_stride(19) 2882 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 2883 } 2884 } 2885 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD2R,k_lt_16_subtile)2886 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD2R, k_lt_16_subtile) { 2887 TEST_REQUIRES_ARM_NEON; 2888 for (size_t k = 1; k < 16; k++) { 2889 for (uint32_t n = 1; n <= 16; n++) { 2890 for (uint32_t m = 1; m <= 1; m++) { 2891 GemmMicrokernelTester() 2892 .mr(1) 2893 .nr(16) 2894 .kr(2) 2895 .sr(1) 2896 .m(m) 2897 .n(n) 2898 .k(k) 2899 .iterations(1) 2900 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 2901 } 2902 } 2903 } 2904 } 2905 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD2R,k_gt_16)2906 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD2R, k_gt_16) { 2907 TEST_REQUIRES_ARM_NEON; 2908 for (size_t k = 17; k < 32; k++) { 2909 GemmMicrokernelTester() 2910 .mr(1) 2911 .nr(16) 2912 .kr(2) 2913 .sr(1) 2914 .m(1) 2915 .n(16) 2916 .k(k) 2917 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 2918 } 2919 } 2920 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD2R,k_gt_16_strided_a)2921 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD2R, k_gt_16_strided_a) { 2922 TEST_REQUIRES_ARM_NEON; 2923 for (size_t k = 17; k < 32; k++) { 2924 GemmMicrokernelTester() 2925 .mr(1) 2926 .nr(16) 2927 .kr(2) 2928 .sr(1) 2929 .m(1) 2930 .n(16) 2931 .k(k) 2932 .a_stride(37) 2933 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 2934 } 2935 } 2936 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD2R,k_gt_16_subtile)2937 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD2R, k_gt_16_subtile) { 2938 TEST_REQUIRES_ARM_NEON; 2939 for (size_t k = 17; k < 32; k++) { 2940 for (uint32_t n = 1; n <= 16; n++) { 2941 for (uint32_t m = 1; m <= 1; m++) { 2942 GemmMicrokernelTester() 2943 .mr(1) 2944 .nr(16) 2945 .kr(2) 2946 .sr(1) 2947 .m(m) 2948 .n(n) 2949 .k(k) 2950 .iterations(1) 2951 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 2952 } 2953 } 2954 } 2955 } 2956 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD2R,k_div_16)2957 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD2R, k_div_16) { 2958 TEST_REQUIRES_ARM_NEON; 2959 for (size_t k = 32; k <= 160; k += 16) { 2960 GemmMicrokernelTester() 2961 .mr(1) 2962 .nr(16) 2963 .kr(2) 2964 .sr(1) 2965 .m(1) 2966 .n(16) 2967 .k(k) 2968 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 2969 } 2970 } 2971 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD2R,k_div_16_strided_a)2972 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD2R, k_div_16_strided_a) { 2973 TEST_REQUIRES_ARM_NEON; 2974 for (size_t k = 32; k <= 160; k += 16) { 2975 GemmMicrokernelTester() 2976 .mr(1) 2977 .nr(16) 2978 .kr(2) 2979 .sr(1) 2980 .m(1) 2981 .n(16) 2982 .k(k) 2983 .a_stride(163) 2984 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 2985 } 2986 } 2987 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD2R,k_div_16_subtile)2988 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD2R, k_div_16_subtile) { 2989 TEST_REQUIRES_ARM_NEON; 2990 for (size_t k = 32; k <= 160; k += 16) { 2991 for (uint32_t n = 1; n <= 16; n++) { 2992 for (uint32_t m = 1; m <= 1; m++) { 2993 GemmMicrokernelTester() 2994 .mr(1) 2995 .nr(16) 2996 .kr(2) 2997 .sr(1) 2998 .m(m) 2999 .n(n) 3000 .k(k) 3001 .iterations(1) 3002 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 3003 } 3004 } 3005 } 3006 } 3007 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD2R,n_gt_16)3008 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD2R, n_gt_16) { 3009 TEST_REQUIRES_ARM_NEON; 3010 for (uint32_t n = 17; n < 32; n++) { 3011 for (size_t k = 1; k <= 80; k += 17) { 3012 GemmMicrokernelTester() 3013 .mr(1) 3014 .nr(16) 3015 .kr(2) 3016 .sr(1) 3017 .m(1) 3018 .n(n) 3019 .k(k) 3020 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 3021 } 3022 } 3023 } 3024 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD2R,n_gt_16_strided_cn)3025 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD2R, n_gt_16_strided_cn) { 3026 TEST_REQUIRES_ARM_NEON; 3027 for (uint32_t n = 17; n < 32; n++) { 3028 for (size_t k = 1; k <= 80; k += 17) { 3029 GemmMicrokernelTester() 3030 .mr(1) 3031 .nr(16) 3032 .kr(2) 3033 .sr(1) 3034 .m(1) 3035 .n(n) 3036 .k(k) 3037 .cn_stride(19) 3038 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 3039 } 3040 } 3041 } 3042 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD2R,n_gt_16_strided_a)3043 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD2R, n_gt_16_strided_a) { 3044 TEST_REQUIRES_ARM_NEON; 3045 for (uint32_t n = 17; n < 32; n++) { 3046 for (size_t k = 1; k <= 80; k += 17) { 3047 GemmMicrokernelTester() 3048 .mr(1) 3049 .nr(16) 3050 .kr(2) 3051 .sr(1) 3052 .m(1) 3053 .n(n) 3054 .k(k) 3055 .a_stride(83) 3056 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 3057 } 3058 } 3059 } 3060 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD2R,n_gt_16_subtile)3061 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD2R, n_gt_16_subtile) { 3062 TEST_REQUIRES_ARM_NEON; 3063 for (uint32_t n = 17; n < 32; n++) { 3064 for (size_t k = 1; k <= 80; k += 17) { 3065 for (uint32_t m = 1; m <= 1; m++) { 3066 GemmMicrokernelTester() 3067 .mr(1) 3068 .nr(16) 3069 .kr(2) 3070 .sr(1) 3071 .m(m) 3072 .n(n) 3073 .k(k) 3074 .iterations(1) 3075 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 3076 } 3077 } 3078 } 3079 } 3080 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD2R,n_div_16)3081 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD2R, n_div_16) { 3082 TEST_REQUIRES_ARM_NEON; 3083 for (uint32_t n = 32; n <= 48; n += 16) { 3084 for (size_t k = 1; k <= 80; k += 17) { 3085 GemmMicrokernelTester() 3086 .mr(1) 3087 .nr(16) 3088 .kr(2) 3089 .sr(1) 3090 .m(1) 3091 .n(n) 3092 .k(k) 3093 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 3094 } 3095 } 3096 } 3097 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD2R,n_div_16_strided_cn)3098 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD2R, n_div_16_strided_cn) { 3099 TEST_REQUIRES_ARM_NEON; 3100 for (uint32_t n = 32; n <= 48; n += 16) { 3101 for (size_t k = 1; k <= 80; k += 17) { 3102 GemmMicrokernelTester() 3103 .mr(1) 3104 .nr(16) 3105 .kr(2) 3106 .sr(1) 3107 .m(1) 3108 .n(n) 3109 .k(k) 3110 .cn_stride(19) 3111 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 3112 } 3113 } 3114 } 3115 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD2R,n_div_16_strided_a)3116 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD2R, n_div_16_strided_a) { 3117 TEST_REQUIRES_ARM_NEON; 3118 for (uint32_t n = 32; n <= 48; n += 16) { 3119 for (size_t k = 1; k <= 80; k += 17) { 3120 GemmMicrokernelTester() 3121 .mr(1) 3122 .nr(16) 3123 .kr(2) 3124 .sr(1) 3125 .m(1) 3126 .n(n) 3127 .k(k) 3128 .a_stride(83) 3129 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 3130 } 3131 } 3132 } 3133 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD2R,n_div_16_subtile)3134 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD2R, n_div_16_subtile) { 3135 TEST_REQUIRES_ARM_NEON; 3136 for (uint32_t n = 32; n <= 48; n += 16) { 3137 for (size_t k = 1; k <= 80; k += 17) { 3138 for (uint32_t m = 1; m <= 1; m++) { 3139 GemmMicrokernelTester() 3140 .mr(1) 3141 .nr(16) 3142 .kr(2) 3143 .sr(1) 3144 .m(m) 3145 .n(n) 3146 .k(k) 3147 .iterations(1) 3148 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 3149 } 3150 } 3151 } 3152 } 3153 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD2R,strided_cm_subtile)3154 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD2R, strided_cm_subtile) { 3155 TEST_REQUIRES_ARM_NEON; 3156 for (size_t k = 1; k <= 80; k += 17) { 3157 for (uint32_t n = 1; n <= 16; n++) { 3158 for (uint32_t m = 1; m <= 1; m++) { 3159 GemmMicrokernelTester() 3160 .mr(1) 3161 .nr(16) 3162 .kr(2) 3163 .sr(1) 3164 .m(m) 3165 .n(n) 3166 .k(k) 3167 .cm_stride(19) 3168 .iterations(1) 3169 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 3170 } 3171 } 3172 } 3173 } 3174 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD2R,qmin)3175 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD2R, qmin) { 3176 TEST_REQUIRES_ARM_NEON; 3177 GemmMicrokernelTester() 3178 .mr(1) 3179 .nr(16) 3180 .kr(2) 3181 .sr(1) 3182 .m(1) 3183 .n(16) 3184 .k(16) 3185 .qmin(128) 3186 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 3187 } 3188 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD2R,qmax)3189 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD2R, qmax) { 3190 TEST_REQUIRES_ARM_NEON; 3191 GemmMicrokernelTester() 3192 .mr(1) 3193 .nr(16) 3194 .kr(2) 3195 .sr(1) 3196 .m(1) 3197 .n(16) 3198 .k(16) 3199 .qmax(128) 3200 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 3201 } 3202 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD2R,strided_cm)3203 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD2R, strided_cm) { 3204 TEST_REQUIRES_ARM_NEON; 3205 GemmMicrokernelTester() 3206 .mr(1) 3207 .nr(16) 3208 .kr(2) 3209 .sr(1) 3210 .m(1) 3211 .n(16) 3212 .k(16) 3213 .cm_stride(19) 3214 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 3215 } 3216 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 3217 3218 3219 #if XNN_ARCH_ARM || XNN_ARCH_ARM64 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD4R,k_eq_16)3220 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD4R, k_eq_16) { 3221 TEST_REQUIRES_ARM_NEON; 3222 GemmMicrokernelTester() 3223 .mr(1) 3224 .nr(16) 3225 .kr(2) 3226 .sr(1) 3227 .m(1) 3228 .n(16) 3229 .k(16) 3230 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 3231 } 3232 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD4R,strided_cn)3233 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD4R, strided_cn) { 3234 TEST_REQUIRES_ARM_NEON; 3235 GemmMicrokernelTester() 3236 .mr(1) 3237 .nr(16) 3238 .kr(2) 3239 .sr(1) 3240 .m(1) 3241 .n(16) 3242 .k(16) 3243 .cn_stride(19) 3244 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 3245 } 3246 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD4R,k_eq_16_strided_a)3247 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD4R, k_eq_16_strided_a) { 3248 TEST_REQUIRES_ARM_NEON; 3249 GemmMicrokernelTester() 3250 .mr(1) 3251 .nr(16) 3252 .kr(2) 3253 .sr(1) 3254 .m(1) 3255 .n(16) 3256 .k(16) 3257 .a_stride(19) 3258 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 3259 } 3260 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD4R,k_eq_16_subtile)3261 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD4R, k_eq_16_subtile) { 3262 TEST_REQUIRES_ARM_NEON; 3263 for (uint32_t n = 1; n <= 16; n++) { 3264 for (uint32_t m = 1; m <= 1; m++) { 3265 GemmMicrokernelTester() 3266 .mr(1) 3267 .nr(16) 3268 .kr(2) 3269 .sr(1) 3270 .m(m) 3271 .n(n) 3272 .k(16) 3273 .iterations(1) 3274 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 3275 } 3276 } 3277 } 3278 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD4R,k_eq_16_subtile_m)3279 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD4R, k_eq_16_subtile_m) { 3280 TEST_REQUIRES_ARM_NEON; 3281 for (uint32_t m = 1; m <= 1; m++) { 3282 GemmMicrokernelTester() 3283 .mr(1) 3284 .nr(16) 3285 .kr(2) 3286 .sr(1) 3287 .m(m) 3288 .n(16) 3289 .k(16) 3290 .iterations(1) 3291 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 3292 } 3293 } 3294 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD4R,k_eq_16_subtile_n)3295 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD4R, k_eq_16_subtile_n) { 3296 TEST_REQUIRES_ARM_NEON; 3297 for (uint32_t n = 1; n <= 16; n++) { 3298 GemmMicrokernelTester() 3299 .mr(1) 3300 .nr(16) 3301 .kr(2) 3302 .sr(1) 3303 .m(1) 3304 .n(n) 3305 .k(16) 3306 .iterations(1) 3307 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 3308 } 3309 } 3310 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD4R,k_lt_16)3311 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD4R, k_lt_16) { 3312 TEST_REQUIRES_ARM_NEON; 3313 for (size_t k = 1; k < 16; k++) { 3314 GemmMicrokernelTester() 3315 .mr(1) 3316 .nr(16) 3317 .kr(2) 3318 .sr(1) 3319 .m(1) 3320 .n(16) 3321 .k(k) 3322 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 3323 } 3324 } 3325 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD4R,k_lt_16_strided_a)3326 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD4R, k_lt_16_strided_a) { 3327 TEST_REQUIRES_ARM_NEON; 3328 for (size_t k = 1; k < 16; k++) { 3329 GemmMicrokernelTester() 3330 .mr(1) 3331 .nr(16) 3332 .kr(2) 3333 .sr(1) 3334 .m(1) 3335 .n(16) 3336 .k(k) 3337 .a_stride(19) 3338 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 3339 } 3340 } 3341 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD4R,k_lt_16_subtile)3342 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD4R, k_lt_16_subtile) { 3343 TEST_REQUIRES_ARM_NEON; 3344 for (size_t k = 1; k < 16; k++) { 3345 for (uint32_t n = 1; n <= 16; n++) { 3346 for (uint32_t m = 1; m <= 1; m++) { 3347 GemmMicrokernelTester() 3348 .mr(1) 3349 .nr(16) 3350 .kr(2) 3351 .sr(1) 3352 .m(m) 3353 .n(n) 3354 .k(k) 3355 .iterations(1) 3356 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 3357 } 3358 } 3359 } 3360 } 3361 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD4R,k_gt_16)3362 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD4R, k_gt_16) { 3363 TEST_REQUIRES_ARM_NEON; 3364 for (size_t k = 17; k < 32; k++) { 3365 GemmMicrokernelTester() 3366 .mr(1) 3367 .nr(16) 3368 .kr(2) 3369 .sr(1) 3370 .m(1) 3371 .n(16) 3372 .k(k) 3373 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 3374 } 3375 } 3376 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD4R,k_gt_16_strided_a)3377 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD4R, k_gt_16_strided_a) { 3378 TEST_REQUIRES_ARM_NEON; 3379 for (size_t k = 17; k < 32; k++) { 3380 GemmMicrokernelTester() 3381 .mr(1) 3382 .nr(16) 3383 .kr(2) 3384 .sr(1) 3385 .m(1) 3386 .n(16) 3387 .k(k) 3388 .a_stride(37) 3389 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 3390 } 3391 } 3392 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD4R,k_gt_16_subtile)3393 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD4R, k_gt_16_subtile) { 3394 TEST_REQUIRES_ARM_NEON; 3395 for (size_t k = 17; k < 32; k++) { 3396 for (uint32_t n = 1; n <= 16; n++) { 3397 for (uint32_t m = 1; m <= 1; m++) { 3398 GemmMicrokernelTester() 3399 .mr(1) 3400 .nr(16) 3401 .kr(2) 3402 .sr(1) 3403 .m(m) 3404 .n(n) 3405 .k(k) 3406 .iterations(1) 3407 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 3408 } 3409 } 3410 } 3411 } 3412 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD4R,k_div_16)3413 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD4R, k_div_16) { 3414 TEST_REQUIRES_ARM_NEON; 3415 for (size_t k = 32; k <= 160; k += 16) { 3416 GemmMicrokernelTester() 3417 .mr(1) 3418 .nr(16) 3419 .kr(2) 3420 .sr(1) 3421 .m(1) 3422 .n(16) 3423 .k(k) 3424 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 3425 } 3426 } 3427 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD4R,k_div_16_strided_a)3428 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD4R, k_div_16_strided_a) { 3429 TEST_REQUIRES_ARM_NEON; 3430 for (size_t k = 32; k <= 160; k += 16) { 3431 GemmMicrokernelTester() 3432 .mr(1) 3433 .nr(16) 3434 .kr(2) 3435 .sr(1) 3436 .m(1) 3437 .n(16) 3438 .k(k) 3439 .a_stride(163) 3440 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 3441 } 3442 } 3443 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD4R,k_div_16_subtile)3444 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD4R, k_div_16_subtile) { 3445 TEST_REQUIRES_ARM_NEON; 3446 for (size_t k = 32; k <= 160; k += 16) { 3447 for (uint32_t n = 1; n <= 16; n++) { 3448 for (uint32_t m = 1; m <= 1; m++) { 3449 GemmMicrokernelTester() 3450 .mr(1) 3451 .nr(16) 3452 .kr(2) 3453 .sr(1) 3454 .m(m) 3455 .n(n) 3456 .k(k) 3457 .iterations(1) 3458 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 3459 } 3460 } 3461 } 3462 } 3463 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD4R,n_gt_16)3464 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD4R, n_gt_16) { 3465 TEST_REQUIRES_ARM_NEON; 3466 for (uint32_t n = 17; n < 32; n++) { 3467 for (size_t k = 1; k <= 80; k += 17) { 3468 GemmMicrokernelTester() 3469 .mr(1) 3470 .nr(16) 3471 .kr(2) 3472 .sr(1) 3473 .m(1) 3474 .n(n) 3475 .k(k) 3476 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 3477 } 3478 } 3479 } 3480 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD4R,n_gt_16_strided_cn)3481 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD4R, n_gt_16_strided_cn) { 3482 TEST_REQUIRES_ARM_NEON; 3483 for (uint32_t n = 17; n < 32; n++) { 3484 for (size_t k = 1; k <= 80; k += 17) { 3485 GemmMicrokernelTester() 3486 .mr(1) 3487 .nr(16) 3488 .kr(2) 3489 .sr(1) 3490 .m(1) 3491 .n(n) 3492 .k(k) 3493 .cn_stride(19) 3494 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 3495 } 3496 } 3497 } 3498 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD4R,n_gt_16_strided_a)3499 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD4R, n_gt_16_strided_a) { 3500 TEST_REQUIRES_ARM_NEON; 3501 for (uint32_t n = 17; n < 32; n++) { 3502 for (size_t k = 1; k <= 80; k += 17) { 3503 GemmMicrokernelTester() 3504 .mr(1) 3505 .nr(16) 3506 .kr(2) 3507 .sr(1) 3508 .m(1) 3509 .n(n) 3510 .k(k) 3511 .a_stride(83) 3512 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 3513 } 3514 } 3515 } 3516 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD4R,n_gt_16_subtile)3517 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD4R, n_gt_16_subtile) { 3518 TEST_REQUIRES_ARM_NEON; 3519 for (uint32_t n = 17; n < 32; n++) { 3520 for (size_t k = 1; k <= 80; k += 17) { 3521 for (uint32_t m = 1; m <= 1; m++) { 3522 GemmMicrokernelTester() 3523 .mr(1) 3524 .nr(16) 3525 .kr(2) 3526 .sr(1) 3527 .m(m) 3528 .n(n) 3529 .k(k) 3530 .iterations(1) 3531 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 3532 } 3533 } 3534 } 3535 } 3536 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD4R,n_div_16)3537 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD4R, n_div_16) { 3538 TEST_REQUIRES_ARM_NEON; 3539 for (uint32_t n = 32; n <= 48; n += 16) { 3540 for (size_t k = 1; k <= 80; k += 17) { 3541 GemmMicrokernelTester() 3542 .mr(1) 3543 .nr(16) 3544 .kr(2) 3545 .sr(1) 3546 .m(1) 3547 .n(n) 3548 .k(k) 3549 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 3550 } 3551 } 3552 } 3553 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD4R,n_div_16_strided_cn)3554 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD4R, n_div_16_strided_cn) { 3555 TEST_REQUIRES_ARM_NEON; 3556 for (uint32_t n = 32; n <= 48; n += 16) { 3557 for (size_t k = 1; k <= 80; k += 17) { 3558 GemmMicrokernelTester() 3559 .mr(1) 3560 .nr(16) 3561 .kr(2) 3562 .sr(1) 3563 .m(1) 3564 .n(n) 3565 .k(k) 3566 .cn_stride(19) 3567 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 3568 } 3569 } 3570 } 3571 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD4R,n_div_16_strided_a)3572 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD4R, n_div_16_strided_a) { 3573 TEST_REQUIRES_ARM_NEON; 3574 for (uint32_t n = 32; n <= 48; n += 16) { 3575 for (size_t k = 1; k <= 80; k += 17) { 3576 GemmMicrokernelTester() 3577 .mr(1) 3578 .nr(16) 3579 .kr(2) 3580 .sr(1) 3581 .m(1) 3582 .n(n) 3583 .k(k) 3584 .a_stride(83) 3585 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 3586 } 3587 } 3588 } 3589 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD4R,n_div_16_subtile)3590 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD4R, n_div_16_subtile) { 3591 TEST_REQUIRES_ARM_NEON; 3592 for (uint32_t n = 32; n <= 48; n += 16) { 3593 for (size_t k = 1; k <= 80; k += 17) { 3594 for (uint32_t m = 1; m <= 1; m++) { 3595 GemmMicrokernelTester() 3596 .mr(1) 3597 .nr(16) 3598 .kr(2) 3599 .sr(1) 3600 .m(m) 3601 .n(n) 3602 .k(k) 3603 .iterations(1) 3604 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 3605 } 3606 } 3607 } 3608 } 3609 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD4R,strided_cm_subtile)3610 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD4R, strided_cm_subtile) { 3611 TEST_REQUIRES_ARM_NEON; 3612 for (size_t k = 1; k <= 80; k += 17) { 3613 for (uint32_t n = 1; n <= 16; n++) { 3614 for (uint32_t m = 1; m <= 1; m++) { 3615 GemmMicrokernelTester() 3616 .mr(1) 3617 .nr(16) 3618 .kr(2) 3619 .sr(1) 3620 .m(m) 3621 .n(n) 3622 .k(k) 3623 .cm_stride(19) 3624 .iterations(1) 3625 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 3626 } 3627 } 3628 } 3629 } 3630 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD4R,qmin)3631 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD4R, qmin) { 3632 TEST_REQUIRES_ARM_NEON; 3633 GemmMicrokernelTester() 3634 .mr(1) 3635 .nr(16) 3636 .kr(2) 3637 .sr(1) 3638 .m(1) 3639 .n(16) 3640 .k(16) 3641 .qmin(128) 3642 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 3643 } 3644 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD4R,qmax)3645 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD4R, qmax) { 3646 TEST_REQUIRES_ARM_NEON; 3647 GemmMicrokernelTester() 3648 .mr(1) 3649 .nr(16) 3650 .kr(2) 3651 .sr(1) 3652 .m(1) 3653 .n(16) 3654 .k(16) 3655 .qmax(128) 3656 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 3657 } 3658 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD4R,strided_cm)3659 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD4R, strided_cm) { 3660 TEST_REQUIRES_ARM_NEON; 3661 GemmMicrokernelTester() 3662 .mr(1) 3663 .nr(16) 3664 .kr(2) 3665 .sr(1) 3666 .m(1) 3667 .n(16) 3668 .k(16) 3669 .cm_stride(19) 3670 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 3671 } 3672 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 3673 3674 3675 #if XNN_ARCH_ARM || XNN_ARCH_ARM64 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2S4__NEON_MLAL,k_eq_16)3676 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2S4__NEON_MLAL, k_eq_16) { 3677 TEST_REQUIRES_ARM_NEON; 3678 GemmMicrokernelTester() 3679 .mr(1) 3680 .nr(16) 3681 .kr(2) 3682 .sr(4) 3683 .m(1) 3684 .n(16) 3685 .k(16) 3686 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 3687 } 3688 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2S4__NEON_MLAL,strided_cn)3689 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2S4__NEON_MLAL, strided_cn) { 3690 TEST_REQUIRES_ARM_NEON; 3691 GemmMicrokernelTester() 3692 .mr(1) 3693 .nr(16) 3694 .kr(2) 3695 .sr(4) 3696 .m(1) 3697 .n(16) 3698 .k(16) 3699 .cn_stride(19) 3700 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 3701 } 3702 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2S4__NEON_MLAL,k_eq_16_strided_a)3703 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2S4__NEON_MLAL, k_eq_16_strided_a) { 3704 TEST_REQUIRES_ARM_NEON; 3705 GemmMicrokernelTester() 3706 .mr(1) 3707 .nr(16) 3708 .kr(2) 3709 .sr(4) 3710 .m(1) 3711 .n(16) 3712 .k(16) 3713 .a_stride(19) 3714 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 3715 } 3716 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2S4__NEON_MLAL,k_eq_16_subtile)3717 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2S4__NEON_MLAL, k_eq_16_subtile) { 3718 TEST_REQUIRES_ARM_NEON; 3719 for (uint32_t n = 1; n <= 16; n++) { 3720 for (uint32_t m = 1; m <= 1; m++) { 3721 GemmMicrokernelTester() 3722 .mr(1) 3723 .nr(16) 3724 .kr(2) 3725 .sr(4) 3726 .m(m) 3727 .n(n) 3728 .k(16) 3729 .iterations(1) 3730 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 3731 } 3732 } 3733 } 3734 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2S4__NEON_MLAL,k_eq_16_subtile_m)3735 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2S4__NEON_MLAL, k_eq_16_subtile_m) { 3736 TEST_REQUIRES_ARM_NEON; 3737 for (uint32_t m = 1; m <= 1; m++) { 3738 GemmMicrokernelTester() 3739 .mr(1) 3740 .nr(16) 3741 .kr(2) 3742 .sr(4) 3743 .m(m) 3744 .n(16) 3745 .k(16) 3746 .iterations(1) 3747 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 3748 } 3749 } 3750 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2S4__NEON_MLAL,k_eq_16_subtile_n)3751 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2S4__NEON_MLAL, k_eq_16_subtile_n) { 3752 TEST_REQUIRES_ARM_NEON; 3753 for (uint32_t n = 1; n <= 16; n++) { 3754 GemmMicrokernelTester() 3755 .mr(1) 3756 .nr(16) 3757 .kr(2) 3758 .sr(4) 3759 .m(1) 3760 .n(n) 3761 .k(16) 3762 .iterations(1) 3763 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 3764 } 3765 } 3766 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2S4__NEON_MLAL,k_lt_16)3767 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2S4__NEON_MLAL, k_lt_16) { 3768 TEST_REQUIRES_ARM_NEON; 3769 for (size_t k = 1; k < 16; k++) { 3770 GemmMicrokernelTester() 3771 .mr(1) 3772 .nr(16) 3773 .kr(2) 3774 .sr(4) 3775 .m(1) 3776 .n(16) 3777 .k(k) 3778 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 3779 } 3780 } 3781 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2S4__NEON_MLAL,k_lt_16_strided_a)3782 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2S4__NEON_MLAL, k_lt_16_strided_a) { 3783 TEST_REQUIRES_ARM_NEON; 3784 for (size_t k = 1; k < 16; k++) { 3785 GemmMicrokernelTester() 3786 .mr(1) 3787 .nr(16) 3788 .kr(2) 3789 .sr(4) 3790 .m(1) 3791 .n(16) 3792 .k(k) 3793 .a_stride(19) 3794 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 3795 } 3796 } 3797 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2S4__NEON_MLAL,k_lt_16_subtile)3798 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2S4__NEON_MLAL, k_lt_16_subtile) { 3799 TEST_REQUIRES_ARM_NEON; 3800 for (size_t k = 1; k < 16; k++) { 3801 for (uint32_t n = 1; n <= 16; n++) { 3802 for (uint32_t m = 1; m <= 1; m++) { 3803 GemmMicrokernelTester() 3804 .mr(1) 3805 .nr(16) 3806 .kr(2) 3807 .sr(4) 3808 .m(m) 3809 .n(n) 3810 .k(k) 3811 .iterations(1) 3812 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 3813 } 3814 } 3815 } 3816 } 3817 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2S4__NEON_MLAL,k_gt_16)3818 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2S4__NEON_MLAL, k_gt_16) { 3819 TEST_REQUIRES_ARM_NEON; 3820 for (size_t k = 17; k < 32; k++) { 3821 GemmMicrokernelTester() 3822 .mr(1) 3823 .nr(16) 3824 .kr(2) 3825 .sr(4) 3826 .m(1) 3827 .n(16) 3828 .k(k) 3829 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 3830 } 3831 } 3832 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2S4__NEON_MLAL,k_gt_16_strided_a)3833 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2S4__NEON_MLAL, k_gt_16_strided_a) { 3834 TEST_REQUIRES_ARM_NEON; 3835 for (size_t k = 17; k < 32; k++) { 3836 GemmMicrokernelTester() 3837 .mr(1) 3838 .nr(16) 3839 .kr(2) 3840 .sr(4) 3841 .m(1) 3842 .n(16) 3843 .k(k) 3844 .a_stride(37) 3845 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 3846 } 3847 } 3848 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2S4__NEON_MLAL,k_gt_16_subtile)3849 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2S4__NEON_MLAL, k_gt_16_subtile) { 3850 TEST_REQUIRES_ARM_NEON; 3851 for (size_t k = 17; k < 32; k++) { 3852 for (uint32_t n = 1; n <= 16; n++) { 3853 for (uint32_t m = 1; m <= 1; m++) { 3854 GemmMicrokernelTester() 3855 .mr(1) 3856 .nr(16) 3857 .kr(2) 3858 .sr(4) 3859 .m(m) 3860 .n(n) 3861 .k(k) 3862 .iterations(1) 3863 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 3864 } 3865 } 3866 } 3867 } 3868 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2S4__NEON_MLAL,k_div_16)3869 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2S4__NEON_MLAL, k_div_16) { 3870 TEST_REQUIRES_ARM_NEON; 3871 for (size_t k = 32; k <= 160; k += 16) { 3872 GemmMicrokernelTester() 3873 .mr(1) 3874 .nr(16) 3875 .kr(2) 3876 .sr(4) 3877 .m(1) 3878 .n(16) 3879 .k(k) 3880 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 3881 } 3882 } 3883 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2S4__NEON_MLAL,k_div_16_strided_a)3884 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2S4__NEON_MLAL, k_div_16_strided_a) { 3885 TEST_REQUIRES_ARM_NEON; 3886 for (size_t k = 32; k <= 160; k += 16) { 3887 GemmMicrokernelTester() 3888 .mr(1) 3889 .nr(16) 3890 .kr(2) 3891 .sr(4) 3892 .m(1) 3893 .n(16) 3894 .k(k) 3895 .a_stride(163) 3896 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 3897 } 3898 } 3899 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2S4__NEON_MLAL,k_div_16_subtile)3900 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2S4__NEON_MLAL, k_div_16_subtile) { 3901 TEST_REQUIRES_ARM_NEON; 3902 for (size_t k = 32; k <= 160; k += 16) { 3903 for (uint32_t n = 1; n <= 16; n++) { 3904 for (uint32_t m = 1; m <= 1; m++) { 3905 GemmMicrokernelTester() 3906 .mr(1) 3907 .nr(16) 3908 .kr(2) 3909 .sr(4) 3910 .m(m) 3911 .n(n) 3912 .k(k) 3913 .iterations(1) 3914 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 3915 } 3916 } 3917 } 3918 } 3919 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2S4__NEON_MLAL,n_gt_16)3920 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2S4__NEON_MLAL, n_gt_16) { 3921 TEST_REQUIRES_ARM_NEON; 3922 for (uint32_t n = 17; n < 32; n++) { 3923 for (size_t k = 1; k <= 80; k += 17) { 3924 GemmMicrokernelTester() 3925 .mr(1) 3926 .nr(16) 3927 .kr(2) 3928 .sr(4) 3929 .m(1) 3930 .n(n) 3931 .k(k) 3932 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 3933 } 3934 } 3935 } 3936 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2S4__NEON_MLAL,n_gt_16_strided_cn)3937 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2S4__NEON_MLAL, n_gt_16_strided_cn) { 3938 TEST_REQUIRES_ARM_NEON; 3939 for (uint32_t n = 17; n < 32; n++) { 3940 for (size_t k = 1; k <= 80; k += 17) { 3941 GemmMicrokernelTester() 3942 .mr(1) 3943 .nr(16) 3944 .kr(2) 3945 .sr(4) 3946 .m(1) 3947 .n(n) 3948 .k(k) 3949 .cn_stride(19) 3950 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 3951 } 3952 } 3953 } 3954 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2S4__NEON_MLAL,n_gt_16_strided_a)3955 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2S4__NEON_MLAL, n_gt_16_strided_a) { 3956 TEST_REQUIRES_ARM_NEON; 3957 for (uint32_t n = 17; n < 32; n++) { 3958 for (size_t k = 1; k <= 80; k += 17) { 3959 GemmMicrokernelTester() 3960 .mr(1) 3961 .nr(16) 3962 .kr(2) 3963 .sr(4) 3964 .m(1) 3965 .n(n) 3966 .k(k) 3967 .a_stride(83) 3968 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 3969 } 3970 } 3971 } 3972 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2S4__NEON_MLAL,n_gt_16_subtile)3973 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2S4__NEON_MLAL, n_gt_16_subtile) { 3974 TEST_REQUIRES_ARM_NEON; 3975 for (uint32_t n = 17; n < 32; n++) { 3976 for (size_t k = 1; k <= 80; k += 17) { 3977 for (uint32_t m = 1; m <= 1; m++) { 3978 GemmMicrokernelTester() 3979 .mr(1) 3980 .nr(16) 3981 .kr(2) 3982 .sr(4) 3983 .m(m) 3984 .n(n) 3985 .k(k) 3986 .iterations(1) 3987 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 3988 } 3989 } 3990 } 3991 } 3992 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2S4__NEON_MLAL,n_div_16)3993 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2S4__NEON_MLAL, n_div_16) { 3994 TEST_REQUIRES_ARM_NEON; 3995 for (uint32_t n = 32; n <= 48; n += 16) { 3996 for (size_t k = 1; k <= 80; k += 17) { 3997 GemmMicrokernelTester() 3998 .mr(1) 3999 .nr(16) 4000 .kr(2) 4001 .sr(4) 4002 .m(1) 4003 .n(n) 4004 .k(k) 4005 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 4006 } 4007 } 4008 } 4009 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2S4__NEON_MLAL,n_div_16_strided_cn)4010 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2S4__NEON_MLAL, n_div_16_strided_cn) { 4011 TEST_REQUIRES_ARM_NEON; 4012 for (uint32_t n = 32; n <= 48; n += 16) { 4013 for (size_t k = 1; k <= 80; k += 17) { 4014 GemmMicrokernelTester() 4015 .mr(1) 4016 .nr(16) 4017 .kr(2) 4018 .sr(4) 4019 .m(1) 4020 .n(n) 4021 .k(k) 4022 .cn_stride(19) 4023 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 4024 } 4025 } 4026 } 4027 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2S4__NEON_MLAL,n_div_16_strided_a)4028 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2S4__NEON_MLAL, n_div_16_strided_a) { 4029 TEST_REQUIRES_ARM_NEON; 4030 for (uint32_t n = 32; n <= 48; n += 16) { 4031 for (size_t k = 1; k <= 80; k += 17) { 4032 GemmMicrokernelTester() 4033 .mr(1) 4034 .nr(16) 4035 .kr(2) 4036 .sr(4) 4037 .m(1) 4038 .n(n) 4039 .k(k) 4040 .a_stride(83) 4041 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 4042 } 4043 } 4044 } 4045 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2S4__NEON_MLAL,n_div_16_subtile)4046 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2S4__NEON_MLAL, n_div_16_subtile) { 4047 TEST_REQUIRES_ARM_NEON; 4048 for (uint32_t n = 32; n <= 48; n += 16) { 4049 for (size_t k = 1; k <= 80; k += 17) { 4050 for (uint32_t m = 1; m <= 1; m++) { 4051 GemmMicrokernelTester() 4052 .mr(1) 4053 .nr(16) 4054 .kr(2) 4055 .sr(4) 4056 .m(m) 4057 .n(n) 4058 .k(k) 4059 .iterations(1) 4060 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 4061 } 4062 } 4063 } 4064 } 4065 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2S4__NEON_MLAL,strided_cm_subtile)4066 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2S4__NEON_MLAL, strided_cm_subtile) { 4067 TEST_REQUIRES_ARM_NEON; 4068 for (size_t k = 1; k <= 80; k += 17) { 4069 for (uint32_t n = 1; n <= 16; n++) { 4070 for (uint32_t m = 1; m <= 1; m++) { 4071 GemmMicrokernelTester() 4072 .mr(1) 4073 .nr(16) 4074 .kr(2) 4075 .sr(4) 4076 .m(m) 4077 .n(n) 4078 .k(k) 4079 .cm_stride(19) 4080 .iterations(1) 4081 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 4082 } 4083 } 4084 } 4085 } 4086 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2S4__NEON_MLAL,qmin)4087 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2S4__NEON_MLAL, qmin) { 4088 TEST_REQUIRES_ARM_NEON; 4089 GemmMicrokernelTester() 4090 .mr(1) 4091 .nr(16) 4092 .kr(2) 4093 .sr(4) 4094 .m(1) 4095 .n(16) 4096 .k(16) 4097 .qmin(128) 4098 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 4099 } 4100 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2S4__NEON_MLAL,qmax)4101 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2S4__NEON_MLAL, qmax) { 4102 TEST_REQUIRES_ARM_NEON; 4103 GemmMicrokernelTester() 4104 .mr(1) 4105 .nr(16) 4106 .kr(2) 4107 .sr(4) 4108 .m(1) 4109 .n(16) 4110 .k(16) 4111 .qmax(128) 4112 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 4113 } 4114 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2S4__NEON_MLAL,strided_cm)4115 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2S4__NEON_MLAL, strided_cm) { 4116 TEST_REQUIRES_ARM_NEON; 4117 GemmMicrokernelTester() 4118 .mr(1) 4119 .nr(16) 4120 .kr(2) 4121 .sr(4) 4122 .m(1) 4123 .n(16) 4124 .k(16) 4125 .cm_stride(19) 4126 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 4127 } 4128 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 4129 4130 4131 #if XNN_ARCH_ARM || XNN_ARCH_ARM64 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4__NEON_MLAL_DUP,k_eq_16)4132 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4__NEON_MLAL_DUP, k_eq_16) { 4133 TEST_REQUIRES_ARM_NEON; 4134 GemmMicrokernelTester() 4135 .mr(1) 4136 .nr(16) 4137 .kr(4) 4138 .sr(1) 4139 .m(1) 4140 .n(16) 4141 .k(16) 4142 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 4143 } 4144 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4__NEON_MLAL_DUP,strided_cn)4145 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4__NEON_MLAL_DUP, strided_cn) { 4146 TEST_REQUIRES_ARM_NEON; 4147 GemmMicrokernelTester() 4148 .mr(1) 4149 .nr(16) 4150 .kr(4) 4151 .sr(1) 4152 .m(1) 4153 .n(16) 4154 .k(16) 4155 .cn_stride(19) 4156 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 4157 } 4158 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4__NEON_MLAL_DUP,k_eq_16_strided_a)4159 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4__NEON_MLAL_DUP, k_eq_16_strided_a) { 4160 TEST_REQUIRES_ARM_NEON; 4161 GemmMicrokernelTester() 4162 .mr(1) 4163 .nr(16) 4164 .kr(4) 4165 .sr(1) 4166 .m(1) 4167 .n(16) 4168 .k(16) 4169 .a_stride(19) 4170 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 4171 } 4172 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4__NEON_MLAL_DUP,k_eq_16_subtile)4173 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4__NEON_MLAL_DUP, k_eq_16_subtile) { 4174 TEST_REQUIRES_ARM_NEON; 4175 for (uint32_t n = 1; n <= 16; n++) { 4176 for (uint32_t m = 1; m <= 1; m++) { 4177 GemmMicrokernelTester() 4178 .mr(1) 4179 .nr(16) 4180 .kr(4) 4181 .sr(1) 4182 .m(m) 4183 .n(n) 4184 .k(16) 4185 .iterations(1) 4186 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 4187 } 4188 } 4189 } 4190 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4__NEON_MLAL_DUP,k_eq_16_subtile_m)4191 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4__NEON_MLAL_DUP, k_eq_16_subtile_m) { 4192 TEST_REQUIRES_ARM_NEON; 4193 for (uint32_t m = 1; m <= 1; m++) { 4194 GemmMicrokernelTester() 4195 .mr(1) 4196 .nr(16) 4197 .kr(4) 4198 .sr(1) 4199 .m(m) 4200 .n(16) 4201 .k(16) 4202 .iterations(1) 4203 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 4204 } 4205 } 4206 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4__NEON_MLAL_DUP,k_eq_16_subtile_n)4207 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4__NEON_MLAL_DUP, k_eq_16_subtile_n) { 4208 TEST_REQUIRES_ARM_NEON; 4209 for (uint32_t n = 1; n <= 16; n++) { 4210 GemmMicrokernelTester() 4211 .mr(1) 4212 .nr(16) 4213 .kr(4) 4214 .sr(1) 4215 .m(1) 4216 .n(n) 4217 .k(16) 4218 .iterations(1) 4219 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 4220 } 4221 } 4222 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4__NEON_MLAL_DUP,k_lt_16)4223 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4__NEON_MLAL_DUP, k_lt_16) { 4224 TEST_REQUIRES_ARM_NEON; 4225 for (size_t k = 1; k < 16; k++) { 4226 GemmMicrokernelTester() 4227 .mr(1) 4228 .nr(16) 4229 .kr(4) 4230 .sr(1) 4231 .m(1) 4232 .n(16) 4233 .k(k) 4234 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 4235 } 4236 } 4237 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4__NEON_MLAL_DUP,k_lt_16_strided_a)4238 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4__NEON_MLAL_DUP, k_lt_16_strided_a) { 4239 TEST_REQUIRES_ARM_NEON; 4240 for (size_t k = 1; k < 16; k++) { 4241 GemmMicrokernelTester() 4242 .mr(1) 4243 .nr(16) 4244 .kr(4) 4245 .sr(1) 4246 .m(1) 4247 .n(16) 4248 .k(k) 4249 .a_stride(19) 4250 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 4251 } 4252 } 4253 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4__NEON_MLAL_DUP,k_lt_16_subtile)4254 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4__NEON_MLAL_DUP, k_lt_16_subtile) { 4255 TEST_REQUIRES_ARM_NEON; 4256 for (size_t k = 1; k < 16; k++) { 4257 for (uint32_t n = 1; n <= 16; n++) { 4258 for (uint32_t m = 1; m <= 1; m++) { 4259 GemmMicrokernelTester() 4260 .mr(1) 4261 .nr(16) 4262 .kr(4) 4263 .sr(1) 4264 .m(m) 4265 .n(n) 4266 .k(k) 4267 .iterations(1) 4268 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 4269 } 4270 } 4271 } 4272 } 4273 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4__NEON_MLAL_DUP,k_gt_16)4274 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4__NEON_MLAL_DUP, k_gt_16) { 4275 TEST_REQUIRES_ARM_NEON; 4276 for (size_t k = 17; k < 32; k++) { 4277 GemmMicrokernelTester() 4278 .mr(1) 4279 .nr(16) 4280 .kr(4) 4281 .sr(1) 4282 .m(1) 4283 .n(16) 4284 .k(k) 4285 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 4286 } 4287 } 4288 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4__NEON_MLAL_DUP,k_gt_16_strided_a)4289 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4__NEON_MLAL_DUP, k_gt_16_strided_a) { 4290 TEST_REQUIRES_ARM_NEON; 4291 for (size_t k = 17; k < 32; k++) { 4292 GemmMicrokernelTester() 4293 .mr(1) 4294 .nr(16) 4295 .kr(4) 4296 .sr(1) 4297 .m(1) 4298 .n(16) 4299 .k(k) 4300 .a_stride(37) 4301 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 4302 } 4303 } 4304 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4__NEON_MLAL_DUP,k_gt_16_subtile)4305 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4__NEON_MLAL_DUP, k_gt_16_subtile) { 4306 TEST_REQUIRES_ARM_NEON; 4307 for (size_t k = 17; k < 32; k++) { 4308 for (uint32_t n = 1; n <= 16; n++) { 4309 for (uint32_t m = 1; m <= 1; m++) { 4310 GemmMicrokernelTester() 4311 .mr(1) 4312 .nr(16) 4313 .kr(4) 4314 .sr(1) 4315 .m(m) 4316 .n(n) 4317 .k(k) 4318 .iterations(1) 4319 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 4320 } 4321 } 4322 } 4323 } 4324 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4__NEON_MLAL_DUP,k_div_16)4325 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4__NEON_MLAL_DUP, k_div_16) { 4326 TEST_REQUIRES_ARM_NEON; 4327 for (size_t k = 32; k <= 160; k += 16) { 4328 GemmMicrokernelTester() 4329 .mr(1) 4330 .nr(16) 4331 .kr(4) 4332 .sr(1) 4333 .m(1) 4334 .n(16) 4335 .k(k) 4336 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 4337 } 4338 } 4339 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4__NEON_MLAL_DUP,k_div_16_strided_a)4340 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4__NEON_MLAL_DUP, k_div_16_strided_a) { 4341 TEST_REQUIRES_ARM_NEON; 4342 for (size_t k = 32; k <= 160; k += 16) { 4343 GemmMicrokernelTester() 4344 .mr(1) 4345 .nr(16) 4346 .kr(4) 4347 .sr(1) 4348 .m(1) 4349 .n(16) 4350 .k(k) 4351 .a_stride(163) 4352 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 4353 } 4354 } 4355 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4__NEON_MLAL_DUP,k_div_16_subtile)4356 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4__NEON_MLAL_DUP, k_div_16_subtile) { 4357 TEST_REQUIRES_ARM_NEON; 4358 for (size_t k = 32; k <= 160; k += 16) { 4359 for (uint32_t n = 1; n <= 16; n++) { 4360 for (uint32_t m = 1; m <= 1; m++) { 4361 GemmMicrokernelTester() 4362 .mr(1) 4363 .nr(16) 4364 .kr(4) 4365 .sr(1) 4366 .m(m) 4367 .n(n) 4368 .k(k) 4369 .iterations(1) 4370 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 4371 } 4372 } 4373 } 4374 } 4375 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4__NEON_MLAL_DUP,n_gt_16)4376 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4__NEON_MLAL_DUP, n_gt_16) { 4377 TEST_REQUIRES_ARM_NEON; 4378 for (uint32_t n = 17; n < 32; n++) { 4379 for (size_t k = 1; k <= 80; k += 17) { 4380 GemmMicrokernelTester() 4381 .mr(1) 4382 .nr(16) 4383 .kr(4) 4384 .sr(1) 4385 .m(1) 4386 .n(n) 4387 .k(k) 4388 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 4389 } 4390 } 4391 } 4392 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4__NEON_MLAL_DUP,n_gt_16_strided_cn)4393 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4__NEON_MLAL_DUP, n_gt_16_strided_cn) { 4394 TEST_REQUIRES_ARM_NEON; 4395 for (uint32_t n = 17; n < 32; n++) { 4396 for (size_t k = 1; k <= 80; k += 17) { 4397 GemmMicrokernelTester() 4398 .mr(1) 4399 .nr(16) 4400 .kr(4) 4401 .sr(1) 4402 .m(1) 4403 .n(n) 4404 .k(k) 4405 .cn_stride(19) 4406 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 4407 } 4408 } 4409 } 4410 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4__NEON_MLAL_DUP,n_gt_16_strided_a)4411 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4__NEON_MLAL_DUP, n_gt_16_strided_a) { 4412 TEST_REQUIRES_ARM_NEON; 4413 for (uint32_t n = 17; n < 32; n++) { 4414 for (size_t k = 1; k <= 80; k += 17) { 4415 GemmMicrokernelTester() 4416 .mr(1) 4417 .nr(16) 4418 .kr(4) 4419 .sr(1) 4420 .m(1) 4421 .n(n) 4422 .k(k) 4423 .a_stride(83) 4424 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 4425 } 4426 } 4427 } 4428 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4__NEON_MLAL_DUP,n_gt_16_subtile)4429 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4__NEON_MLAL_DUP, n_gt_16_subtile) { 4430 TEST_REQUIRES_ARM_NEON; 4431 for (uint32_t n = 17; n < 32; n++) { 4432 for (size_t k = 1; k <= 80; k += 17) { 4433 for (uint32_t m = 1; m <= 1; m++) { 4434 GemmMicrokernelTester() 4435 .mr(1) 4436 .nr(16) 4437 .kr(4) 4438 .sr(1) 4439 .m(m) 4440 .n(n) 4441 .k(k) 4442 .iterations(1) 4443 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 4444 } 4445 } 4446 } 4447 } 4448 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4__NEON_MLAL_DUP,n_div_16)4449 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4__NEON_MLAL_DUP, n_div_16) { 4450 TEST_REQUIRES_ARM_NEON; 4451 for (uint32_t n = 32; n <= 48; n += 16) { 4452 for (size_t k = 1; k <= 80; k += 17) { 4453 GemmMicrokernelTester() 4454 .mr(1) 4455 .nr(16) 4456 .kr(4) 4457 .sr(1) 4458 .m(1) 4459 .n(n) 4460 .k(k) 4461 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 4462 } 4463 } 4464 } 4465 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4__NEON_MLAL_DUP,n_div_16_strided_cn)4466 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4__NEON_MLAL_DUP, n_div_16_strided_cn) { 4467 TEST_REQUIRES_ARM_NEON; 4468 for (uint32_t n = 32; n <= 48; n += 16) { 4469 for (size_t k = 1; k <= 80; k += 17) { 4470 GemmMicrokernelTester() 4471 .mr(1) 4472 .nr(16) 4473 .kr(4) 4474 .sr(1) 4475 .m(1) 4476 .n(n) 4477 .k(k) 4478 .cn_stride(19) 4479 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 4480 } 4481 } 4482 } 4483 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4__NEON_MLAL_DUP,n_div_16_strided_a)4484 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4__NEON_MLAL_DUP, n_div_16_strided_a) { 4485 TEST_REQUIRES_ARM_NEON; 4486 for (uint32_t n = 32; n <= 48; n += 16) { 4487 for (size_t k = 1; k <= 80; k += 17) { 4488 GemmMicrokernelTester() 4489 .mr(1) 4490 .nr(16) 4491 .kr(4) 4492 .sr(1) 4493 .m(1) 4494 .n(n) 4495 .k(k) 4496 .a_stride(83) 4497 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 4498 } 4499 } 4500 } 4501 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4__NEON_MLAL_DUP,n_div_16_subtile)4502 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4__NEON_MLAL_DUP, n_div_16_subtile) { 4503 TEST_REQUIRES_ARM_NEON; 4504 for (uint32_t n = 32; n <= 48; n += 16) { 4505 for (size_t k = 1; k <= 80; k += 17) { 4506 for (uint32_t m = 1; m <= 1; m++) { 4507 GemmMicrokernelTester() 4508 .mr(1) 4509 .nr(16) 4510 .kr(4) 4511 .sr(1) 4512 .m(m) 4513 .n(n) 4514 .k(k) 4515 .iterations(1) 4516 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 4517 } 4518 } 4519 } 4520 } 4521 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4__NEON_MLAL_DUP,strided_cm_subtile)4522 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4__NEON_MLAL_DUP, strided_cm_subtile) { 4523 TEST_REQUIRES_ARM_NEON; 4524 for (size_t k = 1; k <= 80; k += 17) { 4525 for (uint32_t n = 1; n <= 16; n++) { 4526 for (uint32_t m = 1; m <= 1; m++) { 4527 GemmMicrokernelTester() 4528 .mr(1) 4529 .nr(16) 4530 .kr(4) 4531 .sr(1) 4532 .m(m) 4533 .n(n) 4534 .k(k) 4535 .cm_stride(19) 4536 .iterations(1) 4537 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 4538 } 4539 } 4540 } 4541 } 4542 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4__NEON_MLAL_DUP,qmin)4543 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4__NEON_MLAL_DUP, qmin) { 4544 TEST_REQUIRES_ARM_NEON; 4545 GemmMicrokernelTester() 4546 .mr(1) 4547 .nr(16) 4548 .kr(4) 4549 .sr(1) 4550 .m(1) 4551 .n(16) 4552 .k(16) 4553 .qmin(128) 4554 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 4555 } 4556 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4__NEON_MLAL_DUP,qmax)4557 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4__NEON_MLAL_DUP, qmax) { 4558 TEST_REQUIRES_ARM_NEON; 4559 GemmMicrokernelTester() 4560 .mr(1) 4561 .nr(16) 4562 .kr(4) 4563 .sr(1) 4564 .m(1) 4565 .n(16) 4566 .k(16) 4567 .qmax(128) 4568 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 4569 } 4570 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4__NEON_MLAL_DUP,strided_cm)4571 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4__NEON_MLAL_DUP, strided_cm) { 4572 TEST_REQUIRES_ARM_NEON; 4573 GemmMicrokernelTester() 4574 .mr(1) 4575 .nr(16) 4576 .kr(4) 4577 .sr(1) 4578 .m(1) 4579 .n(16) 4580 .k(16) 4581 .cm_stride(19) 4582 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 4583 } 4584 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 4585 4586 4587 #if XNN_ARCH_ARM || XNN_ARCH_ARM64 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4__NEON_MLAL_LD1R,k_eq_16)4588 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4__NEON_MLAL_LD1R, k_eq_16) { 4589 TEST_REQUIRES_ARM_NEON; 4590 GemmMicrokernelTester() 4591 .mr(1) 4592 .nr(16) 4593 .kr(4) 4594 .sr(1) 4595 .m(1) 4596 .n(16) 4597 .k(16) 4598 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 4599 } 4600 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4__NEON_MLAL_LD1R,strided_cn)4601 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4__NEON_MLAL_LD1R, strided_cn) { 4602 TEST_REQUIRES_ARM_NEON; 4603 GemmMicrokernelTester() 4604 .mr(1) 4605 .nr(16) 4606 .kr(4) 4607 .sr(1) 4608 .m(1) 4609 .n(16) 4610 .k(16) 4611 .cn_stride(19) 4612 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 4613 } 4614 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4__NEON_MLAL_LD1R,k_eq_16_strided_a)4615 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4__NEON_MLAL_LD1R, k_eq_16_strided_a) { 4616 TEST_REQUIRES_ARM_NEON; 4617 GemmMicrokernelTester() 4618 .mr(1) 4619 .nr(16) 4620 .kr(4) 4621 .sr(1) 4622 .m(1) 4623 .n(16) 4624 .k(16) 4625 .a_stride(19) 4626 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 4627 } 4628 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4__NEON_MLAL_LD1R,k_eq_16_subtile)4629 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4__NEON_MLAL_LD1R, k_eq_16_subtile) { 4630 TEST_REQUIRES_ARM_NEON; 4631 for (uint32_t n = 1; n <= 16; n++) { 4632 for (uint32_t m = 1; m <= 1; m++) { 4633 GemmMicrokernelTester() 4634 .mr(1) 4635 .nr(16) 4636 .kr(4) 4637 .sr(1) 4638 .m(m) 4639 .n(n) 4640 .k(16) 4641 .iterations(1) 4642 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 4643 } 4644 } 4645 } 4646 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4__NEON_MLAL_LD1R,k_eq_16_subtile_m)4647 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4__NEON_MLAL_LD1R, k_eq_16_subtile_m) { 4648 TEST_REQUIRES_ARM_NEON; 4649 for (uint32_t m = 1; m <= 1; m++) { 4650 GemmMicrokernelTester() 4651 .mr(1) 4652 .nr(16) 4653 .kr(4) 4654 .sr(1) 4655 .m(m) 4656 .n(16) 4657 .k(16) 4658 .iterations(1) 4659 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 4660 } 4661 } 4662 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4__NEON_MLAL_LD1R,k_eq_16_subtile_n)4663 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4__NEON_MLAL_LD1R, k_eq_16_subtile_n) { 4664 TEST_REQUIRES_ARM_NEON; 4665 for (uint32_t n = 1; n <= 16; n++) { 4666 GemmMicrokernelTester() 4667 .mr(1) 4668 .nr(16) 4669 .kr(4) 4670 .sr(1) 4671 .m(1) 4672 .n(n) 4673 .k(16) 4674 .iterations(1) 4675 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 4676 } 4677 } 4678 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4__NEON_MLAL_LD1R,k_lt_16)4679 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4__NEON_MLAL_LD1R, k_lt_16) { 4680 TEST_REQUIRES_ARM_NEON; 4681 for (size_t k = 1; k < 16; k++) { 4682 GemmMicrokernelTester() 4683 .mr(1) 4684 .nr(16) 4685 .kr(4) 4686 .sr(1) 4687 .m(1) 4688 .n(16) 4689 .k(k) 4690 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 4691 } 4692 } 4693 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4__NEON_MLAL_LD1R,k_lt_16_strided_a)4694 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4__NEON_MLAL_LD1R, k_lt_16_strided_a) { 4695 TEST_REQUIRES_ARM_NEON; 4696 for (size_t k = 1; k < 16; k++) { 4697 GemmMicrokernelTester() 4698 .mr(1) 4699 .nr(16) 4700 .kr(4) 4701 .sr(1) 4702 .m(1) 4703 .n(16) 4704 .k(k) 4705 .a_stride(19) 4706 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 4707 } 4708 } 4709 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4__NEON_MLAL_LD1R,k_lt_16_subtile)4710 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4__NEON_MLAL_LD1R, k_lt_16_subtile) { 4711 TEST_REQUIRES_ARM_NEON; 4712 for (size_t k = 1; k < 16; k++) { 4713 for (uint32_t n = 1; n <= 16; n++) { 4714 for (uint32_t m = 1; m <= 1; m++) { 4715 GemmMicrokernelTester() 4716 .mr(1) 4717 .nr(16) 4718 .kr(4) 4719 .sr(1) 4720 .m(m) 4721 .n(n) 4722 .k(k) 4723 .iterations(1) 4724 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 4725 } 4726 } 4727 } 4728 } 4729 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4__NEON_MLAL_LD1R,k_gt_16)4730 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4__NEON_MLAL_LD1R, k_gt_16) { 4731 TEST_REQUIRES_ARM_NEON; 4732 for (size_t k = 17; k < 32; k++) { 4733 GemmMicrokernelTester() 4734 .mr(1) 4735 .nr(16) 4736 .kr(4) 4737 .sr(1) 4738 .m(1) 4739 .n(16) 4740 .k(k) 4741 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 4742 } 4743 } 4744 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4__NEON_MLAL_LD1R,k_gt_16_strided_a)4745 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4__NEON_MLAL_LD1R, k_gt_16_strided_a) { 4746 TEST_REQUIRES_ARM_NEON; 4747 for (size_t k = 17; k < 32; k++) { 4748 GemmMicrokernelTester() 4749 .mr(1) 4750 .nr(16) 4751 .kr(4) 4752 .sr(1) 4753 .m(1) 4754 .n(16) 4755 .k(k) 4756 .a_stride(37) 4757 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 4758 } 4759 } 4760 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4__NEON_MLAL_LD1R,k_gt_16_subtile)4761 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4__NEON_MLAL_LD1R, k_gt_16_subtile) { 4762 TEST_REQUIRES_ARM_NEON; 4763 for (size_t k = 17; k < 32; k++) { 4764 for (uint32_t n = 1; n <= 16; n++) { 4765 for (uint32_t m = 1; m <= 1; m++) { 4766 GemmMicrokernelTester() 4767 .mr(1) 4768 .nr(16) 4769 .kr(4) 4770 .sr(1) 4771 .m(m) 4772 .n(n) 4773 .k(k) 4774 .iterations(1) 4775 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 4776 } 4777 } 4778 } 4779 } 4780 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4__NEON_MLAL_LD1R,k_div_16)4781 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4__NEON_MLAL_LD1R, k_div_16) { 4782 TEST_REQUIRES_ARM_NEON; 4783 for (size_t k = 32; k <= 160; k += 16) { 4784 GemmMicrokernelTester() 4785 .mr(1) 4786 .nr(16) 4787 .kr(4) 4788 .sr(1) 4789 .m(1) 4790 .n(16) 4791 .k(k) 4792 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 4793 } 4794 } 4795 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4__NEON_MLAL_LD1R,k_div_16_strided_a)4796 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4__NEON_MLAL_LD1R, k_div_16_strided_a) { 4797 TEST_REQUIRES_ARM_NEON; 4798 for (size_t k = 32; k <= 160; k += 16) { 4799 GemmMicrokernelTester() 4800 .mr(1) 4801 .nr(16) 4802 .kr(4) 4803 .sr(1) 4804 .m(1) 4805 .n(16) 4806 .k(k) 4807 .a_stride(163) 4808 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 4809 } 4810 } 4811 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4__NEON_MLAL_LD1R,k_div_16_subtile)4812 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4__NEON_MLAL_LD1R, k_div_16_subtile) { 4813 TEST_REQUIRES_ARM_NEON; 4814 for (size_t k = 32; k <= 160; k += 16) { 4815 for (uint32_t n = 1; n <= 16; n++) { 4816 for (uint32_t m = 1; m <= 1; m++) { 4817 GemmMicrokernelTester() 4818 .mr(1) 4819 .nr(16) 4820 .kr(4) 4821 .sr(1) 4822 .m(m) 4823 .n(n) 4824 .k(k) 4825 .iterations(1) 4826 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 4827 } 4828 } 4829 } 4830 } 4831 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4__NEON_MLAL_LD1R,n_gt_16)4832 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4__NEON_MLAL_LD1R, n_gt_16) { 4833 TEST_REQUIRES_ARM_NEON; 4834 for (uint32_t n = 17; n < 32; n++) { 4835 for (size_t k = 1; k <= 80; k += 17) { 4836 GemmMicrokernelTester() 4837 .mr(1) 4838 .nr(16) 4839 .kr(4) 4840 .sr(1) 4841 .m(1) 4842 .n(n) 4843 .k(k) 4844 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 4845 } 4846 } 4847 } 4848 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4__NEON_MLAL_LD1R,n_gt_16_strided_cn)4849 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4__NEON_MLAL_LD1R, n_gt_16_strided_cn) { 4850 TEST_REQUIRES_ARM_NEON; 4851 for (uint32_t n = 17; n < 32; n++) { 4852 for (size_t k = 1; k <= 80; k += 17) { 4853 GemmMicrokernelTester() 4854 .mr(1) 4855 .nr(16) 4856 .kr(4) 4857 .sr(1) 4858 .m(1) 4859 .n(n) 4860 .k(k) 4861 .cn_stride(19) 4862 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 4863 } 4864 } 4865 } 4866 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4__NEON_MLAL_LD1R,n_gt_16_strided_a)4867 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4__NEON_MLAL_LD1R, n_gt_16_strided_a) { 4868 TEST_REQUIRES_ARM_NEON; 4869 for (uint32_t n = 17; n < 32; n++) { 4870 for (size_t k = 1; k <= 80; k += 17) { 4871 GemmMicrokernelTester() 4872 .mr(1) 4873 .nr(16) 4874 .kr(4) 4875 .sr(1) 4876 .m(1) 4877 .n(n) 4878 .k(k) 4879 .a_stride(83) 4880 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 4881 } 4882 } 4883 } 4884 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4__NEON_MLAL_LD1R,n_gt_16_subtile)4885 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4__NEON_MLAL_LD1R, n_gt_16_subtile) { 4886 TEST_REQUIRES_ARM_NEON; 4887 for (uint32_t n = 17; n < 32; n++) { 4888 for (size_t k = 1; k <= 80; k += 17) { 4889 for (uint32_t m = 1; m <= 1; m++) { 4890 GemmMicrokernelTester() 4891 .mr(1) 4892 .nr(16) 4893 .kr(4) 4894 .sr(1) 4895 .m(m) 4896 .n(n) 4897 .k(k) 4898 .iterations(1) 4899 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 4900 } 4901 } 4902 } 4903 } 4904 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4__NEON_MLAL_LD1R,n_div_16)4905 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4__NEON_MLAL_LD1R, n_div_16) { 4906 TEST_REQUIRES_ARM_NEON; 4907 for (uint32_t n = 32; n <= 48; n += 16) { 4908 for (size_t k = 1; k <= 80; k += 17) { 4909 GemmMicrokernelTester() 4910 .mr(1) 4911 .nr(16) 4912 .kr(4) 4913 .sr(1) 4914 .m(1) 4915 .n(n) 4916 .k(k) 4917 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 4918 } 4919 } 4920 } 4921 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4__NEON_MLAL_LD1R,n_div_16_strided_cn)4922 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4__NEON_MLAL_LD1R, n_div_16_strided_cn) { 4923 TEST_REQUIRES_ARM_NEON; 4924 for (uint32_t n = 32; n <= 48; n += 16) { 4925 for (size_t k = 1; k <= 80; k += 17) { 4926 GemmMicrokernelTester() 4927 .mr(1) 4928 .nr(16) 4929 .kr(4) 4930 .sr(1) 4931 .m(1) 4932 .n(n) 4933 .k(k) 4934 .cn_stride(19) 4935 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 4936 } 4937 } 4938 } 4939 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4__NEON_MLAL_LD1R,n_div_16_strided_a)4940 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4__NEON_MLAL_LD1R, n_div_16_strided_a) { 4941 TEST_REQUIRES_ARM_NEON; 4942 for (uint32_t n = 32; n <= 48; n += 16) { 4943 for (size_t k = 1; k <= 80; k += 17) { 4944 GemmMicrokernelTester() 4945 .mr(1) 4946 .nr(16) 4947 .kr(4) 4948 .sr(1) 4949 .m(1) 4950 .n(n) 4951 .k(k) 4952 .a_stride(83) 4953 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 4954 } 4955 } 4956 } 4957 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4__NEON_MLAL_LD1R,n_div_16_subtile)4958 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4__NEON_MLAL_LD1R, n_div_16_subtile) { 4959 TEST_REQUIRES_ARM_NEON; 4960 for (uint32_t n = 32; n <= 48; n += 16) { 4961 for (size_t k = 1; k <= 80; k += 17) { 4962 for (uint32_t m = 1; m <= 1; m++) { 4963 GemmMicrokernelTester() 4964 .mr(1) 4965 .nr(16) 4966 .kr(4) 4967 .sr(1) 4968 .m(m) 4969 .n(n) 4970 .k(k) 4971 .iterations(1) 4972 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 4973 } 4974 } 4975 } 4976 } 4977 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4__NEON_MLAL_LD1R,strided_cm_subtile)4978 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4__NEON_MLAL_LD1R, strided_cm_subtile) { 4979 TEST_REQUIRES_ARM_NEON; 4980 for (size_t k = 1; k <= 80; k += 17) { 4981 for (uint32_t n = 1; n <= 16; n++) { 4982 for (uint32_t m = 1; m <= 1; m++) { 4983 GemmMicrokernelTester() 4984 .mr(1) 4985 .nr(16) 4986 .kr(4) 4987 .sr(1) 4988 .m(m) 4989 .n(n) 4990 .k(k) 4991 .cm_stride(19) 4992 .iterations(1) 4993 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 4994 } 4995 } 4996 } 4997 } 4998 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4__NEON_MLAL_LD1R,qmin)4999 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4__NEON_MLAL_LD1R, qmin) { 5000 TEST_REQUIRES_ARM_NEON; 5001 GemmMicrokernelTester() 5002 .mr(1) 5003 .nr(16) 5004 .kr(4) 5005 .sr(1) 5006 .m(1) 5007 .n(16) 5008 .k(16) 5009 .qmin(128) 5010 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 5011 } 5012 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4__NEON_MLAL_LD1R,qmax)5013 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4__NEON_MLAL_LD1R, qmax) { 5014 TEST_REQUIRES_ARM_NEON; 5015 GemmMicrokernelTester() 5016 .mr(1) 5017 .nr(16) 5018 .kr(4) 5019 .sr(1) 5020 .m(1) 5021 .n(16) 5022 .k(16) 5023 .qmax(128) 5024 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 5025 } 5026 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4__NEON_MLAL_LD1R,strided_cm)5027 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4__NEON_MLAL_LD1R, strided_cm) { 5028 TEST_REQUIRES_ARM_NEON; 5029 GemmMicrokernelTester() 5030 .mr(1) 5031 .nr(16) 5032 .kr(4) 5033 .sr(1) 5034 .m(1) 5035 .n(16) 5036 .k(16) 5037 .cm_stride(19) 5038 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 5039 } 5040 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 5041 5042 5043 #if XNN_ARCH_ARM || XNN_ARCH_ARM64 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4S2__NEON_MLAL,k_eq_16)5044 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4S2__NEON_MLAL, k_eq_16) { 5045 TEST_REQUIRES_ARM_NEON; 5046 GemmMicrokernelTester() 5047 .mr(1) 5048 .nr(16) 5049 .kr(4) 5050 .sr(2) 5051 .m(1) 5052 .n(16) 5053 .k(16) 5054 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 5055 } 5056 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4S2__NEON_MLAL,strided_cn)5057 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4S2__NEON_MLAL, strided_cn) { 5058 TEST_REQUIRES_ARM_NEON; 5059 GemmMicrokernelTester() 5060 .mr(1) 5061 .nr(16) 5062 .kr(4) 5063 .sr(2) 5064 .m(1) 5065 .n(16) 5066 .k(16) 5067 .cn_stride(19) 5068 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 5069 } 5070 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4S2__NEON_MLAL,k_eq_16_strided_a)5071 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4S2__NEON_MLAL, k_eq_16_strided_a) { 5072 TEST_REQUIRES_ARM_NEON; 5073 GemmMicrokernelTester() 5074 .mr(1) 5075 .nr(16) 5076 .kr(4) 5077 .sr(2) 5078 .m(1) 5079 .n(16) 5080 .k(16) 5081 .a_stride(19) 5082 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 5083 } 5084 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4S2__NEON_MLAL,k_eq_16_subtile)5085 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4S2__NEON_MLAL, k_eq_16_subtile) { 5086 TEST_REQUIRES_ARM_NEON; 5087 for (uint32_t n = 1; n <= 16; n++) { 5088 for (uint32_t m = 1; m <= 1; m++) { 5089 GemmMicrokernelTester() 5090 .mr(1) 5091 .nr(16) 5092 .kr(4) 5093 .sr(2) 5094 .m(m) 5095 .n(n) 5096 .k(16) 5097 .iterations(1) 5098 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 5099 } 5100 } 5101 } 5102 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4S2__NEON_MLAL,k_eq_16_subtile_m)5103 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4S2__NEON_MLAL, k_eq_16_subtile_m) { 5104 TEST_REQUIRES_ARM_NEON; 5105 for (uint32_t m = 1; m <= 1; m++) { 5106 GemmMicrokernelTester() 5107 .mr(1) 5108 .nr(16) 5109 .kr(4) 5110 .sr(2) 5111 .m(m) 5112 .n(16) 5113 .k(16) 5114 .iterations(1) 5115 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 5116 } 5117 } 5118 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4S2__NEON_MLAL,k_eq_16_subtile_n)5119 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4S2__NEON_MLAL, k_eq_16_subtile_n) { 5120 TEST_REQUIRES_ARM_NEON; 5121 for (uint32_t n = 1; n <= 16; n++) { 5122 GemmMicrokernelTester() 5123 .mr(1) 5124 .nr(16) 5125 .kr(4) 5126 .sr(2) 5127 .m(1) 5128 .n(n) 5129 .k(16) 5130 .iterations(1) 5131 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 5132 } 5133 } 5134 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4S2__NEON_MLAL,k_lt_16)5135 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4S2__NEON_MLAL, k_lt_16) { 5136 TEST_REQUIRES_ARM_NEON; 5137 for (size_t k = 1; k < 16; k++) { 5138 GemmMicrokernelTester() 5139 .mr(1) 5140 .nr(16) 5141 .kr(4) 5142 .sr(2) 5143 .m(1) 5144 .n(16) 5145 .k(k) 5146 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 5147 } 5148 } 5149 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4S2__NEON_MLAL,k_lt_16_strided_a)5150 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4S2__NEON_MLAL, k_lt_16_strided_a) { 5151 TEST_REQUIRES_ARM_NEON; 5152 for (size_t k = 1; k < 16; k++) { 5153 GemmMicrokernelTester() 5154 .mr(1) 5155 .nr(16) 5156 .kr(4) 5157 .sr(2) 5158 .m(1) 5159 .n(16) 5160 .k(k) 5161 .a_stride(19) 5162 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 5163 } 5164 } 5165 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4S2__NEON_MLAL,k_lt_16_subtile)5166 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4S2__NEON_MLAL, k_lt_16_subtile) { 5167 TEST_REQUIRES_ARM_NEON; 5168 for (size_t k = 1; k < 16; k++) { 5169 for (uint32_t n = 1; n <= 16; n++) { 5170 for (uint32_t m = 1; m <= 1; m++) { 5171 GemmMicrokernelTester() 5172 .mr(1) 5173 .nr(16) 5174 .kr(4) 5175 .sr(2) 5176 .m(m) 5177 .n(n) 5178 .k(k) 5179 .iterations(1) 5180 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 5181 } 5182 } 5183 } 5184 } 5185 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4S2__NEON_MLAL,k_gt_16)5186 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4S2__NEON_MLAL, k_gt_16) { 5187 TEST_REQUIRES_ARM_NEON; 5188 for (size_t k = 17; k < 32; k++) { 5189 GemmMicrokernelTester() 5190 .mr(1) 5191 .nr(16) 5192 .kr(4) 5193 .sr(2) 5194 .m(1) 5195 .n(16) 5196 .k(k) 5197 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 5198 } 5199 } 5200 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4S2__NEON_MLAL,k_gt_16_strided_a)5201 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4S2__NEON_MLAL, k_gt_16_strided_a) { 5202 TEST_REQUIRES_ARM_NEON; 5203 for (size_t k = 17; k < 32; k++) { 5204 GemmMicrokernelTester() 5205 .mr(1) 5206 .nr(16) 5207 .kr(4) 5208 .sr(2) 5209 .m(1) 5210 .n(16) 5211 .k(k) 5212 .a_stride(37) 5213 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 5214 } 5215 } 5216 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4S2__NEON_MLAL,k_gt_16_subtile)5217 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4S2__NEON_MLAL, k_gt_16_subtile) { 5218 TEST_REQUIRES_ARM_NEON; 5219 for (size_t k = 17; k < 32; k++) { 5220 for (uint32_t n = 1; n <= 16; n++) { 5221 for (uint32_t m = 1; m <= 1; m++) { 5222 GemmMicrokernelTester() 5223 .mr(1) 5224 .nr(16) 5225 .kr(4) 5226 .sr(2) 5227 .m(m) 5228 .n(n) 5229 .k(k) 5230 .iterations(1) 5231 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 5232 } 5233 } 5234 } 5235 } 5236 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4S2__NEON_MLAL,k_div_16)5237 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4S2__NEON_MLAL, k_div_16) { 5238 TEST_REQUIRES_ARM_NEON; 5239 for (size_t k = 32; k <= 160; k += 16) { 5240 GemmMicrokernelTester() 5241 .mr(1) 5242 .nr(16) 5243 .kr(4) 5244 .sr(2) 5245 .m(1) 5246 .n(16) 5247 .k(k) 5248 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 5249 } 5250 } 5251 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4S2__NEON_MLAL,k_div_16_strided_a)5252 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4S2__NEON_MLAL, k_div_16_strided_a) { 5253 TEST_REQUIRES_ARM_NEON; 5254 for (size_t k = 32; k <= 160; k += 16) { 5255 GemmMicrokernelTester() 5256 .mr(1) 5257 .nr(16) 5258 .kr(4) 5259 .sr(2) 5260 .m(1) 5261 .n(16) 5262 .k(k) 5263 .a_stride(163) 5264 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 5265 } 5266 } 5267 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4S2__NEON_MLAL,k_div_16_subtile)5268 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4S2__NEON_MLAL, k_div_16_subtile) { 5269 TEST_REQUIRES_ARM_NEON; 5270 for (size_t k = 32; k <= 160; k += 16) { 5271 for (uint32_t n = 1; n <= 16; n++) { 5272 for (uint32_t m = 1; m <= 1; m++) { 5273 GemmMicrokernelTester() 5274 .mr(1) 5275 .nr(16) 5276 .kr(4) 5277 .sr(2) 5278 .m(m) 5279 .n(n) 5280 .k(k) 5281 .iterations(1) 5282 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 5283 } 5284 } 5285 } 5286 } 5287 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4S2__NEON_MLAL,n_gt_16)5288 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4S2__NEON_MLAL, n_gt_16) { 5289 TEST_REQUIRES_ARM_NEON; 5290 for (uint32_t n = 17; n < 32; n++) { 5291 for (size_t k = 1; k <= 80; k += 17) { 5292 GemmMicrokernelTester() 5293 .mr(1) 5294 .nr(16) 5295 .kr(4) 5296 .sr(2) 5297 .m(1) 5298 .n(n) 5299 .k(k) 5300 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 5301 } 5302 } 5303 } 5304 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4S2__NEON_MLAL,n_gt_16_strided_cn)5305 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4S2__NEON_MLAL, n_gt_16_strided_cn) { 5306 TEST_REQUIRES_ARM_NEON; 5307 for (uint32_t n = 17; n < 32; n++) { 5308 for (size_t k = 1; k <= 80; k += 17) { 5309 GemmMicrokernelTester() 5310 .mr(1) 5311 .nr(16) 5312 .kr(4) 5313 .sr(2) 5314 .m(1) 5315 .n(n) 5316 .k(k) 5317 .cn_stride(19) 5318 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 5319 } 5320 } 5321 } 5322 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4S2__NEON_MLAL,n_gt_16_strided_a)5323 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4S2__NEON_MLAL, n_gt_16_strided_a) { 5324 TEST_REQUIRES_ARM_NEON; 5325 for (uint32_t n = 17; n < 32; n++) { 5326 for (size_t k = 1; k <= 80; k += 17) { 5327 GemmMicrokernelTester() 5328 .mr(1) 5329 .nr(16) 5330 .kr(4) 5331 .sr(2) 5332 .m(1) 5333 .n(n) 5334 .k(k) 5335 .a_stride(83) 5336 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 5337 } 5338 } 5339 } 5340 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4S2__NEON_MLAL,n_gt_16_subtile)5341 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4S2__NEON_MLAL, n_gt_16_subtile) { 5342 TEST_REQUIRES_ARM_NEON; 5343 for (uint32_t n = 17; n < 32; n++) { 5344 for (size_t k = 1; k <= 80; k += 17) { 5345 for (uint32_t m = 1; m <= 1; m++) { 5346 GemmMicrokernelTester() 5347 .mr(1) 5348 .nr(16) 5349 .kr(4) 5350 .sr(2) 5351 .m(m) 5352 .n(n) 5353 .k(k) 5354 .iterations(1) 5355 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 5356 } 5357 } 5358 } 5359 } 5360 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4S2__NEON_MLAL,n_div_16)5361 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4S2__NEON_MLAL, n_div_16) { 5362 TEST_REQUIRES_ARM_NEON; 5363 for (uint32_t n = 32; n <= 48; n += 16) { 5364 for (size_t k = 1; k <= 80; k += 17) { 5365 GemmMicrokernelTester() 5366 .mr(1) 5367 .nr(16) 5368 .kr(4) 5369 .sr(2) 5370 .m(1) 5371 .n(n) 5372 .k(k) 5373 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 5374 } 5375 } 5376 } 5377 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4S2__NEON_MLAL,n_div_16_strided_cn)5378 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4S2__NEON_MLAL, n_div_16_strided_cn) { 5379 TEST_REQUIRES_ARM_NEON; 5380 for (uint32_t n = 32; n <= 48; n += 16) { 5381 for (size_t k = 1; k <= 80; k += 17) { 5382 GemmMicrokernelTester() 5383 .mr(1) 5384 .nr(16) 5385 .kr(4) 5386 .sr(2) 5387 .m(1) 5388 .n(n) 5389 .k(k) 5390 .cn_stride(19) 5391 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 5392 } 5393 } 5394 } 5395 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4S2__NEON_MLAL,n_div_16_strided_a)5396 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4S2__NEON_MLAL, n_div_16_strided_a) { 5397 TEST_REQUIRES_ARM_NEON; 5398 for (uint32_t n = 32; n <= 48; n += 16) { 5399 for (size_t k = 1; k <= 80; k += 17) { 5400 GemmMicrokernelTester() 5401 .mr(1) 5402 .nr(16) 5403 .kr(4) 5404 .sr(2) 5405 .m(1) 5406 .n(n) 5407 .k(k) 5408 .a_stride(83) 5409 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 5410 } 5411 } 5412 } 5413 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4S2__NEON_MLAL,n_div_16_subtile)5414 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4S2__NEON_MLAL, n_div_16_subtile) { 5415 TEST_REQUIRES_ARM_NEON; 5416 for (uint32_t n = 32; n <= 48; n += 16) { 5417 for (size_t k = 1; k <= 80; k += 17) { 5418 for (uint32_t m = 1; m <= 1; m++) { 5419 GemmMicrokernelTester() 5420 .mr(1) 5421 .nr(16) 5422 .kr(4) 5423 .sr(2) 5424 .m(m) 5425 .n(n) 5426 .k(k) 5427 .iterations(1) 5428 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 5429 } 5430 } 5431 } 5432 } 5433 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4S2__NEON_MLAL,strided_cm_subtile)5434 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4S2__NEON_MLAL, strided_cm_subtile) { 5435 TEST_REQUIRES_ARM_NEON; 5436 for (size_t k = 1; k <= 80; k += 17) { 5437 for (uint32_t n = 1; n <= 16; n++) { 5438 for (uint32_t m = 1; m <= 1; m++) { 5439 GemmMicrokernelTester() 5440 .mr(1) 5441 .nr(16) 5442 .kr(4) 5443 .sr(2) 5444 .m(m) 5445 .n(n) 5446 .k(k) 5447 .cm_stride(19) 5448 .iterations(1) 5449 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 5450 } 5451 } 5452 } 5453 } 5454 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4S2__NEON_MLAL,qmin)5455 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4S2__NEON_MLAL, qmin) { 5456 TEST_REQUIRES_ARM_NEON; 5457 GemmMicrokernelTester() 5458 .mr(1) 5459 .nr(16) 5460 .kr(4) 5461 .sr(2) 5462 .m(1) 5463 .n(16) 5464 .k(16) 5465 .qmin(128) 5466 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 5467 } 5468 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4S2__NEON_MLAL,qmax)5469 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4S2__NEON_MLAL, qmax) { 5470 TEST_REQUIRES_ARM_NEON; 5471 GemmMicrokernelTester() 5472 .mr(1) 5473 .nr(16) 5474 .kr(4) 5475 .sr(2) 5476 .m(1) 5477 .n(16) 5478 .k(16) 5479 .qmax(128) 5480 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 5481 } 5482 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4S2__NEON_MLAL,strided_cm)5483 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4S2__NEON_MLAL, strided_cm) { 5484 TEST_REQUIRES_ARM_NEON; 5485 GemmMicrokernelTester() 5486 .mr(1) 5487 .nr(16) 5488 .kr(4) 5489 .sr(2) 5490 .m(1) 5491 .n(16) 5492 .k(16) 5493 .cm_stride(19) 5494 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 5495 } 5496 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 5497 5498 5499 #if XNN_ARCH_ARM || XNN_ARCH_ARM64 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4S2__NEON_MULL,k_eq_8)5500 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4S2__NEON_MULL, k_eq_8) { 5501 TEST_REQUIRES_ARM_NEON; 5502 GemmMicrokernelTester() 5503 .mr(1) 5504 .nr(16) 5505 .kr(4) 5506 .sr(2) 5507 .m(1) 5508 .n(16) 5509 .k(8) 5510 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 5511 } 5512 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4S2__NEON_MULL,strided_cn)5513 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4S2__NEON_MULL, strided_cn) { 5514 TEST_REQUIRES_ARM_NEON; 5515 GemmMicrokernelTester() 5516 .mr(1) 5517 .nr(16) 5518 .kr(4) 5519 .sr(2) 5520 .m(1) 5521 .n(16) 5522 .k(8) 5523 .cn_stride(19) 5524 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 5525 } 5526 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4S2__NEON_MULL,k_eq_8_strided_a)5527 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4S2__NEON_MULL, k_eq_8_strided_a) { 5528 TEST_REQUIRES_ARM_NEON; 5529 GemmMicrokernelTester() 5530 .mr(1) 5531 .nr(16) 5532 .kr(4) 5533 .sr(2) 5534 .m(1) 5535 .n(16) 5536 .k(8) 5537 .a_stride(11) 5538 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 5539 } 5540 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4S2__NEON_MULL,k_eq_8_subtile)5541 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4S2__NEON_MULL, k_eq_8_subtile) { 5542 TEST_REQUIRES_ARM_NEON; 5543 for (uint32_t n = 1; n <= 16; n++) { 5544 for (uint32_t m = 1; m <= 1; m++) { 5545 GemmMicrokernelTester() 5546 .mr(1) 5547 .nr(16) 5548 .kr(4) 5549 .sr(2) 5550 .m(m) 5551 .n(n) 5552 .k(8) 5553 .iterations(1) 5554 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 5555 } 5556 } 5557 } 5558 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4S2__NEON_MULL,k_eq_8_subtile_m)5559 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4S2__NEON_MULL, k_eq_8_subtile_m) { 5560 TEST_REQUIRES_ARM_NEON; 5561 for (uint32_t m = 1; m <= 1; m++) { 5562 GemmMicrokernelTester() 5563 .mr(1) 5564 .nr(16) 5565 .kr(4) 5566 .sr(2) 5567 .m(m) 5568 .n(16) 5569 .k(8) 5570 .iterations(1) 5571 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 5572 } 5573 } 5574 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4S2__NEON_MULL,k_eq_8_subtile_n)5575 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4S2__NEON_MULL, k_eq_8_subtile_n) { 5576 TEST_REQUIRES_ARM_NEON; 5577 for (uint32_t n = 1; n <= 16; n++) { 5578 GemmMicrokernelTester() 5579 .mr(1) 5580 .nr(16) 5581 .kr(4) 5582 .sr(2) 5583 .m(1) 5584 .n(n) 5585 .k(8) 5586 .iterations(1) 5587 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 5588 } 5589 } 5590 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4S2__NEON_MULL,k_lt_8)5591 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4S2__NEON_MULL, k_lt_8) { 5592 TEST_REQUIRES_ARM_NEON; 5593 for (size_t k = 1; k < 8; k++) { 5594 GemmMicrokernelTester() 5595 .mr(1) 5596 .nr(16) 5597 .kr(4) 5598 .sr(2) 5599 .m(1) 5600 .n(16) 5601 .k(k) 5602 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 5603 } 5604 } 5605 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4S2__NEON_MULL,k_lt_8_strided_a)5606 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4S2__NEON_MULL, k_lt_8_strided_a) { 5607 TEST_REQUIRES_ARM_NEON; 5608 for (size_t k = 1; k < 8; k++) { 5609 GemmMicrokernelTester() 5610 .mr(1) 5611 .nr(16) 5612 .kr(4) 5613 .sr(2) 5614 .m(1) 5615 .n(16) 5616 .k(k) 5617 .a_stride(11) 5618 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 5619 } 5620 } 5621 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4S2__NEON_MULL,k_lt_8_subtile)5622 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4S2__NEON_MULL, k_lt_8_subtile) { 5623 TEST_REQUIRES_ARM_NEON; 5624 for (size_t k = 1; k < 8; k++) { 5625 for (uint32_t n = 1; n <= 16; n++) { 5626 for (uint32_t m = 1; m <= 1; m++) { 5627 GemmMicrokernelTester() 5628 .mr(1) 5629 .nr(16) 5630 .kr(4) 5631 .sr(2) 5632 .m(m) 5633 .n(n) 5634 .k(k) 5635 .iterations(1) 5636 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 5637 } 5638 } 5639 } 5640 } 5641 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4S2__NEON_MULL,k_gt_8)5642 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4S2__NEON_MULL, k_gt_8) { 5643 TEST_REQUIRES_ARM_NEON; 5644 for (size_t k = 9; k < 16; k++) { 5645 GemmMicrokernelTester() 5646 .mr(1) 5647 .nr(16) 5648 .kr(4) 5649 .sr(2) 5650 .m(1) 5651 .n(16) 5652 .k(k) 5653 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 5654 } 5655 } 5656 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4S2__NEON_MULL,k_gt_8_strided_a)5657 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4S2__NEON_MULL, k_gt_8_strided_a) { 5658 TEST_REQUIRES_ARM_NEON; 5659 for (size_t k = 9; k < 16; k++) { 5660 GemmMicrokernelTester() 5661 .mr(1) 5662 .nr(16) 5663 .kr(4) 5664 .sr(2) 5665 .m(1) 5666 .n(16) 5667 .k(k) 5668 .a_stride(19) 5669 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 5670 } 5671 } 5672 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4S2__NEON_MULL,k_gt_8_subtile)5673 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4S2__NEON_MULL, k_gt_8_subtile) { 5674 TEST_REQUIRES_ARM_NEON; 5675 for (size_t k = 9; k < 16; k++) { 5676 for (uint32_t n = 1; n <= 16; n++) { 5677 for (uint32_t m = 1; m <= 1; m++) { 5678 GemmMicrokernelTester() 5679 .mr(1) 5680 .nr(16) 5681 .kr(4) 5682 .sr(2) 5683 .m(m) 5684 .n(n) 5685 .k(k) 5686 .iterations(1) 5687 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 5688 } 5689 } 5690 } 5691 } 5692 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4S2__NEON_MULL,k_div_8)5693 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4S2__NEON_MULL, k_div_8) { 5694 TEST_REQUIRES_ARM_NEON; 5695 for (size_t k = 16; k <= 80; k += 8) { 5696 GemmMicrokernelTester() 5697 .mr(1) 5698 .nr(16) 5699 .kr(4) 5700 .sr(2) 5701 .m(1) 5702 .n(16) 5703 .k(k) 5704 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 5705 } 5706 } 5707 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4S2__NEON_MULL,k_div_8_strided_a)5708 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4S2__NEON_MULL, k_div_8_strided_a) { 5709 TEST_REQUIRES_ARM_NEON; 5710 for (size_t k = 16; k <= 80; k += 8) { 5711 GemmMicrokernelTester() 5712 .mr(1) 5713 .nr(16) 5714 .kr(4) 5715 .sr(2) 5716 .m(1) 5717 .n(16) 5718 .k(k) 5719 .a_stride(83) 5720 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 5721 } 5722 } 5723 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4S2__NEON_MULL,k_div_8_subtile)5724 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4S2__NEON_MULL, k_div_8_subtile) { 5725 TEST_REQUIRES_ARM_NEON; 5726 for (size_t k = 16; k <= 80; k += 8) { 5727 for (uint32_t n = 1; n <= 16; n++) { 5728 for (uint32_t m = 1; m <= 1; m++) { 5729 GemmMicrokernelTester() 5730 .mr(1) 5731 .nr(16) 5732 .kr(4) 5733 .sr(2) 5734 .m(m) 5735 .n(n) 5736 .k(k) 5737 .iterations(1) 5738 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 5739 } 5740 } 5741 } 5742 } 5743 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4S2__NEON_MULL,n_gt_16)5744 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4S2__NEON_MULL, n_gt_16) { 5745 TEST_REQUIRES_ARM_NEON; 5746 for (uint32_t n = 17; n < 32; n++) { 5747 for (size_t k = 1; k <= 40; k += 9) { 5748 GemmMicrokernelTester() 5749 .mr(1) 5750 .nr(16) 5751 .kr(4) 5752 .sr(2) 5753 .m(1) 5754 .n(n) 5755 .k(k) 5756 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 5757 } 5758 } 5759 } 5760 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4S2__NEON_MULL,n_gt_16_strided_cn)5761 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4S2__NEON_MULL, n_gt_16_strided_cn) { 5762 TEST_REQUIRES_ARM_NEON; 5763 for (uint32_t n = 17; n < 32; n++) { 5764 for (size_t k = 1; k <= 40; k += 9) { 5765 GemmMicrokernelTester() 5766 .mr(1) 5767 .nr(16) 5768 .kr(4) 5769 .sr(2) 5770 .m(1) 5771 .n(n) 5772 .k(k) 5773 .cn_stride(19) 5774 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 5775 } 5776 } 5777 } 5778 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4S2__NEON_MULL,n_gt_16_strided_a)5779 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4S2__NEON_MULL, n_gt_16_strided_a) { 5780 TEST_REQUIRES_ARM_NEON; 5781 for (uint32_t n = 17; n < 32; n++) { 5782 for (size_t k = 1; k <= 40; k += 9) { 5783 GemmMicrokernelTester() 5784 .mr(1) 5785 .nr(16) 5786 .kr(4) 5787 .sr(2) 5788 .m(1) 5789 .n(n) 5790 .k(k) 5791 .a_stride(43) 5792 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 5793 } 5794 } 5795 } 5796 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4S2__NEON_MULL,n_gt_16_subtile)5797 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4S2__NEON_MULL, n_gt_16_subtile) { 5798 TEST_REQUIRES_ARM_NEON; 5799 for (uint32_t n = 17; n < 32; n++) { 5800 for (size_t k = 1; k <= 40; k += 9) { 5801 for (uint32_t m = 1; m <= 1; m++) { 5802 GemmMicrokernelTester() 5803 .mr(1) 5804 .nr(16) 5805 .kr(4) 5806 .sr(2) 5807 .m(m) 5808 .n(n) 5809 .k(k) 5810 .iterations(1) 5811 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 5812 } 5813 } 5814 } 5815 } 5816 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4S2__NEON_MULL,n_div_16)5817 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4S2__NEON_MULL, n_div_16) { 5818 TEST_REQUIRES_ARM_NEON; 5819 for (uint32_t n = 32; n <= 48; n += 16) { 5820 for (size_t k = 1; k <= 40; k += 9) { 5821 GemmMicrokernelTester() 5822 .mr(1) 5823 .nr(16) 5824 .kr(4) 5825 .sr(2) 5826 .m(1) 5827 .n(n) 5828 .k(k) 5829 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 5830 } 5831 } 5832 } 5833 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4S2__NEON_MULL,n_div_16_strided_cn)5834 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4S2__NEON_MULL, n_div_16_strided_cn) { 5835 TEST_REQUIRES_ARM_NEON; 5836 for (uint32_t n = 32; n <= 48; n += 16) { 5837 for (size_t k = 1; k <= 40; k += 9) { 5838 GemmMicrokernelTester() 5839 .mr(1) 5840 .nr(16) 5841 .kr(4) 5842 .sr(2) 5843 .m(1) 5844 .n(n) 5845 .k(k) 5846 .cn_stride(19) 5847 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 5848 } 5849 } 5850 } 5851 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4S2__NEON_MULL,n_div_16_strided_a)5852 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4S2__NEON_MULL, n_div_16_strided_a) { 5853 TEST_REQUIRES_ARM_NEON; 5854 for (uint32_t n = 32; n <= 48; n += 16) { 5855 for (size_t k = 1; k <= 40; k += 9) { 5856 GemmMicrokernelTester() 5857 .mr(1) 5858 .nr(16) 5859 .kr(4) 5860 .sr(2) 5861 .m(1) 5862 .n(n) 5863 .k(k) 5864 .a_stride(43) 5865 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 5866 } 5867 } 5868 } 5869 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4S2__NEON_MULL,n_div_16_subtile)5870 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4S2__NEON_MULL, n_div_16_subtile) { 5871 TEST_REQUIRES_ARM_NEON; 5872 for (uint32_t n = 32; n <= 48; n += 16) { 5873 for (size_t k = 1; k <= 40; k += 9) { 5874 for (uint32_t m = 1; m <= 1; m++) { 5875 GemmMicrokernelTester() 5876 .mr(1) 5877 .nr(16) 5878 .kr(4) 5879 .sr(2) 5880 .m(m) 5881 .n(n) 5882 .k(k) 5883 .iterations(1) 5884 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 5885 } 5886 } 5887 } 5888 } 5889 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4S2__NEON_MULL,strided_cm_subtile)5890 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4S2__NEON_MULL, strided_cm_subtile) { 5891 TEST_REQUIRES_ARM_NEON; 5892 for (size_t k = 1; k <= 40; k += 9) { 5893 for (uint32_t n = 1; n <= 16; n++) { 5894 for (uint32_t m = 1; m <= 1; m++) { 5895 GemmMicrokernelTester() 5896 .mr(1) 5897 .nr(16) 5898 .kr(4) 5899 .sr(2) 5900 .m(m) 5901 .n(n) 5902 .k(k) 5903 .cm_stride(19) 5904 .iterations(1) 5905 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 5906 } 5907 } 5908 } 5909 } 5910 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4S2__NEON_MULL,qmin)5911 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4S2__NEON_MULL, qmin) { 5912 TEST_REQUIRES_ARM_NEON; 5913 GemmMicrokernelTester() 5914 .mr(1) 5915 .nr(16) 5916 .kr(4) 5917 .sr(2) 5918 .m(1) 5919 .n(16) 5920 .k(8) 5921 .qmin(128) 5922 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 5923 } 5924 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4S2__NEON_MULL,qmax)5925 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4S2__NEON_MULL, qmax) { 5926 TEST_REQUIRES_ARM_NEON; 5927 GemmMicrokernelTester() 5928 .mr(1) 5929 .nr(16) 5930 .kr(4) 5931 .sr(2) 5932 .m(1) 5933 .n(16) 5934 .k(8) 5935 .qmax(128) 5936 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 5937 } 5938 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4S2__NEON_MULL,strided_cm)5939 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4S2__NEON_MULL, strided_cm) { 5940 TEST_REQUIRES_ARM_NEON; 5941 GemmMicrokernelTester() 5942 .mr(1) 5943 .nr(16) 5944 .kr(4) 5945 .sr(2) 5946 .m(1) 5947 .n(16) 5948 .k(8) 5949 .cm_stride(19) 5950 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 5951 } 5952 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 5953 5954 5955 #if XNN_ARCH_ARM || XNN_ARCH_ARM64 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD1R,k_eq_8)5956 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD1R, k_eq_8) { 5957 TEST_REQUIRES_ARM_NEON; 5958 GemmMicrokernelTester() 5959 .mr(2) 5960 .nr(8) 5961 .kr(2) 5962 .sr(1) 5963 .m(2) 5964 .n(8) 5965 .k(8) 5966 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 5967 } 5968 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD1R,strided_cn)5969 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD1R, strided_cn) { 5970 TEST_REQUIRES_ARM_NEON; 5971 GemmMicrokernelTester() 5972 .mr(2) 5973 .nr(8) 5974 .kr(2) 5975 .sr(1) 5976 .m(2) 5977 .n(8) 5978 .k(8) 5979 .cn_stride(11) 5980 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 5981 } 5982 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD1R,k_eq_8_strided_a)5983 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD1R, k_eq_8_strided_a) { 5984 TEST_REQUIRES_ARM_NEON; 5985 GemmMicrokernelTester() 5986 .mr(2) 5987 .nr(8) 5988 .kr(2) 5989 .sr(1) 5990 .m(2) 5991 .n(8) 5992 .k(8) 5993 .a_stride(11) 5994 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 5995 } 5996 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD1R,k_eq_8_subtile)5997 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD1R, k_eq_8_subtile) { 5998 TEST_REQUIRES_ARM_NEON; 5999 for (uint32_t n = 1; n <= 8; n++) { 6000 for (uint32_t m = 1; m <= 2; m++) { 6001 GemmMicrokernelTester() 6002 .mr(2) 6003 .nr(8) 6004 .kr(2) 6005 .sr(1) 6006 .m(m) 6007 .n(n) 6008 .k(8) 6009 .iterations(1) 6010 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 6011 } 6012 } 6013 } 6014 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD1R,k_eq_8_subtile_m)6015 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD1R, k_eq_8_subtile_m) { 6016 TEST_REQUIRES_ARM_NEON; 6017 for (uint32_t m = 1; m <= 2; m++) { 6018 GemmMicrokernelTester() 6019 .mr(2) 6020 .nr(8) 6021 .kr(2) 6022 .sr(1) 6023 .m(m) 6024 .n(8) 6025 .k(8) 6026 .iterations(1) 6027 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 6028 } 6029 } 6030 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD1R,k_eq_8_subtile_n)6031 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD1R, k_eq_8_subtile_n) { 6032 TEST_REQUIRES_ARM_NEON; 6033 for (uint32_t n = 1; n <= 8; n++) { 6034 GemmMicrokernelTester() 6035 .mr(2) 6036 .nr(8) 6037 .kr(2) 6038 .sr(1) 6039 .m(2) 6040 .n(n) 6041 .k(8) 6042 .iterations(1) 6043 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 6044 } 6045 } 6046 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD1R,k_lt_8)6047 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD1R, k_lt_8) { 6048 TEST_REQUIRES_ARM_NEON; 6049 for (size_t k = 1; k < 8; k++) { 6050 GemmMicrokernelTester() 6051 .mr(2) 6052 .nr(8) 6053 .kr(2) 6054 .sr(1) 6055 .m(2) 6056 .n(8) 6057 .k(k) 6058 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 6059 } 6060 } 6061 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD1R,k_lt_8_strided_a)6062 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD1R, k_lt_8_strided_a) { 6063 TEST_REQUIRES_ARM_NEON; 6064 for (size_t k = 1; k < 8; k++) { 6065 GemmMicrokernelTester() 6066 .mr(2) 6067 .nr(8) 6068 .kr(2) 6069 .sr(1) 6070 .m(2) 6071 .n(8) 6072 .k(k) 6073 .a_stride(11) 6074 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 6075 } 6076 } 6077 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD1R,k_lt_8_subtile)6078 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD1R, k_lt_8_subtile) { 6079 TEST_REQUIRES_ARM_NEON; 6080 for (size_t k = 1; k < 8; k++) { 6081 for (uint32_t n = 1; n <= 8; n++) { 6082 for (uint32_t m = 1; m <= 2; m++) { 6083 GemmMicrokernelTester() 6084 .mr(2) 6085 .nr(8) 6086 .kr(2) 6087 .sr(1) 6088 .m(m) 6089 .n(n) 6090 .k(k) 6091 .iterations(1) 6092 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 6093 } 6094 } 6095 } 6096 } 6097 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD1R,k_gt_8)6098 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD1R, k_gt_8) { 6099 TEST_REQUIRES_ARM_NEON; 6100 for (size_t k = 9; k < 16; k++) { 6101 GemmMicrokernelTester() 6102 .mr(2) 6103 .nr(8) 6104 .kr(2) 6105 .sr(1) 6106 .m(2) 6107 .n(8) 6108 .k(k) 6109 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 6110 } 6111 } 6112 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD1R,k_gt_8_strided_a)6113 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD1R, k_gt_8_strided_a) { 6114 TEST_REQUIRES_ARM_NEON; 6115 for (size_t k = 9; k < 16; k++) { 6116 GemmMicrokernelTester() 6117 .mr(2) 6118 .nr(8) 6119 .kr(2) 6120 .sr(1) 6121 .m(2) 6122 .n(8) 6123 .k(k) 6124 .a_stride(19) 6125 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 6126 } 6127 } 6128 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD1R,k_gt_8_subtile)6129 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD1R, k_gt_8_subtile) { 6130 TEST_REQUIRES_ARM_NEON; 6131 for (size_t k = 9; k < 16; k++) { 6132 for (uint32_t n = 1; n <= 8; n++) { 6133 for (uint32_t m = 1; m <= 2; m++) { 6134 GemmMicrokernelTester() 6135 .mr(2) 6136 .nr(8) 6137 .kr(2) 6138 .sr(1) 6139 .m(m) 6140 .n(n) 6141 .k(k) 6142 .iterations(1) 6143 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 6144 } 6145 } 6146 } 6147 } 6148 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD1R,k_div_8)6149 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD1R, k_div_8) { 6150 TEST_REQUIRES_ARM_NEON; 6151 for (size_t k = 16; k <= 80; k += 8) { 6152 GemmMicrokernelTester() 6153 .mr(2) 6154 .nr(8) 6155 .kr(2) 6156 .sr(1) 6157 .m(2) 6158 .n(8) 6159 .k(k) 6160 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 6161 } 6162 } 6163 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD1R,k_div_8_strided_a)6164 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD1R, k_div_8_strided_a) { 6165 TEST_REQUIRES_ARM_NEON; 6166 for (size_t k = 16; k <= 80; k += 8) { 6167 GemmMicrokernelTester() 6168 .mr(2) 6169 .nr(8) 6170 .kr(2) 6171 .sr(1) 6172 .m(2) 6173 .n(8) 6174 .k(k) 6175 .a_stride(83) 6176 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 6177 } 6178 } 6179 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD1R,k_div_8_subtile)6180 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD1R, k_div_8_subtile) { 6181 TEST_REQUIRES_ARM_NEON; 6182 for (size_t k = 16; k <= 80; k += 8) { 6183 for (uint32_t n = 1; n <= 8; n++) { 6184 for (uint32_t m = 1; m <= 2; m++) { 6185 GemmMicrokernelTester() 6186 .mr(2) 6187 .nr(8) 6188 .kr(2) 6189 .sr(1) 6190 .m(m) 6191 .n(n) 6192 .k(k) 6193 .iterations(1) 6194 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 6195 } 6196 } 6197 } 6198 } 6199 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD1R,n_gt_8)6200 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD1R, n_gt_8) { 6201 TEST_REQUIRES_ARM_NEON; 6202 for (uint32_t n = 9; n < 16; n++) { 6203 for (size_t k = 1; k <= 40; k += 9) { 6204 GemmMicrokernelTester() 6205 .mr(2) 6206 .nr(8) 6207 .kr(2) 6208 .sr(1) 6209 .m(2) 6210 .n(n) 6211 .k(k) 6212 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 6213 } 6214 } 6215 } 6216 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD1R,n_gt_8_strided_cn)6217 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD1R, n_gt_8_strided_cn) { 6218 TEST_REQUIRES_ARM_NEON; 6219 for (uint32_t n = 9; n < 16; n++) { 6220 for (size_t k = 1; k <= 40; k += 9) { 6221 GemmMicrokernelTester() 6222 .mr(2) 6223 .nr(8) 6224 .kr(2) 6225 .sr(1) 6226 .m(2) 6227 .n(n) 6228 .k(k) 6229 .cn_stride(11) 6230 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 6231 } 6232 } 6233 } 6234 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD1R,n_gt_8_strided_a)6235 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD1R, n_gt_8_strided_a) { 6236 TEST_REQUIRES_ARM_NEON; 6237 for (uint32_t n = 9; n < 16; n++) { 6238 for (size_t k = 1; k <= 40; k += 9) { 6239 GemmMicrokernelTester() 6240 .mr(2) 6241 .nr(8) 6242 .kr(2) 6243 .sr(1) 6244 .m(2) 6245 .n(n) 6246 .k(k) 6247 .a_stride(43) 6248 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 6249 } 6250 } 6251 } 6252 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD1R,n_gt_8_subtile)6253 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD1R, n_gt_8_subtile) { 6254 TEST_REQUIRES_ARM_NEON; 6255 for (uint32_t n = 9; n < 16; n++) { 6256 for (size_t k = 1; k <= 40; k += 9) { 6257 for (uint32_t m = 1; m <= 2; m++) { 6258 GemmMicrokernelTester() 6259 .mr(2) 6260 .nr(8) 6261 .kr(2) 6262 .sr(1) 6263 .m(m) 6264 .n(n) 6265 .k(k) 6266 .iterations(1) 6267 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 6268 } 6269 } 6270 } 6271 } 6272 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD1R,n_div_8)6273 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD1R, n_div_8) { 6274 TEST_REQUIRES_ARM_NEON; 6275 for (uint32_t n = 16; n <= 24; n += 8) { 6276 for (size_t k = 1; k <= 40; k += 9) { 6277 GemmMicrokernelTester() 6278 .mr(2) 6279 .nr(8) 6280 .kr(2) 6281 .sr(1) 6282 .m(2) 6283 .n(n) 6284 .k(k) 6285 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 6286 } 6287 } 6288 } 6289 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD1R,n_div_8_strided_cn)6290 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD1R, n_div_8_strided_cn) { 6291 TEST_REQUIRES_ARM_NEON; 6292 for (uint32_t n = 16; n <= 24; n += 8) { 6293 for (size_t k = 1; k <= 40; k += 9) { 6294 GemmMicrokernelTester() 6295 .mr(2) 6296 .nr(8) 6297 .kr(2) 6298 .sr(1) 6299 .m(2) 6300 .n(n) 6301 .k(k) 6302 .cn_stride(11) 6303 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 6304 } 6305 } 6306 } 6307 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD1R,n_div_8_strided_a)6308 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD1R, n_div_8_strided_a) { 6309 TEST_REQUIRES_ARM_NEON; 6310 for (uint32_t n = 16; n <= 24; n += 8) { 6311 for (size_t k = 1; k <= 40; k += 9) { 6312 GemmMicrokernelTester() 6313 .mr(2) 6314 .nr(8) 6315 .kr(2) 6316 .sr(1) 6317 .m(2) 6318 .n(n) 6319 .k(k) 6320 .a_stride(43) 6321 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 6322 } 6323 } 6324 } 6325 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD1R,n_div_8_subtile)6326 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD1R, n_div_8_subtile) { 6327 TEST_REQUIRES_ARM_NEON; 6328 for (uint32_t n = 16; n <= 24; n += 8) { 6329 for (size_t k = 1; k <= 40; k += 9) { 6330 for (uint32_t m = 1; m <= 2; m++) { 6331 GemmMicrokernelTester() 6332 .mr(2) 6333 .nr(8) 6334 .kr(2) 6335 .sr(1) 6336 .m(m) 6337 .n(n) 6338 .k(k) 6339 .iterations(1) 6340 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 6341 } 6342 } 6343 } 6344 } 6345 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD1R,strided_cm_subtile)6346 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD1R, strided_cm_subtile) { 6347 TEST_REQUIRES_ARM_NEON; 6348 for (size_t k = 1; k <= 40; k += 9) { 6349 for (uint32_t n = 1; n <= 8; n++) { 6350 for (uint32_t m = 1; m <= 2; m++) { 6351 GemmMicrokernelTester() 6352 .mr(2) 6353 .nr(8) 6354 .kr(2) 6355 .sr(1) 6356 .m(m) 6357 .n(n) 6358 .k(k) 6359 .cm_stride(11) 6360 .iterations(1) 6361 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 6362 } 6363 } 6364 } 6365 } 6366 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD1R,qmin)6367 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD1R, qmin) { 6368 TEST_REQUIRES_ARM_NEON; 6369 GemmMicrokernelTester() 6370 .mr(2) 6371 .nr(8) 6372 .kr(2) 6373 .sr(1) 6374 .m(2) 6375 .n(8) 6376 .k(8) 6377 .qmin(128) 6378 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 6379 } 6380 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD1R,qmax)6381 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD1R, qmax) { 6382 TEST_REQUIRES_ARM_NEON; 6383 GemmMicrokernelTester() 6384 .mr(2) 6385 .nr(8) 6386 .kr(2) 6387 .sr(1) 6388 .m(2) 6389 .n(8) 6390 .k(8) 6391 .qmax(128) 6392 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 6393 } 6394 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD1R,strided_cm)6395 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD1R, strided_cm) { 6396 TEST_REQUIRES_ARM_NEON; 6397 GemmMicrokernelTester() 6398 .mr(2) 6399 .nr(8) 6400 .kr(2) 6401 .sr(1) 6402 .m(2) 6403 .n(8) 6404 .k(8) 6405 .cm_stride(11) 6406 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 6407 } 6408 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 6409 6410 6411 #if XNN_ARCH_ARM || XNN_ARCH_ARM64 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_DUP,k_eq_8)6412 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_DUP, k_eq_8) { 6413 TEST_REQUIRES_ARM_NEON; 6414 GemmMicrokernelTester() 6415 .mr(2) 6416 .nr(8) 6417 .kr(4) 6418 .sr(1) 6419 .m(2) 6420 .n(8) 6421 .k(8) 6422 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 6423 } 6424 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_DUP,strided_cn)6425 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_DUP, strided_cn) { 6426 TEST_REQUIRES_ARM_NEON; 6427 GemmMicrokernelTester() 6428 .mr(2) 6429 .nr(8) 6430 .kr(4) 6431 .sr(1) 6432 .m(2) 6433 .n(8) 6434 .k(8) 6435 .cn_stride(11) 6436 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 6437 } 6438 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_DUP,k_eq_8_strided_a)6439 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_DUP, k_eq_8_strided_a) { 6440 TEST_REQUIRES_ARM_NEON; 6441 GemmMicrokernelTester() 6442 .mr(2) 6443 .nr(8) 6444 .kr(4) 6445 .sr(1) 6446 .m(2) 6447 .n(8) 6448 .k(8) 6449 .a_stride(11) 6450 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 6451 } 6452 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_DUP,k_eq_8_subtile)6453 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_DUP, k_eq_8_subtile) { 6454 TEST_REQUIRES_ARM_NEON; 6455 for (uint32_t n = 1; n <= 8; n++) { 6456 for (uint32_t m = 1; m <= 2; m++) { 6457 GemmMicrokernelTester() 6458 .mr(2) 6459 .nr(8) 6460 .kr(4) 6461 .sr(1) 6462 .m(m) 6463 .n(n) 6464 .k(8) 6465 .iterations(1) 6466 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 6467 } 6468 } 6469 } 6470 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_DUP,k_eq_8_subtile_m)6471 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_DUP, k_eq_8_subtile_m) { 6472 TEST_REQUIRES_ARM_NEON; 6473 for (uint32_t m = 1; m <= 2; m++) { 6474 GemmMicrokernelTester() 6475 .mr(2) 6476 .nr(8) 6477 .kr(4) 6478 .sr(1) 6479 .m(m) 6480 .n(8) 6481 .k(8) 6482 .iterations(1) 6483 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 6484 } 6485 } 6486 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_DUP,k_eq_8_subtile_n)6487 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_DUP, k_eq_8_subtile_n) { 6488 TEST_REQUIRES_ARM_NEON; 6489 for (uint32_t n = 1; n <= 8; n++) { 6490 GemmMicrokernelTester() 6491 .mr(2) 6492 .nr(8) 6493 .kr(4) 6494 .sr(1) 6495 .m(2) 6496 .n(n) 6497 .k(8) 6498 .iterations(1) 6499 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 6500 } 6501 } 6502 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_DUP,k_lt_8)6503 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_DUP, k_lt_8) { 6504 TEST_REQUIRES_ARM_NEON; 6505 for (size_t k = 1; k < 8; k++) { 6506 GemmMicrokernelTester() 6507 .mr(2) 6508 .nr(8) 6509 .kr(4) 6510 .sr(1) 6511 .m(2) 6512 .n(8) 6513 .k(k) 6514 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 6515 } 6516 } 6517 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_DUP,k_lt_8_strided_a)6518 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_DUP, k_lt_8_strided_a) { 6519 TEST_REQUIRES_ARM_NEON; 6520 for (size_t k = 1; k < 8; k++) { 6521 GemmMicrokernelTester() 6522 .mr(2) 6523 .nr(8) 6524 .kr(4) 6525 .sr(1) 6526 .m(2) 6527 .n(8) 6528 .k(k) 6529 .a_stride(11) 6530 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 6531 } 6532 } 6533 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_DUP,k_lt_8_subtile)6534 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_DUP, k_lt_8_subtile) { 6535 TEST_REQUIRES_ARM_NEON; 6536 for (size_t k = 1; k < 8; k++) { 6537 for (uint32_t n = 1; n <= 8; n++) { 6538 for (uint32_t m = 1; m <= 2; m++) { 6539 GemmMicrokernelTester() 6540 .mr(2) 6541 .nr(8) 6542 .kr(4) 6543 .sr(1) 6544 .m(m) 6545 .n(n) 6546 .k(k) 6547 .iterations(1) 6548 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 6549 } 6550 } 6551 } 6552 } 6553 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_DUP,k_gt_8)6554 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_DUP, k_gt_8) { 6555 TEST_REQUIRES_ARM_NEON; 6556 for (size_t k = 9; k < 16; k++) { 6557 GemmMicrokernelTester() 6558 .mr(2) 6559 .nr(8) 6560 .kr(4) 6561 .sr(1) 6562 .m(2) 6563 .n(8) 6564 .k(k) 6565 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 6566 } 6567 } 6568 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_DUP,k_gt_8_strided_a)6569 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_DUP, k_gt_8_strided_a) { 6570 TEST_REQUIRES_ARM_NEON; 6571 for (size_t k = 9; k < 16; k++) { 6572 GemmMicrokernelTester() 6573 .mr(2) 6574 .nr(8) 6575 .kr(4) 6576 .sr(1) 6577 .m(2) 6578 .n(8) 6579 .k(k) 6580 .a_stride(19) 6581 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 6582 } 6583 } 6584 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_DUP,k_gt_8_subtile)6585 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_DUP, k_gt_8_subtile) { 6586 TEST_REQUIRES_ARM_NEON; 6587 for (size_t k = 9; k < 16; k++) { 6588 for (uint32_t n = 1; n <= 8; n++) { 6589 for (uint32_t m = 1; m <= 2; m++) { 6590 GemmMicrokernelTester() 6591 .mr(2) 6592 .nr(8) 6593 .kr(4) 6594 .sr(1) 6595 .m(m) 6596 .n(n) 6597 .k(k) 6598 .iterations(1) 6599 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 6600 } 6601 } 6602 } 6603 } 6604 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_DUP,k_div_8)6605 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_DUP, k_div_8) { 6606 TEST_REQUIRES_ARM_NEON; 6607 for (size_t k = 16; k <= 80; k += 8) { 6608 GemmMicrokernelTester() 6609 .mr(2) 6610 .nr(8) 6611 .kr(4) 6612 .sr(1) 6613 .m(2) 6614 .n(8) 6615 .k(k) 6616 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 6617 } 6618 } 6619 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_DUP,k_div_8_strided_a)6620 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_DUP, k_div_8_strided_a) { 6621 TEST_REQUIRES_ARM_NEON; 6622 for (size_t k = 16; k <= 80; k += 8) { 6623 GemmMicrokernelTester() 6624 .mr(2) 6625 .nr(8) 6626 .kr(4) 6627 .sr(1) 6628 .m(2) 6629 .n(8) 6630 .k(k) 6631 .a_stride(83) 6632 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 6633 } 6634 } 6635 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_DUP,k_div_8_subtile)6636 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_DUP, k_div_8_subtile) { 6637 TEST_REQUIRES_ARM_NEON; 6638 for (size_t k = 16; k <= 80; k += 8) { 6639 for (uint32_t n = 1; n <= 8; n++) { 6640 for (uint32_t m = 1; m <= 2; m++) { 6641 GemmMicrokernelTester() 6642 .mr(2) 6643 .nr(8) 6644 .kr(4) 6645 .sr(1) 6646 .m(m) 6647 .n(n) 6648 .k(k) 6649 .iterations(1) 6650 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 6651 } 6652 } 6653 } 6654 } 6655 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_DUP,n_gt_8)6656 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_DUP, n_gt_8) { 6657 TEST_REQUIRES_ARM_NEON; 6658 for (uint32_t n = 9; n < 16; n++) { 6659 for (size_t k = 1; k <= 40; k += 9) { 6660 GemmMicrokernelTester() 6661 .mr(2) 6662 .nr(8) 6663 .kr(4) 6664 .sr(1) 6665 .m(2) 6666 .n(n) 6667 .k(k) 6668 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 6669 } 6670 } 6671 } 6672 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_DUP,n_gt_8_strided_cn)6673 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_DUP, n_gt_8_strided_cn) { 6674 TEST_REQUIRES_ARM_NEON; 6675 for (uint32_t n = 9; n < 16; n++) { 6676 for (size_t k = 1; k <= 40; k += 9) { 6677 GemmMicrokernelTester() 6678 .mr(2) 6679 .nr(8) 6680 .kr(4) 6681 .sr(1) 6682 .m(2) 6683 .n(n) 6684 .k(k) 6685 .cn_stride(11) 6686 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 6687 } 6688 } 6689 } 6690 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_DUP,n_gt_8_strided_a)6691 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_DUP, n_gt_8_strided_a) { 6692 TEST_REQUIRES_ARM_NEON; 6693 for (uint32_t n = 9; n < 16; n++) { 6694 for (size_t k = 1; k <= 40; k += 9) { 6695 GemmMicrokernelTester() 6696 .mr(2) 6697 .nr(8) 6698 .kr(4) 6699 .sr(1) 6700 .m(2) 6701 .n(n) 6702 .k(k) 6703 .a_stride(43) 6704 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 6705 } 6706 } 6707 } 6708 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_DUP,n_gt_8_subtile)6709 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_DUP, n_gt_8_subtile) { 6710 TEST_REQUIRES_ARM_NEON; 6711 for (uint32_t n = 9; n < 16; n++) { 6712 for (size_t k = 1; k <= 40; k += 9) { 6713 for (uint32_t m = 1; m <= 2; m++) { 6714 GemmMicrokernelTester() 6715 .mr(2) 6716 .nr(8) 6717 .kr(4) 6718 .sr(1) 6719 .m(m) 6720 .n(n) 6721 .k(k) 6722 .iterations(1) 6723 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 6724 } 6725 } 6726 } 6727 } 6728 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_DUP,n_div_8)6729 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_DUP, n_div_8) { 6730 TEST_REQUIRES_ARM_NEON; 6731 for (uint32_t n = 16; n <= 24; n += 8) { 6732 for (size_t k = 1; k <= 40; k += 9) { 6733 GemmMicrokernelTester() 6734 .mr(2) 6735 .nr(8) 6736 .kr(4) 6737 .sr(1) 6738 .m(2) 6739 .n(n) 6740 .k(k) 6741 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 6742 } 6743 } 6744 } 6745 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_DUP,n_div_8_strided_cn)6746 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_DUP, n_div_8_strided_cn) { 6747 TEST_REQUIRES_ARM_NEON; 6748 for (uint32_t n = 16; n <= 24; n += 8) { 6749 for (size_t k = 1; k <= 40; k += 9) { 6750 GemmMicrokernelTester() 6751 .mr(2) 6752 .nr(8) 6753 .kr(4) 6754 .sr(1) 6755 .m(2) 6756 .n(n) 6757 .k(k) 6758 .cn_stride(11) 6759 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 6760 } 6761 } 6762 } 6763 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_DUP,n_div_8_strided_a)6764 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_DUP, n_div_8_strided_a) { 6765 TEST_REQUIRES_ARM_NEON; 6766 for (uint32_t n = 16; n <= 24; n += 8) { 6767 for (size_t k = 1; k <= 40; k += 9) { 6768 GemmMicrokernelTester() 6769 .mr(2) 6770 .nr(8) 6771 .kr(4) 6772 .sr(1) 6773 .m(2) 6774 .n(n) 6775 .k(k) 6776 .a_stride(43) 6777 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 6778 } 6779 } 6780 } 6781 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_DUP,n_div_8_subtile)6782 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_DUP, n_div_8_subtile) { 6783 TEST_REQUIRES_ARM_NEON; 6784 for (uint32_t n = 16; n <= 24; n += 8) { 6785 for (size_t k = 1; k <= 40; k += 9) { 6786 for (uint32_t m = 1; m <= 2; m++) { 6787 GemmMicrokernelTester() 6788 .mr(2) 6789 .nr(8) 6790 .kr(4) 6791 .sr(1) 6792 .m(m) 6793 .n(n) 6794 .k(k) 6795 .iterations(1) 6796 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 6797 } 6798 } 6799 } 6800 } 6801 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_DUP,strided_cm_subtile)6802 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_DUP, strided_cm_subtile) { 6803 TEST_REQUIRES_ARM_NEON; 6804 for (size_t k = 1; k <= 40; k += 9) { 6805 for (uint32_t n = 1; n <= 8; n++) { 6806 for (uint32_t m = 1; m <= 2; m++) { 6807 GemmMicrokernelTester() 6808 .mr(2) 6809 .nr(8) 6810 .kr(4) 6811 .sr(1) 6812 .m(m) 6813 .n(n) 6814 .k(k) 6815 .cm_stride(11) 6816 .iterations(1) 6817 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 6818 } 6819 } 6820 } 6821 } 6822 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_DUP,qmin)6823 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_DUP, qmin) { 6824 TEST_REQUIRES_ARM_NEON; 6825 GemmMicrokernelTester() 6826 .mr(2) 6827 .nr(8) 6828 .kr(4) 6829 .sr(1) 6830 .m(2) 6831 .n(8) 6832 .k(8) 6833 .qmin(128) 6834 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 6835 } 6836 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_DUP,qmax)6837 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_DUP, qmax) { 6838 TEST_REQUIRES_ARM_NEON; 6839 GemmMicrokernelTester() 6840 .mr(2) 6841 .nr(8) 6842 .kr(4) 6843 .sr(1) 6844 .m(2) 6845 .n(8) 6846 .k(8) 6847 .qmax(128) 6848 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 6849 } 6850 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_DUP,strided_cm)6851 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_DUP, strided_cm) { 6852 TEST_REQUIRES_ARM_NEON; 6853 GemmMicrokernelTester() 6854 .mr(2) 6855 .nr(8) 6856 .kr(4) 6857 .sr(1) 6858 .m(2) 6859 .n(8) 6860 .k(8) 6861 .cm_stride(11) 6862 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 6863 } 6864 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 6865 6866 6867 #if XNN_ARCH_ARM || XNN_ARCH_ARM64 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C4S2__NEON_MULL,k_eq_8)6868 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C4S2__NEON_MULL, k_eq_8) { 6869 TEST_REQUIRES_ARM_NEON; 6870 GemmMicrokernelTester() 6871 .mr(2) 6872 .nr(8) 6873 .kr(4) 6874 .sr(2) 6875 .m(2) 6876 .n(8) 6877 .k(8) 6878 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 6879 } 6880 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C4S2__NEON_MULL,strided_cn)6881 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C4S2__NEON_MULL, strided_cn) { 6882 TEST_REQUIRES_ARM_NEON; 6883 GemmMicrokernelTester() 6884 .mr(2) 6885 .nr(8) 6886 .kr(4) 6887 .sr(2) 6888 .m(2) 6889 .n(8) 6890 .k(8) 6891 .cn_stride(11) 6892 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 6893 } 6894 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C4S2__NEON_MULL,k_eq_8_strided_a)6895 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C4S2__NEON_MULL, k_eq_8_strided_a) { 6896 TEST_REQUIRES_ARM_NEON; 6897 GemmMicrokernelTester() 6898 .mr(2) 6899 .nr(8) 6900 .kr(4) 6901 .sr(2) 6902 .m(2) 6903 .n(8) 6904 .k(8) 6905 .a_stride(11) 6906 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 6907 } 6908 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C4S2__NEON_MULL,k_eq_8_subtile)6909 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C4S2__NEON_MULL, k_eq_8_subtile) { 6910 TEST_REQUIRES_ARM_NEON; 6911 for (uint32_t n = 1; n <= 8; n++) { 6912 for (uint32_t m = 1; m <= 2; m++) { 6913 GemmMicrokernelTester() 6914 .mr(2) 6915 .nr(8) 6916 .kr(4) 6917 .sr(2) 6918 .m(m) 6919 .n(n) 6920 .k(8) 6921 .iterations(1) 6922 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 6923 } 6924 } 6925 } 6926 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C4S2__NEON_MULL,k_eq_8_subtile_m)6927 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C4S2__NEON_MULL, k_eq_8_subtile_m) { 6928 TEST_REQUIRES_ARM_NEON; 6929 for (uint32_t m = 1; m <= 2; m++) { 6930 GemmMicrokernelTester() 6931 .mr(2) 6932 .nr(8) 6933 .kr(4) 6934 .sr(2) 6935 .m(m) 6936 .n(8) 6937 .k(8) 6938 .iterations(1) 6939 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 6940 } 6941 } 6942 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C4S2__NEON_MULL,k_eq_8_subtile_n)6943 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C4S2__NEON_MULL, k_eq_8_subtile_n) { 6944 TEST_REQUIRES_ARM_NEON; 6945 for (uint32_t n = 1; n <= 8; n++) { 6946 GemmMicrokernelTester() 6947 .mr(2) 6948 .nr(8) 6949 .kr(4) 6950 .sr(2) 6951 .m(2) 6952 .n(n) 6953 .k(8) 6954 .iterations(1) 6955 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 6956 } 6957 } 6958 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C4S2__NEON_MULL,k_lt_8)6959 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C4S2__NEON_MULL, k_lt_8) { 6960 TEST_REQUIRES_ARM_NEON; 6961 for (size_t k = 1; k < 8; k++) { 6962 GemmMicrokernelTester() 6963 .mr(2) 6964 .nr(8) 6965 .kr(4) 6966 .sr(2) 6967 .m(2) 6968 .n(8) 6969 .k(k) 6970 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 6971 } 6972 } 6973 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C4S2__NEON_MULL,k_lt_8_strided_a)6974 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C4S2__NEON_MULL, k_lt_8_strided_a) { 6975 TEST_REQUIRES_ARM_NEON; 6976 for (size_t k = 1; k < 8; k++) { 6977 GemmMicrokernelTester() 6978 .mr(2) 6979 .nr(8) 6980 .kr(4) 6981 .sr(2) 6982 .m(2) 6983 .n(8) 6984 .k(k) 6985 .a_stride(11) 6986 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 6987 } 6988 } 6989 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C4S2__NEON_MULL,k_lt_8_subtile)6990 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C4S2__NEON_MULL, k_lt_8_subtile) { 6991 TEST_REQUIRES_ARM_NEON; 6992 for (size_t k = 1; k < 8; k++) { 6993 for (uint32_t n = 1; n <= 8; n++) { 6994 for (uint32_t m = 1; m <= 2; m++) { 6995 GemmMicrokernelTester() 6996 .mr(2) 6997 .nr(8) 6998 .kr(4) 6999 .sr(2) 7000 .m(m) 7001 .n(n) 7002 .k(k) 7003 .iterations(1) 7004 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 7005 } 7006 } 7007 } 7008 } 7009 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C4S2__NEON_MULL,k_gt_8)7010 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C4S2__NEON_MULL, k_gt_8) { 7011 TEST_REQUIRES_ARM_NEON; 7012 for (size_t k = 9; k < 16; k++) { 7013 GemmMicrokernelTester() 7014 .mr(2) 7015 .nr(8) 7016 .kr(4) 7017 .sr(2) 7018 .m(2) 7019 .n(8) 7020 .k(k) 7021 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 7022 } 7023 } 7024 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C4S2__NEON_MULL,k_gt_8_strided_a)7025 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C4S2__NEON_MULL, k_gt_8_strided_a) { 7026 TEST_REQUIRES_ARM_NEON; 7027 for (size_t k = 9; k < 16; k++) { 7028 GemmMicrokernelTester() 7029 .mr(2) 7030 .nr(8) 7031 .kr(4) 7032 .sr(2) 7033 .m(2) 7034 .n(8) 7035 .k(k) 7036 .a_stride(19) 7037 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 7038 } 7039 } 7040 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C4S2__NEON_MULL,k_gt_8_subtile)7041 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C4S2__NEON_MULL, k_gt_8_subtile) { 7042 TEST_REQUIRES_ARM_NEON; 7043 for (size_t k = 9; k < 16; k++) { 7044 for (uint32_t n = 1; n <= 8; n++) { 7045 for (uint32_t m = 1; m <= 2; m++) { 7046 GemmMicrokernelTester() 7047 .mr(2) 7048 .nr(8) 7049 .kr(4) 7050 .sr(2) 7051 .m(m) 7052 .n(n) 7053 .k(k) 7054 .iterations(1) 7055 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 7056 } 7057 } 7058 } 7059 } 7060 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C4S2__NEON_MULL,k_div_8)7061 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C4S2__NEON_MULL, k_div_8) { 7062 TEST_REQUIRES_ARM_NEON; 7063 for (size_t k = 16; k <= 80; k += 8) { 7064 GemmMicrokernelTester() 7065 .mr(2) 7066 .nr(8) 7067 .kr(4) 7068 .sr(2) 7069 .m(2) 7070 .n(8) 7071 .k(k) 7072 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 7073 } 7074 } 7075 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C4S2__NEON_MULL,k_div_8_strided_a)7076 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C4S2__NEON_MULL, k_div_8_strided_a) { 7077 TEST_REQUIRES_ARM_NEON; 7078 for (size_t k = 16; k <= 80; k += 8) { 7079 GemmMicrokernelTester() 7080 .mr(2) 7081 .nr(8) 7082 .kr(4) 7083 .sr(2) 7084 .m(2) 7085 .n(8) 7086 .k(k) 7087 .a_stride(83) 7088 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 7089 } 7090 } 7091 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C4S2__NEON_MULL,k_div_8_subtile)7092 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C4S2__NEON_MULL, k_div_8_subtile) { 7093 TEST_REQUIRES_ARM_NEON; 7094 for (size_t k = 16; k <= 80; k += 8) { 7095 for (uint32_t n = 1; n <= 8; n++) { 7096 for (uint32_t m = 1; m <= 2; m++) { 7097 GemmMicrokernelTester() 7098 .mr(2) 7099 .nr(8) 7100 .kr(4) 7101 .sr(2) 7102 .m(m) 7103 .n(n) 7104 .k(k) 7105 .iterations(1) 7106 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 7107 } 7108 } 7109 } 7110 } 7111 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C4S2__NEON_MULL,n_gt_8)7112 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C4S2__NEON_MULL, n_gt_8) { 7113 TEST_REQUIRES_ARM_NEON; 7114 for (uint32_t n = 9; n < 16; n++) { 7115 for (size_t k = 1; k <= 40; k += 9) { 7116 GemmMicrokernelTester() 7117 .mr(2) 7118 .nr(8) 7119 .kr(4) 7120 .sr(2) 7121 .m(2) 7122 .n(n) 7123 .k(k) 7124 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 7125 } 7126 } 7127 } 7128 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C4S2__NEON_MULL,n_gt_8_strided_cn)7129 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C4S2__NEON_MULL, n_gt_8_strided_cn) { 7130 TEST_REQUIRES_ARM_NEON; 7131 for (uint32_t n = 9; n < 16; n++) { 7132 for (size_t k = 1; k <= 40; k += 9) { 7133 GemmMicrokernelTester() 7134 .mr(2) 7135 .nr(8) 7136 .kr(4) 7137 .sr(2) 7138 .m(2) 7139 .n(n) 7140 .k(k) 7141 .cn_stride(11) 7142 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 7143 } 7144 } 7145 } 7146 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C4S2__NEON_MULL,n_gt_8_strided_a)7147 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C4S2__NEON_MULL, n_gt_8_strided_a) { 7148 TEST_REQUIRES_ARM_NEON; 7149 for (uint32_t n = 9; n < 16; n++) { 7150 for (size_t k = 1; k <= 40; k += 9) { 7151 GemmMicrokernelTester() 7152 .mr(2) 7153 .nr(8) 7154 .kr(4) 7155 .sr(2) 7156 .m(2) 7157 .n(n) 7158 .k(k) 7159 .a_stride(43) 7160 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 7161 } 7162 } 7163 } 7164 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C4S2__NEON_MULL,n_gt_8_subtile)7165 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C4S2__NEON_MULL, n_gt_8_subtile) { 7166 TEST_REQUIRES_ARM_NEON; 7167 for (uint32_t n = 9; n < 16; n++) { 7168 for (size_t k = 1; k <= 40; k += 9) { 7169 for (uint32_t m = 1; m <= 2; m++) { 7170 GemmMicrokernelTester() 7171 .mr(2) 7172 .nr(8) 7173 .kr(4) 7174 .sr(2) 7175 .m(m) 7176 .n(n) 7177 .k(k) 7178 .iterations(1) 7179 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 7180 } 7181 } 7182 } 7183 } 7184 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C4S2__NEON_MULL,n_div_8)7185 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C4S2__NEON_MULL, n_div_8) { 7186 TEST_REQUIRES_ARM_NEON; 7187 for (uint32_t n = 16; n <= 24; n += 8) { 7188 for (size_t k = 1; k <= 40; k += 9) { 7189 GemmMicrokernelTester() 7190 .mr(2) 7191 .nr(8) 7192 .kr(4) 7193 .sr(2) 7194 .m(2) 7195 .n(n) 7196 .k(k) 7197 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 7198 } 7199 } 7200 } 7201 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C4S2__NEON_MULL,n_div_8_strided_cn)7202 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C4S2__NEON_MULL, n_div_8_strided_cn) { 7203 TEST_REQUIRES_ARM_NEON; 7204 for (uint32_t n = 16; n <= 24; n += 8) { 7205 for (size_t k = 1; k <= 40; k += 9) { 7206 GemmMicrokernelTester() 7207 .mr(2) 7208 .nr(8) 7209 .kr(4) 7210 .sr(2) 7211 .m(2) 7212 .n(n) 7213 .k(k) 7214 .cn_stride(11) 7215 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 7216 } 7217 } 7218 } 7219 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C4S2__NEON_MULL,n_div_8_strided_a)7220 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C4S2__NEON_MULL, n_div_8_strided_a) { 7221 TEST_REQUIRES_ARM_NEON; 7222 for (uint32_t n = 16; n <= 24; n += 8) { 7223 for (size_t k = 1; k <= 40; k += 9) { 7224 GemmMicrokernelTester() 7225 .mr(2) 7226 .nr(8) 7227 .kr(4) 7228 .sr(2) 7229 .m(2) 7230 .n(n) 7231 .k(k) 7232 .a_stride(43) 7233 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 7234 } 7235 } 7236 } 7237 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C4S2__NEON_MULL,n_div_8_subtile)7238 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C4S2__NEON_MULL, n_div_8_subtile) { 7239 TEST_REQUIRES_ARM_NEON; 7240 for (uint32_t n = 16; n <= 24; n += 8) { 7241 for (size_t k = 1; k <= 40; k += 9) { 7242 for (uint32_t m = 1; m <= 2; m++) { 7243 GemmMicrokernelTester() 7244 .mr(2) 7245 .nr(8) 7246 .kr(4) 7247 .sr(2) 7248 .m(m) 7249 .n(n) 7250 .k(k) 7251 .iterations(1) 7252 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 7253 } 7254 } 7255 } 7256 } 7257 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C4S2__NEON_MULL,strided_cm_subtile)7258 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C4S2__NEON_MULL, strided_cm_subtile) { 7259 TEST_REQUIRES_ARM_NEON; 7260 for (size_t k = 1; k <= 40; k += 9) { 7261 for (uint32_t n = 1; n <= 8; n++) { 7262 for (uint32_t m = 1; m <= 2; m++) { 7263 GemmMicrokernelTester() 7264 .mr(2) 7265 .nr(8) 7266 .kr(4) 7267 .sr(2) 7268 .m(m) 7269 .n(n) 7270 .k(k) 7271 .cm_stride(11) 7272 .iterations(1) 7273 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 7274 } 7275 } 7276 } 7277 } 7278 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C4S2__NEON_MULL,qmin)7279 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C4S2__NEON_MULL, qmin) { 7280 TEST_REQUIRES_ARM_NEON; 7281 GemmMicrokernelTester() 7282 .mr(2) 7283 .nr(8) 7284 .kr(4) 7285 .sr(2) 7286 .m(2) 7287 .n(8) 7288 .k(8) 7289 .qmin(128) 7290 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 7291 } 7292 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C4S2__NEON_MULL,qmax)7293 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C4S2__NEON_MULL, qmax) { 7294 TEST_REQUIRES_ARM_NEON; 7295 GemmMicrokernelTester() 7296 .mr(2) 7297 .nr(8) 7298 .kr(4) 7299 .sr(2) 7300 .m(2) 7301 .n(8) 7302 .k(8) 7303 .qmax(128) 7304 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 7305 } 7306 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C4S2__NEON_MULL,strided_cm)7307 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C4S2__NEON_MULL, strided_cm) { 7308 TEST_REQUIRES_ARM_NEON; 7309 GemmMicrokernelTester() 7310 .mr(2) 7311 .nr(8) 7312 .kr(4) 7313 .sr(2) 7314 .m(2) 7315 .n(8) 7316 .k(8) 7317 .cm_stride(11) 7318 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 7319 } 7320 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 7321 7322 7323 #if XNN_ARCH_ARM || XNN_ARCH_ARM64 TEST(QS8_GEMM_MINMAX_RNDNU_2X16C2__NEON_MULL_LD1R,k_eq_8)7324 TEST(QS8_GEMM_MINMAX_RNDNU_2X16C2__NEON_MULL_LD1R, k_eq_8) { 7325 TEST_REQUIRES_ARM_NEON; 7326 GemmMicrokernelTester() 7327 .mr(2) 7328 .nr(16) 7329 .kr(2) 7330 .sr(1) 7331 .m(2) 7332 .n(16) 7333 .k(8) 7334 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 7335 } 7336 TEST(QS8_GEMM_MINMAX_RNDNU_2X16C2__NEON_MULL_LD1R,strided_cn)7337 TEST(QS8_GEMM_MINMAX_RNDNU_2X16C2__NEON_MULL_LD1R, strided_cn) { 7338 TEST_REQUIRES_ARM_NEON; 7339 GemmMicrokernelTester() 7340 .mr(2) 7341 .nr(16) 7342 .kr(2) 7343 .sr(1) 7344 .m(2) 7345 .n(16) 7346 .k(8) 7347 .cn_stride(19) 7348 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 7349 } 7350 TEST(QS8_GEMM_MINMAX_RNDNU_2X16C2__NEON_MULL_LD1R,k_eq_8_strided_a)7351 TEST(QS8_GEMM_MINMAX_RNDNU_2X16C2__NEON_MULL_LD1R, k_eq_8_strided_a) { 7352 TEST_REQUIRES_ARM_NEON; 7353 GemmMicrokernelTester() 7354 .mr(2) 7355 .nr(16) 7356 .kr(2) 7357 .sr(1) 7358 .m(2) 7359 .n(16) 7360 .k(8) 7361 .a_stride(11) 7362 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 7363 } 7364 TEST(QS8_GEMM_MINMAX_RNDNU_2X16C2__NEON_MULL_LD1R,k_eq_8_subtile)7365 TEST(QS8_GEMM_MINMAX_RNDNU_2X16C2__NEON_MULL_LD1R, k_eq_8_subtile) { 7366 TEST_REQUIRES_ARM_NEON; 7367 for (uint32_t n = 1; n <= 16; n++) { 7368 for (uint32_t m = 1; m <= 2; m++) { 7369 GemmMicrokernelTester() 7370 .mr(2) 7371 .nr(16) 7372 .kr(2) 7373 .sr(1) 7374 .m(m) 7375 .n(n) 7376 .k(8) 7377 .iterations(1) 7378 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 7379 } 7380 } 7381 } 7382 TEST(QS8_GEMM_MINMAX_RNDNU_2X16C2__NEON_MULL_LD1R,k_eq_8_subtile_m)7383 TEST(QS8_GEMM_MINMAX_RNDNU_2X16C2__NEON_MULL_LD1R, k_eq_8_subtile_m) { 7384 TEST_REQUIRES_ARM_NEON; 7385 for (uint32_t m = 1; m <= 2; m++) { 7386 GemmMicrokernelTester() 7387 .mr(2) 7388 .nr(16) 7389 .kr(2) 7390 .sr(1) 7391 .m(m) 7392 .n(16) 7393 .k(8) 7394 .iterations(1) 7395 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 7396 } 7397 } 7398 TEST(QS8_GEMM_MINMAX_RNDNU_2X16C2__NEON_MULL_LD1R,k_eq_8_subtile_n)7399 TEST(QS8_GEMM_MINMAX_RNDNU_2X16C2__NEON_MULL_LD1R, k_eq_8_subtile_n) { 7400 TEST_REQUIRES_ARM_NEON; 7401 for (uint32_t n = 1; n <= 16; n++) { 7402 GemmMicrokernelTester() 7403 .mr(2) 7404 .nr(16) 7405 .kr(2) 7406 .sr(1) 7407 .m(2) 7408 .n(n) 7409 .k(8) 7410 .iterations(1) 7411 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 7412 } 7413 } 7414 TEST(QS8_GEMM_MINMAX_RNDNU_2X16C2__NEON_MULL_LD1R,k_lt_8)7415 TEST(QS8_GEMM_MINMAX_RNDNU_2X16C2__NEON_MULL_LD1R, k_lt_8) { 7416 TEST_REQUIRES_ARM_NEON; 7417 for (size_t k = 1; k < 8; k++) { 7418 GemmMicrokernelTester() 7419 .mr(2) 7420 .nr(16) 7421 .kr(2) 7422 .sr(1) 7423 .m(2) 7424 .n(16) 7425 .k(k) 7426 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 7427 } 7428 } 7429 TEST(QS8_GEMM_MINMAX_RNDNU_2X16C2__NEON_MULL_LD1R,k_lt_8_strided_a)7430 TEST(QS8_GEMM_MINMAX_RNDNU_2X16C2__NEON_MULL_LD1R, k_lt_8_strided_a) { 7431 TEST_REQUIRES_ARM_NEON; 7432 for (size_t k = 1; k < 8; k++) { 7433 GemmMicrokernelTester() 7434 .mr(2) 7435 .nr(16) 7436 .kr(2) 7437 .sr(1) 7438 .m(2) 7439 .n(16) 7440 .k(k) 7441 .a_stride(11) 7442 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 7443 } 7444 } 7445 TEST(QS8_GEMM_MINMAX_RNDNU_2X16C2__NEON_MULL_LD1R,k_lt_8_subtile)7446 TEST(QS8_GEMM_MINMAX_RNDNU_2X16C2__NEON_MULL_LD1R, k_lt_8_subtile) { 7447 TEST_REQUIRES_ARM_NEON; 7448 for (size_t k = 1; k < 8; k++) { 7449 for (uint32_t n = 1; n <= 16; n++) { 7450 for (uint32_t m = 1; m <= 2; m++) { 7451 GemmMicrokernelTester() 7452 .mr(2) 7453 .nr(16) 7454 .kr(2) 7455 .sr(1) 7456 .m(m) 7457 .n(n) 7458 .k(k) 7459 .iterations(1) 7460 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 7461 } 7462 } 7463 } 7464 } 7465 TEST(QS8_GEMM_MINMAX_RNDNU_2X16C2__NEON_MULL_LD1R,k_gt_8)7466 TEST(QS8_GEMM_MINMAX_RNDNU_2X16C2__NEON_MULL_LD1R, k_gt_8) { 7467 TEST_REQUIRES_ARM_NEON; 7468 for (size_t k = 9; k < 16; k++) { 7469 GemmMicrokernelTester() 7470 .mr(2) 7471 .nr(16) 7472 .kr(2) 7473 .sr(1) 7474 .m(2) 7475 .n(16) 7476 .k(k) 7477 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 7478 } 7479 } 7480 TEST(QS8_GEMM_MINMAX_RNDNU_2X16C2__NEON_MULL_LD1R,k_gt_8_strided_a)7481 TEST(QS8_GEMM_MINMAX_RNDNU_2X16C2__NEON_MULL_LD1R, k_gt_8_strided_a) { 7482 TEST_REQUIRES_ARM_NEON; 7483 for (size_t k = 9; k < 16; k++) { 7484 GemmMicrokernelTester() 7485 .mr(2) 7486 .nr(16) 7487 .kr(2) 7488 .sr(1) 7489 .m(2) 7490 .n(16) 7491 .k(k) 7492 .a_stride(19) 7493 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 7494 } 7495 } 7496 TEST(QS8_GEMM_MINMAX_RNDNU_2X16C2__NEON_MULL_LD1R,k_gt_8_subtile)7497 TEST(QS8_GEMM_MINMAX_RNDNU_2X16C2__NEON_MULL_LD1R, k_gt_8_subtile) { 7498 TEST_REQUIRES_ARM_NEON; 7499 for (size_t k = 9; k < 16; k++) { 7500 for (uint32_t n = 1; n <= 16; n++) { 7501 for (uint32_t m = 1; m <= 2; m++) { 7502 GemmMicrokernelTester() 7503 .mr(2) 7504 .nr(16) 7505 .kr(2) 7506 .sr(1) 7507 .m(m) 7508 .n(n) 7509 .k(k) 7510 .iterations(1) 7511 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 7512 } 7513 } 7514 } 7515 } 7516 TEST(QS8_GEMM_MINMAX_RNDNU_2X16C2__NEON_MULL_LD1R,k_div_8)7517 TEST(QS8_GEMM_MINMAX_RNDNU_2X16C2__NEON_MULL_LD1R, k_div_8) { 7518 TEST_REQUIRES_ARM_NEON; 7519 for (size_t k = 16; k <= 80; k += 8) { 7520 GemmMicrokernelTester() 7521 .mr(2) 7522 .nr(16) 7523 .kr(2) 7524 .sr(1) 7525 .m(2) 7526 .n(16) 7527 .k(k) 7528 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 7529 } 7530 } 7531 TEST(QS8_GEMM_MINMAX_RNDNU_2X16C2__NEON_MULL_LD1R,k_div_8_strided_a)7532 TEST(QS8_GEMM_MINMAX_RNDNU_2X16C2__NEON_MULL_LD1R, k_div_8_strided_a) { 7533 TEST_REQUIRES_ARM_NEON; 7534 for (size_t k = 16; k <= 80; k += 8) { 7535 GemmMicrokernelTester() 7536 .mr(2) 7537 .nr(16) 7538 .kr(2) 7539 .sr(1) 7540 .m(2) 7541 .n(16) 7542 .k(k) 7543 .a_stride(83) 7544 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 7545 } 7546 } 7547 TEST(QS8_GEMM_MINMAX_RNDNU_2X16C2__NEON_MULL_LD1R,k_div_8_subtile)7548 TEST(QS8_GEMM_MINMAX_RNDNU_2X16C2__NEON_MULL_LD1R, k_div_8_subtile) { 7549 TEST_REQUIRES_ARM_NEON; 7550 for (size_t k = 16; k <= 80; k += 8) { 7551 for (uint32_t n = 1; n <= 16; n++) { 7552 for (uint32_t m = 1; m <= 2; m++) { 7553 GemmMicrokernelTester() 7554 .mr(2) 7555 .nr(16) 7556 .kr(2) 7557 .sr(1) 7558 .m(m) 7559 .n(n) 7560 .k(k) 7561 .iterations(1) 7562 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 7563 } 7564 } 7565 } 7566 } 7567 TEST(QS8_GEMM_MINMAX_RNDNU_2X16C2__NEON_MULL_LD1R,n_gt_16)7568 TEST(QS8_GEMM_MINMAX_RNDNU_2X16C2__NEON_MULL_LD1R, n_gt_16) { 7569 TEST_REQUIRES_ARM_NEON; 7570 for (uint32_t n = 17; n < 32; n++) { 7571 for (size_t k = 1; k <= 40; k += 9) { 7572 GemmMicrokernelTester() 7573 .mr(2) 7574 .nr(16) 7575 .kr(2) 7576 .sr(1) 7577 .m(2) 7578 .n(n) 7579 .k(k) 7580 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 7581 } 7582 } 7583 } 7584 TEST(QS8_GEMM_MINMAX_RNDNU_2X16C2__NEON_MULL_LD1R,n_gt_16_strided_cn)7585 TEST(QS8_GEMM_MINMAX_RNDNU_2X16C2__NEON_MULL_LD1R, n_gt_16_strided_cn) { 7586 TEST_REQUIRES_ARM_NEON; 7587 for (uint32_t n = 17; n < 32; n++) { 7588 for (size_t k = 1; k <= 40; k += 9) { 7589 GemmMicrokernelTester() 7590 .mr(2) 7591 .nr(16) 7592 .kr(2) 7593 .sr(1) 7594 .m(2) 7595 .n(n) 7596 .k(k) 7597 .cn_stride(19) 7598 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 7599 } 7600 } 7601 } 7602 TEST(QS8_GEMM_MINMAX_RNDNU_2X16C2__NEON_MULL_LD1R,n_gt_16_strided_a)7603 TEST(QS8_GEMM_MINMAX_RNDNU_2X16C2__NEON_MULL_LD1R, n_gt_16_strided_a) { 7604 TEST_REQUIRES_ARM_NEON; 7605 for (uint32_t n = 17; n < 32; n++) { 7606 for (size_t k = 1; k <= 40; k += 9) { 7607 GemmMicrokernelTester() 7608 .mr(2) 7609 .nr(16) 7610 .kr(2) 7611 .sr(1) 7612 .m(2) 7613 .n(n) 7614 .k(k) 7615 .a_stride(43) 7616 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 7617 } 7618 } 7619 } 7620 TEST(QS8_GEMM_MINMAX_RNDNU_2X16C2__NEON_MULL_LD1R,n_gt_16_subtile)7621 TEST(QS8_GEMM_MINMAX_RNDNU_2X16C2__NEON_MULL_LD1R, n_gt_16_subtile) { 7622 TEST_REQUIRES_ARM_NEON; 7623 for (uint32_t n = 17; n < 32; n++) { 7624 for (size_t k = 1; k <= 40; k += 9) { 7625 for (uint32_t m = 1; m <= 2; m++) { 7626 GemmMicrokernelTester() 7627 .mr(2) 7628 .nr(16) 7629 .kr(2) 7630 .sr(1) 7631 .m(m) 7632 .n(n) 7633 .k(k) 7634 .iterations(1) 7635 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 7636 } 7637 } 7638 } 7639 } 7640 TEST(QS8_GEMM_MINMAX_RNDNU_2X16C2__NEON_MULL_LD1R,n_div_16)7641 TEST(QS8_GEMM_MINMAX_RNDNU_2X16C2__NEON_MULL_LD1R, n_div_16) { 7642 TEST_REQUIRES_ARM_NEON; 7643 for (uint32_t n = 32; n <= 48; n += 16) { 7644 for (size_t k = 1; k <= 40; k += 9) { 7645 GemmMicrokernelTester() 7646 .mr(2) 7647 .nr(16) 7648 .kr(2) 7649 .sr(1) 7650 .m(2) 7651 .n(n) 7652 .k(k) 7653 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 7654 } 7655 } 7656 } 7657 TEST(QS8_GEMM_MINMAX_RNDNU_2X16C2__NEON_MULL_LD1R,n_div_16_strided_cn)7658 TEST(QS8_GEMM_MINMAX_RNDNU_2X16C2__NEON_MULL_LD1R, n_div_16_strided_cn) { 7659 TEST_REQUIRES_ARM_NEON; 7660 for (uint32_t n = 32; n <= 48; n += 16) { 7661 for (size_t k = 1; k <= 40; k += 9) { 7662 GemmMicrokernelTester() 7663 .mr(2) 7664 .nr(16) 7665 .kr(2) 7666 .sr(1) 7667 .m(2) 7668 .n(n) 7669 .k(k) 7670 .cn_stride(19) 7671 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 7672 } 7673 } 7674 } 7675 TEST(QS8_GEMM_MINMAX_RNDNU_2X16C2__NEON_MULL_LD1R,n_div_16_strided_a)7676 TEST(QS8_GEMM_MINMAX_RNDNU_2X16C2__NEON_MULL_LD1R, n_div_16_strided_a) { 7677 TEST_REQUIRES_ARM_NEON; 7678 for (uint32_t n = 32; n <= 48; n += 16) { 7679 for (size_t k = 1; k <= 40; k += 9) { 7680 GemmMicrokernelTester() 7681 .mr(2) 7682 .nr(16) 7683 .kr(2) 7684 .sr(1) 7685 .m(2) 7686 .n(n) 7687 .k(k) 7688 .a_stride(43) 7689 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 7690 } 7691 } 7692 } 7693 TEST(QS8_GEMM_MINMAX_RNDNU_2X16C2__NEON_MULL_LD1R,n_div_16_subtile)7694 TEST(QS8_GEMM_MINMAX_RNDNU_2X16C2__NEON_MULL_LD1R, n_div_16_subtile) { 7695 TEST_REQUIRES_ARM_NEON; 7696 for (uint32_t n = 32; n <= 48; n += 16) { 7697 for (size_t k = 1; k <= 40; k += 9) { 7698 for (uint32_t m = 1; m <= 2; m++) { 7699 GemmMicrokernelTester() 7700 .mr(2) 7701 .nr(16) 7702 .kr(2) 7703 .sr(1) 7704 .m(m) 7705 .n(n) 7706 .k(k) 7707 .iterations(1) 7708 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 7709 } 7710 } 7711 } 7712 } 7713 TEST(QS8_GEMM_MINMAX_RNDNU_2X16C2__NEON_MULL_LD1R,strided_cm_subtile)7714 TEST(QS8_GEMM_MINMAX_RNDNU_2X16C2__NEON_MULL_LD1R, strided_cm_subtile) { 7715 TEST_REQUIRES_ARM_NEON; 7716 for (size_t k = 1; k <= 40; k += 9) { 7717 for (uint32_t n = 1; n <= 16; n++) { 7718 for (uint32_t m = 1; m <= 2; m++) { 7719 GemmMicrokernelTester() 7720 .mr(2) 7721 .nr(16) 7722 .kr(2) 7723 .sr(1) 7724 .m(m) 7725 .n(n) 7726 .k(k) 7727 .cm_stride(19) 7728 .iterations(1) 7729 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 7730 } 7731 } 7732 } 7733 } 7734 TEST(QS8_GEMM_MINMAX_RNDNU_2X16C2__NEON_MULL_LD1R,qmin)7735 TEST(QS8_GEMM_MINMAX_RNDNU_2X16C2__NEON_MULL_LD1R, qmin) { 7736 TEST_REQUIRES_ARM_NEON; 7737 GemmMicrokernelTester() 7738 .mr(2) 7739 .nr(16) 7740 .kr(2) 7741 .sr(1) 7742 .m(2) 7743 .n(16) 7744 .k(8) 7745 .qmin(128) 7746 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 7747 } 7748 TEST(QS8_GEMM_MINMAX_RNDNU_2X16C2__NEON_MULL_LD1R,qmax)7749 TEST(QS8_GEMM_MINMAX_RNDNU_2X16C2__NEON_MULL_LD1R, qmax) { 7750 TEST_REQUIRES_ARM_NEON; 7751 GemmMicrokernelTester() 7752 .mr(2) 7753 .nr(16) 7754 .kr(2) 7755 .sr(1) 7756 .m(2) 7757 .n(16) 7758 .k(8) 7759 .qmax(128) 7760 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 7761 } 7762 TEST(QS8_GEMM_MINMAX_RNDNU_2X16C2__NEON_MULL_LD1R,strided_cm)7763 TEST(QS8_GEMM_MINMAX_RNDNU_2X16C2__NEON_MULL_LD1R, strided_cm) { 7764 TEST_REQUIRES_ARM_NEON; 7765 GemmMicrokernelTester() 7766 .mr(2) 7767 .nr(16) 7768 .kr(2) 7769 .sr(1) 7770 .m(2) 7771 .n(16) 7772 .k(8) 7773 .cm_stride(19) 7774 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 7775 } 7776 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 7777 7778 7779 #if XNN_ARCH_ARM || XNN_ARCH_ARM64 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_LD1R,k_eq_16)7780 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_LD1R, k_eq_16) { 7781 TEST_REQUIRES_ARM_NEON; 7782 GemmMicrokernelTester() 7783 .mr(3) 7784 .nr(8) 7785 .kr(2) 7786 .sr(1) 7787 .m(3) 7788 .n(8) 7789 .k(16) 7790 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 7791 } 7792 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_LD1R,strided_cn)7793 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_LD1R, strided_cn) { 7794 TEST_REQUIRES_ARM_NEON; 7795 GemmMicrokernelTester() 7796 .mr(3) 7797 .nr(8) 7798 .kr(2) 7799 .sr(1) 7800 .m(3) 7801 .n(8) 7802 .k(16) 7803 .cn_stride(11) 7804 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 7805 } 7806 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_LD1R,k_eq_16_strided_a)7807 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_LD1R, k_eq_16_strided_a) { 7808 TEST_REQUIRES_ARM_NEON; 7809 GemmMicrokernelTester() 7810 .mr(3) 7811 .nr(8) 7812 .kr(2) 7813 .sr(1) 7814 .m(3) 7815 .n(8) 7816 .k(16) 7817 .a_stride(19) 7818 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 7819 } 7820 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_LD1R,k_eq_16_subtile)7821 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_LD1R, k_eq_16_subtile) { 7822 TEST_REQUIRES_ARM_NEON; 7823 for (uint32_t n = 1; n <= 8; n++) { 7824 for (uint32_t m = 1; m <= 3; m++) { 7825 GemmMicrokernelTester() 7826 .mr(3) 7827 .nr(8) 7828 .kr(2) 7829 .sr(1) 7830 .m(m) 7831 .n(n) 7832 .k(16) 7833 .iterations(1) 7834 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 7835 } 7836 } 7837 } 7838 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_LD1R,k_eq_16_subtile_m)7839 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_LD1R, k_eq_16_subtile_m) { 7840 TEST_REQUIRES_ARM_NEON; 7841 for (uint32_t m = 1; m <= 3; m++) { 7842 GemmMicrokernelTester() 7843 .mr(3) 7844 .nr(8) 7845 .kr(2) 7846 .sr(1) 7847 .m(m) 7848 .n(8) 7849 .k(16) 7850 .iterations(1) 7851 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 7852 } 7853 } 7854 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_LD1R,k_eq_16_subtile_n)7855 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_LD1R, k_eq_16_subtile_n) { 7856 TEST_REQUIRES_ARM_NEON; 7857 for (uint32_t n = 1; n <= 8; n++) { 7858 GemmMicrokernelTester() 7859 .mr(3) 7860 .nr(8) 7861 .kr(2) 7862 .sr(1) 7863 .m(3) 7864 .n(n) 7865 .k(16) 7866 .iterations(1) 7867 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 7868 } 7869 } 7870 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_LD1R,k_lt_16)7871 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_LD1R, k_lt_16) { 7872 TEST_REQUIRES_ARM_NEON; 7873 for (size_t k = 1; k < 16; k++) { 7874 GemmMicrokernelTester() 7875 .mr(3) 7876 .nr(8) 7877 .kr(2) 7878 .sr(1) 7879 .m(3) 7880 .n(8) 7881 .k(k) 7882 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 7883 } 7884 } 7885 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_LD1R,k_lt_16_strided_a)7886 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_LD1R, k_lt_16_strided_a) { 7887 TEST_REQUIRES_ARM_NEON; 7888 for (size_t k = 1; k < 16; k++) { 7889 GemmMicrokernelTester() 7890 .mr(3) 7891 .nr(8) 7892 .kr(2) 7893 .sr(1) 7894 .m(3) 7895 .n(8) 7896 .k(k) 7897 .a_stride(19) 7898 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 7899 } 7900 } 7901 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_LD1R,k_lt_16_subtile)7902 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_LD1R, k_lt_16_subtile) { 7903 TEST_REQUIRES_ARM_NEON; 7904 for (size_t k = 1; k < 16; k++) { 7905 for (uint32_t n = 1; n <= 8; n++) { 7906 for (uint32_t m = 1; m <= 3; m++) { 7907 GemmMicrokernelTester() 7908 .mr(3) 7909 .nr(8) 7910 .kr(2) 7911 .sr(1) 7912 .m(m) 7913 .n(n) 7914 .k(k) 7915 .iterations(1) 7916 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 7917 } 7918 } 7919 } 7920 } 7921 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_LD1R,k_gt_16)7922 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_LD1R, k_gt_16) { 7923 TEST_REQUIRES_ARM_NEON; 7924 for (size_t k = 17; k < 32; k++) { 7925 GemmMicrokernelTester() 7926 .mr(3) 7927 .nr(8) 7928 .kr(2) 7929 .sr(1) 7930 .m(3) 7931 .n(8) 7932 .k(k) 7933 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 7934 } 7935 } 7936 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_LD1R,k_gt_16_strided_a)7937 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_LD1R, k_gt_16_strided_a) { 7938 TEST_REQUIRES_ARM_NEON; 7939 for (size_t k = 17; k < 32; k++) { 7940 GemmMicrokernelTester() 7941 .mr(3) 7942 .nr(8) 7943 .kr(2) 7944 .sr(1) 7945 .m(3) 7946 .n(8) 7947 .k(k) 7948 .a_stride(37) 7949 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 7950 } 7951 } 7952 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_LD1R,k_gt_16_subtile)7953 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_LD1R, k_gt_16_subtile) { 7954 TEST_REQUIRES_ARM_NEON; 7955 for (size_t k = 17; k < 32; k++) { 7956 for (uint32_t n = 1; n <= 8; n++) { 7957 for (uint32_t m = 1; m <= 3; m++) { 7958 GemmMicrokernelTester() 7959 .mr(3) 7960 .nr(8) 7961 .kr(2) 7962 .sr(1) 7963 .m(m) 7964 .n(n) 7965 .k(k) 7966 .iterations(1) 7967 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 7968 } 7969 } 7970 } 7971 } 7972 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_LD1R,k_div_16)7973 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_LD1R, k_div_16) { 7974 TEST_REQUIRES_ARM_NEON; 7975 for (size_t k = 32; k <= 160; k += 16) { 7976 GemmMicrokernelTester() 7977 .mr(3) 7978 .nr(8) 7979 .kr(2) 7980 .sr(1) 7981 .m(3) 7982 .n(8) 7983 .k(k) 7984 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 7985 } 7986 } 7987 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_LD1R,k_div_16_strided_a)7988 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_LD1R, k_div_16_strided_a) { 7989 TEST_REQUIRES_ARM_NEON; 7990 for (size_t k = 32; k <= 160; k += 16) { 7991 GemmMicrokernelTester() 7992 .mr(3) 7993 .nr(8) 7994 .kr(2) 7995 .sr(1) 7996 .m(3) 7997 .n(8) 7998 .k(k) 7999 .a_stride(163) 8000 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 8001 } 8002 } 8003 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_LD1R,k_div_16_subtile)8004 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_LD1R, k_div_16_subtile) { 8005 TEST_REQUIRES_ARM_NEON; 8006 for (size_t k = 32; k <= 160; k += 16) { 8007 for (uint32_t n = 1; n <= 8; n++) { 8008 for (uint32_t m = 1; m <= 3; m++) { 8009 GemmMicrokernelTester() 8010 .mr(3) 8011 .nr(8) 8012 .kr(2) 8013 .sr(1) 8014 .m(m) 8015 .n(n) 8016 .k(k) 8017 .iterations(1) 8018 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 8019 } 8020 } 8021 } 8022 } 8023 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_LD1R,n_gt_8)8024 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_LD1R, n_gt_8) { 8025 TEST_REQUIRES_ARM_NEON; 8026 for (uint32_t n = 9; n < 16; n++) { 8027 for (size_t k = 1; k <= 80; k += 17) { 8028 GemmMicrokernelTester() 8029 .mr(3) 8030 .nr(8) 8031 .kr(2) 8032 .sr(1) 8033 .m(3) 8034 .n(n) 8035 .k(k) 8036 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 8037 } 8038 } 8039 } 8040 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_LD1R,n_gt_8_strided_cn)8041 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_LD1R, n_gt_8_strided_cn) { 8042 TEST_REQUIRES_ARM_NEON; 8043 for (uint32_t n = 9; n < 16; n++) { 8044 for (size_t k = 1; k <= 80; k += 17) { 8045 GemmMicrokernelTester() 8046 .mr(3) 8047 .nr(8) 8048 .kr(2) 8049 .sr(1) 8050 .m(3) 8051 .n(n) 8052 .k(k) 8053 .cn_stride(11) 8054 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 8055 } 8056 } 8057 } 8058 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_LD1R,n_gt_8_strided_a)8059 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_LD1R, n_gt_8_strided_a) { 8060 TEST_REQUIRES_ARM_NEON; 8061 for (uint32_t n = 9; n < 16; n++) { 8062 for (size_t k = 1; k <= 80; k += 17) { 8063 GemmMicrokernelTester() 8064 .mr(3) 8065 .nr(8) 8066 .kr(2) 8067 .sr(1) 8068 .m(3) 8069 .n(n) 8070 .k(k) 8071 .a_stride(83) 8072 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 8073 } 8074 } 8075 } 8076 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_LD1R,n_gt_8_subtile)8077 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_LD1R, n_gt_8_subtile) { 8078 TEST_REQUIRES_ARM_NEON; 8079 for (uint32_t n = 9; n < 16; n++) { 8080 for (size_t k = 1; k <= 80; k += 17) { 8081 for (uint32_t m = 1; m <= 3; m++) { 8082 GemmMicrokernelTester() 8083 .mr(3) 8084 .nr(8) 8085 .kr(2) 8086 .sr(1) 8087 .m(m) 8088 .n(n) 8089 .k(k) 8090 .iterations(1) 8091 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 8092 } 8093 } 8094 } 8095 } 8096 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_LD1R,n_div_8)8097 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_LD1R, n_div_8) { 8098 TEST_REQUIRES_ARM_NEON; 8099 for (uint32_t n = 16; n <= 24; n += 8) { 8100 for (size_t k = 1; k <= 80; k += 17) { 8101 GemmMicrokernelTester() 8102 .mr(3) 8103 .nr(8) 8104 .kr(2) 8105 .sr(1) 8106 .m(3) 8107 .n(n) 8108 .k(k) 8109 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 8110 } 8111 } 8112 } 8113 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_LD1R,n_div_8_strided_cn)8114 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_LD1R, n_div_8_strided_cn) { 8115 TEST_REQUIRES_ARM_NEON; 8116 for (uint32_t n = 16; n <= 24; n += 8) { 8117 for (size_t k = 1; k <= 80; k += 17) { 8118 GemmMicrokernelTester() 8119 .mr(3) 8120 .nr(8) 8121 .kr(2) 8122 .sr(1) 8123 .m(3) 8124 .n(n) 8125 .k(k) 8126 .cn_stride(11) 8127 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 8128 } 8129 } 8130 } 8131 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_LD1R,n_div_8_strided_a)8132 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_LD1R, n_div_8_strided_a) { 8133 TEST_REQUIRES_ARM_NEON; 8134 for (uint32_t n = 16; n <= 24; n += 8) { 8135 for (size_t k = 1; k <= 80; k += 17) { 8136 GemmMicrokernelTester() 8137 .mr(3) 8138 .nr(8) 8139 .kr(2) 8140 .sr(1) 8141 .m(3) 8142 .n(n) 8143 .k(k) 8144 .a_stride(83) 8145 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 8146 } 8147 } 8148 } 8149 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_LD1R,n_div_8_subtile)8150 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_LD1R, n_div_8_subtile) { 8151 TEST_REQUIRES_ARM_NEON; 8152 for (uint32_t n = 16; n <= 24; n += 8) { 8153 for (size_t k = 1; k <= 80; k += 17) { 8154 for (uint32_t m = 1; m <= 3; m++) { 8155 GemmMicrokernelTester() 8156 .mr(3) 8157 .nr(8) 8158 .kr(2) 8159 .sr(1) 8160 .m(m) 8161 .n(n) 8162 .k(k) 8163 .iterations(1) 8164 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 8165 } 8166 } 8167 } 8168 } 8169 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_LD1R,strided_cm_subtile)8170 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_LD1R, strided_cm_subtile) { 8171 TEST_REQUIRES_ARM_NEON; 8172 for (size_t k = 1; k <= 80; k += 17) { 8173 for (uint32_t n = 1; n <= 8; n++) { 8174 for (uint32_t m = 1; m <= 3; m++) { 8175 GemmMicrokernelTester() 8176 .mr(3) 8177 .nr(8) 8178 .kr(2) 8179 .sr(1) 8180 .m(m) 8181 .n(n) 8182 .k(k) 8183 .cm_stride(11) 8184 .iterations(1) 8185 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 8186 } 8187 } 8188 } 8189 } 8190 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_LD1R,qmin)8191 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_LD1R, qmin) { 8192 TEST_REQUIRES_ARM_NEON; 8193 GemmMicrokernelTester() 8194 .mr(3) 8195 .nr(8) 8196 .kr(2) 8197 .sr(1) 8198 .m(3) 8199 .n(8) 8200 .k(16) 8201 .qmin(128) 8202 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 8203 } 8204 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_LD1R,qmax)8205 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_LD1R, qmax) { 8206 TEST_REQUIRES_ARM_NEON; 8207 GemmMicrokernelTester() 8208 .mr(3) 8209 .nr(8) 8210 .kr(2) 8211 .sr(1) 8212 .m(3) 8213 .n(8) 8214 .k(16) 8215 .qmax(128) 8216 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 8217 } 8218 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_LD1R,strided_cm)8219 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_LD1R, strided_cm) { 8220 TEST_REQUIRES_ARM_NEON; 8221 GemmMicrokernelTester() 8222 .mr(3) 8223 .nr(8) 8224 .kr(2) 8225 .sr(1) 8226 .m(3) 8227 .n(8) 8228 .k(16) 8229 .cm_stride(11) 8230 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 8231 } 8232 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 8233 8234 8235 #if XNN_ARCH_ARM || XNN_ARCH_ARM64 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C4__NEON_MLAL_LD2R,k_eq_16)8236 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C4__NEON_MLAL_LD2R, k_eq_16) { 8237 TEST_REQUIRES_ARM_NEON; 8238 GemmMicrokernelTester() 8239 .mr(3) 8240 .nr(8) 8241 .kr(4) 8242 .sr(1) 8243 .m(3) 8244 .n(8) 8245 .k(16) 8246 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 8247 } 8248 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C4__NEON_MLAL_LD2R,strided_cn)8249 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C4__NEON_MLAL_LD2R, strided_cn) { 8250 TEST_REQUIRES_ARM_NEON; 8251 GemmMicrokernelTester() 8252 .mr(3) 8253 .nr(8) 8254 .kr(4) 8255 .sr(1) 8256 .m(3) 8257 .n(8) 8258 .k(16) 8259 .cn_stride(11) 8260 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 8261 } 8262 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C4__NEON_MLAL_LD2R,k_eq_16_strided_a)8263 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C4__NEON_MLAL_LD2R, k_eq_16_strided_a) { 8264 TEST_REQUIRES_ARM_NEON; 8265 GemmMicrokernelTester() 8266 .mr(3) 8267 .nr(8) 8268 .kr(4) 8269 .sr(1) 8270 .m(3) 8271 .n(8) 8272 .k(16) 8273 .a_stride(19) 8274 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 8275 } 8276 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C4__NEON_MLAL_LD2R,k_eq_16_subtile)8277 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C4__NEON_MLAL_LD2R, k_eq_16_subtile) { 8278 TEST_REQUIRES_ARM_NEON; 8279 for (uint32_t n = 1; n <= 8; n++) { 8280 for (uint32_t m = 1; m <= 3; m++) { 8281 GemmMicrokernelTester() 8282 .mr(3) 8283 .nr(8) 8284 .kr(4) 8285 .sr(1) 8286 .m(m) 8287 .n(n) 8288 .k(16) 8289 .iterations(1) 8290 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 8291 } 8292 } 8293 } 8294 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C4__NEON_MLAL_LD2R,k_eq_16_subtile_m)8295 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C4__NEON_MLAL_LD2R, k_eq_16_subtile_m) { 8296 TEST_REQUIRES_ARM_NEON; 8297 for (uint32_t m = 1; m <= 3; m++) { 8298 GemmMicrokernelTester() 8299 .mr(3) 8300 .nr(8) 8301 .kr(4) 8302 .sr(1) 8303 .m(m) 8304 .n(8) 8305 .k(16) 8306 .iterations(1) 8307 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 8308 } 8309 } 8310 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C4__NEON_MLAL_LD2R,k_eq_16_subtile_n)8311 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C4__NEON_MLAL_LD2R, k_eq_16_subtile_n) { 8312 TEST_REQUIRES_ARM_NEON; 8313 for (uint32_t n = 1; n <= 8; n++) { 8314 GemmMicrokernelTester() 8315 .mr(3) 8316 .nr(8) 8317 .kr(4) 8318 .sr(1) 8319 .m(3) 8320 .n(n) 8321 .k(16) 8322 .iterations(1) 8323 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 8324 } 8325 } 8326 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C4__NEON_MLAL_LD2R,k_lt_16)8327 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C4__NEON_MLAL_LD2R, k_lt_16) { 8328 TEST_REQUIRES_ARM_NEON; 8329 for (size_t k = 1; k < 16; k++) { 8330 GemmMicrokernelTester() 8331 .mr(3) 8332 .nr(8) 8333 .kr(4) 8334 .sr(1) 8335 .m(3) 8336 .n(8) 8337 .k(k) 8338 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 8339 } 8340 } 8341 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C4__NEON_MLAL_LD2R,k_lt_16_strided_a)8342 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C4__NEON_MLAL_LD2R, k_lt_16_strided_a) { 8343 TEST_REQUIRES_ARM_NEON; 8344 for (size_t k = 1; k < 16; k++) { 8345 GemmMicrokernelTester() 8346 .mr(3) 8347 .nr(8) 8348 .kr(4) 8349 .sr(1) 8350 .m(3) 8351 .n(8) 8352 .k(k) 8353 .a_stride(19) 8354 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 8355 } 8356 } 8357 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C4__NEON_MLAL_LD2R,k_lt_16_subtile)8358 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C4__NEON_MLAL_LD2R, k_lt_16_subtile) { 8359 TEST_REQUIRES_ARM_NEON; 8360 for (size_t k = 1; k < 16; k++) { 8361 for (uint32_t n = 1; n <= 8; n++) { 8362 for (uint32_t m = 1; m <= 3; m++) { 8363 GemmMicrokernelTester() 8364 .mr(3) 8365 .nr(8) 8366 .kr(4) 8367 .sr(1) 8368 .m(m) 8369 .n(n) 8370 .k(k) 8371 .iterations(1) 8372 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 8373 } 8374 } 8375 } 8376 } 8377 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C4__NEON_MLAL_LD2R,k_gt_16)8378 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C4__NEON_MLAL_LD2R, k_gt_16) { 8379 TEST_REQUIRES_ARM_NEON; 8380 for (size_t k = 17; k < 32; k++) { 8381 GemmMicrokernelTester() 8382 .mr(3) 8383 .nr(8) 8384 .kr(4) 8385 .sr(1) 8386 .m(3) 8387 .n(8) 8388 .k(k) 8389 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 8390 } 8391 } 8392 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C4__NEON_MLAL_LD2R,k_gt_16_strided_a)8393 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C4__NEON_MLAL_LD2R, k_gt_16_strided_a) { 8394 TEST_REQUIRES_ARM_NEON; 8395 for (size_t k = 17; k < 32; k++) { 8396 GemmMicrokernelTester() 8397 .mr(3) 8398 .nr(8) 8399 .kr(4) 8400 .sr(1) 8401 .m(3) 8402 .n(8) 8403 .k(k) 8404 .a_stride(37) 8405 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 8406 } 8407 } 8408 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C4__NEON_MLAL_LD2R,k_gt_16_subtile)8409 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C4__NEON_MLAL_LD2R, k_gt_16_subtile) { 8410 TEST_REQUIRES_ARM_NEON; 8411 for (size_t k = 17; k < 32; k++) { 8412 for (uint32_t n = 1; n <= 8; n++) { 8413 for (uint32_t m = 1; m <= 3; m++) { 8414 GemmMicrokernelTester() 8415 .mr(3) 8416 .nr(8) 8417 .kr(4) 8418 .sr(1) 8419 .m(m) 8420 .n(n) 8421 .k(k) 8422 .iterations(1) 8423 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 8424 } 8425 } 8426 } 8427 } 8428 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C4__NEON_MLAL_LD2R,k_div_16)8429 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C4__NEON_MLAL_LD2R, k_div_16) { 8430 TEST_REQUIRES_ARM_NEON; 8431 for (size_t k = 32; k <= 160; k += 16) { 8432 GemmMicrokernelTester() 8433 .mr(3) 8434 .nr(8) 8435 .kr(4) 8436 .sr(1) 8437 .m(3) 8438 .n(8) 8439 .k(k) 8440 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 8441 } 8442 } 8443 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C4__NEON_MLAL_LD2R,k_div_16_strided_a)8444 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C4__NEON_MLAL_LD2R, k_div_16_strided_a) { 8445 TEST_REQUIRES_ARM_NEON; 8446 for (size_t k = 32; k <= 160; k += 16) { 8447 GemmMicrokernelTester() 8448 .mr(3) 8449 .nr(8) 8450 .kr(4) 8451 .sr(1) 8452 .m(3) 8453 .n(8) 8454 .k(k) 8455 .a_stride(163) 8456 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 8457 } 8458 } 8459 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C4__NEON_MLAL_LD2R,k_div_16_subtile)8460 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C4__NEON_MLAL_LD2R, k_div_16_subtile) { 8461 TEST_REQUIRES_ARM_NEON; 8462 for (size_t k = 32; k <= 160; k += 16) { 8463 for (uint32_t n = 1; n <= 8; n++) { 8464 for (uint32_t m = 1; m <= 3; m++) { 8465 GemmMicrokernelTester() 8466 .mr(3) 8467 .nr(8) 8468 .kr(4) 8469 .sr(1) 8470 .m(m) 8471 .n(n) 8472 .k(k) 8473 .iterations(1) 8474 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 8475 } 8476 } 8477 } 8478 } 8479 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C4__NEON_MLAL_LD2R,n_gt_8)8480 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C4__NEON_MLAL_LD2R, n_gt_8) { 8481 TEST_REQUIRES_ARM_NEON; 8482 for (uint32_t n = 9; n < 16; n++) { 8483 for (size_t k = 1; k <= 80; k += 17) { 8484 GemmMicrokernelTester() 8485 .mr(3) 8486 .nr(8) 8487 .kr(4) 8488 .sr(1) 8489 .m(3) 8490 .n(n) 8491 .k(k) 8492 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 8493 } 8494 } 8495 } 8496 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C4__NEON_MLAL_LD2R,n_gt_8_strided_cn)8497 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C4__NEON_MLAL_LD2R, n_gt_8_strided_cn) { 8498 TEST_REQUIRES_ARM_NEON; 8499 for (uint32_t n = 9; n < 16; n++) { 8500 for (size_t k = 1; k <= 80; k += 17) { 8501 GemmMicrokernelTester() 8502 .mr(3) 8503 .nr(8) 8504 .kr(4) 8505 .sr(1) 8506 .m(3) 8507 .n(n) 8508 .k(k) 8509 .cn_stride(11) 8510 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 8511 } 8512 } 8513 } 8514 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C4__NEON_MLAL_LD2R,n_gt_8_strided_a)8515 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C4__NEON_MLAL_LD2R, n_gt_8_strided_a) { 8516 TEST_REQUIRES_ARM_NEON; 8517 for (uint32_t n = 9; n < 16; n++) { 8518 for (size_t k = 1; k <= 80; k += 17) { 8519 GemmMicrokernelTester() 8520 .mr(3) 8521 .nr(8) 8522 .kr(4) 8523 .sr(1) 8524 .m(3) 8525 .n(n) 8526 .k(k) 8527 .a_stride(83) 8528 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 8529 } 8530 } 8531 } 8532 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C4__NEON_MLAL_LD2R,n_gt_8_subtile)8533 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C4__NEON_MLAL_LD2R, n_gt_8_subtile) { 8534 TEST_REQUIRES_ARM_NEON; 8535 for (uint32_t n = 9; n < 16; n++) { 8536 for (size_t k = 1; k <= 80; k += 17) { 8537 for (uint32_t m = 1; m <= 3; m++) { 8538 GemmMicrokernelTester() 8539 .mr(3) 8540 .nr(8) 8541 .kr(4) 8542 .sr(1) 8543 .m(m) 8544 .n(n) 8545 .k(k) 8546 .iterations(1) 8547 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 8548 } 8549 } 8550 } 8551 } 8552 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C4__NEON_MLAL_LD2R,n_div_8)8553 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C4__NEON_MLAL_LD2R, n_div_8) { 8554 TEST_REQUIRES_ARM_NEON; 8555 for (uint32_t n = 16; n <= 24; n += 8) { 8556 for (size_t k = 1; k <= 80; k += 17) { 8557 GemmMicrokernelTester() 8558 .mr(3) 8559 .nr(8) 8560 .kr(4) 8561 .sr(1) 8562 .m(3) 8563 .n(n) 8564 .k(k) 8565 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 8566 } 8567 } 8568 } 8569 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C4__NEON_MLAL_LD2R,n_div_8_strided_cn)8570 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C4__NEON_MLAL_LD2R, n_div_8_strided_cn) { 8571 TEST_REQUIRES_ARM_NEON; 8572 for (uint32_t n = 16; n <= 24; n += 8) { 8573 for (size_t k = 1; k <= 80; k += 17) { 8574 GemmMicrokernelTester() 8575 .mr(3) 8576 .nr(8) 8577 .kr(4) 8578 .sr(1) 8579 .m(3) 8580 .n(n) 8581 .k(k) 8582 .cn_stride(11) 8583 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 8584 } 8585 } 8586 } 8587 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C4__NEON_MLAL_LD2R,n_div_8_strided_a)8588 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C4__NEON_MLAL_LD2R, n_div_8_strided_a) { 8589 TEST_REQUIRES_ARM_NEON; 8590 for (uint32_t n = 16; n <= 24; n += 8) { 8591 for (size_t k = 1; k <= 80; k += 17) { 8592 GemmMicrokernelTester() 8593 .mr(3) 8594 .nr(8) 8595 .kr(4) 8596 .sr(1) 8597 .m(3) 8598 .n(n) 8599 .k(k) 8600 .a_stride(83) 8601 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 8602 } 8603 } 8604 } 8605 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C4__NEON_MLAL_LD2R,n_div_8_subtile)8606 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C4__NEON_MLAL_LD2R, n_div_8_subtile) { 8607 TEST_REQUIRES_ARM_NEON; 8608 for (uint32_t n = 16; n <= 24; n += 8) { 8609 for (size_t k = 1; k <= 80; k += 17) { 8610 for (uint32_t m = 1; m <= 3; m++) { 8611 GemmMicrokernelTester() 8612 .mr(3) 8613 .nr(8) 8614 .kr(4) 8615 .sr(1) 8616 .m(m) 8617 .n(n) 8618 .k(k) 8619 .iterations(1) 8620 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 8621 } 8622 } 8623 } 8624 } 8625 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C4__NEON_MLAL_LD2R,strided_cm_subtile)8626 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C4__NEON_MLAL_LD2R, strided_cm_subtile) { 8627 TEST_REQUIRES_ARM_NEON; 8628 for (size_t k = 1; k <= 80; k += 17) { 8629 for (uint32_t n = 1; n <= 8; n++) { 8630 for (uint32_t m = 1; m <= 3; m++) { 8631 GemmMicrokernelTester() 8632 .mr(3) 8633 .nr(8) 8634 .kr(4) 8635 .sr(1) 8636 .m(m) 8637 .n(n) 8638 .k(k) 8639 .cm_stride(11) 8640 .iterations(1) 8641 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 8642 } 8643 } 8644 } 8645 } 8646 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C4__NEON_MLAL_LD2R,qmin)8647 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C4__NEON_MLAL_LD2R, qmin) { 8648 TEST_REQUIRES_ARM_NEON; 8649 GemmMicrokernelTester() 8650 .mr(3) 8651 .nr(8) 8652 .kr(4) 8653 .sr(1) 8654 .m(3) 8655 .n(8) 8656 .k(16) 8657 .qmin(128) 8658 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 8659 } 8660 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C4__NEON_MLAL_LD2R,qmax)8661 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C4__NEON_MLAL_LD2R, qmax) { 8662 TEST_REQUIRES_ARM_NEON; 8663 GemmMicrokernelTester() 8664 .mr(3) 8665 .nr(8) 8666 .kr(4) 8667 .sr(1) 8668 .m(3) 8669 .n(8) 8670 .k(16) 8671 .qmax(128) 8672 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 8673 } 8674 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C4__NEON_MLAL_LD2R,strided_cm)8675 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C4__NEON_MLAL_LD2R, strided_cm) { 8676 TEST_REQUIRES_ARM_NEON; 8677 GemmMicrokernelTester() 8678 .mr(3) 8679 .nr(8) 8680 .kr(4) 8681 .sr(1) 8682 .m(3) 8683 .n(8) 8684 .k(16) 8685 .cm_stride(11) 8686 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 8687 } 8688 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 8689 8690 8691 #if XNN_ARCH_ARM || XNN_ARCH_ARM64 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD2R,k_eq_16)8692 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD2R, k_eq_16) { 8693 TEST_REQUIRES_ARM_NEON; 8694 GemmMicrokernelTester() 8695 .mr(3) 8696 .nr(16) 8697 .kr(2) 8698 .sr(1) 8699 .m(3) 8700 .n(16) 8701 .k(16) 8702 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 8703 } 8704 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD2R,strided_cn)8705 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD2R, strided_cn) { 8706 TEST_REQUIRES_ARM_NEON; 8707 GemmMicrokernelTester() 8708 .mr(3) 8709 .nr(16) 8710 .kr(2) 8711 .sr(1) 8712 .m(3) 8713 .n(16) 8714 .k(16) 8715 .cn_stride(19) 8716 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 8717 } 8718 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD2R,k_eq_16_strided_a)8719 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD2R, k_eq_16_strided_a) { 8720 TEST_REQUIRES_ARM_NEON; 8721 GemmMicrokernelTester() 8722 .mr(3) 8723 .nr(16) 8724 .kr(2) 8725 .sr(1) 8726 .m(3) 8727 .n(16) 8728 .k(16) 8729 .a_stride(19) 8730 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 8731 } 8732 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD2R,k_eq_16_subtile)8733 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD2R, k_eq_16_subtile) { 8734 TEST_REQUIRES_ARM_NEON; 8735 for (uint32_t n = 1; n <= 16; n++) { 8736 for (uint32_t m = 1; m <= 3; m++) { 8737 GemmMicrokernelTester() 8738 .mr(3) 8739 .nr(16) 8740 .kr(2) 8741 .sr(1) 8742 .m(m) 8743 .n(n) 8744 .k(16) 8745 .iterations(1) 8746 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 8747 } 8748 } 8749 } 8750 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD2R,k_eq_16_subtile_m)8751 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD2R, k_eq_16_subtile_m) { 8752 TEST_REQUIRES_ARM_NEON; 8753 for (uint32_t m = 1; m <= 3; m++) { 8754 GemmMicrokernelTester() 8755 .mr(3) 8756 .nr(16) 8757 .kr(2) 8758 .sr(1) 8759 .m(m) 8760 .n(16) 8761 .k(16) 8762 .iterations(1) 8763 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 8764 } 8765 } 8766 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD2R,k_eq_16_subtile_n)8767 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD2R, k_eq_16_subtile_n) { 8768 TEST_REQUIRES_ARM_NEON; 8769 for (uint32_t n = 1; n <= 16; n++) { 8770 GemmMicrokernelTester() 8771 .mr(3) 8772 .nr(16) 8773 .kr(2) 8774 .sr(1) 8775 .m(3) 8776 .n(n) 8777 .k(16) 8778 .iterations(1) 8779 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 8780 } 8781 } 8782 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD2R,k_lt_16)8783 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD2R, k_lt_16) { 8784 TEST_REQUIRES_ARM_NEON; 8785 for (size_t k = 1; k < 16; k++) { 8786 GemmMicrokernelTester() 8787 .mr(3) 8788 .nr(16) 8789 .kr(2) 8790 .sr(1) 8791 .m(3) 8792 .n(16) 8793 .k(k) 8794 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 8795 } 8796 } 8797 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD2R,k_lt_16_strided_a)8798 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD2R, k_lt_16_strided_a) { 8799 TEST_REQUIRES_ARM_NEON; 8800 for (size_t k = 1; k < 16; k++) { 8801 GemmMicrokernelTester() 8802 .mr(3) 8803 .nr(16) 8804 .kr(2) 8805 .sr(1) 8806 .m(3) 8807 .n(16) 8808 .k(k) 8809 .a_stride(19) 8810 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 8811 } 8812 } 8813 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD2R,k_lt_16_subtile)8814 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD2R, k_lt_16_subtile) { 8815 TEST_REQUIRES_ARM_NEON; 8816 for (size_t k = 1; k < 16; k++) { 8817 for (uint32_t n = 1; n <= 16; n++) { 8818 for (uint32_t m = 1; m <= 3; m++) { 8819 GemmMicrokernelTester() 8820 .mr(3) 8821 .nr(16) 8822 .kr(2) 8823 .sr(1) 8824 .m(m) 8825 .n(n) 8826 .k(k) 8827 .iterations(1) 8828 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 8829 } 8830 } 8831 } 8832 } 8833 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD2R,k_gt_16)8834 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD2R, k_gt_16) { 8835 TEST_REQUIRES_ARM_NEON; 8836 for (size_t k = 17; k < 32; k++) { 8837 GemmMicrokernelTester() 8838 .mr(3) 8839 .nr(16) 8840 .kr(2) 8841 .sr(1) 8842 .m(3) 8843 .n(16) 8844 .k(k) 8845 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 8846 } 8847 } 8848 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD2R,k_gt_16_strided_a)8849 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD2R, k_gt_16_strided_a) { 8850 TEST_REQUIRES_ARM_NEON; 8851 for (size_t k = 17; k < 32; k++) { 8852 GemmMicrokernelTester() 8853 .mr(3) 8854 .nr(16) 8855 .kr(2) 8856 .sr(1) 8857 .m(3) 8858 .n(16) 8859 .k(k) 8860 .a_stride(37) 8861 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 8862 } 8863 } 8864 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD2R,k_gt_16_subtile)8865 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD2R, k_gt_16_subtile) { 8866 TEST_REQUIRES_ARM_NEON; 8867 for (size_t k = 17; k < 32; k++) { 8868 for (uint32_t n = 1; n <= 16; n++) { 8869 for (uint32_t m = 1; m <= 3; m++) { 8870 GemmMicrokernelTester() 8871 .mr(3) 8872 .nr(16) 8873 .kr(2) 8874 .sr(1) 8875 .m(m) 8876 .n(n) 8877 .k(k) 8878 .iterations(1) 8879 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 8880 } 8881 } 8882 } 8883 } 8884 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD2R,k_div_16)8885 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD2R, k_div_16) { 8886 TEST_REQUIRES_ARM_NEON; 8887 for (size_t k = 32; k <= 160; k += 16) { 8888 GemmMicrokernelTester() 8889 .mr(3) 8890 .nr(16) 8891 .kr(2) 8892 .sr(1) 8893 .m(3) 8894 .n(16) 8895 .k(k) 8896 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 8897 } 8898 } 8899 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD2R,k_div_16_strided_a)8900 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD2R, k_div_16_strided_a) { 8901 TEST_REQUIRES_ARM_NEON; 8902 for (size_t k = 32; k <= 160; k += 16) { 8903 GemmMicrokernelTester() 8904 .mr(3) 8905 .nr(16) 8906 .kr(2) 8907 .sr(1) 8908 .m(3) 8909 .n(16) 8910 .k(k) 8911 .a_stride(163) 8912 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 8913 } 8914 } 8915 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD2R,k_div_16_subtile)8916 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD2R, k_div_16_subtile) { 8917 TEST_REQUIRES_ARM_NEON; 8918 for (size_t k = 32; k <= 160; k += 16) { 8919 for (uint32_t n = 1; n <= 16; n++) { 8920 for (uint32_t m = 1; m <= 3; m++) { 8921 GemmMicrokernelTester() 8922 .mr(3) 8923 .nr(16) 8924 .kr(2) 8925 .sr(1) 8926 .m(m) 8927 .n(n) 8928 .k(k) 8929 .iterations(1) 8930 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 8931 } 8932 } 8933 } 8934 } 8935 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD2R,n_gt_16)8936 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD2R, n_gt_16) { 8937 TEST_REQUIRES_ARM_NEON; 8938 for (uint32_t n = 17; n < 32; n++) { 8939 for (size_t k = 1; k <= 80; k += 17) { 8940 GemmMicrokernelTester() 8941 .mr(3) 8942 .nr(16) 8943 .kr(2) 8944 .sr(1) 8945 .m(3) 8946 .n(n) 8947 .k(k) 8948 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 8949 } 8950 } 8951 } 8952 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD2R,n_gt_16_strided_cn)8953 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD2R, n_gt_16_strided_cn) { 8954 TEST_REQUIRES_ARM_NEON; 8955 for (uint32_t n = 17; n < 32; n++) { 8956 for (size_t k = 1; k <= 80; k += 17) { 8957 GemmMicrokernelTester() 8958 .mr(3) 8959 .nr(16) 8960 .kr(2) 8961 .sr(1) 8962 .m(3) 8963 .n(n) 8964 .k(k) 8965 .cn_stride(19) 8966 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 8967 } 8968 } 8969 } 8970 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD2R,n_gt_16_strided_a)8971 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD2R, n_gt_16_strided_a) { 8972 TEST_REQUIRES_ARM_NEON; 8973 for (uint32_t n = 17; n < 32; n++) { 8974 for (size_t k = 1; k <= 80; k += 17) { 8975 GemmMicrokernelTester() 8976 .mr(3) 8977 .nr(16) 8978 .kr(2) 8979 .sr(1) 8980 .m(3) 8981 .n(n) 8982 .k(k) 8983 .a_stride(83) 8984 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 8985 } 8986 } 8987 } 8988 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD2R,n_gt_16_subtile)8989 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD2R, n_gt_16_subtile) { 8990 TEST_REQUIRES_ARM_NEON; 8991 for (uint32_t n = 17; n < 32; n++) { 8992 for (size_t k = 1; k <= 80; k += 17) { 8993 for (uint32_t m = 1; m <= 3; m++) { 8994 GemmMicrokernelTester() 8995 .mr(3) 8996 .nr(16) 8997 .kr(2) 8998 .sr(1) 8999 .m(m) 9000 .n(n) 9001 .k(k) 9002 .iterations(1) 9003 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 9004 } 9005 } 9006 } 9007 } 9008 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD2R,n_div_16)9009 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD2R, n_div_16) { 9010 TEST_REQUIRES_ARM_NEON; 9011 for (uint32_t n = 32; n <= 48; n += 16) { 9012 for (size_t k = 1; k <= 80; k += 17) { 9013 GemmMicrokernelTester() 9014 .mr(3) 9015 .nr(16) 9016 .kr(2) 9017 .sr(1) 9018 .m(3) 9019 .n(n) 9020 .k(k) 9021 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 9022 } 9023 } 9024 } 9025 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD2R,n_div_16_strided_cn)9026 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD2R, n_div_16_strided_cn) { 9027 TEST_REQUIRES_ARM_NEON; 9028 for (uint32_t n = 32; n <= 48; n += 16) { 9029 for (size_t k = 1; k <= 80; k += 17) { 9030 GemmMicrokernelTester() 9031 .mr(3) 9032 .nr(16) 9033 .kr(2) 9034 .sr(1) 9035 .m(3) 9036 .n(n) 9037 .k(k) 9038 .cn_stride(19) 9039 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 9040 } 9041 } 9042 } 9043 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD2R,n_div_16_strided_a)9044 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD2R, n_div_16_strided_a) { 9045 TEST_REQUIRES_ARM_NEON; 9046 for (uint32_t n = 32; n <= 48; n += 16) { 9047 for (size_t k = 1; k <= 80; k += 17) { 9048 GemmMicrokernelTester() 9049 .mr(3) 9050 .nr(16) 9051 .kr(2) 9052 .sr(1) 9053 .m(3) 9054 .n(n) 9055 .k(k) 9056 .a_stride(83) 9057 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 9058 } 9059 } 9060 } 9061 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD2R,n_div_16_subtile)9062 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD2R, n_div_16_subtile) { 9063 TEST_REQUIRES_ARM_NEON; 9064 for (uint32_t n = 32; n <= 48; n += 16) { 9065 for (size_t k = 1; k <= 80; k += 17) { 9066 for (uint32_t m = 1; m <= 3; m++) { 9067 GemmMicrokernelTester() 9068 .mr(3) 9069 .nr(16) 9070 .kr(2) 9071 .sr(1) 9072 .m(m) 9073 .n(n) 9074 .k(k) 9075 .iterations(1) 9076 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 9077 } 9078 } 9079 } 9080 } 9081 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD2R,strided_cm_subtile)9082 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD2R, strided_cm_subtile) { 9083 TEST_REQUIRES_ARM_NEON; 9084 for (size_t k = 1; k <= 80; k += 17) { 9085 for (uint32_t n = 1; n <= 16; n++) { 9086 for (uint32_t m = 1; m <= 3; m++) { 9087 GemmMicrokernelTester() 9088 .mr(3) 9089 .nr(16) 9090 .kr(2) 9091 .sr(1) 9092 .m(m) 9093 .n(n) 9094 .k(k) 9095 .cm_stride(19) 9096 .iterations(1) 9097 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 9098 } 9099 } 9100 } 9101 } 9102 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD2R,qmin)9103 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD2R, qmin) { 9104 TEST_REQUIRES_ARM_NEON; 9105 GemmMicrokernelTester() 9106 .mr(3) 9107 .nr(16) 9108 .kr(2) 9109 .sr(1) 9110 .m(3) 9111 .n(16) 9112 .k(16) 9113 .qmin(128) 9114 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 9115 } 9116 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD2R,qmax)9117 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD2R, qmax) { 9118 TEST_REQUIRES_ARM_NEON; 9119 GemmMicrokernelTester() 9120 .mr(3) 9121 .nr(16) 9122 .kr(2) 9123 .sr(1) 9124 .m(3) 9125 .n(16) 9126 .k(16) 9127 .qmax(128) 9128 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 9129 } 9130 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD2R,strided_cm)9131 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD2R, strided_cm) { 9132 TEST_REQUIRES_ARM_NEON; 9133 GemmMicrokernelTester() 9134 .mr(3) 9135 .nr(16) 9136 .kr(2) 9137 .sr(1) 9138 .m(3) 9139 .n(16) 9140 .k(16) 9141 .cm_stride(19) 9142 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 9143 } 9144 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 9145 9146 9147 #if XNN_ARCH_ARM || XNN_ARCH_ARM64 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2__NEON_MULL_LD1R,k_eq_8)9148 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2__NEON_MULL_LD1R, k_eq_8) { 9149 TEST_REQUIRES_ARM_NEON; 9150 GemmMicrokernelTester() 9151 .mr(3) 9152 .nr(16) 9153 .kr(2) 9154 .sr(1) 9155 .m(3) 9156 .n(16) 9157 .k(8) 9158 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 9159 } 9160 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2__NEON_MULL_LD1R,strided_cn)9161 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2__NEON_MULL_LD1R, strided_cn) { 9162 TEST_REQUIRES_ARM_NEON; 9163 GemmMicrokernelTester() 9164 .mr(3) 9165 .nr(16) 9166 .kr(2) 9167 .sr(1) 9168 .m(3) 9169 .n(16) 9170 .k(8) 9171 .cn_stride(19) 9172 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 9173 } 9174 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2__NEON_MULL_LD1R,k_eq_8_strided_a)9175 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2__NEON_MULL_LD1R, k_eq_8_strided_a) { 9176 TEST_REQUIRES_ARM_NEON; 9177 GemmMicrokernelTester() 9178 .mr(3) 9179 .nr(16) 9180 .kr(2) 9181 .sr(1) 9182 .m(3) 9183 .n(16) 9184 .k(8) 9185 .a_stride(11) 9186 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 9187 } 9188 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2__NEON_MULL_LD1R,k_eq_8_subtile)9189 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2__NEON_MULL_LD1R, k_eq_8_subtile) { 9190 TEST_REQUIRES_ARM_NEON; 9191 for (uint32_t n = 1; n <= 16; n++) { 9192 for (uint32_t m = 1; m <= 3; m++) { 9193 GemmMicrokernelTester() 9194 .mr(3) 9195 .nr(16) 9196 .kr(2) 9197 .sr(1) 9198 .m(m) 9199 .n(n) 9200 .k(8) 9201 .iterations(1) 9202 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 9203 } 9204 } 9205 } 9206 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2__NEON_MULL_LD1R,k_eq_8_subtile_m)9207 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2__NEON_MULL_LD1R, k_eq_8_subtile_m) { 9208 TEST_REQUIRES_ARM_NEON; 9209 for (uint32_t m = 1; m <= 3; m++) { 9210 GemmMicrokernelTester() 9211 .mr(3) 9212 .nr(16) 9213 .kr(2) 9214 .sr(1) 9215 .m(m) 9216 .n(16) 9217 .k(8) 9218 .iterations(1) 9219 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 9220 } 9221 } 9222 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2__NEON_MULL_LD1R,k_eq_8_subtile_n)9223 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2__NEON_MULL_LD1R, k_eq_8_subtile_n) { 9224 TEST_REQUIRES_ARM_NEON; 9225 for (uint32_t n = 1; n <= 16; n++) { 9226 GemmMicrokernelTester() 9227 .mr(3) 9228 .nr(16) 9229 .kr(2) 9230 .sr(1) 9231 .m(3) 9232 .n(n) 9233 .k(8) 9234 .iterations(1) 9235 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 9236 } 9237 } 9238 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2__NEON_MULL_LD1R,k_lt_8)9239 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2__NEON_MULL_LD1R, k_lt_8) { 9240 TEST_REQUIRES_ARM_NEON; 9241 for (size_t k = 1; k < 8; k++) { 9242 GemmMicrokernelTester() 9243 .mr(3) 9244 .nr(16) 9245 .kr(2) 9246 .sr(1) 9247 .m(3) 9248 .n(16) 9249 .k(k) 9250 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 9251 } 9252 } 9253 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2__NEON_MULL_LD1R,k_lt_8_strided_a)9254 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2__NEON_MULL_LD1R, k_lt_8_strided_a) { 9255 TEST_REQUIRES_ARM_NEON; 9256 for (size_t k = 1; k < 8; k++) { 9257 GemmMicrokernelTester() 9258 .mr(3) 9259 .nr(16) 9260 .kr(2) 9261 .sr(1) 9262 .m(3) 9263 .n(16) 9264 .k(k) 9265 .a_stride(11) 9266 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 9267 } 9268 } 9269 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2__NEON_MULL_LD1R,k_lt_8_subtile)9270 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2__NEON_MULL_LD1R, k_lt_8_subtile) { 9271 TEST_REQUIRES_ARM_NEON; 9272 for (size_t k = 1; k < 8; k++) { 9273 for (uint32_t n = 1; n <= 16; n++) { 9274 for (uint32_t m = 1; m <= 3; m++) { 9275 GemmMicrokernelTester() 9276 .mr(3) 9277 .nr(16) 9278 .kr(2) 9279 .sr(1) 9280 .m(m) 9281 .n(n) 9282 .k(k) 9283 .iterations(1) 9284 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 9285 } 9286 } 9287 } 9288 } 9289 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2__NEON_MULL_LD1R,k_gt_8)9290 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2__NEON_MULL_LD1R, k_gt_8) { 9291 TEST_REQUIRES_ARM_NEON; 9292 for (size_t k = 9; k < 16; k++) { 9293 GemmMicrokernelTester() 9294 .mr(3) 9295 .nr(16) 9296 .kr(2) 9297 .sr(1) 9298 .m(3) 9299 .n(16) 9300 .k(k) 9301 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 9302 } 9303 } 9304 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2__NEON_MULL_LD1R,k_gt_8_strided_a)9305 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2__NEON_MULL_LD1R, k_gt_8_strided_a) { 9306 TEST_REQUIRES_ARM_NEON; 9307 for (size_t k = 9; k < 16; k++) { 9308 GemmMicrokernelTester() 9309 .mr(3) 9310 .nr(16) 9311 .kr(2) 9312 .sr(1) 9313 .m(3) 9314 .n(16) 9315 .k(k) 9316 .a_stride(19) 9317 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 9318 } 9319 } 9320 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2__NEON_MULL_LD1R,k_gt_8_subtile)9321 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2__NEON_MULL_LD1R, k_gt_8_subtile) { 9322 TEST_REQUIRES_ARM_NEON; 9323 for (size_t k = 9; k < 16; k++) { 9324 for (uint32_t n = 1; n <= 16; n++) { 9325 for (uint32_t m = 1; m <= 3; m++) { 9326 GemmMicrokernelTester() 9327 .mr(3) 9328 .nr(16) 9329 .kr(2) 9330 .sr(1) 9331 .m(m) 9332 .n(n) 9333 .k(k) 9334 .iterations(1) 9335 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 9336 } 9337 } 9338 } 9339 } 9340 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2__NEON_MULL_LD1R,k_div_8)9341 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2__NEON_MULL_LD1R, k_div_8) { 9342 TEST_REQUIRES_ARM_NEON; 9343 for (size_t k = 16; k <= 80; k += 8) { 9344 GemmMicrokernelTester() 9345 .mr(3) 9346 .nr(16) 9347 .kr(2) 9348 .sr(1) 9349 .m(3) 9350 .n(16) 9351 .k(k) 9352 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 9353 } 9354 } 9355 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2__NEON_MULL_LD1R,k_div_8_strided_a)9356 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2__NEON_MULL_LD1R, k_div_8_strided_a) { 9357 TEST_REQUIRES_ARM_NEON; 9358 for (size_t k = 16; k <= 80; k += 8) { 9359 GemmMicrokernelTester() 9360 .mr(3) 9361 .nr(16) 9362 .kr(2) 9363 .sr(1) 9364 .m(3) 9365 .n(16) 9366 .k(k) 9367 .a_stride(83) 9368 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 9369 } 9370 } 9371 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2__NEON_MULL_LD1R,k_div_8_subtile)9372 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2__NEON_MULL_LD1R, k_div_8_subtile) { 9373 TEST_REQUIRES_ARM_NEON; 9374 for (size_t k = 16; k <= 80; k += 8) { 9375 for (uint32_t n = 1; n <= 16; n++) { 9376 for (uint32_t m = 1; m <= 3; m++) { 9377 GemmMicrokernelTester() 9378 .mr(3) 9379 .nr(16) 9380 .kr(2) 9381 .sr(1) 9382 .m(m) 9383 .n(n) 9384 .k(k) 9385 .iterations(1) 9386 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 9387 } 9388 } 9389 } 9390 } 9391 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2__NEON_MULL_LD1R,n_gt_16)9392 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2__NEON_MULL_LD1R, n_gt_16) { 9393 TEST_REQUIRES_ARM_NEON; 9394 for (uint32_t n = 17; n < 32; n++) { 9395 for (size_t k = 1; k <= 40; k += 9) { 9396 GemmMicrokernelTester() 9397 .mr(3) 9398 .nr(16) 9399 .kr(2) 9400 .sr(1) 9401 .m(3) 9402 .n(n) 9403 .k(k) 9404 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 9405 } 9406 } 9407 } 9408 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2__NEON_MULL_LD1R,n_gt_16_strided_cn)9409 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2__NEON_MULL_LD1R, n_gt_16_strided_cn) { 9410 TEST_REQUIRES_ARM_NEON; 9411 for (uint32_t n = 17; n < 32; n++) { 9412 for (size_t k = 1; k <= 40; k += 9) { 9413 GemmMicrokernelTester() 9414 .mr(3) 9415 .nr(16) 9416 .kr(2) 9417 .sr(1) 9418 .m(3) 9419 .n(n) 9420 .k(k) 9421 .cn_stride(19) 9422 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 9423 } 9424 } 9425 } 9426 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2__NEON_MULL_LD1R,n_gt_16_strided_a)9427 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2__NEON_MULL_LD1R, n_gt_16_strided_a) { 9428 TEST_REQUIRES_ARM_NEON; 9429 for (uint32_t n = 17; n < 32; n++) { 9430 for (size_t k = 1; k <= 40; k += 9) { 9431 GemmMicrokernelTester() 9432 .mr(3) 9433 .nr(16) 9434 .kr(2) 9435 .sr(1) 9436 .m(3) 9437 .n(n) 9438 .k(k) 9439 .a_stride(43) 9440 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 9441 } 9442 } 9443 } 9444 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2__NEON_MULL_LD1R,n_gt_16_subtile)9445 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2__NEON_MULL_LD1R, n_gt_16_subtile) { 9446 TEST_REQUIRES_ARM_NEON; 9447 for (uint32_t n = 17; n < 32; n++) { 9448 for (size_t k = 1; k <= 40; k += 9) { 9449 for (uint32_t m = 1; m <= 3; m++) { 9450 GemmMicrokernelTester() 9451 .mr(3) 9452 .nr(16) 9453 .kr(2) 9454 .sr(1) 9455 .m(m) 9456 .n(n) 9457 .k(k) 9458 .iterations(1) 9459 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 9460 } 9461 } 9462 } 9463 } 9464 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2__NEON_MULL_LD1R,n_div_16)9465 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2__NEON_MULL_LD1R, n_div_16) { 9466 TEST_REQUIRES_ARM_NEON; 9467 for (uint32_t n = 32; n <= 48; n += 16) { 9468 for (size_t k = 1; k <= 40; k += 9) { 9469 GemmMicrokernelTester() 9470 .mr(3) 9471 .nr(16) 9472 .kr(2) 9473 .sr(1) 9474 .m(3) 9475 .n(n) 9476 .k(k) 9477 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 9478 } 9479 } 9480 } 9481 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2__NEON_MULL_LD1R,n_div_16_strided_cn)9482 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2__NEON_MULL_LD1R, n_div_16_strided_cn) { 9483 TEST_REQUIRES_ARM_NEON; 9484 for (uint32_t n = 32; n <= 48; n += 16) { 9485 for (size_t k = 1; k <= 40; k += 9) { 9486 GemmMicrokernelTester() 9487 .mr(3) 9488 .nr(16) 9489 .kr(2) 9490 .sr(1) 9491 .m(3) 9492 .n(n) 9493 .k(k) 9494 .cn_stride(19) 9495 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 9496 } 9497 } 9498 } 9499 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2__NEON_MULL_LD1R,n_div_16_strided_a)9500 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2__NEON_MULL_LD1R, n_div_16_strided_a) { 9501 TEST_REQUIRES_ARM_NEON; 9502 for (uint32_t n = 32; n <= 48; n += 16) { 9503 for (size_t k = 1; k <= 40; k += 9) { 9504 GemmMicrokernelTester() 9505 .mr(3) 9506 .nr(16) 9507 .kr(2) 9508 .sr(1) 9509 .m(3) 9510 .n(n) 9511 .k(k) 9512 .a_stride(43) 9513 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 9514 } 9515 } 9516 } 9517 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2__NEON_MULL_LD1R,n_div_16_subtile)9518 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2__NEON_MULL_LD1R, n_div_16_subtile) { 9519 TEST_REQUIRES_ARM_NEON; 9520 for (uint32_t n = 32; n <= 48; n += 16) { 9521 for (size_t k = 1; k <= 40; k += 9) { 9522 for (uint32_t m = 1; m <= 3; m++) { 9523 GemmMicrokernelTester() 9524 .mr(3) 9525 .nr(16) 9526 .kr(2) 9527 .sr(1) 9528 .m(m) 9529 .n(n) 9530 .k(k) 9531 .iterations(1) 9532 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 9533 } 9534 } 9535 } 9536 } 9537 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2__NEON_MULL_LD1R,strided_cm_subtile)9538 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2__NEON_MULL_LD1R, strided_cm_subtile) { 9539 TEST_REQUIRES_ARM_NEON; 9540 for (size_t k = 1; k <= 40; k += 9) { 9541 for (uint32_t n = 1; n <= 16; n++) { 9542 for (uint32_t m = 1; m <= 3; m++) { 9543 GemmMicrokernelTester() 9544 .mr(3) 9545 .nr(16) 9546 .kr(2) 9547 .sr(1) 9548 .m(m) 9549 .n(n) 9550 .k(k) 9551 .cm_stride(19) 9552 .iterations(1) 9553 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 9554 } 9555 } 9556 } 9557 } 9558 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2__NEON_MULL_LD1R,qmin)9559 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2__NEON_MULL_LD1R, qmin) { 9560 TEST_REQUIRES_ARM_NEON; 9561 GemmMicrokernelTester() 9562 .mr(3) 9563 .nr(16) 9564 .kr(2) 9565 .sr(1) 9566 .m(3) 9567 .n(16) 9568 .k(8) 9569 .qmin(128) 9570 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 9571 } 9572 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2__NEON_MULL_LD1R,qmax)9573 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2__NEON_MULL_LD1R, qmax) { 9574 TEST_REQUIRES_ARM_NEON; 9575 GemmMicrokernelTester() 9576 .mr(3) 9577 .nr(16) 9578 .kr(2) 9579 .sr(1) 9580 .m(3) 9581 .n(16) 9582 .k(8) 9583 .qmax(128) 9584 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 9585 } 9586 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2__NEON_MULL_LD1R,strided_cm)9587 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2__NEON_MULL_LD1R, strided_cm) { 9588 TEST_REQUIRES_ARM_NEON; 9589 GemmMicrokernelTester() 9590 .mr(3) 9591 .nr(16) 9592 .kr(2) 9593 .sr(1) 9594 .m(3) 9595 .n(16) 9596 .k(8) 9597 .cm_stride(19) 9598 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 9599 } 9600 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 9601 9602 9603 #if XNN_ARCH_ARM || XNN_ARCH_ARM64 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C4__NEON_MLAL_LD1R,k_eq_16)9604 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C4__NEON_MLAL_LD1R, k_eq_16) { 9605 TEST_REQUIRES_ARM_NEON; 9606 GemmMicrokernelTester() 9607 .mr(3) 9608 .nr(16) 9609 .kr(4) 9610 .sr(1) 9611 .m(3) 9612 .n(16) 9613 .k(16) 9614 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 9615 } 9616 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C4__NEON_MLAL_LD1R,strided_cn)9617 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C4__NEON_MLAL_LD1R, strided_cn) { 9618 TEST_REQUIRES_ARM_NEON; 9619 GemmMicrokernelTester() 9620 .mr(3) 9621 .nr(16) 9622 .kr(4) 9623 .sr(1) 9624 .m(3) 9625 .n(16) 9626 .k(16) 9627 .cn_stride(19) 9628 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 9629 } 9630 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C4__NEON_MLAL_LD1R,k_eq_16_strided_a)9631 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C4__NEON_MLAL_LD1R, k_eq_16_strided_a) { 9632 TEST_REQUIRES_ARM_NEON; 9633 GemmMicrokernelTester() 9634 .mr(3) 9635 .nr(16) 9636 .kr(4) 9637 .sr(1) 9638 .m(3) 9639 .n(16) 9640 .k(16) 9641 .a_stride(19) 9642 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 9643 } 9644 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C4__NEON_MLAL_LD1R,k_eq_16_subtile)9645 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C4__NEON_MLAL_LD1R, k_eq_16_subtile) { 9646 TEST_REQUIRES_ARM_NEON; 9647 for (uint32_t n = 1; n <= 16; n++) { 9648 for (uint32_t m = 1; m <= 3; m++) { 9649 GemmMicrokernelTester() 9650 .mr(3) 9651 .nr(16) 9652 .kr(4) 9653 .sr(1) 9654 .m(m) 9655 .n(n) 9656 .k(16) 9657 .iterations(1) 9658 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 9659 } 9660 } 9661 } 9662 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C4__NEON_MLAL_LD1R,k_eq_16_subtile_m)9663 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C4__NEON_MLAL_LD1R, k_eq_16_subtile_m) { 9664 TEST_REQUIRES_ARM_NEON; 9665 for (uint32_t m = 1; m <= 3; m++) { 9666 GemmMicrokernelTester() 9667 .mr(3) 9668 .nr(16) 9669 .kr(4) 9670 .sr(1) 9671 .m(m) 9672 .n(16) 9673 .k(16) 9674 .iterations(1) 9675 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 9676 } 9677 } 9678 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C4__NEON_MLAL_LD1R,k_eq_16_subtile_n)9679 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C4__NEON_MLAL_LD1R, k_eq_16_subtile_n) { 9680 TEST_REQUIRES_ARM_NEON; 9681 for (uint32_t n = 1; n <= 16; n++) { 9682 GemmMicrokernelTester() 9683 .mr(3) 9684 .nr(16) 9685 .kr(4) 9686 .sr(1) 9687 .m(3) 9688 .n(n) 9689 .k(16) 9690 .iterations(1) 9691 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 9692 } 9693 } 9694 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C4__NEON_MLAL_LD1R,k_lt_16)9695 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C4__NEON_MLAL_LD1R, k_lt_16) { 9696 TEST_REQUIRES_ARM_NEON; 9697 for (size_t k = 1; k < 16; k++) { 9698 GemmMicrokernelTester() 9699 .mr(3) 9700 .nr(16) 9701 .kr(4) 9702 .sr(1) 9703 .m(3) 9704 .n(16) 9705 .k(k) 9706 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 9707 } 9708 } 9709 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C4__NEON_MLAL_LD1R,k_lt_16_strided_a)9710 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C4__NEON_MLAL_LD1R, k_lt_16_strided_a) { 9711 TEST_REQUIRES_ARM_NEON; 9712 for (size_t k = 1; k < 16; k++) { 9713 GemmMicrokernelTester() 9714 .mr(3) 9715 .nr(16) 9716 .kr(4) 9717 .sr(1) 9718 .m(3) 9719 .n(16) 9720 .k(k) 9721 .a_stride(19) 9722 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 9723 } 9724 } 9725 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C4__NEON_MLAL_LD1R,k_lt_16_subtile)9726 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C4__NEON_MLAL_LD1R, k_lt_16_subtile) { 9727 TEST_REQUIRES_ARM_NEON; 9728 for (size_t k = 1; k < 16; k++) { 9729 for (uint32_t n = 1; n <= 16; n++) { 9730 for (uint32_t m = 1; m <= 3; m++) { 9731 GemmMicrokernelTester() 9732 .mr(3) 9733 .nr(16) 9734 .kr(4) 9735 .sr(1) 9736 .m(m) 9737 .n(n) 9738 .k(k) 9739 .iterations(1) 9740 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 9741 } 9742 } 9743 } 9744 } 9745 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C4__NEON_MLAL_LD1R,k_gt_16)9746 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C4__NEON_MLAL_LD1R, k_gt_16) { 9747 TEST_REQUIRES_ARM_NEON; 9748 for (size_t k = 17; k < 32; k++) { 9749 GemmMicrokernelTester() 9750 .mr(3) 9751 .nr(16) 9752 .kr(4) 9753 .sr(1) 9754 .m(3) 9755 .n(16) 9756 .k(k) 9757 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 9758 } 9759 } 9760 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C4__NEON_MLAL_LD1R,k_gt_16_strided_a)9761 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C4__NEON_MLAL_LD1R, k_gt_16_strided_a) { 9762 TEST_REQUIRES_ARM_NEON; 9763 for (size_t k = 17; k < 32; k++) { 9764 GemmMicrokernelTester() 9765 .mr(3) 9766 .nr(16) 9767 .kr(4) 9768 .sr(1) 9769 .m(3) 9770 .n(16) 9771 .k(k) 9772 .a_stride(37) 9773 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 9774 } 9775 } 9776 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C4__NEON_MLAL_LD1R,k_gt_16_subtile)9777 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C4__NEON_MLAL_LD1R, k_gt_16_subtile) { 9778 TEST_REQUIRES_ARM_NEON; 9779 for (size_t k = 17; k < 32; k++) { 9780 for (uint32_t n = 1; n <= 16; n++) { 9781 for (uint32_t m = 1; m <= 3; m++) { 9782 GemmMicrokernelTester() 9783 .mr(3) 9784 .nr(16) 9785 .kr(4) 9786 .sr(1) 9787 .m(m) 9788 .n(n) 9789 .k(k) 9790 .iterations(1) 9791 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 9792 } 9793 } 9794 } 9795 } 9796 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C4__NEON_MLAL_LD1R,k_div_16)9797 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C4__NEON_MLAL_LD1R, k_div_16) { 9798 TEST_REQUIRES_ARM_NEON; 9799 for (size_t k = 32; k <= 160; k += 16) { 9800 GemmMicrokernelTester() 9801 .mr(3) 9802 .nr(16) 9803 .kr(4) 9804 .sr(1) 9805 .m(3) 9806 .n(16) 9807 .k(k) 9808 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 9809 } 9810 } 9811 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C4__NEON_MLAL_LD1R,k_div_16_strided_a)9812 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C4__NEON_MLAL_LD1R, k_div_16_strided_a) { 9813 TEST_REQUIRES_ARM_NEON; 9814 for (size_t k = 32; k <= 160; k += 16) { 9815 GemmMicrokernelTester() 9816 .mr(3) 9817 .nr(16) 9818 .kr(4) 9819 .sr(1) 9820 .m(3) 9821 .n(16) 9822 .k(k) 9823 .a_stride(163) 9824 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 9825 } 9826 } 9827 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C4__NEON_MLAL_LD1R,k_div_16_subtile)9828 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C4__NEON_MLAL_LD1R, k_div_16_subtile) { 9829 TEST_REQUIRES_ARM_NEON; 9830 for (size_t k = 32; k <= 160; k += 16) { 9831 for (uint32_t n = 1; n <= 16; n++) { 9832 for (uint32_t m = 1; m <= 3; m++) { 9833 GemmMicrokernelTester() 9834 .mr(3) 9835 .nr(16) 9836 .kr(4) 9837 .sr(1) 9838 .m(m) 9839 .n(n) 9840 .k(k) 9841 .iterations(1) 9842 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 9843 } 9844 } 9845 } 9846 } 9847 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C4__NEON_MLAL_LD1R,n_gt_16)9848 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C4__NEON_MLAL_LD1R, n_gt_16) { 9849 TEST_REQUIRES_ARM_NEON; 9850 for (uint32_t n = 17; n < 32; n++) { 9851 for (size_t k = 1; k <= 80; k += 17) { 9852 GemmMicrokernelTester() 9853 .mr(3) 9854 .nr(16) 9855 .kr(4) 9856 .sr(1) 9857 .m(3) 9858 .n(n) 9859 .k(k) 9860 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 9861 } 9862 } 9863 } 9864 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C4__NEON_MLAL_LD1R,n_gt_16_strided_cn)9865 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C4__NEON_MLAL_LD1R, n_gt_16_strided_cn) { 9866 TEST_REQUIRES_ARM_NEON; 9867 for (uint32_t n = 17; n < 32; n++) { 9868 for (size_t k = 1; k <= 80; k += 17) { 9869 GemmMicrokernelTester() 9870 .mr(3) 9871 .nr(16) 9872 .kr(4) 9873 .sr(1) 9874 .m(3) 9875 .n(n) 9876 .k(k) 9877 .cn_stride(19) 9878 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 9879 } 9880 } 9881 } 9882 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C4__NEON_MLAL_LD1R,n_gt_16_strided_a)9883 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C4__NEON_MLAL_LD1R, n_gt_16_strided_a) { 9884 TEST_REQUIRES_ARM_NEON; 9885 for (uint32_t n = 17; n < 32; n++) { 9886 for (size_t k = 1; k <= 80; k += 17) { 9887 GemmMicrokernelTester() 9888 .mr(3) 9889 .nr(16) 9890 .kr(4) 9891 .sr(1) 9892 .m(3) 9893 .n(n) 9894 .k(k) 9895 .a_stride(83) 9896 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 9897 } 9898 } 9899 } 9900 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C4__NEON_MLAL_LD1R,n_gt_16_subtile)9901 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C4__NEON_MLAL_LD1R, n_gt_16_subtile) { 9902 TEST_REQUIRES_ARM_NEON; 9903 for (uint32_t n = 17; n < 32; n++) { 9904 for (size_t k = 1; k <= 80; k += 17) { 9905 for (uint32_t m = 1; m <= 3; m++) { 9906 GemmMicrokernelTester() 9907 .mr(3) 9908 .nr(16) 9909 .kr(4) 9910 .sr(1) 9911 .m(m) 9912 .n(n) 9913 .k(k) 9914 .iterations(1) 9915 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 9916 } 9917 } 9918 } 9919 } 9920 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C4__NEON_MLAL_LD1R,n_div_16)9921 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C4__NEON_MLAL_LD1R, n_div_16) { 9922 TEST_REQUIRES_ARM_NEON; 9923 for (uint32_t n = 32; n <= 48; n += 16) { 9924 for (size_t k = 1; k <= 80; k += 17) { 9925 GemmMicrokernelTester() 9926 .mr(3) 9927 .nr(16) 9928 .kr(4) 9929 .sr(1) 9930 .m(3) 9931 .n(n) 9932 .k(k) 9933 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 9934 } 9935 } 9936 } 9937 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C4__NEON_MLAL_LD1R,n_div_16_strided_cn)9938 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C4__NEON_MLAL_LD1R, n_div_16_strided_cn) { 9939 TEST_REQUIRES_ARM_NEON; 9940 for (uint32_t n = 32; n <= 48; n += 16) { 9941 for (size_t k = 1; k <= 80; k += 17) { 9942 GemmMicrokernelTester() 9943 .mr(3) 9944 .nr(16) 9945 .kr(4) 9946 .sr(1) 9947 .m(3) 9948 .n(n) 9949 .k(k) 9950 .cn_stride(19) 9951 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 9952 } 9953 } 9954 } 9955 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C4__NEON_MLAL_LD1R,n_div_16_strided_a)9956 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C4__NEON_MLAL_LD1R, n_div_16_strided_a) { 9957 TEST_REQUIRES_ARM_NEON; 9958 for (uint32_t n = 32; n <= 48; n += 16) { 9959 for (size_t k = 1; k <= 80; k += 17) { 9960 GemmMicrokernelTester() 9961 .mr(3) 9962 .nr(16) 9963 .kr(4) 9964 .sr(1) 9965 .m(3) 9966 .n(n) 9967 .k(k) 9968 .a_stride(83) 9969 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 9970 } 9971 } 9972 } 9973 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C4__NEON_MLAL_LD1R,n_div_16_subtile)9974 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C4__NEON_MLAL_LD1R, n_div_16_subtile) { 9975 TEST_REQUIRES_ARM_NEON; 9976 for (uint32_t n = 32; n <= 48; n += 16) { 9977 for (size_t k = 1; k <= 80; k += 17) { 9978 for (uint32_t m = 1; m <= 3; m++) { 9979 GemmMicrokernelTester() 9980 .mr(3) 9981 .nr(16) 9982 .kr(4) 9983 .sr(1) 9984 .m(m) 9985 .n(n) 9986 .k(k) 9987 .iterations(1) 9988 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 9989 } 9990 } 9991 } 9992 } 9993 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C4__NEON_MLAL_LD1R,strided_cm_subtile)9994 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C4__NEON_MLAL_LD1R, strided_cm_subtile) { 9995 TEST_REQUIRES_ARM_NEON; 9996 for (size_t k = 1; k <= 80; k += 17) { 9997 for (uint32_t n = 1; n <= 16; n++) { 9998 for (uint32_t m = 1; m <= 3; m++) { 9999 GemmMicrokernelTester() 10000 .mr(3) 10001 .nr(16) 10002 .kr(4) 10003 .sr(1) 10004 .m(m) 10005 .n(n) 10006 .k(k) 10007 .cm_stride(19) 10008 .iterations(1) 10009 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 10010 } 10011 } 10012 } 10013 } 10014 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C4__NEON_MLAL_LD1R,qmin)10015 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C4__NEON_MLAL_LD1R, qmin) { 10016 TEST_REQUIRES_ARM_NEON; 10017 GemmMicrokernelTester() 10018 .mr(3) 10019 .nr(16) 10020 .kr(4) 10021 .sr(1) 10022 .m(3) 10023 .n(16) 10024 .k(16) 10025 .qmin(128) 10026 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 10027 } 10028 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C4__NEON_MLAL_LD1R,qmax)10029 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C4__NEON_MLAL_LD1R, qmax) { 10030 TEST_REQUIRES_ARM_NEON; 10031 GemmMicrokernelTester() 10032 .mr(3) 10033 .nr(16) 10034 .kr(4) 10035 .sr(1) 10036 .m(3) 10037 .n(16) 10038 .k(16) 10039 .qmax(128) 10040 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 10041 } 10042 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C4__NEON_MLAL_LD1R,strided_cm)10043 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C4__NEON_MLAL_LD1R, strided_cm) { 10044 TEST_REQUIRES_ARM_NEON; 10045 GemmMicrokernelTester() 10046 .mr(3) 10047 .nr(16) 10048 .kr(4) 10049 .sr(1) 10050 .m(3) 10051 .n(16) 10052 .k(16) 10053 .cm_stride(19) 10054 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 10055 } 10056 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 10057 10058 10059 #if XNN_ARCH_ARM || XNN_ARCH_ARM64 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C4__NEON_MULL_DUP,k_eq_8)10060 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C4__NEON_MULL_DUP, k_eq_8) { 10061 TEST_REQUIRES_ARM_NEON; 10062 GemmMicrokernelTester() 10063 .mr(3) 10064 .nr(16) 10065 .kr(4) 10066 .sr(1) 10067 .m(3) 10068 .n(16) 10069 .k(8) 10070 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 10071 } 10072 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C4__NEON_MULL_DUP,strided_cn)10073 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C4__NEON_MULL_DUP, strided_cn) { 10074 TEST_REQUIRES_ARM_NEON; 10075 GemmMicrokernelTester() 10076 .mr(3) 10077 .nr(16) 10078 .kr(4) 10079 .sr(1) 10080 .m(3) 10081 .n(16) 10082 .k(8) 10083 .cn_stride(19) 10084 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 10085 } 10086 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C4__NEON_MULL_DUP,k_eq_8_strided_a)10087 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C4__NEON_MULL_DUP, k_eq_8_strided_a) { 10088 TEST_REQUIRES_ARM_NEON; 10089 GemmMicrokernelTester() 10090 .mr(3) 10091 .nr(16) 10092 .kr(4) 10093 .sr(1) 10094 .m(3) 10095 .n(16) 10096 .k(8) 10097 .a_stride(11) 10098 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 10099 } 10100 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C4__NEON_MULL_DUP,k_eq_8_subtile)10101 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C4__NEON_MULL_DUP, k_eq_8_subtile) { 10102 TEST_REQUIRES_ARM_NEON; 10103 for (uint32_t n = 1; n <= 16; n++) { 10104 for (uint32_t m = 1; m <= 3; m++) { 10105 GemmMicrokernelTester() 10106 .mr(3) 10107 .nr(16) 10108 .kr(4) 10109 .sr(1) 10110 .m(m) 10111 .n(n) 10112 .k(8) 10113 .iterations(1) 10114 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 10115 } 10116 } 10117 } 10118 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C4__NEON_MULL_DUP,k_eq_8_subtile_m)10119 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C4__NEON_MULL_DUP, k_eq_8_subtile_m) { 10120 TEST_REQUIRES_ARM_NEON; 10121 for (uint32_t m = 1; m <= 3; m++) { 10122 GemmMicrokernelTester() 10123 .mr(3) 10124 .nr(16) 10125 .kr(4) 10126 .sr(1) 10127 .m(m) 10128 .n(16) 10129 .k(8) 10130 .iterations(1) 10131 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 10132 } 10133 } 10134 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C4__NEON_MULL_DUP,k_eq_8_subtile_n)10135 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C4__NEON_MULL_DUP, k_eq_8_subtile_n) { 10136 TEST_REQUIRES_ARM_NEON; 10137 for (uint32_t n = 1; n <= 16; n++) { 10138 GemmMicrokernelTester() 10139 .mr(3) 10140 .nr(16) 10141 .kr(4) 10142 .sr(1) 10143 .m(3) 10144 .n(n) 10145 .k(8) 10146 .iterations(1) 10147 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 10148 } 10149 } 10150 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C4__NEON_MULL_DUP,k_lt_8)10151 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C4__NEON_MULL_DUP, k_lt_8) { 10152 TEST_REQUIRES_ARM_NEON; 10153 for (size_t k = 1; k < 8; k++) { 10154 GemmMicrokernelTester() 10155 .mr(3) 10156 .nr(16) 10157 .kr(4) 10158 .sr(1) 10159 .m(3) 10160 .n(16) 10161 .k(k) 10162 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 10163 } 10164 } 10165 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C4__NEON_MULL_DUP,k_lt_8_strided_a)10166 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C4__NEON_MULL_DUP, k_lt_8_strided_a) { 10167 TEST_REQUIRES_ARM_NEON; 10168 for (size_t k = 1; k < 8; k++) { 10169 GemmMicrokernelTester() 10170 .mr(3) 10171 .nr(16) 10172 .kr(4) 10173 .sr(1) 10174 .m(3) 10175 .n(16) 10176 .k(k) 10177 .a_stride(11) 10178 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 10179 } 10180 } 10181 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C4__NEON_MULL_DUP,k_lt_8_subtile)10182 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C4__NEON_MULL_DUP, k_lt_8_subtile) { 10183 TEST_REQUIRES_ARM_NEON; 10184 for (size_t k = 1; k < 8; k++) { 10185 for (uint32_t n = 1; n <= 16; n++) { 10186 for (uint32_t m = 1; m <= 3; m++) { 10187 GemmMicrokernelTester() 10188 .mr(3) 10189 .nr(16) 10190 .kr(4) 10191 .sr(1) 10192 .m(m) 10193 .n(n) 10194 .k(k) 10195 .iterations(1) 10196 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 10197 } 10198 } 10199 } 10200 } 10201 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C4__NEON_MULL_DUP,k_gt_8)10202 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C4__NEON_MULL_DUP, k_gt_8) { 10203 TEST_REQUIRES_ARM_NEON; 10204 for (size_t k = 9; k < 16; k++) { 10205 GemmMicrokernelTester() 10206 .mr(3) 10207 .nr(16) 10208 .kr(4) 10209 .sr(1) 10210 .m(3) 10211 .n(16) 10212 .k(k) 10213 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 10214 } 10215 } 10216 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C4__NEON_MULL_DUP,k_gt_8_strided_a)10217 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C4__NEON_MULL_DUP, k_gt_8_strided_a) { 10218 TEST_REQUIRES_ARM_NEON; 10219 for (size_t k = 9; k < 16; k++) { 10220 GemmMicrokernelTester() 10221 .mr(3) 10222 .nr(16) 10223 .kr(4) 10224 .sr(1) 10225 .m(3) 10226 .n(16) 10227 .k(k) 10228 .a_stride(19) 10229 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 10230 } 10231 } 10232 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C4__NEON_MULL_DUP,k_gt_8_subtile)10233 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C4__NEON_MULL_DUP, k_gt_8_subtile) { 10234 TEST_REQUIRES_ARM_NEON; 10235 for (size_t k = 9; k < 16; k++) { 10236 for (uint32_t n = 1; n <= 16; n++) { 10237 for (uint32_t m = 1; m <= 3; m++) { 10238 GemmMicrokernelTester() 10239 .mr(3) 10240 .nr(16) 10241 .kr(4) 10242 .sr(1) 10243 .m(m) 10244 .n(n) 10245 .k(k) 10246 .iterations(1) 10247 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 10248 } 10249 } 10250 } 10251 } 10252 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C4__NEON_MULL_DUP,k_div_8)10253 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C4__NEON_MULL_DUP, k_div_8) { 10254 TEST_REQUIRES_ARM_NEON; 10255 for (size_t k = 16; k <= 80; k += 8) { 10256 GemmMicrokernelTester() 10257 .mr(3) 10258 .nr(16) 10259 .kr(4) 10260 .sr(1) 10261 .m(3) 10262 .n(16) 10263 .k(k) 10264 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 10265 } 10266 } 10267 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C4__NEON_MULL_DUP,k_div_8_strided_a)10268 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C4__NEON_MULL_DUP, k_div_8_strided_a) { 10269 TEST_REQUIRES_ARM_NEON; 10270 for (size_t k = 16; k <= 80; k += 8) { 10271 GemmMicrokernelTester() 10272 .mr(3) 10273 .nr(16) 10274 .kr(4) 10275 .sr(1) 10276 .m(3) 10277 .n(16) 10278 .k(k) 10279 .a_stride(83) 10280 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 10281 } 10282 } 10283 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C4__NEON_MULL_DUP,k_div_8_subtile)10284 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C4__NEON_MULL_DUP, k_div_8_subtile) { 10285 TEST_REQUIRES_ARM_NEON; 10286 for (size_t k = 16; k <= 80; k += 8) { 10287 for (uint32_t n = 1; n <= 16; n++) { 10288 for (uint32_t m = 1; m <= 3; m++) { 10289 GemmMicrokernelTester() 10290 .mr(3) 10291 .nr(16) 10292 .kr(4) 10293 .sr(1) 10294 .m(m) 10295 .n(n) 10296 .k(k) 10297 .iterations(1) 10298 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 10299 } 10300 } 10301 } 10302 } 10303 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C4__NEON_MULL_DUP,n_gt_16)10304 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C4__NEON_MULL_DUP, n_gt_16) { 10305 TEST_REQUIRES_ARM_NEON; 10306 for (uint32_t n = 17; n < 32; n++) { 10307 for (size_t k = 1; k <= 40; k += 9) { 10308 GemmMicrokernelTester() 10309 .mr(3) 10310 .nr(16) 10311 .kr(4) 10312 .sr(1) 10313 .m(3) 10314 .n(n) 10315 .k(k) 10316 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 10317 } 10318 } 10319 } 10320 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C4__NEON_MULL_DUP,n_gt_16_strided_cn)10321 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C4__NEON_MULL_DUP, n_gt_16_strided_cn) { 10322 TEST_REQUIRES_ARM_NEON; 10323 for (uint32_t n = 17; n < 32; n++) { 10324 for (size_t k = 1; k <= 40; k += 9) { 10325 GemmMicrokernelTester() 10326 .mr(3) 10327 .nr(16) 10328 .kr(4) 10329 .sr(1) 10330 .m(3) 10331 .n(n) 10332 .k(k) 10333 .cn_stride(19) 10334 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 10335 } 10336 } 10337 } 10338 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C4__NEON_MULL_DUP,n_gt_16_strided_a)10339 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C4__NEON_MULL_DUP, n_gt_16_strided_a) { 10340 TEST_REQUIRES_ARM_NEON; 10341 for (uint32_t n = 17; n < 32; n++) { 10342 for (size_t k = 1; k <= 40; k += 9) { 10343 GemmMicrokernelTester() 10344 .mr(3) 10345 .nr(16) 10346 .kr(4) 10347 .sr(1) 10348 .m(3) 10349 .n(n) 10350 .k(k) 10351 .a_stride(43) 10352 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 10353 } 10354 } 10355 } 10356 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C4__NEON_MULL_DUP,n_gt_16_subtile)10357 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C4__NEON_MULL_DUP, n_gt_16_subtile) { 10358 TEST_REQUIRES_ARM_NEON; 10359 for (uint32_t n = 17; n < 32; n++) { 10360 for (size_t k = 1; k <= 40; k += 9) { 10361 for (uint32_t m = 1; m <= 3; m++) { 10362 GemmMicrokernelTester() 10363 .mr(3) 10364 .nr(16) 10365 .kr(4) 10366 .sr(1) 10367 .m(m) 10368 .n(n) 10369 .k(k) 10370 .iterations(1) 10371 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 10372 } 10373 } 10374 } 10375 } 10376 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C4__NEON_MULL_DUP,n_div_16)10377 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C4__NEON_MULL_DUP, n_div_16) { 10378 TEST_REQUIRES_ARM_NEON; 10379 for (uint32_t n = 32; n <= 48; n += 16) { 10380 for (size_t k = 1; k <= 40; k += 9) { 10381 GemmMicrokernelTester() 10382 .mr(3) 10383 .nr(16) 10384 .kr(4) 10385 .sr(1) 10386 .m(3) 10387 .n(n) 10388 .k(k) 10389 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 10390 } 10391 } 10392 } 10393 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C4__NEON_MULL_DUP,n_div_16_strided_cn)10394 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C4__NEON_MULL_DUP, n_div_16_strided_cn) { 10395 TEST_REQUIRES_ARM_NEON; 10396 for (uint32_t n = 32; n <= 48; n += 16) { 10397 for (size_t k = 1; k <= 40; k += 9) { 10398 GemmMicrokernelTester() 10399 .mr(3) 10400 .nr(16) 10401 .kr(4) 10402 .sr(1) 10403 .m(3) 10404 .n(n) 10405 .k(k) 10406 .cn_stride(19) 10407 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 10408 } 10409 } 10410 } 10411 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C4__NEON_MULL_DUP,n_div_16_strided_a)10412 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C4__NEON_MULL_DUP, n_div_16_strided_a) { 10413 TEST_REQUIRES_ARM_NEON; 10414 for (uint32_t n = 32; n <= 48; n += 16) { 10415 for (size_t k = 1; k <= 40; k += 9) { 10416 GemmMicrokernelTester() 10417 .mr(3) 10418 .nr(16) 10419 .kr(4) 10420 .sr(1) 10421 .m(3) 10422 .n(n) 10423 .k(k) 10424 .a_stride(43) 10425 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 10426 } 10427 } 10428 } 10429 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C4__NEON_MULL_DUP,n_div_16_subtile)10430 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C4__NEON_MULL_DUP, n_div_16_subtile) { 10431 TEST_REQUIRES_ARM_NEON; 10432 for (uint32_t n = 32; n <= 48; n += 16) { 10433 for (size_t k = 1; k <= 40; k += 9) { 10434 for (uint32_t m = 1; m <= 3; m++) { 10435 GemmMicrokernelTester() 10436 .mr(3) 10437 .nr(16) 10438 .kr(4) 10439 .sr(1) 10440 .m(m) 10441 .n(n) 10442 .k(k) 10443 .iterations(1) 10444 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 10445 } 10446 } 10447 } 10448 } 10449 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C4__NEON_MULL_DUP,strided_cm_subtile)10450 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C4__NEON_MULL_DUP, strided_cm_subtile) { 10451 TEST_REQUIRES_ARM_NEON; 10452 for (size_t k = 1; k <= 40; k += 9) { 10453 for (uint32_t n = 1; n <= 16; n++) { 10454 for (uint32_t m = 1; m <= 3; m++) { 10455 GemmMicrokernelTester() 10456 .mr(3) 10457 .nr(16) 10458 .kr(4) 10459 .sr(1) 10460 .m(m) 10461 .n(n) 10462 .k(k) 10463 .cm_stride(19) 10464 .iterations(1) 10465 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 10466 } 10467 } 10468 } 10469 } 10470 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C4__NEON_MULL_DUP,qmin)10471 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C4__NEON_MULL_DUP, qmin) { 10472 TEST_REQUIRES_ARM_NEON; 10473 GemmMicrokernelTester() 10474 .mr(3) 10475 .nr(16) 10476 .kr(4) 10477 .sr(1) 10478 .m(3) 10479 .n(16) 10480 .k(8) 10481 .qmin(128) 10482 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 10483 } 10484 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C4__NEON_MULL_DUP,qmax)10485 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C4__NEON_MULL_DUP, qmax) { 10486 TEST_REQUIRES_ARM_NEON; 10487 GemmMicrokernelTester() 10488 .mr(3) 10489 .nr(16) 10490 .kr(4) 10491 .sr(1) 10492 .m(3) 10493 .n(16) 10494 .k(8) 10495 .qmax(128) 10496 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 10497 } 10498 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C4__NEON_MULL_DUP,strided_cm)10499 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C4__NEON_MULL_DUP, strided_cm) { 10500 TEST_REQUIRES_ARM_NEON; 10501 GemmMicrokernelTester() 10502 .mr(3) 10503 .nr(16) 10504 .kr(4) 10505 .sr(1) 10506 .m(3) 10507 .n(16) 10508 .k(8) 10509 .cm_stride(19) 10510 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 10511 } 10512 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 10513 10514 10515 #if XNN_ARCH_ARM || XNN_ARCH_ARM64 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_LD1R,k_eq_16)10516 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_LD1R, k_eq_16) { 10517 TEST_REQUIRES_ARM_NEON; 10518 GemmMicrokernelTester() 10519 .mr(4) 10520 .nr(8) 10521 .kr(2) 10522 .sr(1) 10523 .m(4) 10524 .n(8) 10525 .k(16) 10526 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 10527 } 10528 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_LD1R,strided_cn)10529 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_LD1R, strided_cn) { 10530 TEST_REQUIRES_ARM_NEON; 10531 GemmMicrokernelTester() 10532 .mr(4) 10533 .nr(8) 10534 .kr(2) 10535 .sr(1) 10536 .m(4) 10537 .n(8) 10538 .k(16) 10539 .cn_stride(11) 10540 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 10541 } 10542 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_LD1R,k_eq_16_strided_a)10543 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_LD1R, k_eq_16_strided_a) { 10544 TEST_REQUIRES_ARM_NEON; 10545 GemmMicrokernelTester() 10546 .mr(4) 10547 .nr(8) 10548 .kr(2) 10549 .sr(1) 10550 .m(4) 10551 .n(8) 10552 .k(16) 10553 .a_stride(19) 10554 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 10555 } 10556 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_LD1R,k_eq_16_subtile)10557 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_LD1R, k_eq_16_subtile) { 10558 TEST_REQUIRES_ARM_NEON; 10559 for (uint32_t n = 1; n <= 8; n++) { 10560 for (uint32_t m = 1; m <= 4; m++) { 10561 GemmMicrokernelTester() 10562 .mr(4) 10563 .nr(8) 10564 .kr(2) 10565 .sr(1) 10566 .m(m) 10567 .n(n) 10568 .k(16) 10569 .iterations(1) 10570 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 10571 } 10572 } 10573 } 10574 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_LD1R,k_eq_16_subtile_m)10575 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_LD1R, k_eq_16_subtile_m) { 10576 TEST_REQUIRES_ARM_NEON; 10577 for (uint32_t m = 1; m <= 4; m++) { 10578 GemmMicrokernelTester() 10579 .mr(4) 10580 .nr(8) 10581 .kr(2) 10582 .sr(1) 10583 .m(m) 10584 .n(8) 10585 .k(16) 10586 .iterations(1) 10587 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 10588 } 10589 } 10590 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_LD1R,k_eq_16_subtile_n)10591 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_LD1R, k_eq_16_subtile_n) { 10592 TEST_REQUIRES_ARM_NEON; 10593 for (uint32_t n = 1; n <= 8; n++) { 10594 GemmMicrokernelTester() 10595 .mr(4) 10596 .nr(8) 10597 .kr(2) 10598 .sr(1) 10599 .m(4) 10600 .n(n) 10601 .k(16) 10602 .iterations(1) 10603 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 10604 } 10605 } 10606 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_LD1R,k_lt_16)10607 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_LD1R, k_lt_16) { 10608 TEST_REQUIRES_ARM_NEON; 10609 for (size_t k = 1; k < 16; k++) { 10610 GemmMicrokernelTester() 10611 .mr(4) 10612 .nr(8) 10613 .kr(2) 10614 .sr(1) 10615 .m(4) 10616 .n(8) 10617 .k(k) 10618 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 10619 } 10620 } 10621 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_LD1R,k_lt_16_strided_a)10622 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_LD1R, k_lt_16_strided_a) { 10623 TEST_REQUIRES_ARM_NEON; 10624 for (size_t k = 1; k < 16; k++) { 10625 GemmMicrokernelTester() 10626 .mr(4) 10627 .nr(8) 10628 .kr(2) 10629 .sr(1) 10630 .m(4) 10631 .n(8) 10632 .k(k) 10633 .a_stride(19) 10634 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 10635 } 10636 } 10637 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_LD1R,k_lt_16_subtile)10638 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_LD1R, k_lt_16_subtile) { 10639 TEST_REQUIRES_ARM_NEON; 10640 for (size_t k = 1; k < 16; k++) { 10641 for (uint32_t n = 1; n <= 8; n++) { 10642 for (uint32_t m = 1; m <= 4; m++) { 10643 GemmMicrokernelTester() 10644 .mr(4) 10645 .nr(8) 10646 .kr(2) 10647 .sr(1) 10648 .m(m) 10649 .n(n) 10650 .k(k) 10651 .iterations(1) 10652 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 10653 } 10654 } 10655 } 10656 } 10657 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_LD1R,k_gt_16)10658 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_LD1R, k_gt_16) { 10659 TEST_REQUIRES_ARM_NEON; 10660 for (size_t k = 17; k < 32; k++) { 10661 GemmMicrokernelTester() 10662 .mr(4) 10663 .nr(8) 10664 .kr(2) 10665 .sr(1) 10666 .m(4) 10667 .n(8) 10668 .k(k) 10669 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 10670 } 10671 } 10672 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_LD1R,k_gt_16_strided_a)10673 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_LD1R, k_gt_16_strided_a) { 10674 TEST_REQUIRES_ARM_NEON; 10675 for (size_t k = 17; k < 32; k++) { 10676 GemmMicrokernelTester() 10677 .mr(4) 10678 .nr(8) 10679 .kr(2) 10680 .sr(1) 10681 .m(4) 10682 .n(8) 10683 .k(k) 10684 .a_stride(37) 10685 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 10686 } 10687 } 10688 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_LD1R,k_gt_16_subtile)10689 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_LD1R, k_gt_16_subtile) { 10690 TEST_REQUIRES_ARM_NEON; 10691 for (size_t k = 17; k < 32; k++) { 10692 for (uint32_t n = 1; n <= 8; n++) { 10693 for (uint32_t m = 1; m <= 4; m++) { 10694 GemmMicrokernelTester() 10695 .mr(4) 10696 .nr(8) 10697 .kr(2) 10698 .sr(1) 10699 .m(m) 10700 .n(n) 10701 .k(k) 10702 .iterations(1) 10703 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 10704 } 10705 } 10706 } 10707 } 10708 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_LD1R,k_div_16)10709 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_LD1R, k_div_16) { 10710 TEST_REQUIRES_ARM_NEON; 10711 for (size_t k = 32; k <= 160; k += 16) { 10712 GemmMicrokernelTester() 10713 .mr(4) 10714 .nr(8) 10715 .kr(2) 10716 .sr(1) 10717 .m(4) 10718 .n(8) 10719 .k(k) 10720 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 10721 } 10722 } 10723 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_LD1R,k_div_16_strided_a)10724 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_LD1R, k_div_16_strided_a) { 10725 TEST_REQUIRES_ARM_NEON; 10726 for (size_t k = 32; k <= 160; k += 16) { 10727 GemmMicrokernelTester() 10728 .mr(4) 10729 .nr(8) 10730 .kr(2) 10731 .sr(1) 10732 .m(4) 10733 .n(8) 10734 .k(k) 10735 .a_stride(163) 10736 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 10737 } 10738 } 10739 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_LD1R,k_div_16_subtile)10740 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_LD1R, k_div_16_subtile) { 10741 TEST_REQUIRES_ARM_NEON; 10742 for (size_t k = 32; k <= 160; k += 16) { 10743 for (uint32_t n = 1; n <= 8; n++) { 10744 for (uint32_t m = 1; m <= 4; m++) { 10745 GemmMicrokernelTester() 10746 .mr(4) 10747 .nr(8) 10748 .kr(2) 10749 .sr(1) 10750 .m(m) 10751 .n(n) 10752 .k(k) 10753 .iterations(1) 10754 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 10755 } 10756 } 10757 } 10758 } 10759 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_LD1R,n_gt_8)10760 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_LD1R, n_gt_8) { 10761 TEST_REQUIRES_ARM_NEON; 10762 for (uint32_t n = 9; n < 16; n++) { 10763 for (size_t k = 1; k <= 80; k += 17) { 10764 GemmMicrokernelTester() 10765 .mr(4) 10766 .nr(8) 10767 .kr(2) 10768 .sr(1) 10769 .m(4) 10770 .n(n) 10771 .k(k) 10772 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 10773 } 10774 } 10775 } 10776 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_LD1R,n_gt_8_strided_cn)10777 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_LD1R, n_gt_8_strided_cn) { 10778 TEST_REQUIRES_ARM_NEON; 10779 for (uint32_t n = 9; n < 16; n++) { 10780 for (size_t k = 1; k <= 80; k += 17) { 10781 GemmMicrokernelTester() 10782 .mr(4) 10783 .nr(8) 10784 .kr(2) 10785 .sr(1) 10786 .m(4) 10787 .n(n) 10788 .k(k) 10789 .cn_stride(11) 10790 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 10791 } 10792 } 10793 } 10794 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_LD1R,n_gt_8_strided_a)10795 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_LD1R, n_gt_8_strided_a) { 10796 TEST_REQUIRES_ARM_NEON; 10797 for (uint32_t n = 9; n < 16; n++) { 10798 for (size_t k = 1; k <= 80; k += 17) { 10799 GemmMicrokernelTester() 10800 .mr(4) 10801 .nr(8) 10802 .kr(2) 10803 .sr(1) 10804 .m(4) 10805 .n(n) 10806 .k(k) 10807 .a_stride(83) 10808 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 10809 } 10810 } 10811 } 10812 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_LD1R,n_gt_8_subtile)10813 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_LD1R, n_gt_8_subtile) { 10814 TEST_REQUIRES_ARM_NEON; 10815 for (uint32_t n = 9; n < 16; n++) { 10816 for (size_t k = 1; k <= 80; k += 17) { 10817 for (uint32_t m = 1; m <= 4; m++) { 10818 GemmMicrokernelTester() 10819 .mr(4) 10820 .nr(8) 10821 .kr(2) 10822 .sr(1) 10823 .m(m) 10824 .n(n) 10825 .k(k) 10826 .iterations(1) 10827 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 10828 } 10829 } 10830 } 10831 } 10832 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_LD1R,n_div_8)10833 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_LD1R, n_div_8) { 10834 TEST_REQUIRES_ARM_NEON; 10835 for (uint32_t n = 16; n <= 24; n += 8) { 10836 for (size_t k = 1; k <= 80; k += 17) { 10837 GemmMicrokernelTester() 10838 .mr(4) 10839 .nr(8) 10840 .kr(2) 10841 .sr(1) 10842 .m(4) 10843 .n(n) 10844 .k(k) 10845 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 10846 } 10847 } 10848 } 10849 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_LD1R,n_div_8_strided_cn)10850 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_LD1R, n_div_8_strided_cn) { 10851 TEST_REQUIRES_ARM_NEON; 10852 for (uint32_t n = 16; n <= 24; n += 8) { 10853 for (size_t k = 1; k <= 80; k += 17) { 10854 GemmMicrokernelTester() 10855 .mr(4) 10856 .nr(8) 10857 .kr(2) 10858 .sr(1) 10859 .m(4) 10860 .n(n) 10861 .k(k) 10862 .cn_stride(11) 10863 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 10864 } 10865 } 10866 } 10867 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_LD1R,n_div_8_strided_a)10868 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_LD1R, n_div_8_strided_a) { 10869 TEST_REQUIRES_ARM_NEON; 10870 for (uint32_t n = 16; n <= 24; n += 8) { 10871 for (size_t k = 1; k <= 80; k += 17) { 10872 GemmMicrokernelTester() 10873 .mr(4) 10874 .nr(8) 10875 .kr(2) 10876 .sr(1) 10877 .m(4) 10878 .n(n) 10879 .k(k) 10880 .a_stride(83) 10881 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 10882 } 10883 } 10884 } 10885 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_LD1R,n_div_8_subtile)10886 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_LD1R, n_div_8_subtile) { 10887 TEST_REQUIRES_ARM_NEON; 10888 for (uint32_t n = 16; n <= 24; n += 8) { 10889 for (size_t k = 1; k <= 80; k += 17) { 10890 for (uint32_t m = 1; m <= 4; m++) { 10891 GemmMicrokernelTester() 10892 .mr(4) 10893 .nr(8) 10894 .kr(2) 10895 .sr(1) 10896 .m(m) 10897 .n(n) 10898 .k(k) 10899 .iterations(1) 10900 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 10901 } 10902 } 10903 } 10904 } 10905 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_LD1R,strided_cm_subtile)10906 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_LD1R, strided_cm_subtile) { 10907 TEST_REQUIRES_ARM_NEON; 10908 for (size_t k = 1; k <= 80; k += 17) { 10909 for (uint32_t n = 1; n <= 8; n++) { 10910 for (uint32_t m = 1; m <= 4; m++) { 10911 GemmMicrokernelTester() 10912 .mr(4) 10913 .nr(8) 10914 .kr(2) 10915 .sr(1) 10916 .m(m) 10917 .n(n) 10918 .k(k) 10919 .cm_stride(11) 10920 .iterations(1) 10921 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 10922 } 10923 } 10924 } 10925 } 10926 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_LD1R,qmin)10927 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_LD1R, qmin) { 10928 TEST_REQUIRES_ARM_NEON; 10929 GemmMicrokernelTester() 10930 .mr(4) 10931 .nr(8) 10932 .kr(2) 10933 .sr(1) 10934 .m(4) 10935 .n(8) 10936 .k(16) 10937 .qmin(128) 10938 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 10939 } 10940 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_LD1R,qmax)10941 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_LD1R, qmax) { 10942 TEST_REQUIRES_ARM_NEON; 10943 GemmMicrokernelTester() 10944 .mr(4) 10945 .nr(8) 10946 .kr(2) 10947 .sr(1) 10948 .m(4) 10949 .n(8) 10950 .k(16) 10951 .qmax(128) 10952 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 10953 } 10954 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_LD1R,strided_cm)10955 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_LD1R, strided_cm) { 10956 TEST_REQUIRES_ARM_NEON; 10957 GemmMicrokernelTester() 10958 .mr(4) 10959 .nr(8) 10960 .kr(2) 10961 .sr(1) 10962 .m(4) 10963 .n(8) 10964 .k(16) 10965 .cm_stride(11) 10966 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 10967 } 10968 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 10969 10970 10971 #if XNN_ARCH_ARM || XNN_ARCH_ARM64 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2S4__NEON_MLAL,k_eq_16)10972 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2S4__NEON_MLAL, k_eq_16) { 10973 TEST_REQUIRES_ARM_NEON; 10974 GemmMicrokernelTester() 10975 .mr(4) 10976 .nr(8) 10977 .kr(2) 10978 .sr(4) 10979 .m(4) 10980 .n(8) 10981 .k(16) 10982 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 10983 } 10984 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2S4__NEON_MLAL,strided_cn)10985 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2S4__NEON_MLAL, strided_cn) { 10986 TEST_REQUIRES_ARM_NEON; 10987 GemmMicrokernelTester() 10988 .mr(4) 10989 .nr(8) 10990 .kr(2) 10991 .sr(4) 10992 .m(4) 10993 .n(8) 10994 .k(16) 10995 .cn_stride(11) 10996 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 10997 } 10998 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2S4__NEON_MLAL,k_eq_16_strided_a)10999 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2S4__NEON_MLAL, k_eq_16_strided_a) { 11000 TEST_REQUIRES_ARM_NEON; 11001 GemmMicrokernelTester() 11002 .mr(4) 11003 .nr(8) 11004 .kr(2) 11005 .sr(4) 11006 .m(4) 11007 .n(8) 11008 .k(16) 11009 .a_stride(19) 11010 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 11011 } 11012 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2S4__NEON_MLAL,k_eq_16_subtile)11013 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2S4__NEON_MLAL, k_eq_16_subtile) { 11014 TEST_REQUIRES_ARM_NEON; 11015 for (uint32_t n = 1; n <= 8; n++) { 11016 for (uint32_t m = 1; m <= 4; m++) { 11017 GemmMicrokernelTester() 11018 .mr(4) 11019 .nr(8) 11020 .kr(2) 11021 .sr(4) 11022 .m(m) 11023 .n(n) 11024 .k(16) 11025 .iterations(1) 11026 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 11027 } 11028 } 11029 } 11030 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2S4__NEON_MLAL,k_eq_16_subtile_m)11031 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2S4__NEON_MLAL, k_eq_16_subtile_m) { 11032 TEST_REQUIRES_ARM_NEON; 11033 for (uint32_t m = 1; m <= 4; m++) { 11034 GemmMicrokernelTester() 11035 .mr(4) 11036 .nr(8) 11037 .kr(2) 11038 .sr(4) 11039 .m(m) 11040 .n(8) 11041 .k(16) 11042 .iterations(1) 11043 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 11044 } 11045 } 11046 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2S4__NEON_MLAL,k_eq_16_subtile_n)11047 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2S4__NEON_MLAL, k_eq_16_subtile_n) { 11048 TEST_REQUIRES_ARM_NEON; 11049 for (uint32_t n = 1; n <= 8; n++) { 11050 GemmMicrokernelTester() 11051 .mr(4) 11052 .nr(8) 11053 .kr(2) 11054 .sr(4) 11055 .m(4) 11056 .n(n) 11057 .k(16) 11058 .iterations(1) 11059 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 11060 } 11061 } 11062 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2S4__NEON_MLAL,k_lt_16)11063 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2S4__NEON_MLAL, k_lt_16) { 11064 TEST_REQUIRES_ARM_NEON; 11065 for (size_t k = 1; k < 16; k++) { 11066 GemmMicrokernelTester() 11067 .mr(4) 11068 .nr(8) 11069 .kr(2) 11070 .sr(4) 11071 .m(4) 11072 .n(8) 11073 .k(k) 11074 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 11075 } 11076 } 11077 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2S4__NEON_MLAL,k_lt_16_strided_a)11078 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2S4__NEON_MLAL, k_lt_16_strided_a) { 11079 TEST_REQUIRES_ARM_NEON; 11080 for (size_t k = 1; k < 16; k++) { 11081 GemmMicrokernelTester() 11082 .mr(4) 11083 .nr(8) 11084 .kr(2) 11085 .sr(4) 11086 .m(4) 11087 .n(8) 11088 .k(k) 11089 .a_stride(19) 11090 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 11091 } 11092 } 11093 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2S4__NEON_MLAL,k_lt_16_subtile)11094 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2S4__NEON_MLAL, k_lt_16_subtile) { 11095 TEST_REQUIRES_ARM_NEON; 11096 for (size_t k = 1; k < 16; k++) { 11097 for (uint32_t n = 1; n <= 8; n++) { 11098 for (uint32_t m = 1; m <= 4; m++) { 11099 GemmMicrokernelTester() 11100 .mr(4) 11101 .nr(8) 11102 .kr(2) 11103 .sr(4) 11104 .m(m) 11105 .n(n) 11106 .k(k) 11107 .iterations(1) 11108 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 11109 } 11110 } 11111 } 11112 } 11113 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2S4__NEON_MLAL,k_gt_16)11114 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2S4__NEON_MLAL, k_gt_16) { 11115 TEST_REQUIRES_ARM_NEON; 11116 for (size_t k = 17; k < 32; k++) { 11117 GemmMicrokernelTester() 11118 .mr(4) 11119 .nr(8) 11120 .kr(2) 11121 .sr(4) 11122 .m(4) 11123 .n(8) 11124 .k(k) 11125 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 11126 } 11127 } 11128 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2S4__NEON_MLAL,k_gt_16_strided_a)11129 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2S4__NEON_MLAL, k_gt_16_strided_a) { 11130 TEST_REQUIRES_ARM_NEON; 11131 for (size_t k = 17; k < 32; k++) { 11132 GemmMicrokernelTester() 11133 .mr(4) 11134 .nr(8) 11135 .kr(2) 11136 .sr(4) 11137 .m(4) 11138 .n(8) 11139 .k(k) 11140 .a_stride(37) 11141 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 11142 } 11143 } 11144 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2S4__NEON_MLAL,k_gt_16_subtile)11145 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2S4__NEON_MLAL, k_gt_16_subtile) { 11146 TEST_REQUIRES_ARM_NEON; 11147 for (size_t k = 17; k < 32; k++) { 11148 for (uint32_t n = 1; n <= 8; n++) { 11149 for (uint32_t m = 1; m <= 4; m++) { 11150 GemmMicrokernelTester() 11151 .mr(4) 11152 .nr(8) 11153 .kr(2) 11154 .sr(4) 11155 .m(m) 11156 .n(n) 11157 .k(k) 11158 .iterations(1) 11159 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 11160 } 11161 } 11162 } 11163 } 11164 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2S4__NEON_MLAL,k_div_16)11165 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2S4__NEON_MLAL, k_div_16) { 11166 TEST_REQUIRES_ARM_NEON; 11167 for (size_t k = 32; k <= 160; k += 16) { 11168 GemmMicrokernelTester() 11169 .mr(4) 11170 .nr(8) 11171 .kr(2) 11172 .sr(4) 11173 .m(4) 11174 .n(8) 11175 .k(k) 11176 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 11177 } 11178 } 11179 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2S4__NEON_MLAL,k_div_16_strided_a)11180 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2S4__NEON_MLAL, k_div_16_strided_a) { 11181 TEST_REQUIRES_ARM_NEON; 11182 for (size_t k = 32; k <= 160; k += 16) { 11183 GemmMicrokernelTester() 11184 .mr(4) 11185 .nr(8) 11186 .kr(2) 11187 .sr(4) 11188 .m(4) 11189 .n(8) 11190 .k(k) 11191 .a_stride(163) 11192 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 11193 } 11194 } 11195 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2S4__NEON_MLAL,k_div_16_subtile)11196 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2S4__NEON_MLAL, k_div_16_subtile) { 11197 TEST_REQUIRES_ARM_NEON; 11198 for (size_t k = 32; k <= 160; k += 16) { 11199 for (uint32_t n = 1; n <= 8; n++) { 11200 for (uint32_t m = 1; m <= 4; m++) { 11201 GemmMicrokernelTester() 11202 .mr(4) 11203 .nr(8) 11204 .kr(2) 11205 .sr(4) 11206 .m(m) 11207 .n(n) 11208 .k(k) 11209 .iterations(1) 11210 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 11211 } 11212 } 11213 } 11214 } 11215 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2S4__NEON_MLAL,n_gt_8)11216 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2S4__NEON_MLAL, n_gt_8) { 11217 TEST_REQUIRES_ARM_NEON; 11218 for (uint32_t n = 9; n < 16; n++) { 11219 for (size_t k = 1; k <= 80; k += 17) { 11220 GemmMicrokernelTester() 11221 .mr(4) 11222 .nr(8) 11223 .kr(2) 11224 .sr(4) 11225 .m(4) 11226 .n(n) 11227 .k(k) 11228 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 11229 } 11230 } 11231 } 11232 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2S4__NEON_MLAL,n_gt_8_strided_cn)11233 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2S4__NEON_MLAL, n_gt_8_strided_cn) { 11234 TEST_REQUIRES_ARM_NEON; 11235 for (uint32_t n = 9; n < 16; n++) { 11236 for (size_t k = 1; k <= 80; k += 17) { 11237 GemmMicrokernelTester() 11238 .mr(4) 11239 .nr(8) 11240 .kr(2) 11241 .sr(4) 11242 .m(4) 11243 .n(n) 11244 .k(k) 11245 .cn_stride(11) 11246 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 11247 } 11248 } 11249 } 11250 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2S4__NEON_MLAL,n_gt_8_strided_a)11251 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2S4__NEON_MLAL, n_gt_8_strided_a) { 11252 TEST_REQUIRES_ARM_NEON; 11253 for (uint32_t n = 9; n < 16; n++) { 11254 for (size_t k = 1; k <= 80; k += 17) { 11255 GemmMicrokernelTester() 11256 .mr(4) 11257 .nr(8) 11258 .kr(2) 11259 .sr(4) 11260 .m(4) 11261 .n(n) 11262 .k(k) 11263 .a_stride(83) 11264 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 11265 } 11266 } 11267 } 11268 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2S4__NEON_MLAL,n_gt_8_subtile)11269 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2S4__NEON_MLAL, n_gt_8_subtile) { 11270 TEST_REQUIRES_ARM_NEON; 11271 for (uint32_t n = 9; n < 16; n++) { 11272 for (size_t k = 1; k <= 80; k += 17) { 11273 for (uint32_t m = 1; m <= 4; m++) { 11274 GemmMicrokernelTester() 11275 .mr(4) 11276 .nr(8) 11277 .kr(2) 11278 .sr(4) 11279 .m(m) 11280 .n(n) 11281 .k(k) 11282 .iterations(1) 11283 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 11284 } 11285 } 11286 } 11287 } 11288 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2S4__NEON_MLAL,n_div_8)11289 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2S4__NEON_MLAL, n_div_8) { 11290 TEST_REQUIRES_ARM_NEON; 11291 for (uint32_t n = 16; n <= 24; n += 8) { 11292 for (size_t k = 1; k <= 80; k += 17) { 11293 GemmMicrokernelTester() 11294 .mr(4) 11295 .nr(8) 11296 .kr(2) 11297 .sr(4) 11298 .m(4) 11299 .n(n) 11300 .k(k) 11301 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 11302 } 11303 } 11304 } 11305 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2S4__NEON_MLAL,n_div_8_strided_cn)11306 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2S4__NEON_MLAL, n_div_8_strided_cn) { 11307 TEST_REQUIRES_ARM_NEON; 11308 for (uint32_t n = 16; n <= 24; n += 8) { 11309 for (size_t k = 1; k <= 80; k += 17) { 11310 GemmMicrokernelTester() 11311 .mr(4) 11312 .nr(8) 11313 .kr(2) 11314 .sr(4) 11315 .m(4) 11316 .n(n) 11317 .k(k) 11318 .cn_stride(11) 11319 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 11320 } 11321 } 11322 } 11323 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2S4__NEON_MLAL,n_div_8_strided_a)11324 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2S4__NEON_MLAL, n_div_8_strided_a) { 11325 TEST_REQUIRES_ARM_NEON; 11326 for (uint32_t n = 16; n <= 24; n += 8) { 11327 for (size_t k = 1; k <= 80; k += 17) { 11328 GemmMicrokernelTester() 11329 .mr(4) 11330 .nr(8) 11331 .kr(2) 11332 .sr(4) 11333 .m(4) 11334 .n(n) 11335 .k(k) 11336 .a_stride(83) 11337 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 11338 } 11339 } 11340 } 11341 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2S4__NEON_MLAL,n_div_8_subtile)11342 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2S4__NEON_MLAL, n_div_8_subtile) { 11343 TEST_REQUIRES_ARM_NEON; 11344 for (uint32_t n = 16; n <= 24; n += 8) { 11345 for (size_t k = 1; k <= 80; k += 17) { 11346 for (uint32_t m = 1; m <= 4; m++) { 11347 GemmMicrokernelTester() 11348 .mr(4) 11349 .nr(8) 11350 .kr(2) 11351 .sr(4) 11352 .m(m) 11353 .n(n) 11354 .k(k) 11355 .iterations(1) 11356 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 11357 } 11358 } 11359 } 11360 } 11361 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2S4__NEON_MLAL,strided_cm_subtile)11362 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2S4__NEON_MLAL, strided_cm_subtile) { 11363 TEST_REQUIRES_ARM_NEON; 11364 for (size_t k = 1; k <= 80; k += 17) { 11365 for (uint32_t n = 1; n <= 8; n++) { 11366 for (uint32_t m = 1; m <= 4; m++) { 11367 GemmMicrokernelTester() 11368 .mr(4) 11369 .nr(8) 11370 .kr(2) 11371 .sr(4) 11372 .m(m) 11373 .n(n) 11374 .k(k) 11375 .cm_stride(11) 11376 .iterations(1) 11377 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 11378 } 11379 } 11380 } 11381 } 11382 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2S4__NEON_MLAL,qmin)11383 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2S4__NEON_MLAL, qmin) { 11384 TEST_REQUIRES_ARM_NEON; 11385 GemmMicrokernelTester() 11386 .mr(4) 11387 .nr(8) 11388 .kr(2) 11389 .sr(4) 11390 .m(4) 11391 .n(8) 11392 .k(16) 11393 .qmin(128) 11394 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 11395 } 11396 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2S4__NEON_MLAL,qmax)11397 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2S4__NEON_MLAL, qmax) { 11398 TEST_REQUIRES_ARM_NEON; 11399 GemmMicrokernelTester() 11400 .mr(4) 11401 .nr(8) 11402 .kr(2) 11403 .sr(4) 11404 .m(4) 11405 .n(8) 11406 .k(16) 11407 .qmax(128) 11408 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 11409 } 11410 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2S4__NEON_MLAL,strided_cm)11411 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2S4__NEON_MLAL, strided_cm) { 11412 TEST_REQUIRES_ARM_NEON; 11413 GemmMicrokernelTester() 11414 .mr(4) 11415 .nr(8) 11416 .kr(2) 11417 .sr(4) 11418 .m(4) 11419 .n(8) 11420 .k(16) 11421 .cm_stride(11) 11422 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 11423 } 11424 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 11425 11426 11427 #if XNN_ARCH_ARM || XNN_ARCH_ARM64 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C4__NEON_MLAL_DUP,k_eq_16)11428 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C4__NEON_MLAL_DUP, k_eq_16) { 11429 TEST_REQUIRES_ARM_NEON; 11430 GemmMicrokernelTester() 11431 .mr(4) 11432 .nr(8) 11433 .kr(4) 11434 .sr(1) 11435 .m(4) 11436 .n(8) 11437 .k(16) 11438 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 11439 } 11440 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C4__NEON_MLAL_DUP,strided_cn)11441 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C4__NEON_MLAL_DUP, strided_cn) { 11442 TEST_REQUIRES_ARM_NEON; 11443 GemmMicrokernelTester() 11444 .mr(4) 11445 .nr(8) 11446 .kr(4) 11447 .sr(1) 11448 .m(4) 11449 .n(8) 11450 .k(16) 11451 .cn_stride(11) 11452 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 11453 } 11454 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C4__NEON_MLAL_DUP,k_eq_16_strided_a)11455 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C4__NEON_MLAL_DUP, k_eq_16_strided_a) { 11456 TEST_REQUIRES_ARM_NEON; 11457 GemmMicrokernelTester() 11458 .mr(4) 11459 .nr(8) 11460 .kr(4) 11461 .sr(1) 11462 .m(4) 11463 .n(8) 11464 .k(16) 11465 .a_stride(19) 11466 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 11467 } 11468 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C4__NEON_MLAL_DUP,k_eq_16_subtile)11469 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C4__NEON_MLAL_DUP, k_eq_16_subtile) { 11470 TEST_REQUIRES_ARM_NEON; 11471 for (uint32_t n = 1; n <= 8; n++) { 11472 for (uint32_t m = 1; m <= 4; m++) { 11473 GemmMicrokernelTester() 11474 .mr(4) 11475 .nr(8) 11476 .kr(4) 11477 .sr(1) 11478 .m(m) 11479 .n(n) 11480 .k(16) 11481 .iterations(1) 11482 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 11483 } 11484 } 11485 } 11486 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C4__NEON_MLAL_DUP,k_eq_16_subtile_m)11487 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C4__NEON_MLAL_DUP, k_eq_16_subtile_m) { 11488 TEST_REQUIRES_ARM_NEON; 11489 for (uint32_t m = 1; m <= 4; m++) { 11490 GemmMicrokernelTester() 11491 .mr(4) 11492 .nr(8) 11493 .kr(4) 11494 .sr(1) 11495 .m(m) 11496 .n(8) 11497 .k(16) 11498 .iterations(1) 11499 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 11500 } 11501 } 11502 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C4__NEON_MLAL_DUP,k_eq_16_subtile_n)11503 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C4__NEON_MLAL_DUP, k_eq_16_subtile_n) { 11504 TEST_REQUIRES_ARM_NEON; 11505 for (uint32_t n = 1; n <= 8; n++) { 11506 GemmMicrokernelTester() 11507 .mr(4) 11508 .nr(8) 11509 .kr(4) 11510 .sr(1) 11511 .m(4) 11512 .n(n) 11513 .k(16) 11514 .iterations(1) 11515 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 11516 } 11517 } 11518 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C4__NEON_MLAL_DUP,k_lt_16)11519 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C4__NEON_MLAL_DUP, k_lt_16) { 11520 TEST_REQUIRES_ARM_NEON; 11521 for (size_t k = 1; k < 16; k++) { 11522 GemmMicrokernelTester() 11523 .mr(4) 11524 .nr(8) 11525 .kr(4) 11526 .sr(1) 11527 .m(4) 11528 .n(8) 11529 .k(k) 11530 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 11531 } 11532 } 11533 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C4__NEON_MLAL_DUP,k_lt_16_strided_a)11534 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C4__NEON_MLAL_DUP, k_lt_16_strided_a) { 11535 TEST_REQUIRES_ARM_NEON; 11536 for (size_t k = 1; k < 16; k++) { 11537 GemmMicrokernelTester() 11538 .mr(4) 11539 .nr(8) 11540 .kr(4) 11541 .sr(1) 11542 .m(4) 11543 .n(8) 11544 .k(k) 11545 .a_stride(19) 11546 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 11547 } 11548 } 11549 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C4__NEON_MLAL_DUP,k_lt_16_subtile)11550 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C4__NEON_MLAL_DUP, k_lt_16_subtile) { 11551 TEST_REQUIRES_ARM_NEON; 11552 for (size_t k = 1; k < 16; k++) { 11553 for (uint32_t n = 1; n <= 8; n++) { 11554 for (uint32_t m = 1; m <= 4; m++) { 11555 GemmMicrokernelTester() 11556 .mr(4) 11557 .nr(8) 11558 .kr(4) 11559 .sr(1) 11560 .m(m) 11561 .n(n) 11562 .k(k) 11563 .iterations(1) 11564 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 11565 } 11566 } 11567 } 11568 } 11569 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C4__NEON_MLAL_DUP,k_gt_16)11570 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C4__NEON_MLAL_DUP, k_gt_16) { 11571 TEST_REQUIRES_ARM_NEON; 11572 for (size_t k = 17; k < 32; k++) { 11573 GemmMicrokernelTester() 11574 .mr(4) 11575 .nr(8) 11576 .kr(4) 11577 .sr(1) 11578 .m(4) 11579 .n(8) 11580 .k(k) 11581 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 11582 } 11583 } 11584 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C4__NEON_MLAL_DUP,k_gt_16_strided_a)11585 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C4__NEON_MLAL_DUP, k_gt_16_strided_a) { 11586 TEST_REQUIRES_ARM_NEON; 11587 for (size_t k = 17; k < 32; k++) { 11588 GemmMicrokernelTester() 11589 .mr(4) 11590 .nr(8) 11591 .kr(4) 11592 .sr(1) 11593 .m(4) 11594 .n(8) 11595 .k(k) 11596 .a_stride(37) 11597 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 11598 } 11599 } 11600 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C4__NEON_MLAL_DUP,k_gt_16_subtile)11601 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C4__NEON_MLAL_DUP, k_gt_16_subtile) { 11602 TEST_REQUIRES_ARM_NEON; 11603 for (size_t k = 17; k < 32; k++) { 11604 for (uint32_t n = 1; n <= 8; n++) { 11605 for (uint32_t m = 1; m <= 4; m++) { 11606 GemmMicrokernelTester() 11607 .mr(4) 11608 .nr(8) 11609 .kr(4) 11610 .sr(1) 11611 .m(m) 11612 .n(n) 11613 .k(k) 11614 .iterations(1) 11615 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 11616 } 11617 } 11618 } 11619 } 11620 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C4__NEON_MLAL_DUP,k_div_16)11621 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C4__NEON_MLAL_DUP, k_div_16) { 11622 TEST_REQUIRES_ARM_NEON; 11623 for (size_t k = 32; k <= 160; k += 16) { 11624 GemmMicrokernelTester() 11625 .mr(4) 11626 .nr(8) 11627 .kr(4) 11628 .sr(1) 11629 .m(4) 11630 .n(8) 11631 .k(k) 11632 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 11633 } 11634 } 11635 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C4__NEON_MLAL_DUP,k_div_16_strided_a)11636 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C4__NEON_MLAL_DUP, k_div_16_strided_a) { 11637 TEST_REQUIRES_ARM_NEON; 11638 for (size_t k = 32; k <= 160; k += 16) { 11639 GemmMicrokernelTester() 11640 .mr(4) 11641 .nr(8) 11642 .kr(4) 11643 .sr(1) 11644 .m(4) 11645 .n(8) 11646 .k(k) 11647 .a_stride(163) 11648 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 11649 } 11650 } 11651 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C4__NEON_MLAL_DUP,k_div_16_subtile)11652 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C4__NEON_MLAL_DUP, k_div_16_subtile) { 11653 TEST_REQUIRES_ARM_NEON; 11654 for (size_t k = 32; k <= 160; k += 16) { 11655 for (uint32_t n = 1; n <= 8; n++) { 11656 for (uint32_t m = 1; m <= 4; m++) { 11657 GemmMicrokernelTester() 11658 .mr(4) 11659 .nr(8) 11660 .kr(4) 11661 .sr(1) 11662 .m(m) 11663 .n(n) 11664 .k(k) 11665 .iterations(1) 11666 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 11667 } 11668 } 11669 } 11670 } 11671 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C4__NEON_MLAL_DUP,n_gt_8)11672 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C4__NEON_MLAL_DUP, n_gt_8) { 11673 TEST_REQUIRES_ARM_NEON; 11674 for (uint32_t n = 9; n < 16; n++) { 11675 for (size_t k = 1; k <= 80; k += 17) { 11676 GemmMicrokernelTester() 11677 .mr(4) 11678 .nr(8) 11679 .kr(4) 11680 .sr(1) 11681 .m(4) 11682 .n(n) 11683 .k(k) 11684 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 11685 } 11686 } 11687 } 11688 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C4__NEON_MLAL_DUP,n_gt_8_strided_cn)11689 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C4__NEON_MLAL_DUP, n_gt_8_strided_cn) { 11690 TEST_REQUIRES_ARM_NEON; 11691 for (uint32_t n = 9; n < 16; n++) { 11692 for (size_t k = 1; k <= 80; k += 17) { 11693 GemmMicrokernelTester() 11694 .mr(4) 11695 .nr(8) 11696 .kr(4) 11697 .sr(1) 11698 .m(4) 11699 .n(n) 11700 .k(k) 11701 .cn_stride(11) 11702 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 11703 } 11704 } 11705 } 11706 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C4__NEON_MLAL_DUP,n_gt_8_strided_a)11707 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C4__NEON_MLAL_DUP, n_gt_8_strided_a) { 11708 TEST_REQUIRES_ARM_NEON; 11709 for (uint32_t n = 9; n < 16; n++) { 11710 for (size_t k = 1; k <= 80; k += 17) { 11711 GemmMicrokernelTester() 11712 .mr(4) 11713 .nr(8) 11714 .kr(4) 11715 .sr(1) 11716 .m(4) 11717 .n(n) 11718 .k(k) 11719 .a_stride(83) 11720 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 11721 } 11722 } 11723 } 11724 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C4__NEON_MLAL_DUP,n_gt_8_subtile)11725 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C4__NEON_MLAL_DUP, n_gt_8_subtile) { 11726 TEST_REQUIRES_ARM_NEON; 11727 for (uint32_t n = 9; n < 16; n++) { 11728 for (size_t k = 1; k <= 80; k += 17) { 11729 for (uint32_t m = 1; m <= 4; m++) { 11730 GemmMicrokernelTester() 11731 .mr(4) 11732 .nr(8) 11733 .kr(4) 11734 .sr(1) 11735 .m(m) 11736 .n(n) 11737 .k(k) 11738 .iterations(1) 11739 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 11740 } 11741 } 11742 } 11743 } 11744 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C4__NEON_MLAL_DUP,n_div_8)11745 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C4__NEON_MLAL_DUP, n_div_8) { 11746 TEST_REQUIRES_ARM_NEON; 11747 for (uint32_t n = 16; n <= 24; n += 8) { 11748 for (size_t k = 1; k <= 80; k += 17) { 11749 GemmMicrokernelTester() 11750 .mr(4) 11751 .nr(8) 11752 .kr(4) 11753 .sr(1) 11754 .m(4) 11755 .n(n) 11756 .k(k) 11757 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 11758 } 11759 } 11760 } 11761 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C4__NEON_MLAL_DUP,n_div_8_strided_cn)11762 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C4__NEON_MLAL_DUP, n_div_8_strided_cn) { 11763 TEST_REQUIRES_ARM_NEON; 11764 for (uint32_t n = 16; n <= 24; n += 8) { 11765 for (size_t k = 1; k <= 80; k += 17) { 11766 GemmMicrokernelTester() 11767 .mr(4) 11768 .nr(8) 11769 .kr(4) 11770 .sr(1) 11771 .m(4) 11772 .n(n) 11773 .k(k) 11774 .cn_stride(11) 11775 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 11776 } 11777 } 11778 } 11779 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C4__NEON_MLAL_DUP,n_div_8_strided_a)11780 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C4__NEON_MLAL_DUP, n_div_8_strided_a) { 11781 TEST_REQUIRES_ARM_NEON; 11782 for (uint32_t n = 16; n <= 24; n += 8) { 11783 for (size_t k = 1; k <= 80; k += 17) { 11784 GemmMicrokernelTester() 11785 .mr(4) 11786 .nr(8) 11787 .kr(4) 11788 .sr(1) 11789 .m(4) 11790 .n(n) 11791 .k(k) 11792 .a_stride(83) 11793 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 11794 } 11795 } 11796 } 11797 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C4__NEON_MLAL_DUP,n_div_8_subtile)11798 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C4__NEON_MLAL_DUP, n_div_8_subtile) { 11799 TEST_REQUIRES_ARM_NEON; 11800 for (uint32_t n = 16; n <= 24; n += 8) { 11801 for (size_t k = 1; k <= 80; k += 17) { 11802 for (uint32_t m = 1; m <= 4; m++) { 11803 GemmMicrokernelTester() 11804 .mr(4) 11805 .nr(8) 11806 .kr(4) 11807 .sr(1) 11808 .m(m) 11809 .n(n) 11810 .k(k) 11811 .iterations(1) 11812 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 11813 } 11814 } 11815 } 11816 } 11817 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C4__NEON_MLAL_DUP,strided_cm_subtile)11818 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C4__NEON_MLAL_DUP, strided_cm_subtile) { 11819 TEST_REQUIRES_ARM_NEON; 11820 for (size_t k = 1; k <= 80; k += 17) { 11821 for (uint32_t n = 1; n <= 8; n++) { 11822 for (uint32_t m = 1; m <= 4; m++) { 11823 GemmMicrokernelTester() 11824 .mr(4) 11825 .nr(8) 11826 .kr(4) 11827 .sr(1) 11828 .m(m) 11829 .n(n) 11830 .k(k) 11831 .cm_stride(11) 11832 .iterations(1) 11833 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 11834 } 11835 } 11836 } 11837 } 11838 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C4__NEON_MLAL_DUP,qmin)11839 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C4__NEON_MLAL_DUP, qmin) { 11840 TEST_REQUIRES_ARM_NEON; 11841 GemmMicrokernelTester() 11842 .mr(4) 11843 .nr(8) 11844 .kr(4) 11845 .sr(1) 11846 .m(4) 11847 .n(8) 11848 .k(16) 11849 .qmin(128) 11850 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 11851 } 11852 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C4__NEON_MLAL_DUP,qmax)11853 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C4__NEON_MLAL_DUP, qmax) { 11854 TEST_REQUIRES_ARM_NEON; 11855 GemmMicrokernelTester() 11856 .mr(4) 11857 .nr(8) 11858 .kr(4) 11859 .sr(1) 11860 .m(4) 11861 .n(8) 11862 .k(16) 11863 .qmax(128) 11864 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 11865 } 11866 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C4__NEON_MLAL_DUP,strided_cm)11867 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C4__NEON_MLAL_DUP, strided_cm) { 11868 TEST_REQUIRES_ARM_NEON; 11869 GemmMicrokernelTester() 11870 .mr(4) 11871 .nr(8) 11872 .kr(4) 11873 .sr(1) 11874 .m(4) 11875 .n(8) 11876 .k(16) 11877 .cm_stride(11) 11878 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 11879 } 11880 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 11881 11882 11883 #if XNN_ARCH_ARM || XNN_ARCH_ARM64 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C4__NEON_MULL_LD2R,k_eq_8)11884 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C4__NEON_MULL_LD2R, k_eq_8) { 11885 TEST_REQUIRES_ARM_NEON; 11886 GemmMicrokernelTester() 11887 .mr(4) 11888 .nr(8) 11889 .kr(4) 11890 .sr(1) 11891 .m(4) 11892 .n(8) 11893 .k(8) 11894 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 11895 } 11896 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C4__NEON_MULL_LD2R,strided_cn)11897 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C4__NEON_MULL_LD2R, strided_cn) { 11898 TEST_REQUIRES_ARM_NEON; 11899 GemmMicrokernelTester() 11900 .mr(4) 11901 .nr(8) 11902 .kr(4) 11903 .sr(1) 11904 .m(4) 11905 .n(8) 11906 .k(8) 11907 .cn_stride(11) 11908 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 11909 } 11910 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C4__NEON_MULL_LD2R,k_eq_8_strided_a)11911 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C4__NEON_MULL_LD2R, k_eq_8_strided_a) { 11912 TEST_REQUIRES_ARM_NEON; 11913 GemmMicrokernelTester() 11914 .mr(4) 11915 .nr(8) 11916 .kr(4) 11917 .sr(1) 11918 .m(4) 11919 .n(8) 11920 .k(8) 11921 .a_stride(11) 11922 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 11923 } 11924 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C4__NEON_MULL_LD2R,k_eq_8_subtile)11925 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C4__NEON_MULL_LD2R, k_eq_8_subtile) { 11926 TEST_REQUIRES_ARM_NEON; 11927 for (uint32_t n = 1; n <= 8; n++) { 11928 for (uint32_t m = 1; m <= 4; m++) { 11929 GemmMicrokernelTester() 11930 .mr(4) 11931 .nr(8) 11932 .kr(4) 11933 .sr(1) 11934 .m(m) 11935 .n(n) 11936 .k(8) 11937 .iterations(1) 11938 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 11939 } 11940 } 11941 } 11942 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C4__NEON_MULL_LD2R,k_eq_8_subtile_m)11943 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C4__NEON_MULL_LD2R, k_eq_8_subtile_m) { 11944 TEST_REQUIRES_ARM_NEON; 11945 for (uint32_t m = 1; m <= 4; m++) { 11946 GemmMicrokernelTester() 11947 .mr(4) 11948 .nr(8) 11949 .kr(4) 11950 .sr(1) 11951 .m(m) 11952 .n(8) 11953 .k(8) 11954 .iterations(1) 11955 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 11956 } 11957 } 11958 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C4__NEON_MULL_LD2R,k_eq_8_subtile_n)11959 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C4__NEON_MULL_LD2R, k_eq_8_subtile_n) { 11960 TEST_REQUIRES_ARM_NEON; 11961 for (uint32_t n = 1; n <= 8; n++) { 11962 GemmMicrokernelTester() 11963 .mr(4) 11964 .nr(8) 11965 .kr(4) 11966 .sr(1) 11967 .m(4) 11968 .n(n) 11969 .k(8) 11970 .iterations(1) 11971 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 11972 } 11973 } 11974 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C4__NEON_MULL_LD2R,k_lt_8)11975 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C4__NEON_MULL_LD2R, k_lt_8) { 11976 TEST_REQUIRES_ARM_NEON; 11977 for (size_t k = 1; k < 8; k++) { 11978 GemmMicrokernelTester() 11979 .mr(4) 11980 .nr(8) 11981 .kr(4) 11982 .sr(1) 11983 .m(4) 11984 .n(8) 11985 .k(k) 11986 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 11987 } 11988 } 11989 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C4__NEON_MULL_LD2R,k_lt_8_strided_a)11990 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C4__NEON_MULL_LD2R, k_lt_8_strided_a) { 11991 TEST_REQUIRES_ARM_NEON; 11992 for (size_t k = 1; k < 8; k++) { 11993 GemmMicrokernelTester() 11994 .mr(4) 11995 .nr(8) 11996 .kr(4) 11997 .sr(1) 11998 .m(4) 11999 .n(8) 12000 .k(k) 12001 .a_stride(11) 12002 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 12003 } 12004 } 12005 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C4__NEON_MULL_LD2R,k_lt_8_subtile)12006 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C4__NEON_MULL_LD2R, k_lt_8_subtile) { 12007 TEST_REQUIRES_ARM_NEON; 12008 for (size_t k = 1; k < 8; k++) { 12009 for (uint32_t n = 1; n <= 8; n++) { 12010 for (uint32_t m = 1; m <= 4; m++) { 12011 GemmMicrokernelTester() 12012 .mr(4) 12013 .nr(8) 12014 .kr(4) 12015 .sr(1) 12016 .m(m) 12017 .n(n) 12018 .k(k) 12019 .iterations(1) 12020 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 12021 } 12022 } 12023 } 12024 } 12025 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C4__NEON_MULL_LD2R,k_gt_8)12026 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C4__NEON_MULL_LD2R, k_gt_8) { 12027 TEST_REQUIRES_ARM_NEON; 12028 for (size_t k = 9; k < 16; k++) { 12029 GemmMicrokernelTester() 12030 .mr(4) 12031 .nr(8) 12032 .kr(4) 12033 .sr(1) 12034 .m(4) 12035 .n(8) 12036 .k(k) 12037 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 12038 } 12039 } 12040 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C4__NEON_MULL_LD2R,k_gt_8_strided_a)12041 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C4__NEON_MULL_LD2R, k_gt_8_strided_a) { 12042 TEST_REQUIRES_ARM_NEON; 12043 for (size_t k = 9; k < 16; k++) { 12044 GemmMicrokernelTester() 12045 .mr(4) 12046 .nr(8) 12047 .kr(4) 12048 .sr(1) 12049 .m(4) 12050 .n(8) 12051 .k(k) 12052 .a_stride(19) 12053 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 12054 } 12055 } 12056 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C4__NEON_MULL_LD2R,k_gt_8_subtile)12057 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C4__NEON_MULL_LD2R, k_gt_8_subtile) { 12058 TEST_REQUIRES_ARM_NEON; 12059 for (size_t k = 9; k < 16; k++) { 12060 for (uint32_t n = 1; n <= 8; n++) { 12061 for (uint32_t m = 1; m <= 4; m++) { 12062 GemmMicrokernelTester() 12063 .mr(4) 12064 .nr(8) 12065 .kr(4) 12066 .sr(1) 12067 .m(m) 12068 .n(n) 12069 .k(k) 12070 .iterations(1) 12071 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 12072 } 12073 } 12074 } 12075 } 12076 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C4__NEON_MULL_LD2R,k_div_8)12077 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C4__NEON_MULL_LD2R, k_div_8) { 12078 TEST_REQUIRES_ARM_NEON; 12079 for (size_t k = 16; k <= 80; k += 8) { 12080 GemmMicrokernelTester() 12081 .mr(4) 12082 .nr(8) 12083 .kr(4) 12084 .sr(1) 12085 .m(4) 12086 .n(8) 12087 .k(k) 12088 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 12089 } 12090 } 12091 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C4__NEON_MULL_LD2R,k_div_8_strided_a)12092 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C4__NEON_MULL_LD2R, k_div_8_strided_a) { 12093 TEST_REQUIRES_ARM_NEON; 12094 for (size_t k = 16; k <= 80; k += 8) { 12095 GemmMicrokernelTester() 12096 .mr(4) 12097 .nr(8) 12098 .kr(4) 12099 .sr(1) 12100 .m(4) 12101 .n(8) 12102 .k(k) 12103 .a_stride(83) 12104 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 12105 } 12106 } 12107 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C4__NEON_MULL_LD2R,k_div_8_subtile)12108 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C4__NEON_MULL_LD2R, k_div_8_subtile) { 12109 TEST_REQUIRES_ARM_NEON; 12110 for (size_t k = 16; k <= 80; k += 8) { 12111 for (uint32_t n = 1; n <= 8; n++) { 12112 for (uint32_t m = 1; m <= 4; m++) { 12113 GemmMicrokernelTester() 12114 .mr(4) 12115 .nr(8) 12116 .kr(4) 12117 .sr(1) 12118 .m(m) 12119 .n(n) 12120 .k(k) 12121 .iterations(1) 12122 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 12123 } 12124 } 12125 } 12126 } 12127 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C4__NEON_MULL_LD2R,n_gt_8)12128 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C4__NEON_MULL_LD2R, n_gt_8) { 12129 TEST_REQUIRES_ARM_NEON; 12130 for (uint32_t n = 9; n < 16; n++) { 12131 for (size_t k = 1; k <= 40; k += 9) { 12132 GemmMicrokernelTester() 12133 .mr(4) 12134 .nr(8) 12135 .kr(4) 12136 .sr(1) 12137 .m(4) 12138 .n(n) 12139 .k(k) 12140 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 12141 } 12142 } 12143 } 12144 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C4__NEON_MULL_LD2R,n_gt_8_strided_cn)12145 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C4__NEON_MULL_LD2R, n_gt_8_strided_cn) { 12146 TEST_REQUIRES_ARM_NEON; 12147 for (uint32_t n = 9; n < 16; n++) { 12148 for (size_t k = 1; k <= 40; k += 9) { 12149 GemmMicrokernelTester() 12150 .mr(4) 12151 .nr(8) 12152 .kr(4) 12153 .sr(1) 12154 .m(4) 12155 .n(n) 12156 .k(k) 12157 .cn_stride(11) 12158 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 12159 } 12160 } 12161 } 12162 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C4__NEON_MULL_LD2R,n_gt_8_strided_a)12163 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C4__NEON_MULL_LD2R, n_gt_8_strided_a) { 12164 TEST_REQUIRES_ARM_NEON; 12165 for (uint32_t n = 9; n < 16; n++) { 12166 for (size_t k = 1; k <= 40; k += 9) { 12167 GemmMicrokernelTester() 12168 .mr(4) 12169 .nr(8) 12170 .kr(4) 12171 .sr(1) 12172 .m(4) 12173 .n(n) 12174 .k(k) 12175 .a_stride(43) 12176 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 12177 } 12178 } 12179 } 12180 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C4__NEON_MULL_LD2R,n_gt_8_subtile)12181 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C4__NEON_MULL_LD2R, n_gt_8_subtile) { 12182 TEST_REQUIRES_ARM_NEON; 12183 for (uint32_t n = 9; n < 16; n++) { 12184 for (size_t k = 1; k <= 40; k += 9) { 12185 for (uint32_t m = 1; m <= 4; m++) { 12186 GemmMicrokernelTester() 12187 .mr(4) 12188 .nr(8) 12189 .kr(4) 12190 .sr(1) 12191 .m(m) 12192 .n(n) 12193 .k(k) 12194 .iterations(1) 12195 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 12196 } 12197 } 12198 } 12199 } 12200 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C4__NEON_MULL_LD2R,n_div_8)12201 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C4__NEON_MULL_LD2R, n_div_8) { 12202 TEST_REQUIRES_ARM_NEON; 12203 for (uint32_t n = 16; n <= 24; n += 8) { 12204 for (size_t k = 1; k <= 40; k += 9) { 12205 GemmMicrokernelTester() 12206 .mr(4) 12207 .nr(8) 12208 .kr(4) 12209 .sr(1) 12210 .m(4) 12211 .n(n) 12212 .k(k) 12213 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 12214 } 12215 } 12216 } 12217 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C4__NEON_MULL_LD2R,n_div_8_strided_cn)12218 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C4__NEON_MULL_LD2R, n_div_8_strided_cn) { 12219 TEST_REQUIRES_ARM_NEON; 12220 for (uint32_t n = 16; n <= 24; n += 8) { 12221 for (size_t k = 1; k <= 40; k += 9) { 12222 GemmMicrokernelTester() 12223 .mr(4) 12224 .nr(8) 12225 .kr(4) 12226 .sr(1) 12227 .m(4) 12228 .n(n) 12229 .k(k) 12230 .cn_stride(11) 12231 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 12232 } 12233 } 12234 } 12235 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C4__NEON_MULL_LD2R,n_div_8_strided_a)12236 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C4__NEON_MULL_LD2R, n_div_8_strided_a) { 12237 TEST_REQUIRES_ARM_NEON; 12238 for (uint32_t n = 16; n <= 24; n += 8) { 12239 for (size_t k = 1; k <= 40; k += 9) { 12240 GemmMicrokernelTester() 12241 .mr(4) 12242 .nr(8) 12243 .kr(4) 12244 .sr(1) 12245 .m(4) 12246 .n(n) 12247 .k(k) 12248 .a_stride(43) 12249 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 12250 } 12251 } 12252 } 12253 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C4__NEON_MULL_LD2R,n_div_8_subtile)12254 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C4__NEON_MULL_LD2R, n_div_8_subtile) { 12255 TEST_REQUIRES_ARM_NEON; 12256 for (uint32_t n = 16; n <= 24; n += 8) { 12257 for (size_t k = 1; k <= 40; k += 9) { 12258 for (uint32_t m = 1; m <= 4; m++) { 12259 GemmMicrokernelTester() 12260 .mr(4) 12261 .nr(8) 12262 .kr(4) 12263 .sr(1) 12264 .m(m) 12265 .n(n) 12266 .k(k) 12267 .iterations(1) 12268 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 12269 } 12270 } 12271 } 12272 } 12273 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C4__NEON_MULL_LD2R,strided_cm_subtile)12274 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C4__NEON_MULL_LD2R, strided_cm_subtile) { 12275 TEST_REQUIRES_ARM_NEON; 12276 for (size_t k = 1; k <= 40; k += 9) { 12277 for (uint32_t n = 1; n <= 8; n++) { 12278 for (uint32_t m = 1; m <= 4; m++) { 12279 GemmMicrokernelTester() 12280 .mr(4) 12281 .nr(8) 12282 .kr(4) 12283 .sr(1) 12284 .m(m) 12285 .n(n) 12286 .k(k) 12287 .cm_stride(11) 12288 .iterations(1) 12289 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 12290 } 12291 } 12292 } 12293 } 12294 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C4__NEON_MULL_LD2R,qmin)12295 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C4__NEON_MULL_LD2R, qmin) { 12296 TEST_REQUIRES_ARM_NEON; 12297 GemmMicrokernelTester() 12298 .mr(4) 12299 .nr(8) 12300 .kr(4) 12301 .sr(1) 12302 .m(4) 12303 .n(8) 12304 .k(8) 12305 .qmin(128) 12306 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 12307 } 12308 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C4__NEON_MULL_LD2R,qmax)12309 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C4__NEON_MULL_LD2R, qmax) { 12310 TEST_REQUIRES_ARM_NEON; 12311 GemmMicrokernelTester() 12312 .mr(4) 12313 .nr(8) 12314 .kr(4) 12315 .sr(1) 12316 .m(4) 12317 .n(8) 12318 .k(8) 12319 .qmax(128) 12320 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 12321 } 12322 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C4__NEON_MULL_LD2R,strided_cm)12323 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C4__NEON_MULL_LD2R, strided_cm) { 12324 TEST_REQUIRES_ARM_NEON; 12325 GemmMicrokernelTester() 12326 .mr(4) 12327 .nr(8) 12328 .kr(4) 12329 .sr(1) 12330 .m(4) 12331 .n(8) 12332 .k(8) 12333 .cm_stride(11) 12334 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 12335 } 12336 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 12337 12338 12339 #if XNN_ARCH_ARM || XNN_ARCH_ARM64 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_LD2R,k_eq_8)12340 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_LD2R, k_eq_8) { 12341 TEST_REQUIRES_ARM_NEON; 12342 GemmMicrokernelTester() 12343 .mr(4) 12344 .nr(16) 12345 .kr(2) 12346 .sr(1) 12347 .m(4) 12348 .n(16) 12349 .k(8) 12350 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 12351 } 12352 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_LD2R,strided_cn)12353 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_LD2R, strided_cn) { 12354 TEST_REQUIRES_ARM_NEON; 12355 GemmMicrokernelTester() 12356 .mr(4) 12357 .nr(16) 12358 .kr(2) 12359 .sr(1) 12360 .m(4) 12361 .n(16) 12362 .k(8) 12363 .cn_stride(19) 12364 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 12365 } 12366 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_LD2R,k_eq_8_strided_a)12367 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_LD2R, k_eq_8_strided_a) { 12368 TEST_REQUIRES_ARM_NEON; 12369 GemmMicrokernelTester() 12370 .mr(4) 12371 .nr(16) 12372 .kr(2) 12373 .sr(1) 12374 .m(4) 12375 .n(16) 12376 .k(8) 12377 .a_stride(11) 12378 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 12379 } 12380 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_LD2R,k_eq_8_subtile)12381 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_LD2R, k_eq_8_subtile) { 12382 TEST_REQUIRES_ARM_NEON; 12383 for (uint32_t n = 1; n <= 16; n++) { 12384 for (uint32_t m = 1; m <= 4; m++) { 12385 GemmMicrokernelTester() 12386 .mr(4) 12387 .nr(16) 12388 .kr(2) 12389 .sr(1) 12390 .m(m) 12391 .n(n) 12392 .k(8) 12393 .iterations(1) 12394 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 12395 } 12396 } 12397 } 12398 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_LD2R,k_eq_8_subtile_m)12399 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_LD2R, k_eq_8_subtile_m) { 12400 TEST_REQUIRES_ARM_NEON; 12401 for (uint32_t m = 1; m <= 4; m++) { 12402 GemmMicrokernelTester() 12403 .mr(4) 12404 .nr(16) 12405 .kr(2) 12406 .sr(1) 12407 .m(m) 12408 .n(16) 12409 .k(8) 12410 .iterations(1) 12411 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 12412 } 12413 } 12414 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_LD2R,k_eq_8_subtile_n)12415 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_LD2R, k_eq_8_subtile_n) { 12416 TEST_REQUIRES_ARM_NEON; 12417 for (uint32_t n = 1; n <= 16; n++) { 12418 GemmMicrokernelTester() 12419 .mr(4) 12420 .nr(16) 12421 .kr(2) 12422 .sr(1) 12423 .m(4) 12424 .n(n) 12425 .k(8) 12426 .iterations(1) 12427 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 12428 } 12429 } 12430 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_LD2R,k_lt_8)12431 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_LD2R, k_lt_8) { 12432 TEST_REQUIRES_ARM_NEON; 12433 for (size_t k = 1; k < 8; k++) { 12434 GemmMicrokernelTester() 12435 .mr(4) 12436 .nr(16) 12437 .kr(2) 12438 .sr(1) 12439 .m(4) 12440 .n(16) 12441 .k(k) 12442 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 12443 } 12444 } 12445 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_LD2R,k_lt_8_strided_a)12446 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_LD2R, k_lt_8_strided_a) { 12447 TEST_REQUIRES_ARM_NEON; 12448 for (size_t k = 1; k < 8; k++) { 12449 GemmMicrokernelTester() 12450 .mr(4) 12451 .nr(16) 12452 .kr(2) 12453 .sr(1) 12454 .m(4) 12455 .n(16) 12456 .k(k) 12457 .a_stride(11) 12458 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 12459 } 12460 } 12461 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_LD2R,k_lt_8_subtile)12462 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_LD2R, k_lt_8_subtile) { 12463 TEST_REQUIRES_ARM_NEON; 12464 for (size_t k = 1; k < 8; k++) { 12465 for (uint32_t n = 1; n <= 16; n++) { 12466 for (uint32_t m = 1; m <= 4; m++) { 12467 GemmMicrokernelTester() 12468 .mr(4) 12469 .nr(16) 12470 .kr(2) 12471 .sr(1) 12472 .m(m) 12473 .n(n) 12474 .k(k) 12475 .iterations(1) 12476 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 12477 } 12478 } 12479 } 12480 } 12481 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_LD2R,k_gt_8)12482 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_LD2R, k_gt_8) { 12483 TEST_REQUIRES_ARM_NEON; 12484 for (size_t k = 9; k < 16; k++) { 12485 GemmMicrokernelTester() 12486 .mr(4) 12487 .nr(16) 12488 .kr(2) 12489 .sr(1) 12490 .m(4) 12491 .n(16) 12492 .k(k) 12493 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 12494 } 12495 } 12496 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_LD2R,k_gt_8_strided_a)12497 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_LD2R, k_gt_8_strided_a) { 12498 TEST_REQUIRES_ARM_NEON; 12499 for (size_t k = 9; k < 16; k++) { 12500 GemmMicrokernelTester() 12501 .mr(4) 12502 .nr(16) 12503 .kr(2) 12504 .sr(1) 12505 .m(4) 12506 .n(16) 12507 .k(k) 12508 .a_stride(19) 12509 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 12510 } 12511 } 12512 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_LD2R,k_gt_8_subtile)12513 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_LD2R, k_gt_8_subtile) { 12514 TEST_REQUIRES_ARM_NEON; 12515 for (size_t k = 9; k < 16; k++) { 12516 for (uint32_t n = 1; n <= 16; n++) { 12517 for (uint32_t m = 1; m <= 4; m++) { 12518 GemmMicrokernelTester() 12519 .mr(4) 12520 .nr(16) 12521 .kr(2) 12522 .sr(1) 12523 .m(m) 12524 .n(n) 12525 .k(k) 12526 .iterations(1) 12527 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 12528 } 12529 } 12530 } 12531 } 12532 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_LD2R,k_div_8)12533 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_LD2R, k_div_8) { 12534 TEST_REQUIRES_ARM_NEON; 12535 for (size_t k = 16; k <= 80; k += 8) { 12536 GemmMicrokernelTester() 12537 .mr(4) 12538 .nr(16) 12539 .kr(2) 12540 .sr(1) 12541 .m(4) 12542 .n(16) 12543 .k(k) 12544 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 12545 } 12546 } 12547 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_LD2R,k_div_8_strided_a)12548 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_LD2R, k_div_8_strided_a) { 12549 TEST_REQUIRES_ARM_NEON; 12550 for (size_t k = 16; k <= 80; k += 8) { 12551 GemmMicrokernelTester() 12552 .mr(4) 12553 .nr(16) 12554 .kr(2) 12555 .sr(1) 12556 .m(4) 12557 .n(16) 12558 .k(k) 12559 .a_stride(83) 12560 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 12561 } 12562 } 12563 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_LD2R,k_div_8_subtile)12564 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_LD2R, k_div_8_subtile) { 12565 TEST_REQUIRES_ARM_NEON; 12566 for (size_t k = 16; k <= 80; k += 8) { 12567 for (uint32_t n = 1; n <= 16; n++) { 12568 for (uint32_t m = 1; m <= 4; m++) { 12569 GemmMicrokernelTester() 12570 .mr(4) 12571 .nr(16) 12572 .kr(2) 12573 .sr(1) 12574 .m(m) 12575 .n(n) 12576 .k(k) 12577 .iterations(1) 12578 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 12579 } 12580 } 12581 } 12582 } 12583 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_LD2R,n_gt_16)12584 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_LD2R, n_gt_16) { 12585 TEST_REQUIRES_ARM_NEON; 12586 for (uint32_t n = 17; n < 32; n++) { 12587 for (size_t k = 1; k <= 40; k += 9) { 12588 GemmMicrokernelTester() 12589 .mr(4) 12590 .nr(16) 12591 .kr(2) 12592 .sr(1) 12593 .m(4) 12594 .n(n) 12595 .k(k) 12596 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 12597 } 12598 } 12599 } 12600 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_LD2R,n_gt_16_strided_cn)12601 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_LD2R, n_gt_16_strided_cn) { 12602 TEST_REQUIRES_ARM_NEON; 12603 for (uint32_t n = 17; n < 32; n++) { 12604 for (size_t k = 1; k <= 40; k += 9) { 12605 GemmMicrokernelTester() 12606 .mr(4) 12607 .nr(16) 12608 .kr(2) 12609 .sr(1) 12610 .m(4) 12611 .n(n) 12612 .k(k) 12613 .cn_stride(19) 12614 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 12615 } 12616 } 12617 } 12618 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_LD2R,n_gt_16_strided_a)12619 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_LD2R, n_gt_16_strided_a) { 12620 TEST_REQUIRES_ARM_NEON; 12621 for (uint32_t n = 17; n < 32; n++) { 12622 for (size_t k = 1; k <= 40; k += 9) { 12623 GemmMicrokernelTester() 12624 .mr(4) 12625 .nr(16) 12626 .kr(2) 12627 .sr(1) 12628 .m(4) 12629 .n(n) 12630 .k(k) 12631 .a_stride(43) 12632 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 12633 } 12634 } 12635 } 12636 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_LD2R,n_gt_16_subtile)12637 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_LD2R, n_gt_16_subtile) { 12638 TEST_REQUIRES_ARM_NEON; 12639 for (uint32_t n = 17; n < 32; n++) { 12640 for (size_t k = 1; k <= 40; k += 9) { 12641 for (uint32_t m = 1; m <= 4; m++) { 12642 GemmMicrokernelTester() 12643 .mr(4) 12644 .nr(16) 12645 .kr(2) 12646 .sr(1) 12647 .m(m) 12648 .n(n) 12649 .k(k) 12650 .iterations(1) 12651 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 12652 } 12653 } 12654 } 12655 } 12656 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_LD2R,n_div_16)12657 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_LD2R, n_div_16) { 12658 TEST_REQUIRES_ARM_NEON; 12659 for (uint32_t n = 32; n <= 48; n += 16) { 12660 for (size_t k = 1; k <= 40; k += 9) { 12661 GemmMicrokernelTester() 12662 .mr(4) 12663 .nr(16) 12664 .kr(2) 12665 .sr(1) 12666 .m(4) 12667 .n(n) 12668 .k(k) 12669 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 12670 } 12671 } 12672 } 12673 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_LD2R,n_div_16_strided_cn)12674 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_LD2R, n_div_16_strided_cn) { 12675 TEST_REQUIRES_ARM_NEON; 12676 for (uint32_t n = 32; n <= 48; n += 16) { 12677 for (size_t k = 1; k <= 40; k += 9) { 12678 GemmMicrokernelTester() 12679 .mr(4) 12680 .nr(16) 12681 .kr(2) 12682 .sr(1) 12683 .m(4) 12684 .n(n) 12685 .k(k) 12686 .cn_stride(19) 12687 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 12688 } 12689 } 12690 } 12691 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_LD2R,n_div_16_strided_a)12692 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_LD2R, n_div_16_strided_a) { 12693 TEST_REQUIRES_ARM_NEON; 12694 for (uint32_t n = 32; n <= 48; n += 16) { 12695 for (size_t k = 1; k <= 40; k += 9) { 12696 GemmMicrokernelTester() 12697 .mr(4) 12698 .nr(16) 12699 .kr(2) 12700 .sr(1) 12701 .m(4) 12702 .n(n) 12703 .k(k) 12704 .a_stride(43) 12705 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 12706 } 12707 } 12708 } 12709 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_LD2R,n_div_16_subtile)12710 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_LD2R, n_div_16_subtile) { 12711 TEST_REQUIRES_ARM_NEON; 12712 for (uint32_t n = 32; n <= 48; n += 16) { 12713 for (size_t k = 1; k <= 40; k += 9) { 12714 for (uint32_t m = 1; m <= 4; m++) { 12715 GemmMicrokernelTester() 12716 .mr(4) 12717 .nr(16) 12718 .kr(2) 12719 .sr(1) 12720 .m(m) 12721 .n(n) 12722 .k(k) 12723 .iterations(1) 12724 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 12725 } 12726 } 12727 } 12728 } 12729 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_LD2R,strided_cm_subtile)12730 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_LD2R, strided_cm_subtile) { 12731 TEST_REQUIRES_ARM_NEON; 12732 for (size_t k = 1; k <= 40; k += 9) { 12733 for (uint32_t n = 1; n <= 16; n++) { 12734 for (uint32_t m = 1; m <= 4; m++) { 12735 GemmMicrokernelTester() 12736 .mr(4) 12737 .nr(16) 12738 .kr(2) 12739 .sr(1) 12740 .m(m) 12741 .n(n) 12742 .k(k) 12743 .cm_stride(19) 12744 .iterations(1) 12745 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 12746 } 12747 } 12748 } 12749 } 12750 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_LD2R,qmin)12751 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_LD2R, qmin) { 12752 TEST_REQUIRES_ARM_NEON; 12753 GemmMicrokernelTester() 12754 .mr(4) 12755 .nr(16) 12756 .kr(2) 12757 .sr(1) 12758 .m(4) 12759 .n(16) 12760 .k(8) 12761 .qmin(128) 12762 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 12763 } 12764 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_LD2R,qmax)12765 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_LD2R, qmax) { 12766 TEST_REQUIRES_ARM_NEON; 12767 GemmMicrokernelTester() 12768 .mr(4) 12769 .nr(16) 12770 .kr(2) 12771 .sr(1) 12772 .m(4) 12773 .n(16) 12774 .k(8) 12775 .qmax(128) 12776 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 12777 } 12778 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_LD2R,strided_cm)12779 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_LD2R, strided_cm) { 12780 TEST_REQUIRES_ARM_NEON; 12781 GemmMicrokernelTester() 12782 .mr(4) 12783 .nr(16) 12784 .kr(2) 12785 .sr(1) 12786 .m(4) 12787 .n(16) 12788 .k(8) 12789 .cm_stride(19) 12790 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 12791 } 12792 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 12793 12794 12795 #if XNN_ARCH_ARM || XNN_ARCH_ARM64 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__NEON_MLAL_LD1R,k_eq_16)12796 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__NEON_MLAL_LD1R, k_eq_16) { 12797 TEST_REQUIRES_ARM_NEON; 12798 GemmMicrokernelTester() 12799 .mr(4) 12800 .nr(16) 12801 .kr(4) 12802 .sr(1) 12803 .m(4) 12804 .n(16) 12805 .k(16) 12806 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 12807 } 12808 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__NEON_MLAL_LD1R,strided_cn)12809 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__NEON_MLAL_LD1R, strided_cn) { 12810 TEST_REQUIRES_ARM_NEON; 12811 GemmMicrokernelTester() 12812 .mr(4) 12813 .nr(16) 12814 .kr(4) 12815 .sr(1) 12816 .m(4) 12817 .n(16) 12818 .k(16) 12819 .cn_stride(19) 12820 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 12821 } 12822 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__NEON_MLAL_LD1R,k_eq_16_strided_a)12823 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__NEON_MLAL_LD1R, k_eq_16_strided_a) { 12824 TEST_REQUIRES_ARM_NEON; 12825 GemmMicrokernelTester() 12826 .mr(4) 12827 .nr(16) 12828 .kr(4) 12829 .sr(1) 12830 .m(4) 12831 .n(16) 12832 .k(16) 12833 .a_stride(19) 12834 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 12835 } 12836 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__NEON_MLAL_LD1R,k_eq_16_subtile)12837 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__NEON_MLAL_LD1R, k_eq_16_subtile) { 12838 TEST_REQUIRES_ARM_NEON; 12839 for (uint32_t n = 1; n <= 16; n++) { 12840 for (uint32_t m = 1; m <= 4; m++) { 12841 GemmMicrokernelTester() 12842 .mr(4) 12843 .nr(16) 12844 .kr(4) 12845 .sr(1) 12846 .m(m) 12847 .n(n) 12848 .k(16) 12849 .iterations(1) 12850 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 12851 } 12852 } 12853 } 12854 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__NEON_MLAL_LD1R,k_eq_16_subtile_m)12855 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__NEON_MLAL_LD1R, k_eq_16_subtile_m) { 12856 TEST_REQUIRES_ARM_NEON; 12857 for (uint32_t m = 1; m <= 4; m++) { 12858 GemmMicrokernelTester() 12859 .mr(4) 12860 .nr(16) 12861 .kr(4) 12862 .sr(1) 12863 .m(m) 12864 .n(16) 12865 .k(16) 12866 .iterations(1) 12867 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 12868 } 12869 } 12870 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__NEON_MLAL_LD1R,k_eq_16_subtile_n)12871 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__NEON_MLAL_LD1R, k_eq_16_subtile_n) { 12872 TEST_REQUIRES_ARM_NEON; 12873 for (uint32_t n = 1; n <= 16; n++) { 12874 GemmMicrokernelTester() 12875 .mr(4) 12876 .nr(16) 12877 .kr(4) 12878 .sr(1) 12879 .m(4) 12880 .n(n) 12881 .k(16) 12882 .iterations(1) 12883 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 12884 } 12885 } 12886 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__NEON_MLAL_LD1R,k_lt_16)12887 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__NEON_MLAL_LD1R, k_lt_16) { 12888 TEST_REQUIRES_ARM_NEON; 12889 for (size_t k = 1; k < 16; k++) { 12890 GemmMicrokernelTester() 12891 .mr(4) 12892 .nr(16) 12893 .kr(4) 12894 .sr(1) 12895 .m(4) 12896 .n(16) 12897 .k(k) 12898 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 12899 } 12900 } 12901 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__NEON_MLAL_LD1R,k_lt_16_strided_a)12902 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__NEON_MLAL_LD1R, k_lt_16_strided_a) { 12903 TEST_REQUIRES_ARM_NEON; 12904 for (size_t k = 1; k < 16; k++) { 12905 GemmMicrokernelTester() 12906 .mr(4) 12907 .nr(16) 12908 .kr(4) 12909 .sr(1) 12910 .m(4) 12911 .n(16) 12912 .k(k) 12913 .a_stride(19) 12914 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 12915 } 12916 } 12917 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__NEON_MLAL_LD1R,k_lt_16_subtile)12918 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__NEON_MLAL_LD1R, k_lt_16_subtile) { 12919 TEST_REQUIRES_ARM_NEON; 12920 for (size_t k = 1; k < 16; k++) { 12921 for (uint32_t n = 1; n <= 16; n++) { 12922 for (uint32_t m = 1; m <= 4; m++) { 12923 GemmMicrokernelTester() 12924 .mr(4) 12925 .nr(16) 12926 .kr(4) 12927 .sr(1) 12928 .m(m) 12929 .n(n) 12930 .k(k) 12931 .iterations(1) 12932 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 12933 } 12934 } 12935 } 12936 } 12937 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__NEON_MLAL_LD1R,k_gt_16)12938 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__NEON_MLAL_LD1R, k_gt_16) { 12939 TEST_REQUIRES_ARM_NEON; 12940 for (size_t k = 17; k < 32; k++) { 12941 GemmMicrokernelTester() 12942 .mr(4) 12943 .nr(16) 12944 .kr(4) 12945 .sr(1) 12946 .m(4) 12947 .n(16) 12948 .k(k) 12949 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 12950 } 12951 } 12952 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__NEON_MLAL_LD1R,k_gt_16_strided_a)12953 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__NEON_MLAL_LD1R, k_gt_16_strided_a) { 12954 TEST_REQUIRES_ARM_NEON; 12955 for (size_t k = 17; k < 32; k++) { 12956 GemmMicrokernelTester() 12957 .mr(4) 12958 .nr(16) 12959 .kr(4) 12960 .sr(1) 12961 .m(4) 12962 .n(16) 12963 .k(k) 12964 .a_stride(37) 12965 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 12966 } 12967 } 12968 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__NEON_MLAL_LD1R,k_gt_16_subtile)12969 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__NEON_MLAL_LD1R, k_gt_16_subtile) { 12970 TEST_REQUIRES_ARM_NEON; 12971 for (size_t k = 17; k < 32; k++) { 12972 for (uint32_t n = 1; n <= 16; n++) { 12973 for (uint32_t m = 1; m <= 4; m++) { 12974 GemmMicrokernelTester() 12975 .mr(4) 12976 .nr(16) 12977 .kr(4) 12978 .sr(1) 12979 .m(m) 12980 .n(n) 12981 .k(k) 12982 .iterations(1) 12983 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 12984 } 12985 } 12986 } 12987 } 12988 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__NEON_MLAL_LD1R,k_div_16)12989 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__NEON_MLAL_LD1R, k_div_16) { 12990 TEST_REQUIRES_ARM_NEON; 12991 for (size_t k = 32; k <= 160; k += 16) { 12992 GemmMicrokernelTester() 12993 .mr(4) 12994 .nr(16) 12995 .kr(4) 12996 .sr(1) 12997 .m(4) 12998 .n(16) 12999 .k(k) 13000 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 13001 } 13002 } 13003 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__NEON_MLAL_LD1R,k_div_16_strided_a)13004 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__NEON_MLAL_LD1R, k_div_16_strided_a) { 13005 TEST_REQUIRES_ARM_NEON; 13006 for (size_t k = 32; k <= 160; k += 16) { 13007 GemmMicrokernelTester() 13008 .mr(4) 13009 .nr(16) 13010 .kr(4) 13011 .sr(1) 13012 .m(4) 13013 .n(16) 13014 .k(k) 13015 .a_stride(163) 13016 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 13017 } 13018 } 13019 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__NEON_MLAL_LD1R,k_div_16_subtile)13020 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__NEON_MLAL_LD1R, k_div_16_subtile) { 13021 TEST_REQUIRES_ARM_NEON; 13022 for (size_t k = 32; k <= 160; k += 16) { 13023 for (uint32_t n = 1; n <= 16; n++) { 13024 for (uint32_t m = 1; m <= 4; m++) { 13025 GemmMicrokernelTester() 13026 .mr(4) 13027 .nr(16) 13028 .kr(4) 13029 .sr(1) 13030 .m(m) 13031 .n(n) 13032 .k(k) 13033 .iterations(1) 13034 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 13035 } 13036 } 13037 } 13038 } 13039 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__NEON_MLAL_LD1R,n_gt_16)13040 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__NEON_MLAL_LD1R, n_gt_16) { 13041 TEST_REQUIRES_ARM_NEON; 13042 for (uint32_t n = 17; n < 32; n++) { 13043 for (size_t k = 1; k <= 80; k += 17) { 13044 GemmMicrokernelTester() 13045 .mr(4) 13046 .nr(16) 13047 .kr(4) 13048 .sr(1) 13049 .m(4) 13050 .n(n) 13051 .k(k) 13052 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 13053 } 13054 } 13055 } 13056 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__NEON_MLAL_LD1R,n_gt_16_strided_cn)13057 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__NEON_MLAL_LD1R, n_gt_16_strided_cn) { 13058 TEST_REQUIRES_ARM_NEON; 13059 for (uint32_t n = 17; n < 32; n++) { 13060 for (size_t k = 1; k <= 80; k += 17) { 13061 GemmMicrokernelTester() 13062 .mr(4) 13063 .nr(16) 13064 .kr(4) 13065 .sr(1) 13066 .m(4) 13067 .n(n) 13068 .k(k) 13069 .cn_stride(19) 13070 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 13071 } 13072 } 13073 } 13074 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__NEON_MLAL_LD1R,n_gt_16_strided_a)13075 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__NEON_MLAL_LD1R, n_gt_16_strided_a) { 13076 TEST_REQUIRES_ARM_NEON; 13077 for (uint32_t n = 17; n < 32; n++) { 13078 for (size_t k = 1; k <= 80; k += 17) { 13079 GemmMicrokernelTester() 13080 .mr(4) 13081 .nr(16) 13082 .kr(4) 13083 .sr(1) 13084 .m(4) 13085 .n(n) 13086 .k(k) 13087 .a_stride(83) 13088 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 13089 } 13090 } 13091 } 13092 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__NEON_MLAL_LD1R,n_gt_16_subtile)13093 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__NEON_MLAL_LD1R, n_gt_16_subtile) { 13094 TEST_REQUIRES_ARM_NEON; 13095 for (uint32_t n = 17; n < 32; n++) { 13096 for (size_t k = 1; k <= 80; k += 17) { 13097 for (uint32_t m = 1; m <= 4; m++) { 13098 GemmMicrokernelTester() 13099 .mr(4) 13100 .nr(16) 13101 .kr(4) 13102 .sr(1) 13103 .m(m) 13104 .n(n) 13105 .k(k) 13106 .iterations(1) 13107 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 13108 } 13109 } 13110 } 13111 } 13112 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__NEON_MLAL_LD1R,n_div_16)13113 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__NEON_MLAL_LD1R, n_div_16) { 13114 TEST_REQUIRES_ARM_NEON; 13115 for (uint32_t n = 32; n <= 48; n += 16) { 13116 for (size_t k = 1; k <= 80; k += 17) { 13117 GemmMicrokernelTester() 13118 .mr(4) 13119 .nr(16) 13120 .kr(4) 13121 .sr(1) 13122 .m(4) 13123 .n(n) 13124 .k(k) 13125 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 13126 } 13127 } 13128 } 13129 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__NEON_MLAL_LD1R,n_div_16_strided_cn)13130 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__NEON_MLAL_LD1R, n_div_16_strided_cn) { 13131 TEST_REQUIRES_ARM_NEON; 13132 for (uint32_t n = 32; n <= 48; n += 16) { 13133 for (size_t k = 1; k <= 80; k += 17) { 13134 GemmMicrokernelTester() 13135 .mr(4) 13136 .nr(16) 13137 .kr(4) 13138 .sr(1) 13139 .m(4) 13140 .n(n) 13141 .k(k) 13142 .cn_stride(19) 13143 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 13144 } 13145 } 13146 } 13147 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__NEON_MLAL_LD1R,n_div_16_strided_a)13148 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__NEON_MLAL_LD1R, n_div_16_strided_a) { 13149 TEST_REQUIRES_ARM_NEON; 13150 for (uint32_t n = 32; n <= 48; n += 16) { 13151 for (size_t k = 1; k <= 80; k += 17) { 13152 GemmMicrokernelTester() 13153 .mr(4) 13154 .nr(16) 13155 .kr(4) 13156 .sr(1) 13157 .m(4) 13158 .n(n) 13159 .k(k) 13160 .a_stride(83) 13161 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 13162 } 13163 } 13164 } 13165 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__NEON_MLAL_LD1R,n_div_16_subtile)13166 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__NEON_MLAL_LD1R, n_div_16_subtile) { 13167 TEST_REQUIRES_ARM_NEON; 13168 for (uint32_t n = 32; n <= 48; n += 16) { 13169 for (size_t k = 1; k <= 80; k += 17) { 13170 for (uint32_t m = 1; m <= 4; m++) { 13171 GemmMicrokernelTester() 13172 .mr(4) 13173 .nr(16) 13174 .kr(4) 13175 .sr(1) 13176 .m(m) 13177 .n(n) 13178 .k(k) 13179 .iterations(1) 13180 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 13181 } 13182 } 13183 } 13184 } 13185 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__NEON_MLAL_LD1R,strided_cm_subtile)13186 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__NEON_MLAL_LD1R, strided_cm_subtile) { 13187 TEST_REQUIRES_ARM_NEON; 13188 for (size_t k = 1; k <= 80; k += 17) { 13189 for (uint32_t n = 1; n <= 16; n++) { 13190 for (uint32_t m = 1; m <= 4; m++) { 13191 GemmMicrokernelTester() 13192 .mr(4) 13193 .nr(16) 13194 .kr(4) 13195 .sr(1) 13196 .m(m) 13197 .n(n) 13198 .k(k) 13199 .cm_stride(19) 13200 .iterations(1) 13201 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 13202 } 13203 } 13204 } 13205 } 13206 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__NEON_MLAL_LD1R,qmin)13207 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__NEON_MLAL_LD1R, qmin) { 13208 TEST_REQUIRES_ARM_NEON; 13209 GemmMicrokernelTester() 13210 .mr(4) 13211 .nr(16) 13212 .kr(4) 13213 .sr(1) 13214 .m(4) 13215 .n(16) 13216 .k(16) 13217 .qmin(128) 13218 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 13219 } 13220 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__NEON_MLAL_LD1R,qmax)13221 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__NEON_MLAL_LD1R, qmax) { 13222 TEST_REQUIRES_ARM_NEON; 13223 GemmMicrokernelTester() 13224 .mr(4) 13225 .nr(16) 13226 .kr(4) 13227 .sr(1) 13228 .m(4) 13229 .n(16) 13230 .k(16) 13231 .qmax(128) 13232 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 13233 } 13234 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__NEON_MLAL_LD1R,strided_cm)13235 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__NEON_MLAL_LD1R, strided_cm) { 13236 TEST_REQUIRES_ARM_NEON; 13237 GemmMicrokernelTester() 13238 .mr(4) 13239 .nr(16) 13240 .kr(4) 13241 .sr(1) 13242 .m(4) 13243 .n(16) 13244 .k(16) 13245 .cm_stride(19) 13246 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 13247 } 13248 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 13249 13250 13251 #if XNN_ARCH_ARM || XNN_ARCH_ARM64 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__NEON_MULL_DUP,k_eq_8)13252 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__NEON_MULL_DUP, k_eq_8) { 13253 TEST_REQUIRES_ARM_NEON; 13254 GemmMicrokernelTester() 13255 .mr(4) 13256 .nr(16) 13257 .kr(4) 13258 .sr(1) 13259 .m(4) 13260 .n(16) 13261 .k(8) 13262 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 13263 } 13264 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__NEON_MULL_DUP,strided_cn)13265 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__NEON_MULL_DUP, strided_cn) { 13266 TEST_REQUIRES_ARM_NEON; 13267 GemmMicrokernelTester() 13268 .mr(4) 13269 .nr(16) 13270 .kr(4) 13271 .sr(1) 13272 .m(4) 13273 .n(16) 13274 .k(8) 13275 .cn_stride(19) 13276 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 13277 } 13278 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__NEON_MULL_DUP,k_eq_8_strided_a)13279 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__NEON_MULL_DUP, k_eq_8_strided_a) { 13280 TEST_REQUIRES_ARM_NEON; 13281 GemmMicrokernelTester() 13282 .mr(4) 13283 .nr(16) 13284 .kr(4) 13285 .sr(1) 13286 .m(4) 13287 .n(16) 13288 .k(8) 13289 .a_stride(11) 13290 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 13291 } 13292 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__NEON_MULL_DUP,k_eq_8_subtile)13293 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__NEON_MULL_DUP, k_eq_8_subtile) { 13294 TEST_REQUIRES_ARM_NEON; 13295 for (uint32_t n = 1; n <= 16; n++) { 13296 for (uint32_t m = 1; m <= 4; m++) { 13297 GemmMicrokernelTester() 13298 .mr(4) 13299 .nr(16) 13300 .kr(4) 13301 .sr(1) 13302 .m(m) 13303 .n(n) 13304 .k(8) 13305 .iterations(1) 13306 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 13307 } 13308 } 13309 } 13310 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__NEON_MULL_DUP,k_eq_8_subtile_m)13311 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__NEON_MULL_DUP, k_eq_8_subtile_m) { 13312 TEST_REQUIRES_ARM_NEON; 13313 for (uint32_t m = 1; m <= 4; m++) { 13314 GemmMicrokernelTester() 13315 .mr(4) 13316 .nr(16) 13317 .kr(4) 13318 .sr(1) 13319 .m(m) 13320 .n(16) 13321 .k(8) 13322 .iterations(1) 13323 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 13324 } 13325 } 13326 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__NEON_MULL_DUP,k_eq_8_subtile_n)13327 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__NEON_MULL_DUP, k_eq_8_subtile_n) { 13328 TEST_REQUIRES_ARM_NEON; 13329 for (uint32_t n = 1; n <= 16; n++) { 13330 GemmMicrokernelTester() 13331 .mr(4) 13332 .nr(16) 13333 .kr(4) 13334 .sr(1) 13335 .m(4) 13336 .n(n) 13337 .k(8) 13338 .iterations(1) 13339 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 13340 } 13341 } 13342 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__NEON_MULL_DUP,k_lt_8)13343 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__NEON_MULL_DUP, k_lt_8) { 13344 TEST_REQUIRES_ARM_NEON; 13345 for (size_t k = 1; k < 8; k++) { 13346 GemmMicrokernelTester() 13347 .mr(4) 13348 .nr(16) 13349 .kr(4) 13350 .sr(1) 13351 .m(4) 13352 .n(16) 13353 .k(k) 13354 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 13355 } 13356 } 13357 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__NEON_MULL_DUP,k_lt_8_strided_a)13358 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__NEON_MULL_DUP, k_lt_8_strided_a) { 13359 TEST_REQUIRES_ARM_NEON; 13360 for (size_t k = 1; k < 8; k++) { 13361 GemmMicrokernelTester() 13362 .mr(4) 13363 .nr(16) 13364 .kr(4) 13365 .sr(1) 13366 .m(4) 13367 .n(16) 13368 .k(k) 13369 .a_stride(11) 13370 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 13371 } 13372 } 13373 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__NEON_MULL_DUP,k_lt_8_subtile)13374 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__NEON_MULL_DUP, k_lt_8_subtile) { 13375 TEST_REQUIRES_ARM_NEON; 13376 for (size_t k = 1; k < 8; k++) { 13377 for (uint32_t n = 1; n <= 16; n++) { 13378 for (uint32_t m = 1; m <= 4; m++) { 13379 GemmMicrokernelTester() 13380 .mr(4) 13381 .nr(16) 13382 .kr(4) 13383 .sr(1) 13384 .m(m) 13385 .n(n) 13386 .k(k) 13387 .iterations(1) 13388 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 13389 } 13390 } 13391 } 13392 } 13393 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__NEON_MULL_DUP,k_gt_8)13394 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__NEON_MULL_DUP, k_gt_8) { 13395 TEST_REQUIRES_ARM_NEON; 13396 for (size_t k = 9; k < 16; k++) { 13397 GemmMicrokernelTester() 13398 .mr(4) 13399 .nr(16) 13400 .kr(4) 13401 .sr(1) 13402 .m(4) 13403 .n(16) 13404 .k(k) 13405 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 13406 } 13407 } 13408 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__NEON_MULL_DUP,k_gt_8_strided_a)13409 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__NEON_MULL_DUP, k_gt_8_strided_a) { 13410 TEST_REQUIRES_ARM_NEON; 13411 for (size_t k = 9; k < 16; k++) { 13412 GemmMicrokernelTester() 13413 .mr(4) 13414 .nr(16) 13415 .kr(4) 13416 .sr(1) 13417 .m(4) 13418 .n(16) 13419 .k(k) 13420 .a_stride(19) 13421 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 13422 } 13423 } 13424 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__NEON_MULL_DUP,k_gt_8_subtile)13425 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__NEON_MULL_DUP, k_gt_8_subtile) { 13426 TEST_REQUIRES_ARM_NEON; 13427 for (size_t k = 9; k < 16; k++) { 13428 for (uint32_t n = 1; n <= 16; n++) { 13429 for (uint32_t m = 1; m <= 4; m++) { 13430 GemmMicrokernelTester() 13431 .mr(4) 13432 .nr(16) 13433 .kr(4) 13434 .sr(1) 13435 .m(m) 13436 .n(n) 13437 .k(k) 13438 .iterations(1) 13439 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 13440 } 13441 } 13442 } 13443 } 13444 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__NEON_MULL_DUP,k_div_8)13445 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__NEON_MULL_DUP, k_div_8) { 13446 TEST_REQUIRES_ARM_NEON; 13447 for (size_t k = 16; k <= 80; k += 8) { 13448 GemmMicrokernelTester() 13449 .mr(4) 13450 .nr(16) 13451 .kr(4) 13452 .sr(1) 13453 .m(4) 13454 .n(16) 13455 .k(k) 13456 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 13457 } 13458 } 13459 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__NEON_MULL_DUP,k_div_8_strided_a)13460 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__NEON_MULL_DUP, k_div_8_strided_a) { 13461 TEST_REQUIRES_ARM_NEON; 13462 for (size_t k = 16; k <= 80; k += 8) { 13463 GemmMicrokernelTester() 13464 .mr(4) 13465 .nr(16) 13466 .kr(4) 13467 .sr(1) 13468 .m(4) 13469 .n(16) 13470 .k(k) 13471 .a_stride(83) 13472 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 13473 } 13474 } 13475 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__NEON_MULL_DUP,k_div_8_subtile)13476 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__NEON_MULL_DUP, k_div_8_subtile) { 13477 TEST_REQUIRES_ARM_NEON; 13478 for (size_t k = 16; k <= 80; k += 8) { 13479 for (uint32_t n = 1; n <= 16; n++) { 13480 for (uint32_t m = 1; m <= 4; m++) { 13481 GemmMicrokernelTester() 13482 .mr(4) 13483 .nr(16) 13484 .kr(4) 13485 .sr(1) 13486 .m(m) 13487 .n(n) 13488 .k(k) 13489 .iterations(1) 13490 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 13491 } 13492 } 13493 } 13494 } 13495 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__NEON_MULL_DUP,n_gt_16)13496 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__NEON_MULL_DUP, n_gt_16) { 13497 TEST_REQUIRES_ARM_NEON; 13498 for (uint32_t n = 17; n < 32; n++) { 13499 for (size_t k = 1; k <= 40; k += 9) { 13500 GemmMicrokernelTester() 13501 .mr(4) 13502 .nr(16) 13503 .kr(4) 13504 .sr(1) 13505 .m(4) 13506 .n(n) 13507 .k(k) 13508 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 13509 } 13510 } 13511 } 13512 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__NEON_MULL_DUP,n_gt_16_strided_cn)13513 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__NEON_MULL_DUP, n_gt_16_strided_cn) { 13514 TEST_REQUIRES_ARM_NEON; 13515 for (uint32_t n = 17; n < 32; n++) { 13516 for (size_t k = 1; k <= 40; k += 9) { 13517 GemmMicrokernelTester() 13518 .mr(4) 13519 .nr(16) 13520 .kr(4) 13521 .sr(1) 13522 .m(4) 13523 .n(n) 13524 .k(k) 13525 .cn_stride(19) 13526 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 13527 } 13528 } 13529 } 13530 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__NEON_MULL_DUP,n_gt_16_strided_a)13531 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__NEON_MULL_DUP, n_gt_16_strided_a) { 13532 TEST_REQUIRES_ARM_NEON; 13533 for (uint32_t n = 17; n < 32; n++) { 13534 for (size_t k = 1; k <= 40; k += 9) { 13535 GemmMicrokernelTester() 13536 .mr(4) 13537 .nr(16) 13538 .kr(4) 13539 .sr(1) 13540 .m(4) 13541 .n(n) 13542 .k(k) 13543 .a_stride(43) 13544 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 13545 } 13546 } 13547 } 13548 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__NEON_MULL_DUP,n_gt_16_subtile)13549 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__NEON_MULL_DUP, n_gt_16_subtile) { 13550 TEST_REQUIRES_ARM_NEON; 13551 for (uint32_t n = 17; n < 32; n++) { 13552 for (size_t k = 1; k <= 40; k += 9) { 13553 for (uint32_t m = 1; m <= 4; m++) { 13554 GemmMicrokernelTester() 13555 .mr(4) 13556 .nr(16) 13557 .kr(4) 13558 .sr(1) 13559 .m(m) 13560 .n(n) 13561 .k(k) 13562 .iterations(1) 13563 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 13564 } 13565 } 13566 } 13567 } 13568 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__NEON_MULL_DUP,n_div_16)13569 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__NEON_MULL_DUP, n_div_16) { 13570 TEST_REQUIRES_ARM_NEON; 13571 for (uint32_t n = 32; n <= 48; n += 16) { 13572 for (size_t k = 1; k <= 40; k += 9) { 13573 GemmMicrokernelTester() 13574 .mr(4) 13575 .nr(16) 13576 .kr(4) 13577 .sr(1) 13578 .m(4) 13579 .n(n) 13580 .k(k) 13581 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 13582 } 13583 } 13584 } 13585 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__NEON_MULL_DUP,n_div_16_strided_cn)13586 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__NEON_MULL_DUP, n_div_16_strided_cn) { 13587 TEST_REQUIRES_ARM_NEON; 13588 for (uint32_t n = 32; n <= 48; n += 16) { 13589 for (size_t k = 1; k <= 40; k += 9) { 13590 GemmMicrokernelTester() 13591 .mr(4) 13592 .nr(16) 13593 .kr(4) 13594 .sr(1) 13595 .m(4) 13596 .n(n) 13597 .k(k) 13598 .cn_stride(19) 13599 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 13600 } 13601 } 13602 } 13603 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__NEON_MULL_DUP,n_div_16_strided_a)13604 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__NEON_MULL_DUP, n_div_16_strided_a) { 13605 TEST_REQUIRES_ARM_NEON; 13606 for (uint32_t n = 32; n <= 48; n += 16) { 13607 for (size_t k = 1; k <= 40; k += 9) { 13608 GemmMicrokernelTester() 13609 .mr(4) 13610 .nr(16) 13611 .kr(4) 13612 .sr(1) 13613 .m(4) 13614 .n(n) 13615 .k(k) 13616 .a_stride(43) 13617 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 13618 } 13619 } 13620 } 13621 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__NEON_MULL_DUP,n_div_16_subtile)13622 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__NEON_MULL_DUP, n_div_16_subtile) { 13623 TEST_REQUIRES_ARM_NEON; 13624 for (uint32_t n = 32; n <= 48; n += 16) { 13625 for (size_t k = 1; k <= 40; k += 9) { 13626 for (uint32_t m = 1; m <= 4; m++) { 13627 GemmMicrokernelTester() 13628 .mr(4) 13629 .nr(16) 13630 .kr(4) 13631 .sr(1) 13632 .m(m) 13633 .n(n) 13634 .k(k) 13635 .iterations(1) 13636 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 13637 } 13638 } 13639 } 13640 } 13641 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__NEON_MULL_DUP,strided_cm_subtile)13642 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__NEON_MULL_DUP, strided_cm_subtile) { 13643 TEST_REQUIRES_ARM_NEON; 13644 for (size_t k = 1; k <= 40; k += 9) { 13645 for (uint32_t n = 1; n <= 16; n++) { 13646 for (uint32_t m = 1; m <= 4; m++) { 13647 GemmMicrokernelTester() 13648 .mr(4) 13649 .nr(16) 13650 .kr(4) 13651 .sr(1) 13652 .m(m) 13653 .n(n) 13654 .k(k) 13655 .cm_stride(19) 13656 .iterations(1) 13657 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 13658 } 13659 } 13660 } 13661 } 13662 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__NEON_MULL_DUP,qmin)13663 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__NEON_MULL_DUP, qmin) { 13664 TEST_REQUIRES_ARM_NEON; 13665 GemmMicrokernelTester() 13666 .mr(4) 13667 .nr(16) 13668 .kr(4) 13669 .sr(1) 13670 .m(4) 13671 .n(16) 13672 .k(8) 13673 .qmin(128) 13674 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 13675 } 13676 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__NEON_MULL_DUP,qmax)13677 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__NEON_MULL_DUP, qmax) { 13678 TEST_REQUIRES_ARM_NEON; 13679 GemmMicrokernelTester() 13680 .mr(4) 13681 .nr(16) 13682 .kr(4) 13683 .sr(1) 13684 .m(4) 13685 .n(16) 13686 .k(8) 13687 .qmax(128) 13688 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 13689 } 13690 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__NEON_MULL_DUP,strided_cm)13691 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__NEON_MULL_DUP, strided_cm) { 13692 TEST_REQUIRES_ARM_NEON; 13693 GemmMicrokernelTester() 13694 .mr(4) 13695 .nr(16) 13696 .kr(4) 13697 .sr(1) 13698 .m(4) 13699 .n(16) 13700 .k(8) 13701 .cm_stride(19) 13702 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 13703 } 13704 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 13705 13706 13707 #if XNN_ARCH_ARM || XNN_ARCH_ARM64 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__NEON_MULL_LD1R,k_eq_8)13708 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__NEON_MULL_LD1R, k_eq_8) { 13709 TEST_REQUIRES_ARM_NEON; 13710 GemmMicrokernelTester() 13711 .mr(4) 13712 .nr(16) 13713 .kr(4) 13714 .sr(1) 13715 .m(4) 13716 .n(16) 13717 .k(8) 13718 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 13719 } 13720 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__NEON_MULL_LD1R,strided_cn)13721 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__NEON_MULL_LD1R, strided_cn) { 13722 TEST_REQUIRES_ARM_NEON; 13723 GemmMicrokernelTester() 13724 .mr(4) 13725 .nr(16) 13726 .kr(4) 13727 .sr(1) 13728 .m(4) 13729 .n(16) 13730 .k(8) 13731 .cn_stride(19) 13732 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 13733 } 13734 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__NEON_MULL_LD1R,k_eq_8_strided_a)13735 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__NEON_MULL_LD1R, k_eq_8_strided_a) { 13736 TEST_REQUIRES_ARM_NEON; 13737 GemmMicrokernelTester() 13738 .mr(4) 13739 .nr(16) 13740 .kr(4) 13741 .sr(1) 13742 .m(4) 13743 .n(16) 13744 .k(8) 13745 .a_stride(11) 13746 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 13747 } 13748 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__NEON_MULL_LD1R,k_eq_8_subtile)13749 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__NEON_MULL_LD1R, k_eq_8_subtile) { 13750 TEST_REQUIRES_ARM_NEON; 13751 for (uint32_t n = 1; n <= 16; n++) { 13752 for (uint32_t m = 1; m <= 4; m++) { 13753 GemmMicrokernelTester() 13754 .mr(4) 13755 .nr(16) 13756 .kr(4) 13757 .sr(1) 13758 .m(m) 13759 .n(n) 13760 .k(8) 13761 .iterations(1) 13762 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 13763 } 13764 } 13765 } 13766 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__NEON_MULL_LD1R,k_eq_8_subtile_m)13767 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__NEON_MULL_LD1R, k_eq_8_subtile_m) { 13768 TEST_REQUIRES_ARM_NEON; 13769 for (uint32_t m = 1; m <= 4; m++) { 13770 GemmMicrokernelTester() 13771 .mr(4) 13772 .nr(16) 13773 .kr(4) 13774 .sr(1) 13775 .m(m) 13776 .n(16) 13777 .k(8) 13778 .iterations(1) 13779 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 13780 } 13781 } 13782 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__NEON_MULL_LD1R,k_eq_8_subtile_n)13783 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__NEON_MULL_LD1R, k_eq_8_subtile_n) { 13784 TEST_REQUIRES_ARM_NEON; 13785 for (uint32_t n = 1; n <= 16; n++) { 13786 GemmMicrokernelTester() 13787 .mr(4) 13788 .nr(16) 13789 .kr(4) 13790 .sr(1) 13791 .m(4) 13792 .n(n) 13793 .k(8) 13794 .iterations(1) 13795 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 13796 } 13797 } 13798 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__NEON_MULL_LD1R,k_lt_8)13799 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__NEON_MULL_LD1R, k_lt_8) { 13800 TEST_REQUIRES_ARM_NEON; 13801 for (size_t k = 1; k < 8; k++) { 13802 GemmMicrokernelTester() 13803 .mr(4) 13804 .nr(16) 13805 .kr(4) 13806 .sr(1) 13807 .m(4) 13808 .n(16) 13809 .k(k) 13810 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 13811 } 13812 } 13813 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__NEON_MULL_LD1R,k_lt_8_strided_a)13814 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__NEON_MULL_LD1R, k_lt_8_strided_a) { 13815 TEST_REQUIRES_ARM_NEON; 13816 for (size_t k = 1; k < 8; k++) { 13817 GemmMicrokernelTester() 13818 .mr(4) 13819 .nr(16) 13820 .kr(4) 13821 .sr(1) 13822 .m(4) 13823 .n(16) 13824 .k(k) 13825 .a_stride(11) 13826 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 13827 } 13828 } 13829 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__NEON_MULL_LD1R,k_lt_8_subtile)13830 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__NEON_MULL_LD1R, k_lt_8_subtile) { 13831 TEST_REQUIRES_ARM_NEON; 13832 for (size_t k = 1; k < 8; k++) { 13833 for (uint32_t n = 1; n <= 16; n++) { 13834 for (uint32_t m = 1; m <= 4; m++) { 13835 GemmMicrokernelTester() 13836 .mr(4) 13837 .nr(16) 13838 .kr(4) 13839 .sr(1) 13840 .m(m) 13841 .n(n) 13842 .k(k) 13843 .iterations(1) 13844 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 13845 } 13846 } 13847 } 13848 } 13849 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__NEON_MULL_LD1R,k_gt_8)13850 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__NEON_MULL_LD1R, k_gt_8) { 13851 TEST_REQUIRES_ARM_NEON; 13852 for (size_t k = 9; k < 16; k++) { 13853 GemmMicrokernelTester() 13854 .mr(4) 13855 .nr(16) 13856 .kr(4) 13857 .sr(1) 13858 .m(4) 13859 .n(16) 13860 .k(k) 13861 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 13862 } 13863 } 13864 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__NEON_MULL_LD1R,k_gt_8_strided_a)13865 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__NEON_MULL_LD1R, k_gt_8_strided_a) { 13866 TEST_REQUIRES_ARM_NEON; 13867 for (size_t k = 9; k < 16; k++) { 13868 GemmMicrokernelTester() 13869 .mr(4) 13870 .nr(16) 13871 .kr(4) 13872 .sr(1) 13873 .m(4) 13874 .n(16) 13875 .k(k) 13876 .a_stride(19) 13877 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 13878 } 13879 } 13880 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__NEON_MULL_LD1R,k_gt_8_subtile)13881 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__NEON_MULL_LD1R, k_gt_8_subtile) { 13882 TEST_REQUIRES_ARM_NEON; 13883 for (size_t k = 9; k < 16; k++) { 13884 for (uint32_t n = 1; n <= 16; n++) { 13885 for (uint32_t m = 1; m <= 4; m++) { 13886 GemmMicrokernelTester() 13887 .mr(4) 13888 .nr(16) 13889 .kr(4) 13890 .sr(1) 13891 .m(m) 13892 .n(n) 13893 .k(k) 13894 .iterations(1) 13895 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 13896 } 13897 } 13898 } 13899 } 13900 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__NEON_MULL_LD1R,k_div_8)13901 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__NEON_MULL_LD1R, k_div_8) { 13902 TEST_REQUIRES_ARM_NEON; 13903 for (size_t k = 16; k <= 80; k += 8) { 13904 GemmMicrokernelTester() 13905 .mr(4) 13906 .nr(16) 13907 .kr(4) 13908 .sr(1) 13909 .m(4) 13910 .n(16) 13911 .k(k) 13912 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 13913 } 13914 } 13915 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__NEON_MULL_LD1R,k_div_8_strided_a)13916 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__NEON_MULL_LD1R, k_div_8_strided_a) { 13917 TEST_REQUIRES_ARM_NEON; 13918 for (size_t k = 16; k <= 80; k += 8) { 13919 GemmMicrokernelTester() 13920 .mr(4) 13921 .nr(16) 13922 .kr(4) 13923 .sr(1) 13924 .m(4) 13925 .n(16) 13926 .k(k) 13927 .a_stride(83) 13928 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 13929 } 13930 } 13931 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__NEON_MULL_LD1R,k_div_8_subtile)13932 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__NEON_MULL_LD1R, k_div_8_subtile) { 13933 TEST_REQUIRES_ARM_NEON; 13934 for (size_t k = 16; k <= 80; k += 8) { 13935 for (uint32_t n = 1; n <= 16; n++) { 13936 for (uint32_t m = 1; m <= 4; m++) { 13937 GemmMicrokernelTester() 13938 .mr(4) 13939 .nr(16) 13940 .kr(4) 13941 .sr(1) 13942 .m(m) 13943 .n(n) 13944 .k(k) 13945 .iterations(1) 13946 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 13947 } 13948 } 13949 } 13950 } 13951 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__NEON_MULL_LD1R,n_gt_16)13952 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__NEON_MULL_LD1R, n_gt_16) { 13953 TEST_REQUIRES_ARM_NEON; 13954 for (uint32_t n = 17; n < 32; n++) { 13955 for (size_t k = 1; k <= 40; k += 9) { 13956 GemmMicrokernelTester() 13957 .mr(4) 13958 .nr(16) 13959 .kr(4) 13960 .sr(1) 13961 .m(4) 13962 .n(n) 13963 .k(k) 13964 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 13965 } 13966 } 13967 } 13968 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__NEON_MULL_LD1R,n_gt_16_strided_cn)13969 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__NEON_MULL_LD1R, n_gt_16_strided_cn) { 13970 TEST_REQUIRES_ARM_NEON; 13971 for (uint32_t n = 17; n < 32; n++) { 13972 for (size_t k = 1; k <= 40; k += 9) { 13973 GemmMicrokernelTester() 13974 .mr(4) 13975 .nr(16) 13976 .kr(4) 13977 .sr(1) 13978 .m(4) 13979 .n(n) 13980 .k(k) 13981 .cn_stride(19) 13982 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 13983 } 13984 } 13985 } 13986 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__NEON_MULL_LD1R,n_gt_16_strided_a)13987 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__NEON_MULL_LD1R, n_gt_16_strided_a) { 13988 TEST_REQUIRES_ARM_NEON; 13989 for (uint32_t n = 17; n < 32; n++) { 13990 for (size_t k = 1; k <= 40; k += 9) { 13991 GemmMicrokernelTester() 13992 .mr(4) 13993 .nr(16) 13994 .kr(4) 13995 .sr(1) 13996 .m(4) 13997 .n(n) 13998 .k(k) 13999 .a_stride(43) 14000 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 14001 } 14002 } 14003 } 14004 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__NEON_MULL_LD1R,n_gt_16_subtile)14005 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__NEON_MULL_LD1R, n_gt_16_subtile) { 14006 TEST_REQUIRES_ARM_NEON; 14007 for (uint32_t n = 17; n < 32; n++) { 14008 for (size_t k = 1; k <= 40; k += 9) { 14009 for (uint32_t m = 1; m <= 4; m++) { 14010 GemmMicrokernelTester() 14011 .mr(4) 14012 .nr(16) 14013 .kr(4) 14014 .sr(1) 14015 .m(m) 14016 .n(n) 14017 .k(k) 14018 .iterations(1) 14019 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 14020 } 14021 } 14022 } 14023 } 14024 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__NEON_MULL_LD1R,n_div_16)14025 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__NEON_MULL_LD1R, n_div_16) { 14026 TEST_REQUIRES_ARM_NEON; 14027 for (uint32_t n = 32; n <= 48; n += 16) { 14028 for (size_t k = 1; k <= 40; k += 9) { 14029 GemmMicrokernelTester() 14030 .mr(4) 14031 .nr(16) 14032 .kr(4) 14033 .sr(1) 14034 .m(4) 14035 .n(n) 14036 .k(k) 14037 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 14038 } 14039 } 14040 } 14041 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__NEON_MULL_LD1R,n_div_16_strided_cn)14042 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__NEON_MULL_LD1R, n_div_16_strided_cn) { 14043 TEST_REQUIRES_ARM_NEON; 14044 for (uint32_t n = 32; n <= 48; n += 16) { 14045 for (size_t k = 1; k <= 40; k += 9) { 14046 GemmMicrokernelTester() 14047 .mr(4) 14048 .nr(16) 14049 .kr(4) 14050 .sr(1) 14051 .m(4) 14052 .n(n) 14053 .k(k) 14054 .cn_stride(19) 14055 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 14056 } 14057 } 14058 } 14059 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__NEON_MULL_LD1R,n_div_16_strided_a)14060 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__NEON_MULL_LD1R, n_div_16_strided_a) { 14061 TEST_REQUIRES_ARM_NEON; 14062 for (uint32_t n = 32; n <= 48; n += 16) { 14063 for (size_t k = 1; k <= 40; k += 9) { 14064 GemmMicrokernelTester() 14065 .mr(4) 14066 .nr(16) 14067 .kr(4) 14068 .sr(1) 14069 .m(4) 14070 .n(n) 14071 .k(k) 14072 .a_stride(43) 14073 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 14074 } 14075 } 14076 } 14077 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__NEON_MULL_LD1R,n_div_16_subtile)14078 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__NEON_MULL_LD1R, n_div_16_subtile) { 14079 TEST_REQUIRES_ARM_NEON; 14080 for (uint32_t n = 32; n <= 48; n += 16) { 14081 for (size_t k = 1; k <= 40; k += 9) { 14082 for (uint32_t m = 1; m <= 4; m++) { 14083 GemmMicrokernelTester() 14084 .mr(4) 14085 .nr(16) 14086 .kr(4) 14087 .sr(1) 14088 .m(m) 14089 .n(n) 14090 .k(k) 14091 .iterations(1) 14092 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 14093 } 14094 } 14095 } 14096 } 14097 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__NEON_MULL_LD1R,strided_cm_subtile)14098 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__NEON_MULL_LD1R, strided_cm_subtile) { 14099 TEST_REQUIRES_ARM_NEON; 14100 for (size_t k = 1; k <= 40; k += 9) { 14101 for (uint32_t n = 1; n <= 16; n++) { 14102 for (uint32_t m = 1; m <= 4; m++) { 14103 GemmMicrokernelTester() 14104 .mr(4) 14105 .nr(16) 14106 .kr(4) 14107 .sr(1) 14108 .m(m) 14109 .n(n) 14110 .k(k) 14111 .cm_stride(19) 14112 .iterations(1) 14113 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 14114 } 14115 } 14116 } 14117 } 14118 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__NEON_MULL_LD1R,qmin)14119 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__NEON_MULL_LD1R, qmin) { 14120 TEST_REQUIRES_ARM_NEON; 14121 GemmMicrokernelTester() 14122 .mr(4) 14123 .nr(16) 14124 .kr(4) 14125 .sr(1) 14126 .m(4) 14127 .n(16) 14128 .k(8) 14129 .qmin(128) 14130 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 14131 } 14132 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__NEON_MULL_LD1R,qmax)14133 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__NEON_MULL_LD1R, qmax) { 14134 TEST_REQUIRES_ARM_NEON; 14135 GemmMicrokernelTester() 14136 .mr(4) 14137 .nr(16) 14138 .kr(4) 14139 .sr(1) 14140 .m(4) 14141 .n(16) 14142 .k(8) 14143 .qmax(128) 14144 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 14145 } 14146 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__NEON_MULL_LD1R,strided_cm)14147 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__NEON_MULL_LD1R, strided_cm) { 14148 TEST_REQUIRES_ARM_NEON; 14149 GemmMicrokernelTester() 14150 .mr(4) 14151 .nr(16) 14152 .kr(4) 14153 .sr(1) 14154 .m(4) 14155 .n(16) 14156 .k(8) 14157 .cm_stride(19) 14158 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 14159 } 14160 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 14161 14162 14163 #if XNN_ARCH_ARM || XNN_ARCH_ARM64 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4S2__NEON_MULL,k_eq_8)14164 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4S2__NEON_MULL, k_eq_8) { 14165 TEST_REQUIRES_ARM_NEON; 14166 GemmMicrokernelTester() 14167 .mr(4) 14168 .nr(16) 14169 .kr(4) 14170 .sr(2) 14171 .m(4) 14172 .n(16) 14173 .k(8) 14174 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 14175 } 14176 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4S2__NEON_MULL,strided_cn)14177 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4S2__NEON_MULL, strided_cn) { 14178 TEST_REQUIRES_ARM_NEON; 14179 GemmMicrokernelTester() 14180 .mr(4) 14181 .nr(16) 14182 .kr(4) 14183 .sr(2) 14184 .m(4) 14185 .n(16) 14186 .k(8) 14187 .cn_stride(19) 14188 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 14189 } 14190 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4S2__NEON_MULL,k_eq_8_strided_a)14191 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4S2__NEON_MULL, k_eq_8_strided_a) { 14192 TEST_REQUIRES_ARM_NEON; 14193 GemmMicrokernelTester() 14194 .mr(4) 14195 .nr(16) 14196 .kr(4) 14197 .sr(2) 14198 .m(4) 14199 .n(16) 14200 .k(8) 14201 .a_stride(11) 14202 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 14203 } 14204 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4S2__NEON_MULL,k_eq_8_subtile)14205 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4S2__NEON_MULL, k_eq_8_subtile) { 14206 TEST_REQUIRES_ARM_NEON; 14207 for (uint32_t n = 1; n <= 16; n++) { 14208 for (uint32_t m = 1; m <= 4; m++) { 14209 GemmMicrokernelTester() 14210 .mr(4) 14211 .nr(16) 14212 .kr(4) 14213 .sr(2) 14214 .m(m) 14215 .n(n) 14216 .k(8) 14217 .iterations(1) 14218 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 14219 } 14220 } 14221 } 14222 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4S2__NEON_MULL,k_eq_8_subtile_m)14223 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4S2__NEON_MULL, k_eq_8_subtile_m) { 14224 TEST_REQUIRES_ARM_NEON; 14225 for (uint32_t m = 1; m <= 4; m++) { 14226 GemmMicrokernelTester() 14227 .mr(4) 14228 .nr(16) 14229 .kr(4) 14230 .sr(2) 14231 .m(m) 14232 .n(16) 14233 .k(8) 14234 .iterations(1) 14235 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 14236 } 14237 } 14238 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4S2__NEON_MULL,k_eq_8_subtile_n)14239 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4S2__NEON_MULL, k_eq_8_subtile_n) { 14240 TEST_REQUIRES_ARM_NEON; 14241 for (uint32_t n = 1; n <= 16; n++) { 14242 GemmMicrokernelTester() 14243 .mr(4) 14244 .nr(16) 14245 .kr(4) 14246 .sr(2) 14247 .m(4) 14248 .n(n) 14249 .k(8) 14250 .iterations(1) 14251 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 14252 } 14253 } 14254 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4S2__NEON_MULL,k_lt_8)14255 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4S2__NEON_MULL, k_lt_8) { 14256 TEST_REQUIRES_ARM_NEON; 14257 for (size_t k = 1; k < 8; k++) { 14258 GemmMicrokernelTester() 14259 .mr(4) 14260 .nr(16) 14261 .kr(4) 14262 .sr(2) 14263 .m(4) 14264 .n(16) 14265 .k(k) 14266 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 14267 } 14268 } 14269 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4S2__NEON_MULL,k_lt_8_strided_a)14270 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4S2__NEON_MULL, k_lt_8_strided_a) { 14271 TEST_REQUIRES_ARM_NEON; 14272 for (size_t k = 1; k < 8; k++) { 14273 GemmMicrokernelTester() 14274 .mr(4) 14275 .nr(16) 14276 .kr(4) 14277 .sr(2) 14278 .m(4) 14279 .n(16) 14280 .k(k) 14281 .a_stride(11) 14282 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 14283 } 14284 } 14285 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4S2__NEON_MULL,k_lt_8_subtile)14286 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4S2__NEON_MULL, k_lt_8_subtile) { 14287 TEST_REQUIRES_ARM_NEON; 14288 for (size_t k = 1; k < 8; k++) { 14289 for (uint32_t n = 1; n <= 16; n++) { 14290 for (uint32_t m = 1; m <= 4; m++) { 14291 GemmMicrokernelTester() 14292 .mr(4) 14293 .nr(16) 14294 .kr(4) 14295 .sr(2) 14296 .m(m) 14297 .n(n) 14298 .k(k) 14299 .iterations(1) 14300 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 14301 } 14302 } 14303 } 14304 } 14305 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4S2__NEON_MULL,k_gt_8)14306 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4S2__NEON_MULL, k_gt_8) { 14307 TEST_REQUIRES_ARM_NEON; 14308 for (size_t k = 9; k < 16; k++) { 14309 GemmMicrokernelTester() 14310 .mr(4) 14311 .nr(16) 14312 .kr(4) 14313 .sr(2) 14314 .m(4) 14315 .n(16) 14316 .k(k) 14317 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 14318 } 14319 } 14320 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4S2__NEON_MULL,k_gt_8_strided_a)14321 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4S2__NEON_MULL, k_gt_8_strided_a) { 14322 TEST_REQUIRES_ARM_NEON; 14323 for (size_t k = 9; k < 16; k++) { 14324 GemmMicrokernelTester() 14325 .mr(4) 14326 .nr(16) 14327 .kr(4) 14328 .sr(2) 14329 .m(4) 14330 .n(16) 14331 .k(k) 14332 .a_stride(19) 14333 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 14334 } 14335 } 14336 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4S2__NEON_MULL,k_gt_8_subtile)14337 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4S2__NEON_MULL, k_gt_8_subtile) { 14338 TEST_REQUIRES_ARM_NEON; 14339 for (size_t k = 9; k < 16; k++) { 14340 for (uint32_t n = 1; n <= 16; n++) { 14341 for (uint32_t m = 1; m <= 4; m++) { 14342 GemmMicrokernelTester() 14343 .mr(4) 14344 .nr(16) 14345 .kr(4) 14346 .sr(2) 14347 .m(m) 14348 .n(n) 14349 .k(k) 14350 .iterations(1) 14351 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 14352 } 14353 } 14354 } 14355 } 14356 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4S2__NEON_MULL,k_div_8)14357 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4S2__NEON_MULL, k_div_8) { 14358 TEST_REQUIRES_ARM_NEON; 14359 for (size_t k = 16; k <= 80; k += 8) { 14360 GemmMicrokernelTester() 14361 .mr(4) 14362 .nr(16) 14363 .kr(4) 14364 .sr(2) 14365 .m(4) 14366 .n(16) 14367 .k(k) 14368 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 14369 } 14370 } 14371 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4S2__NEON_MULL,k_div_8_strided_a)14372 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4S2__NEON_MULL, k_div_8_strided_a) { 14373 TEST_REQUIRES_ARM_NEON; 14374 for (size_t k = 16; k <= 80; k += 8) { 14375 GemmMicrokernelTester() 14376 .mr(4) 14377 .nr(16) 14378 .kr(4) 14379 .sr(2) 14380 .m(4) 14381 .n(16) 14382 .k(k) 14383 .a_stride(83) 14384 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 14385 } 14386 } 14387 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4S2__NEON_MULL,k_div_8_subtile)14388 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4S2__NEON_MULL, k_div_8_subtile) { 14389 TEST_REQUIRES_ARM_NEON; 14390 for (size_t k = 16; k <= 80; k += 8) { 14391 for (uint32_t n = 1; n <= 16; n++) { 14392 for (uint32_t m = 1; m <= 4; m++) { 14393 GemmMicrokernelTester() 14394 .mr(4) 14395 .nr(16) 14396 .kr(4) 14397 .sr(2) 14398 .m(m) 14399 .n(n) 14400 .k(k) 14401 .iterations(1) 14402 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 14403 } 14404 } 14405 } 14406 } 14407 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4S2__NEON_MULL,n_gt_16)14408 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4S2__NEON_MULL, n_gt_16) { 14409 TEST_REQUIRES_ARM_NEON; 14410 for (uint32_t n = 17; n < 32; n++) { 14411 for (size_t k = 1; k <= 40; k += 9) { 14412 GemmMicrokernelTester() 14413 .mr(4) 14414 .nr(16) 14415 .kr(4) 14416 .sr(2) 14417 .m(4) 14418 .n(n) 14419 .k(k) 14420 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 14421 } 14422 } 14423 } 14424 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4S2__NEON_MULL,n_gt_16_strided_cn)14425 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4S2__NEON_MULL, n_gt_16_strided_cn) { 14426 TEST_REQUIRES_ARM_NEON; 14427 for (uint32_t n = 17; n < 32; n++) { 14428 for (size_t k = 1; k <= 40; k += 9) { 14429 GemmMicrokernelTester() 14430 .mr(4) 14431 .nr(16) 14432 .kr(4) 14433 .sr(2) 14434 .m(4) 14435 .n(n) 14436 .k(k) 14437 .cn_stride(19) 14438 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 14439 } 14440 } 14441 } 14442 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4S2__NEON_MULL,n_gt_16_strided_a)14443 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4S2__NEON_MULL, n_gt_16_strided_a) { 14444 TEST_REQUIRES_ARM_NEON; 14445 for (uint32_t n = 17; n < 32; n++) { 14446 for (size_t k = 1; k <= 40; k += 9) { 14447 GemmMicrokernelTester() 14448 .mr(4) 14449 .nr(16) 14450 .kr(4) 14451 .sr(2) 14452 .m(4) 14453 .n(n) 14454 .k(k) 14455 .a_stride(43) 14456 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 14457 } 14458 } 14459 } 14460 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4S2__NEON_MULL,n_gt_16_subtile)14461 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4S2__NEON_MULL, n_gt_16_subtile) { 14462 TEST_REQUIRES_ARM_NEON; 14463 for (uint32_t n = 17; n < 32; n++) { 14464 for (size_t k = 1; k <= 40; k += 9) { 14465 for (uint32_t m = 1; m <= 4; m++) { 14466 GemmMicrokernelTester() 14467 .mr(4) 14468 .nr(16) 14469 .kr(4) 14470 .sr(2) 14471 .m(m) 14472 .n(n) 14473 .k(k) 14474 .iterations(1) 14475 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 14476 } 14477 } 14478 } 14479 } 14480 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4S2__NEON_MULL,n_div_16)14481 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4S2__NEON_MULL, n_div_16) { 14482 TEST_REQUIRES_ARM_NEON; 14483 for (uint32_t n = 32; n <= 48; n += 16) { 14484 for (size_t k = 1; k <= 40; k += 9) { 14485 GemmMicrokernelTester() 14486 .mr(4) 14487 .nr(16) 14488 .kr(4) 14489 .sr(2) 14490 .m(4) 14491 .n(n) 14492 .k(k) 14493 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 14494 } 14495 } 14496 } 14497 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4S2__NEON_MULL,n_div_16_strided_cn)14498 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4S2__NEON_MULL, n_div_16_strided_cn) { 14499 TEST_REQUIRES_ARM_NEON; 14500 for (uint32_t n = 32; n <= 48; n += 16) { 14501 for (size_t k = 1; k <= 40; k += 9) { 14502 GemmMicrokernelTester() 14503 .mr(4) 14504 .nr(16) 14505 .kr(4) 14506 .sr(2) 14507 .m(4) 14508 .n(n) 14509 .k(k) 14510 .cn_stride(19) 14511 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 14512 } 14513 } 14514 } 14515 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4S2__NEON_MULL,n_div_16_strided_a)14516 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4S2__NEON_MULL, n_div_16_strided_a) { 14517 TEST_REQUIRES_ARM_NEON; 14518 for (uint32_t n = 32; n <= 48; n += 16) { 14519 for (size_t k = 1; k <= 40; k += 9) { 14520 GemmMicrokernelTester() 14521 .mr(4) 14522 .nr(16) 14523 .kr(4) 14524 .sr(2) 14525 .m(4) 14526 .n(n) 14527 .k(k) 14528 .a_stride(43) 14529 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 14530 } 14531 } 14532 } 14533 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4S2__NEON_MULL,n_div_16_subtile)14534 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4S2__NEON_MULL, n_div_16_subtile) { 14535 TEST_REQUIRES_ARM_NEON; 14536 for (uint32_t n = 32; n <= 48; n += 16) { 14537 for (size_t k = 1; k <= 40; k += 9) { 14538 for (uint32_t m = 1; m <= 4; m++) { 14539 GemmMicrokernelTester() 14540 .mr(4) 14541 .nr(16) 14542 .kr(4) 14543 .sr(2) 14544 .m(m) 14545 .n(n) 14546 .k(k) 14547 .iterations(1) 14548 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 14549 } 14550 } 14551 } 14552 } 14553 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4S2__NEON_MULL,strided_cm_subtile)14554 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4S2__NEON_MULL, strided_cm_subtile) { 14555 TEST_REQUIRES_ARM_NEON; 14556 for (size_t k = 1; k <= 40; k += 9) { 14557 for (uint32_t n = 1; n <= 16; n++) { 14558 for (uint32_t m = 1; m <= 4; m++) { 14559 GemmMicrokernelTester() 14560 .mr(4) 14561 .nr(16) 14562 .kr(4) 14563 .sr(2) 14564 .m(m) 14565 .n(n) 14566 .k(k) 14567 .cm_stride(19) 14568 .iterations(1) 14569 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 14570 } 14571 } 14572 } 14573 } 14574 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4S2__NEON_MULL,qmin)14575 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4S2__NEON_MULL, qmin) { 14576 TEST_REQUIRES_ARM_NEON; 14577 GemmMicrokernelTester() 14578 .mr(4) 14579 .nr(16) 14580 .kr(4) 14581 .sr(2) 14582 .m(4) 14583 .n(16) 14584 .k(8) 14585 .qmin(128) 14586 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 14587 } 14588 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4S2__NEON_MULL,qmax)14589 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4S2__NEON_MULL, qmax) { 14590 TEST_REQUIRES_ARM_NEON; 14591 GemmMicrokernelTester() 14592 .mr(4) 14593 .nr(16) 14594 .kr(4) 14595 .sr(2) 14596 .m(4) 14597 .n(16) 14598 .k(8) 14599 .qmax(128) 14600 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 14601 } 14602 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4S2__NEON_MULL,strided_cm)14603 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4S2__NEON_MULL, strided_cm) { 14604 TEST_REQUIRES_ARM_NEON; 14605 GemmMicrokernelTester() 14606 .mr(4) 14607 .nr(16) 14608 .kr(4) 14609 .sr(2) 14610 .m(4) 14611 .n(16) 14612 .k(8) 14613 .cm_stride(19) 14614 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 14615 } 14616 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 14617 14618 14619 #if XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY TEST(QS8_GEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53,k_eq_16)14620 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53, k_eq_16) { 14621 TEST_REQUIRES_ARM_NEON; 14622 GemmMicrokernelTester() 14623 .mr(2) 14624 .nr(8) 14625 .kr(8) 14626 .sr(1) 14627 .m(2) 14628 .n(8) 14629 .k(16) 14630 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 14631 } 14632 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53,strided_cn)14633 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53, strided_cn) { 14634 TEST_REQUIRES_ARM_NEON; 14635 GemmMicrokernelTester() 14636 .mr(2) 14637 .nr(8) 14638 .kr(8) 14639 .sr(1) 14640 .m(2) 14641 .n(8) 14642 .k(16) 14643 .cn_stride(11) 14644 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 14645 } 14646 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53,k_eq_16_strided_a)14647 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53, k_eq_16_strided_a) { 14648 TEST_REQUIRES_ARM_NEON; 14649 GemmMicrokernelTester() 14650 .mr(2) 14651 .nr(8) 14652 .kr(8) 14653 .sr(1) 14654 .m(2) 14655 .n(8) 14656 .k(16) 14657 .a_stride(19) 14658 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 14659 } 14660 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53,k_eq_16_subtile)14661 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53, k_eq_16_subtile) { 14662 TEST_REQUIRES_ARM_NEON; 14663 for (uint32_t n = 1; n <= 8; n++) { 14664 for (uint32_t m = 1; m <= 2; m++) { 14665 GemmMicrokernelTester() 14666 .mr(2) 14667 .nr(8) 14668 .kr(8) 14669 .sr(1) 14670 .m(m) 14671 .n(n) 14672 .k(16) 14673 .iterations(1) 14674 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 14675 } 14676 } 14677 } 14678 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53,k_eq_16_subtile_m)14679 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53, k_eq_16_subtile_m) { 14680 TEST_REQUIRES_ARM_NEON; 14681 for (uint32_t m = 1; m <= 2; m++) { 14682 GemmMicrokernelTester() 14683 .mr(2) 14684 .nr(8) 14685 .kr(8) 14686 .sr(1) 14687 .m(m) 14688 .n(8) 14689 .k(16) 14690 .iterations(1) 14691 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 14692 } 14693 } 14694 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53,k_eq_16_subtile_n)14695 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53, k_eq_16_subtile_n) { 14696 TEST_REQUIRES_ARM_NEON; 14697 for (uint32_t n = 1; n <= 8; n++) { 14698 GemmMicrokernelTester() 14699 .mr(2) 14700 .nr(8) 14701 .kr(8) 14702 .sr(1) 14703 .m(2) 14704 .n(n) 14705 .k(16) 14706 .iterations(1) 14707 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 14708 } 14709 } 14710 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53,k_lt_16)14711 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53, k_lt_16) { 14712 TEST_REQUIRES_ARM_NEON; 14713 for (size_t k = 1; k < 16; k++) { 14714 GemmMicrokernelTester() 14715 .mr(2) 14716 .nr(8) 14717 .kr(8) 14718 .sr(1) 14719 .m(2) 14720 .n(8) 14721 .k(k) 14722 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 14723 } 14724 } 14725 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53,k_lt_16_strided_a)14726 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53, k_lt_16_strided_a) { 14727 TEST_REQUIRES_ARM_NEON; 14728 for (size_t k = 1; k < 16; k++) { 14729 GemmMicrokernelTester() 14730 .mr(2) 14731 .nr(8) 14732 .kr(8) 14733 .sr(1) 14734 .m(2) 14735 .n(8) 14736 .k(k) 14737 .a_stride(19) 14738 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 14739 } 14740 } 14741 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53,k_lt_16_subtile)14742 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53, k_lt_16_subtile) { 14743 TEST_REQUIRES_ARM_NEON; 14744 for (size_t k = 1; k < 16; k++) { 14745 for (uint32_t n = 1; n <= 8; n++) { 14746 for (uint32_t m = 1; m <= 2; m++) { 14747 GemmMicrokernelTester() 14748 .mr(2) 14749 .nr(8) 14750 .kr(8) 14751 .sr(1) 14752 .m(m) 14753 .n(n) 14754 .k(k) 14755 .iterations(1) 14756 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 14757 } 14758 } 14759 } 14760 } 14761 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53,k_gt_16)14762 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53, k_gt_16) { 14763 TEST_REQUIRES_ARM_NEON; 14764 for (size_t k = 17; k < 32; k++) { 14765 GemmMicrokernelTester() 14766 .mr(2) 14767 .nr(8) 14768 .kr(8) 14769 .sr(1) 14770 .m(2) 14771 .n(8) 14772 .k(k) 14773 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 14774 } 14775 } 14776 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53,k_gt_16_strided_a)14777 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53, k_gt_16_strided_a) { 14778 TEST_REQUIRES_ARM_NEON; 14779 for (size_t k = 17; k < 32; k++) { 14780 GemmMicrokernelTester() 14781 .mr(2) 14782 .nr(8) 14783 .kr(8) 14784 .sr(1) 14785 .m(2) 14786 .n(8) 14787 .k(k) 14788 .a_stride(37) 14789 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 14790 } 14791 } 14792 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53,k_gt_16_subtile)14793 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53, k_gt_16_subtile) { 14794 TEST_REQUIRES_ARM_NEON; 14795 for (size_t k = 17; k < 32; k++) { 14796 for (uint32_t n = 1; n <= 8; n++) { 14797 for (uint32_t m = 1; m <= 2; m++) { 14798 GemmMicrokernelTester() 14799 .mr(2) 14800 .nr(8) 14801 .kr(8) 14802 .sr(1) 14803 .m(m) 14804 .n(n) 14805 .k(k) 14806 .iterations(1) 14807 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 14808 } 14809 } 14810 } 14811 } 14812 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53,k_div_16)14813 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53, k_div_16) { 14814 TEST_REQUIRES_ARM_NEON; 14815 for (size_t k = 32; k <= 160; k += 16) { 14816 GemmMicrokernelTester() 14817 .mr(2) 14818 .nr(8) 14819 .kr(8) 14820 .sr(1) 14821 .m(2) 14822 .n(8) 14823 .k(k) 14824 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 14825 } 14826 } 14827 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53,k_div_16_strided_a)14828 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53, k_div_16_strided_a) { 14829 TEST_REQUIRES_ARM_NEON; 14830 for (size_t k = 32; k <= 160; k += 16) { 14831 GemmMicrokernelTester() 14832 .mr(2) 14833 .nr(8) 14834 .kr(8) 14835 .sr(1) 14836 .m(2) 14837 .n(8) 14838 .k(k) 14839 .a_stride(163) 14840 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 14841 } 14842 } 14843 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53,k_div_16_subtile)14844 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53, k_div_16_subtile) { 14845 TEST_REQUIRES_ARM_NEON; 14846 for (size_t k = 32; k <= 160; k += 16) { 14847 for (uint32_t n = 1; n <= 8; n++) { 14848 for (uint32_t m = 1; m <= 2; m++) { 14849 GemmMicrokernelTester() 14850 .mr(2) 14851 .nr(8) 14852 .kr(8) 14853 .sr(1) 14854 .m(m) 14855 .n(n) 14856 .k(k) 14857 .iterations(1) 14858 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 14859 } 14860 } 14861 } 14862 } 14863 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53,n_gt_8)14864 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53, n_gt_8) { 14865 TEST_REQUIRES_ARM_NEON; 14866 for (uint32_t n = 9; n < 16; n++) { 14867 for (size_t k = 1; k <= 80; k += 17) { 14868 GemmMicrokernelTester() 14869 .mr(2) 14870 .nr(8) 14871 .kr(8) 14872 .sr(1) 14873 .m(2) 14874 .n(n) 14875 .k(k) 14876 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 14877 } 14878 } 14879 } 14880 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53,n_gt_8_strided_cn)14881 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53, n_gt_8_strided_cn) { 14882 TEST_REQUIRES_ARM_NEON; 14883 for (uint32_t n = 9; n < 16; n++) { 14884 for (size_t k = 1; k <= 80; k += 17) { 14885 GemmMicrokernelTester() 14886 .mr(2) 14887 .nr(8) 14888 .kr(8) 14889 .sr(1) 14890 .m(2) 14891 .n(n) 14892 .k(k) 14893 .cn_stride(11) 14894 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 14895 } 14896 } 14897 } 14898 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53,n_gt_8_strided_a)14899 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53, n_gt_8_strided_a) { 14900 TEST_REQUIRES_ARM_NEON; 14901 for (uint32_t n = 9; n < 16; n++) { 14902 for (size_t k = 1; k <= 80; k += 17) { 14903 GemmMicrokernelTester() 14904 .mr(2) 14905 .nr(8) 14906 .kr(8) 14907 .sr(1) 14908 .m(2) 14909 .n(n) 14910 .k(k) 14911 .a_stride(83) 14912 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 14913 } 14914 } 14915 } 14916 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53,n_gt_8_subtile)14917 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53, n_gt_8_subtile) { 14918 TEST_REQUIRES_ARM_NEON; 14919 for (uint32_t n = 9; n < 16; n++) { 14920 for (size_t k = 1; k <= 80; k += 17) { 14921 for (uint32_t m = 1; m <= 2; m++) { 14922 GemmMicrokernelTester() 14923 .mr(2) 14924 .nr(8) 14925 .kr(8) 14926 .sr(1) 14927 .m(m) 14928 .n(n) 14929 .k(k) 14930 .iterations(1) 14931 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 14932 } 14933 } 14934 } 14935 } 14936 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53,n_div_8)14937 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53, n_div_8) { 14938 TEST_REQUIRES_ARM_NEON; 14939 for (uint32_t n = 16; n <= 24; n += 8) { 14940 for (size_t k = 1; k <= 80; k += 17) { 14941 GemmMicrokernelTester() 14942 .mr(2) 14943 .nr(8) 14944 .kr(8) 14945 .sr(1) 14946 .m(2) 14947 .n(n) 14948 .k(k) 14949 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 14950 } 14951 } 14952 } 14953 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53,n_div_8_strided_cn)14954 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53, n_div_8_strided_cn) { 14955 TEST_REQUIRES_ARM_NEON; 14956 for (uint32_t n = 16; n <= 24; n += 8) { 14957 for (size_t k = 1; k <= 80; k += 17) { 14958 GemmMicrokernelTester() 14959 .mr(2) 14960 .nr(8) 14961 .kr(8) 14962 .sr(1) 14963 .m(2) 14964 .n(n) 14965 .k(k) 14966 .cn_stride(11) 14967 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 14968 } 14969 } 14970 } 14971 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53,n_div_8_strided_a)14972 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53, n_div_8_strided_a) { 14973 TEST_REQUIRES_ARM_NEON; 14974 for (uint32_t n = 16; n <= 24; n += 8) { 14975 for (size_t k = 1; k <= 80; k += 17) { 14976 GemmMicrokernelTester() 14977 .mr(2) 14978 .nr(8) 14979 .kr(8) 14980 .sr(1) 14981 .m(2) 14982 .n(n) 14983 .k(k) 14984 .a_stride(83) 14985 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 14986 } 14987 } 14988 } 14989 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53,n_div_8_subtile)14990 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53, n_div_8_subtile) { 14991 TEST_REQUIRES_ARM_NEON; 14992 for (uint32_t n = 16; n <= 24; n += 8) { 14993 for (size_t k = 1; k <= 80; k += 17) { 14994 for (uint32_t m = 1; m <= 2; m++) { 14995 GemmMicrokernelTester() 14996 .mr(2) 14997 .nr(8) 14998 .kr(8) 14999 .sr(1) 15000 .m(m) 15001 .n(n) 15002 .k(k) 15003 .iterations(1) 15004 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 15005 } 15006 } 15007 } 15008 } 15009 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53,strided_cm_subtile)15010 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53, strided_cm_subtile) { 15011 TEST_REQUIRES_ARM_NEON; 15012 for (size_t k = 1; k <= 80; k += 17) { 15013 for (uint32_t n = 1; n <= 8; n++) { 15014 for (uint32_t m = 1; m <= 2; m++) { 15015 GemmMicrokernelTester() 15016 .mr(2) 15017 .nr(8) 15018 .kr(8) 15019 .sr(1) 15020 .m(m) 15021 .n(n) 15022 .k(k) 15023 .cm_stride(11) 15024 .iterations(1) 15025 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 15026 } 15027 } 15028 } 15029 } 15030 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53,qmin)15031 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53, qmin) { 15032 TEST_REQUIRES_ARM_NEON; 15033 GemmMicrokernelTester() 15034 .mr(2) 15035 .nr(8) 15036 .kr(8) 15037 .sr(1) 15038 .m(2) 15039 .n(8) 15040 .k(16) 15041 .qmin(128) 15042 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 15043 } 15044 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53,qmax)15045 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53, qmax) { 15046 TEST_REQUIRES_ARM_NEON; 15047 GemmMicrokernelTester() 15048 .mr(2) 15049 .nr(8) 15050 .kr(8) 15051 .sr(1) 15052 .m(2) 15053 .n(8) 15054 .k(16) 15055 .qmax(128) 15056 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 15057 } 15058 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53,strided_cm)15059 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MLAL_CORTEX_A53, strided_cm) { 15060 TEST_REQUIRES_ARM_NEON; 15061 GemmMicrokernelTester() 15062 .mr(2) 15063 .nr(8) 15064 .kr(8) 15065 .sr(1) 15066 .m(2) 15067 .n(8) 15068 .k(16) 15069 .cm_stride(11) 15070 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 15071 } 15072 #endif // XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY 15073 15074 15075 #if XNN_ENABLE_ARM_DOTPROD && XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD32,k_eq_4)15076 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD32, k_eq_4) { 15077 TEST_REQUIRES_ARM_NEON_DOT; 15078 GemmMicrokernelTester() 15079 .mr(4) 15080 .nr(16) 15081 .kr(4) 15082 .sr(1) 15083 .m(4) 15084 .n(16) 15085 .k(4) 15086 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_ld32, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 15087 } 15088 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD32,strided_cn)15089 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD32, strided_cn) { 15090 TEST_REQUIRES_ARM_NEON_DOT; 15091 GemmMicrokernelTester() 15092 .mr(4) 15093 .nr(16) 15094 .kr(4) 15095 .sr(1) 15096 .m(4) 15097 .n(16) 15098 .k(4) 15099 .cn_stride(19) 15100 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_ld32, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 15101 } 15102 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD32,k_eq_4_strided_a)15103 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD32, k_eq_4_strided_a) { 15104 TEST_REQUIRES_ARM_NEON_DOT; 15105 GemmMicrokernelTester() 15106 .mr(4) 15107 .nr(16) 15108 .kr(4) 15109 .sr(1) 15110 .m(4) 15111 .n(16) 15112 .k(4) 15113 .a_stride(7) 15114 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_ld32, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 15115 } 15116 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD32,k_eq_4_subtile)15117 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD32, k_eq_4_subtile) { 15118 TEST_REQUIRES_ARM_NEON_DOT; 15119 for (uint32_t n = 1; n <= 16; n++) { 15120 for (uint32_t m = 1; m <= 4; m++) { 15121 GemmMicrokernelTester() 15122 .mr(4) 15123 .nr(16) 15124 .kr(4) 15125 .sr(1) 15126 .m(m) 15127 .n(n) 15128 .k(4) 15129 .iterations(1) 15130 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_ld32, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 15131 } 15132 } 15133 } 15134 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD32,k_eq_4_subtile_m)15135 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD32, k_eq_4_subtile_m) { 15136 TEST_REQUIRES_ARM_NEON_DOT; 15137 for (uint32_t m = 1; m <= 4; m++) { 15138 GemmMicrokernelTester() 15139 .mr(4) 15140 .nr(16) 15141 .kr(4) 15142 .sr(1) 15143 .m(m) 15144 .n(16) 15145 .k(4) 15146 .iterations(1) 15147 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_ld32, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 15148 } 15149 } 15150 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD32,k_eq_4_subtile_n)15151 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD32, k_eq_4_subtile_n) { 15152 TEST_REQUIRES_ARM_NEON_DOT; 15153 for (uint32_t n = 1; n <= 16; n++) { 15154 GemmMicrokernelTester() 15155 .mr(4) 15156 .nr(16) 15157 .kr(4) 15158 .sr(1) 15159 .m(4) 15160 .n(n) 15161 .k(4) 15162 .iterations(1) 15163 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_ld32, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 15164 } 15165 } 15166 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD32,k_lt_4)15167 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD32, k_lt_4) { 15168 TEST_REQUIRES_ARM_NEON_DOT; 15169 for (size_t k = 1; k < 4; k++) { 15170 GemmMicrokernelTester() 15171 .mr(4) 15172 .nr(16) 15173 .kr(4) 15174 .sr(1) 15175 .m(4) 15176 .n(16) 15177 .k(k) 15178 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_ld32, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 15179 } 15180 } 15181 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD32,k_lt_4_strided_a)15182 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD32, k_lt_4_strided_a) { 15183 TEST_REQUIRES_ARM_NEON_DOT; 15184 for (size_t k = 1; k < 4; k++) { 15185 GemmMicrokernelTester() 15186 .mr(4) 15187 .nr(16) 15188 .kr(4) 15189 .sr(1) 15190 .m(4) 15191 .n(16) 15192 .k(k) 15193 .a_stride(7) 15194 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_ld32, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 15195 } 15196 } 15197 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD32,k_lt_4_subtile)15198 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD32, k_lt_4_subtile) { 15199 TEST_REQUIRES_ARM_NEON_DOT; 15200 for (size_t k = 1; k < 4; k++) { 15201 for (uint32_t n = 1; n <= 16; n++) { 15202 for (uint32_t m = 1; m <= 4; m++) { 15203 GemmMicrokernelTester() 15204 .mr(4) 15205 .nr(16) 15206 .kr(4) 15207 .sr(1) 15208 .m(m) 15209 .n(n) 15210 .k(k) 15211 .iterations(1) 15212 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_ld32, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 15213 } 15214 } 15215 } 15216 } 15217 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD32,k_gt_4)15218 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD32, k_gt_4) { 15219 TEST_REQUIRES_ARM_NEON_DOT; 15220 for (size_t k = 5; k < 8; k++) { 15221 GemmMicrokernelTester() 15222 .mr(4) 15223 .nr(16) 15224 .kr(4) 15225 .sr(1) 15226 .m(4) 15227 .n(16) 15228 .k(k) 15229 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_ld32, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 15230 } 15231 } 15232 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD32,k_gt_4_strided_a)15233 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD32, k_gt_4_strided_a) { 15234 TEST_REQUIRES_ARM_NEON_DOT; 15235 for (size_t k = 5; k < 8; k++) { 15236 GemmMicrokernelTester() 15237 .mr(4) 15238 .nr(16) 15239 .kr(4) 15240 .sr(1) 15241 .m(4) 15242 .n(16) 15243 .k(k) 15244 .a_stride(11) 15245 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_ld32, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 15246 } 15247 } 15248 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD32,k_gt_4_subtile)15249 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD32, k_gt_4_subtile) { 15250 TEST_REQUIRES_ARM_NEON_DOT; 15251 for (size_t k = 5; k < 8; k++) { 15252 for (uint32_t n = 1; n <= 16; n++) { 15253 for (uint32_t m = 1; m <= 4; m++) { 15254 GemmMicrokernelTester() 15255 .mr(4) 15256 .nr(16) 15257 .kr(4) 15258 .sr(1) 15259 .m(m) 15260 .n(n) 15261 .k(k) 15262 .iterations(1) 15263 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_ld32, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 15264 } 15265 } 15266 } 15267 } 15268 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD32,k_div_4)15269 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD32, k_div_4) { 15270 TEST_REQUIRES_ARM_NEON_DOT; 15271 for (size_t k = 8; k <= 40; k += 4) { 15272 GemmMicrokernelTester() 15273 .mr(4) 15274 .nr(16) 15275 .kr(4) 15276 .sr(1) 15277 .m(4) 15278 .n(16) 15279 .k(k) 15280 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_ld32, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 15281 } 15282 } 15283 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD32,k_div_4_strided_a)15284 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD32, k_div_4_strided_a) { 15285 TEST_REQUIRES_ARM_NEON_DOT; 15286 for (size_t k = 8; k <= 40; k += 4) { 15287 GemmMicrokernelTester() 15288 .mr(4) 15289 .nr(16) 15290 .kr(4) 15291 .sr(1) 15292 .m(4) 15293 .n(16) 15294 .k(k) 15295 .a_stride(43) 15296 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_ld32, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 15297 } 15298 } 15299 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD32,k_div_4_subtile)15300 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD32, k_div_4_subtile) { 15301 TEST_REQUIRES_ARM_NEON_DOT; 15302 for (size_t k = 8; k <= 40; k += 4) { 15303 for (uint32_t n = 1; n <= 16; n++) { 15304 for (uint32_t m = 1; m <= 4; m++) { 15305 GemmMicrokernelTester() 15306 .mr(4) 15307 .nr(16) 15308 .kr(4) 15309 .sr(1) 15310 .m(m) 15311 .n(n) 15312 .k(k) 15313 .iterations(1) 15314 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_ld32, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 15315 } 15316 } 15317 } 15318 } 15319 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD32,n_gt_16)15320 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD32, n_gt_16) { 15321 TEST_REQUIRES_ARM_NEON_DOT; 15322 for (uint32_t n = 17; n < 32; n++) { 15323 for (size_t k = 1; k <= 20; k += 5) { 15324 GemmMicrokernelTester() 15325 .mr(4) 15326 .nr(16) 15327 .kr(4) 15328 .sr(1) 15329 .m(4) 15330 .n(n) 15331 .k(k) 15332 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_ld32, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 15333 } 15334 } 15335 } 15336 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD32,n_gt_16_strided_cn)15337 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD32, n_gt_16_strided_cn) { 15338 TEST_REQUIRES_ARM_NEON_DOT; 15339 for (uint32_t n = 17; n < 32; n++) { 15340 for (size_t k = 1; k <= 20; k += 5) { 15341 GemmMicrokernelTester() 15342 .mr(4) 15343 .nr(16) 15344 .kr(4) 15345 .sr(1) 15346 .m(4) 15347 .n(n) 15348 .k(k) 15349 .cn_stride(19) 15350 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_ld32, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 15351 } 15352 } 15353 } 15354 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD32,n_gt_16_strided_a)15355 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD32, n_gt_16_strided_a) { 15356 TEST_REQUIRES_ARM_NEON_DOT; 15357 for (uint32_t n = 17; n < 32; n++) { 15358 for (size_t k = 1; k <= 20; k += 5) { 15359 GemmMicrokernelTester() 15360 .mr(4) 15361 .nr(16) 15362 .kr(4) 15363 .sr(1) 15364 .m(4) 15365 .n(n) 15366 .k(k) 15367 .a_stride(23) 15368 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_ld32, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 15369 } 15370 } 15371 } 15372 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD32,n_gt_16_subtile)15373 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD32, n_gt_16_subtile) { 15374 TEST_REQUIRES_ARM_NEON_DOT; 15375 for (uint32_t n = 17; n < 32; n++) { 15376 for (size_t k = 1; k <= 20; k += 5) { 15377 for (uint32_t m = 1; m <= 4; m++) { 15378 GemmMicrokernelTester() 15379 .mr(4) 15380 .nr(16) 15381 .kr(4) 15382 .sr(1) 15383 .m(m) 15384 .n(n) 15385 .k(k) 15386 .iterations(1) 15387 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_ld32, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 15388 } 15389 } 15390 } 15391 } 15392 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD32,n_div_16)15393 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD32, n_div_16) { 15394 TEST_REQUIRES_ARM_NEON_DOT; 15395 for (uint32_t n = 32; n <= 48; n += 16) { 15396 for (size_t k = 1; k <= 20; k += 5) { 15397 GemmMicrokernelTester() 15398 .mr(4) 15399 .nr(16) 15400 .kr(4) 15401 .sr(1) 15402 .m(4) 15403 .n(n) 15404 .k(k) 15405 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_ld32, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 15406 } 15407 } 15408 } 15409 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD32,n_div_16_strided_cn)15410 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD32, n_div_16_strided_cn) { 15411 TEST_REQUIRES_ARM_NEON_DOT; 15412 for (uint32_t n = 32; n <= 48; n += 16) { 15413 for (size_t k = 1; k <= 20; k += 5) { 15414 GemmMicrokernelTester() 15415 .mr(4) 15416 .nr(16) 15417 .kr(4) 15418 .sr(1) 15419 .m(4) 15420 .n(n) 15421 .k(k) 15422 .cn_stride(19) 15423 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_ld32, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 15424 } 15425 } 15426 } 15427 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD32,n_div_16_strided_a)15428 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD32, n_div_16_strided_a) { 15429 TEST_REQUIRES_ARM_NEON_DOT; 15430 for (uint32_t n = 32; n <= 48; n += 16) { 15431 for (size_t k = 1; k <= 20; k += 5) { 15432 GemmMicrokernelTester() 15433 .mr(4) 15434 .nr(16) 15435 .kr(4) 15436 .sr(1) 15437 .m(4) 15438 .n(n) 15439 .k(k) 15440 .a_stride(23) 15441 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_ld32, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 15442 } 15443 } 15444 } 15445 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD32,n_div_16_subtile)15446 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD32, n_div_16_subtile) { 15447 TEST_REQUIRES_ARM_NEON_DOT; 15448 for (uint32_t n = 32; n <= 48; n += 16) { 15449 for (size_t k = 1; k <= 20; k += 5) { 15450 for (uint32_t m = 1; m <= 4; m++) { 15451 GemmMicrokernelTester() 15452 .mr(4) 15453 .nr(16) 15454 .kr(4) 15455 .sr(1) 15456 .m(m) 15457 .n(n) 15458 .k(k) 15459 .iterations(1) 15460 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_ld32, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 15461 } 15462 } 15463 } 15464 } 15465 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD32,strided_cm_subtile)15466 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD32, strided_cm_subtile) { 15467 TEST_REQUIRES_ARM_NEON_DOT; 15468 for (size_t k = 1; k <= 20; k += 5) { 15469 for (uint32_t n = 1; n <= 16; n++) { 15470 for (uint32_t m = 1; m <= 4; m++) { 15471 GemmMicrokernelTester() 15472 .mr(4) 15473 .nr(16) 15474 .kr(4) 15475 .sr(1) 15476 .m(m) 15477 .n(n) 15478 .k(k) 15479 .cm_stride(19) 15480 .iterations(1) 15481 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_ld32, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 15482 } 15483 } 15484 } 15485 } 15486 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD32,qmin)15487 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD32, qmin) { 15488 TEST_REQUIRES_ARM_NEON_DOT; 15489 GemmMicrokernelTester() 15490 .mr(4) 15491 .nr(16) 15492 .kr(4) 15493 .sr(1) 15494 .m(4) 15495 .n(16) 15496 .k(4) 15497 .qmin(128) 15498 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_ld32, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 15499 } 15500 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD32,qmax)15501 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD32, qmax) { 15502 TEST_REQUIRES_ARM_NEON_DOT; 15503 GemmMicrokernelTester() 15504 .mr(4) 15505 .nr(16) 15506 .kr(4) 15507 .sr(1) 15508 .m(4) 15509 .n(16) 15510 .k(4) 15511 .qmax(128) 15512 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_ld32, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 15513 } 15514 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD32,strided_cm)15515 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_LD32, strided_cm) { 15516 TEST_REQUIRES_ARM_NEON_DOT; 15517 GemmMicrokernelTester() 15518 .mr(4) 15519 .nr(16) 15520 .kr(4) 15521 .sr(1) 15522 .m(4) 15523 .n(16) 15524 .k(4) 15525 .cm_stride(19) 15526 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_ld32, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 15527 } 15528 #endif // XNN_ENABLE_ARM_DOTPROD && XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY 15529 15530 15531 #if XNN_ARCH_ARM || XNN_ARCH_ARM64 TEST(QS8_GEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE,k_eq_8)15532 TEST(QS8_GEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE, k_eq_8) { 15533 TEST_REQUIRES_ARM_NEON; 15534 GemmMicrokernelTester() 15535 .mr(2) 15536 .nr(16) 15537 .kr(1) 15538 .sr(1) 15539 .m(2) 15540 .n(16) 15541 .k(8) 15542 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 15543 } 15544 TEST(QS8_GEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE,strided_cn)15545 TEST(QS8_GEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE, strided_cn) { 15546 TEST_REQUIRES_ARM_NEON; 15547 GemmMicrokernelTester() 15548 .mr(2) 15549 .nr(16) 15550 .kr(1) 15551 .sr(1) 15552 .m(2) 15553 .n(16) 15554 .k(8) 15555 .cn_stride(19) 15556 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 15557 } 15558 TEST(QS8_GEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE,k_eq_8_strided_a)15559 TEST(QS8_GEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE, k_eq_8_strided_a) { 15560 TEST_REQUIRES_ARM_NEON; 15561 GemmMicrokernelTester() 15562 .mr(2) 15563 .nr(16) 15564 .kr(1) 15565 .sr(1) 15566 .m(2) 15567 .n(16) 15568 .k(8) 15569 .a_stride(11) 15570 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 15571 } 15572 TEST(QS8_GEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE,k_eq_8_subtile)15573 TEST(QS8_GEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE, k_eq_8_subtile) { 15574 TEST_REQUIRES_ARM_NEON; 15575 for (uint32_t n = 1; n <= 16; n++) { 15576 for (uint32_t m = 1; m <= 2; m++) { 15577 GemmMicrokernelTester() 15578 .mr(2) 15579 .nr(16) 15580 .kr(1) 15581 .sr(1) 15582 .m(m) 15583 .n(n) 15584 .k(8) 15585 .iterations(1) 15586 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 15587 } 15588 } 15589 } 15590 TEST(QS8_GEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE,k_eq_8_subtile_m)15591 TEST(QS8_GEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE, k_eq_8_subtile_m) { 15592 TEST_REQUIRES_ARM_NEON; 15593 for (uint32_t m = 1; m <= 2; m++) { 15594 GemmMicrokernelTester() 15595 .mr(2) 15596 .nr(16) 15597 .kr(1) 15598 .sr(1) 15599 .m(m) 15600 .n(16) 15601 .k(8) 15602 .iterations(1) 15603 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 15604 } 15605 } 15606 TEST(QS8_GEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE,k_eq_8_subtile_n)15607 TEST(QS8_GEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE, k_eq_8_subtile_n) { 15608 TEST_REQUIRES_ARM_NEON; 15609 for (uint32_t n = 1; n <= 16; n++) { 15610 GemmMicrokernelTester() 15611 .mr(2) 15612 .nr(16) 15613 .kr(1) 15614 .sr(1) 15615 .m(2) 15616 .n(n) 15617 .k(8) 15618 .iterations(1) 15619 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 15620 } 15621 } 15622 TEST(QS8_GEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE,k_lt_8)15623 TEST(QS8_GEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE, k_lt_8) { 15624 TEST_REQUIRES_ARM_NEON; 15625 for (size_t k = 1; k < 8; k++) { 15626 GemmMicrokernelTester() 15627 .mr(2) 15628 .nr(16) 15629 .kr(1) 15630 .sr(1) 15631 .m(2) 15632 .n(16) 15633 .k(k) 15634 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 15635 } 15636 } 15637 TEST(QS8_GEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE,k_lt_8_strided_a)15638 TEST(QS8_GEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE, k_lt_8_strided_a) { 15639 TEST_REQUIRES_ARM_NEON; 15640 for (size_t k = 1; k < 8; k++) { 15641 GemmMicrokernelTester() 15642 .mr(2) 15643 .nr(16) 15644 .kr(1) 15645 .sr(1) 15646 .m(2) 15647 .n(16) 15648 .k(k) 15649 .a_stride(11) 15650 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 15651 } 15652 } 15653 TEST(QS8_GEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE,k_lt_8_subtile)15654 TEST(QS8_GEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE, k_lt_8_subtile) { 15655 TEST_REQUIRES_ARM_NEON; 15656 for (size_t k = 1; k < 8; k++) { 15657 for (uint32_t n = 1; n <= 16; n++) { 15658 for (uint32_t m = 1; m <= 2; m++) { 15659 GemmMicrokernelTester() 15660 .mr(2) 15661 .nr(16) 15662 .kr(1) 15663 .sr(1) 15664 .m(m) 15665 .n(n) 15666 .k(k) 15667 .iterations(1) 15668 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 15669 } 15670 } 15671 } 15672 } 15673 TEST(QS8_GEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE,k_gt_8)15674 TEST(QS8_GEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE, k_gt_8) { 15675 TEST_REQUIRES_ARM_NEON; 15676 for (size_t k = 9; k < 16; k++) { 15677 GemmMicrokernelTester() 15678 .mr(2) 15679 .nr(16) 15680 .kr(1) 15681 .sr(1) 15682 .m(2) 15683 .n(16) 15684 .k(k) 15685 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 15686 } 15687 } 15688 TEST(QS8_GEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE,k_gt_8_strided_a)15689 TEST(QS8_GEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE, k_gt_8_strided_a) { 15690 TEST_REQUIRES_ARM_NEON; 15691 for (size_t k = 9; k < 16; k++) { 15692 GemmMicrokernelTester() 15693 .mr(2) 15694 .nr(16) 15695 .kr(1) 15696 .sr(1) 15697 .m(2) 15698 .n(16) 15699 .k(k) 15700 .a_stride(19) 15701 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 15702 } 15703 } 15704 TEST(QS8_GEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE,k_gt_8_subtile)15705 TEST(QS8_GEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE, k_gt_8_subtile) { 15706 TEST_REQUIRES_ARM_NEON; 15707 for (size_t k = 9; k < 16; k++) { 15708 for (uint32_t n = 1; n <= 16; n++) { 15709 for (uint32_t m = 1; m <= 2; m++) { 15710 GemmMicrokernelTester() 15711 .mr(2) 15712 .nr(16) 15713 .kr(1) 15714 .sr(1) 15715 .m(m) 15716 .n(n) 15717 .k(k) 15718 .iterations(1) 15719 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 15720 } 15721 } 15722 } 15723 } 15724 TEST(QS8_GEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE,k_div_8)15725 TEST(QS8_GEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE, k_div_8) { 15726 TEST_REQUIRES_ARM_NEON; 15727 for (size_t k = 16; k <= 80; k += 8) { 15728 GemmMicrokernelTester() 15729 .mr(2) 15730 .nr(16) 15731 .kr(1) 15732 .sr(1) 15733 .m(2) 15734 .n(16) 15735 .k(k) 15736 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 15737 } 15738 } 15739 TEST(QS8_GEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE,k_div_8_strided_a)15740 TEST(QS8_GEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE, k_div_8_strided_a) { 15741 TEST_REQUIRES_ARM_NEON; 15742 for (size_t k = 16; k <= 80; k += 8) { 15743 GemmMicrokernelTester() 15744 .mr(2) 15745 .nr(16) 15746 .kr(1) 15747 .sr(1) 15748 .m(2) 15749 .n(16) 15750 .k(k) 15751 .a_stride(83) 15752 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 15753 } 15754 } 15755 TEST(QS8_GEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE,k_div_8_subtile)15756 TEST(QS8_GEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE, k_div_8_subtile) { 15757 TEST_REQUIRES_ARM_NEON; 15758 for (size_t k = 16; k <= 80; k += 8) { 15759 for (uint32_t n = 1; n <= 16; n++) { 15760 for (uint32_t m = 1; m <= 2; m++) { 15761 GemmMicrokernelTester() 15762 .mr(2) 15763 .nr(16) 15764 .kr(1) 15765 .sr(1) 15766 .m(m) 15767 .n(n) 15768 .k(k) 15769 .iterations(1) 15770 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 15771 } 15772 } 15773 } 15774 } 15775 TEST(QS8_GEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE,n_gt_16)15776 TEST(QS8_GEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE, n_gt_16) { 15777 TEST_REQUIRES_ARM_NEON; 15778 for (uint32_t n = 17; n < 32; n++) { 15779 for (size_t k = 1; k <= 40; k += 9) { 15780 GemmMicrokernelTester() 15781 .mr(2) 15782 .nr(16) 15783 .kr(1) 15784 .sr(1) 15785 .m(2) 15786 .n(n) 15787 .k(k) 15788 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 15789 } 15790 } 15791 } 15792 TEST(QS8_GEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE,n_gt_16_strided_cn)15793 TEST(QS8_GEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE, n_gt_16_strided_cn) { 15794 TEST_REQUIRES_ARM_NEON; 15795 for (uint32_t n = 17; n < 32; n++) { 15796 for (size_t k = 1; k <= 40; k += 9) { 15797 GemmMicrokernelTester() 15798 .mr(2) 15799 .nr(16) 15800 .kr(1) 15801 .sr(1) 15802 .m(2) 15803 .n(n) 15804 .k(k) 15805 .cn_stride(19) 15806 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 15807 } 15808 } 15809 } 15810 TEST(QS8_GEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE,n_gt_16_strided_a)15811 TEST(QS8_GEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE, n_gt_16_strided_a) { 15812 TEST_REQUIRES_ARM_NEON; 15813 for (uint32_t n = 17; n < 32; n++) { 15814 for (size_t k = 1; k <= 40; k += 9) { 15815 GemmMicrokernelTester() 15816 .mr(2) 15817 .nr(16) 15818 .kr(1) 15819 .sr(1) 15820 .m(2) 15821 .n(n) 15822 .k(k) 15823 .a_stride(43) 15824 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 15825 } 15826 } 15827 } 15828 TEST(QS8_GEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE,n_gt_16_subtile)15829 TEST(QS8_GEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE, n_gt_16_subtile) { 15830 TEST_REQUIRES_ARM_NEON; 15831 for (uint32_t n = 17; n < 32; n++) { 15832 for (size_t k = 1; k <= 40; k += 9) { 15833 for (uint32_t m = 1; m <= 2; m++) { 15834 GemmMicrokernelTester() 15835 .mr(2) 15836 .nr(16) 15837 .kr(1) 15838 .sr(1) 15839 .m(m) 15840 .n(n) 15841 .k(k) 15842 .iterations(1) 15843 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 15844 } 15845 } 15846 } 15847 } 15848 TEST(QS8_GEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE,n_div_16)15849 TEST(QS8_GEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE, n_div_16) { 15850 TEST_REQUIRES_ARM_NEON; 15851 for (uint32_t n = 32; n <= 48; n += 16) { 15852 for (size_t k = 1; k <= 40; k += 9) { 15853 GemmMicrokernelTester() 15854 .mr(2) 15855 .nr(16) 15856 .kr(1) 15857 .sr(1) 15858 .m(2) 15859 .n(n) 15860 .k(k) 15861 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 15862 } 15863 } 15864 } 15865 TEST(QS8_GEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE,n_div_16_strided_cn)15866 TEST(QS8_GEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE, n_div_16_strided_cn) { 15867 TEST_REQUIRES_ARM_NEON; 15868 for (uint32_t n = 32; n <= 48; n += 16) { 15869 for (size_t k = 1; k <= 40; k += 9) { 15870 GemmMicrokernelTester() 15871 .mr(2) 15872 .nr(16) 15873 .kr(1) 15874 .sr(1) 15875 .m(2) 15876 .n(n) 15877 .k(k) 15878 .cn_stride(19) 15879 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 15880 } 15881 } 15882 } 15883 TEST(QS8_GEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE,n_div_16_strided_a)15884 TEST(QS8_GEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE, n_div_16_strided_a) { 15885 TEST_REQUIRES_ARM_NEON; 15886 for (uint32_t n = 32; n <= 48; n += 16) { 15887 for (size_t k = 1; k <= 40; k += 9) { 15888 GemmMicrokernelTester() 15889 .mr(2) 15890 .nr(16) 15891 .kr(1) 15892 .sr(1) 15893 .m(2) 15894 .n(n) 15895 .k(k) 15896 .a_stride(43) 15897 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 15898 } 15899 } 15900 } 15901 TEST(QS8_GEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE,n_div_16_subtile)15902 TEST(QS8_GEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE, n_div_16_subtile) { 15903 TEST_REQUIRES_ARM_NEON; 15904 for (uint32_t n = 32; n <= 48; n += 16) { 15905 for (size_t k = 1; k <= 40; k += 9) { 15906 for (uint32_t m = 1; m <= 2; m++) { 15907 GemmMicrokernelTester() 15908 .mr(2) 15909 .nr(16) 15910 .kr(1) 15911 .sr(1) 15912 .m(m) 15913 .n(n) 15914 .k(k) 15915 .iterations(1) 15916 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 15917 } 15918 } 15919 } 15920 } 15921 TEST(QS8_GEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE,strided_cm_subtile)15922 TEST(QS8_GEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE, strided_cm_subtile) { 15923 TEST_REQUIRES_ARM_NEON; 15924 for (size_t k = 1; k <= 40; k += 9) { 15925 for (uint32_t n = 1; n <= 16; n++) { 15926 for (uint32_t m = 1; m <= 2; m++) { 15927 GemmMicrokernelTester() 15928 .mr(2) 15929 .nr(16) 15930 .kr(1) 15931 .sr(1) 15932 .m(m) 15933 .n(n) 15934 .k(k) 15935 .cm_stride(19) 15936 .iterations(1) 15937 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 15938 } 15939 } 15940 } 15941 } 15942 TEST(QS8_GEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE,qmin)15943 TEST(QS8_GEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE, qmin) { 15944 TEST_REQUIRES_ARM_NEON; 15945 GemmMicrokernelTester() 15946 .mr(2) 15947 .nr(16) 15948 .kr(1) 15949 .sr(1) 15950 .m(2) 15951 .n(16) 15952 .k(8) 15953 .qmin(128) 15954 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 15955 } 15956 TEST(QS8_GEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE,qmax)15957 TEST(QS8_GEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE, qmax) { 15958 TEST_REQUIRES_ARM_NEON; 15959 GemmMicrokernelTester() 15960 .mr(2) 15961 .nr(16) 15962 .kr(1) 15963 .sr(1) 15964 .m(2) 15965 .n(16) 15966 .k(8) 15967 .qmax(128) 15968 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 15969 } 15970 TEST(QS8_GEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE,strided_cm)15971 TEST(QS8_GEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE, strided_cm) { 15972 TEST_REQUIRES_ARM_NEON; 15973 GemmMicrokernelTester() 15974 .mr(2) 15975 .nr(16) 15976 .kr(1) 15977 .sr(1) 15978 .m(2) 15979 .n(16) 15980 .k(8) 15981 .cm_stride(19) 15982 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 15983 } 15984 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 15985 15986 15987 #if XNN_ARCH_ARM || XNN_ARCH_ARM64 TEST(QS8_GEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE_PRFM,k_eq_8)15988 TEST(QS8_GEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE_PRFM, k_eq_8) { 15989 TEST_REQUIRES_ARM_NEON; 15990 GemmMicrokernelTester() 15991 .mr(2) 15992 .nr(16) 15993 .kr(1) 15994 .sr(1) 15995 .m(2) 15996 .n(16) 15997 .k(8) 15998 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 15999 } 16000 TEST(QS8_GEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE_PRFM,strided_cn)16001 TEST(QS8_GEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE_PRFM, strided_cn) { 16002 TEST_REQUIRES_ARM_NEON; 16003 GemmMicrokernelTester() 16004 .mr(2) 16005 .nr(16) 16006 .kr(1) 16007 .sr(1) 16008 .m(2) 16009 .n(16) 16010 .k(8) 16011 .cn_stride(19) 16012 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 16013 } 16014 TEST(QS8_GEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE_PRFM,k_eq_8_strided_a)16015 TEST(QS8_GEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE_PRFM, k_eq_8_strided_a) { 16016 TEST_REQUIRES_ARM_NEON; 16017 GemmMicrokernelTester() 16018 .mr(2) 16019 .nr(16) 16020 .kr(1) 16021 .sr(1) 16022 .m(2) 16023 .n(16) 16024 .k(8) 16025 .a_stride(11) 16026 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 16027 } 16028 TEST(QS8_GEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE_PRFM,k_eq_8_subtile)16029 TEST(QS8_GEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE_PRFM, k_eq_8_subtile) { 16030 TEST_REQUIRES_ARM_NEON; 16031 for (uint32_t n = 1; n <= 16; n++) { 16032 for (uint32_t m = 1; m <= 2; m++) { 16033 GemmMicrokernelTester() 16034 .mr(2) 16035 .nr(16) 16036 .kr(1) 16037 .sr(1) 16038 .m(m) 16039 .n(n) 16040 .k(8) 16041 .iterations(1) 16042 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 16043 } 16044 } 16045 } 16046 TEST(QS8_GEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE_PRFM,k_eq_8_subtile_m)16047 TEST(QS8_GEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE_PRFM, k_eq_8_subtile_m) { 16048 TEST_REQUIRES_ARM_NEON; 16049 for (uint32_t m = 1; m <= 2; m++) { 16050 GemmMicrokernelTester() 16051 .mr(2) 16052 .nr(16) 16053 .kr(1) 16054 .sr(1) 16055 .m(m) 16056 .n(16) 16057 .k(8) 16058 .iterations(1) 16059 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 16060 } 16061 } 16062 TEST(QS8_GEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE_PRFM,k_eq_8_subtile_n)16063 TEST(QS8_GEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE_PRFM, k_eq_8_subtile_n) { 16064 TEST_REQUIRES_ARM_NEON; 16065 for (uint32_t n = 1; n <= 16; n++) { 16066 GemmMicrokernelTester() 16067 .mr(2) 16068 .nr(16) 16069 .kr(1) 16070 .sr(1) 16071 .m(2) 16072 .n(n) 16073 .k(8) 16074 .iterations(1) 16075 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 16076 } 16077 } 16078 TEST(QS8_GEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE_PRFM,k_lt_8)16079 TEST(QS8_GEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE_PRFM, k_lt_8) { 16080 TEST_REQUIRES_ARM_NEON; 16081 for (size_t k = 1; k < 8; k++) { 16082 GemmMicrokernelTester() 16083 .mr(2) 16084 .nr(16) 16085 .kr(1) 16086 .sr(1) 16087 .m(2) 16088 .n(16) 16089 .k(k) 16090 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 16091 } 16092 } 16093 TEST(QS8_GEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE_PRFM,k_lt_8_strided_a)16094 TEST(QS8_GEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE_PRFM, k_lt_8_strided_a) { 16095 TEST_REQUIRES_ARM_NEON; 16096 for (size_t k = 1; k < 8; k++) { 16097 GemmMicrokernelTester() 16098 .mr(2) 16099 .nr(16) 16100 .kr(1) 16101 .sr(1) 16102 .m(2) 16103 .n(16) 16104 .k(k) 16105 .a_stride(11) 16106 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 16107 } 16108 } 16109 TEST(QS8_GEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE_PRFM,k_lt_8_subtile)16110 TEST(QS8_GEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE_PRFM, k_lt_8_subtile) { 16111 TEST_REQUIRES_ARM_NEON; 16112 for (size_t k = 1; k < 8; k++) { 16113 for (uint32_t n = 1; n <= 16; n++) { 16114 for (uint32_t m = 1; m <= 2; m++) { 16115 GemmMicrokernelTester() 16116 .mr(2) 16117 .nr(16) 16118 .kr(1) 16119 .sr(1) 16120 .m(m) 16121 .n(n) 16122 .k(k) 16123 .iterations(1) 16124 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 16125 } 16126 } 16127 } 16128 } 16129 TEST(QS8_GEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE_PRFM,k_gt_8)16130 TEST(QS8_GEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE_PRFM, k_gt_8) { 16131 TEST_REQUIRES_ARM_NEON; 16132 for (size_t k = 9; k < 16; k++) { 16133 GemmMicrokernelTester() 16134 .mr(2) 16135 .nr(16) 16136 .kr(1) 16137 .sr(1) 16138 .m(2) 16139 .n(16) 16140 .k(k) 16141 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 16142 } 16143 } 16144 TEST(QS8_GEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE_PRFM,k_gt_8_strided_a)16145 TEST(QS8_GEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE_PRFM, k_gt_8_strided_a) { 16146 TEST_REQUIRES_ARM_NEON; 16147 for (size_t k = 9; k < 16; k++) { 16148 GemmMicrokernelTester() 16149 .mr(2) 16150 .nr(16) 16151 .kr(1) 16152 .sr(1) 16153 .m(2) 16154 .n(16) 16155 .k(k) 16156 .a_stride(19) 16157 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 16158 } 16159 } 16160 TEST(QS8_GEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE_PRFM,k_gt_8_subtile)16161 TEST(QS8_GEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE_PRFM, k_gt_8_subtile) { 16162 TEST_REQUIRES_ARM_NEON; 16163 for (size_t k = 9; k < 16; k++) { 16164 for (uint32_t n = 1; n <= 16; n++) { 16165 for (uint32_t m = 1; m <= 2; m++) { 16166 GemmMicrokernelTester() 16167 .mr(2) 16168 .nr(16) 16169 .kr(1) 16170 .sr(1) 16171 .m(m) 16172 .n(n) 16173 .k(k) 16174 .iterations(1) 16175 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 16176 } 16177 } 16178 } 16179 } 16180 TEST(QS8_GEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE_PRFM,k_div_8)16181 TEST(QS8_GEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE_PRFM, k_div_8) { 16182 TEST_REQUIRES_ARM_NEON; 16183 for (size_t k = 16; k <= 80; k += 8) { 16184 GemmMicrokernelTester() 16185 .mr(2) 16186 .nr(16) 16187 .kr(1) 16188 .sr(1) 16189 .m(2) 16190 .n(16) 16191 .k(k) 16192 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 16193 } 16194 } 16195 TEST(QS8_GEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE_PRFM,k_div_8_strided_a)16196 TEST(QS8_GEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE_PRFM, k_div_8_strided_a) { 16197 TEST_REQUIRES_ARM_NEON; 16198 for (size_t k = 16; k <= 80; k += 8) { 16199 GemmMicrokernelTester() 16200 .mr(2) 16201 .nr(16) 16202 .kr(1) 16203 .sr(1) 16204 .m(2) 16205 .n(16) 16206 .k(k) 16207 .a_stride(83) 16208 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 16209 } 16210 } 16211 TEST(QS8_GEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE_PRFM,k_div_8_subtile)16212 TEST(QS8_GEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE_PRFM, k_div_8_subtile) { 16213 TEST_REQUIRES_ARM_NEON; 16214 for (size_t k = 16; k <= 80; k += 8) { 16215 for (uint32_t n = 1; n <= 16; n++) { 16216 for (uint32_t m = 1; m <= 2; m++) { 16217 GemmMicrokernelTester() 16218 .mr(2) 16219 .nr(16) 16220 .kr(1) 16221 .sr(1) 16222 .m(m) 16223 .n(n) 16224 .k(k) 16225 .iterations(1) 16226 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 16227 } 16228 } 16229 } 16230 } 16231 TEST(QS8_GEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE_PRFM,n_gt_16)16232 TEST(QS8_GEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE_PRFM, n_gt_16) { 16233 TEST_REQUIRES_ARM_NEON; 16234 for (uint32_t n = 17; n < 32; n++) { 16235 for (size_t k = 1; k <= 40; k += 9) { 16236 GemmMicrokernelTester() 16237 .mr(2) 16238 .nr(16) 16239 .kr(1) 16240 .sr(1) 16241 .m(2) 16242 .n(n) 16243 .k(k) 16244 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 16245 } 16246 } 16247 } 16248 TEST(QS8_GEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE_PRFM,n_gt_16_strided_cn)16249 TEST(QS8_GEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE_PRFM, n_gt_16_strided_cn) { 16250 TEST_REQUIRES_ARM_NEON; 16251 for (uint32_t n = 17; n < 32; n++) { 16252 for (size_t k = 1; k <= 40; k += 9) { 16253 GemmMicrokernelTester() 16254 .mr(2) 16255 .nr(16) 16256 .kr(1) 16257 .sr(1) 16258 .m(2) 16259 .n(n) 16260 .k(k) 16261 .cn_stride(19) 16262 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 16263 } 16264 } 16265 } 16266 TEST(QS8_GEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE_PRFM,n_gt_16_strided_a)16267 TEST(QS8_GEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE_PRFM, n_gt_16_strided_a) { 16268 TEST_REQUIRES_ARM_NEON; 16269 for (uint32_t n = 17; n < 32; n++) { 16270 for (size_t k = 1; k <= 40; k += 9) { 16271 GemmMicrokernelTester() 16272 .mr(2) 16273 .nr(16) 16274 .kr(1) 16275 .sr(1) 16276 .m(2) 16277 .n(n) 16278 .k(k) 16279 .a_stride(43) 16280 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 16281 } 16282 } 16283 } 16284 TEST(QS8_GEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE_PRFM,n_gt_16_subtile)16285 TEST(QS8_GEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE_PRFM, n_gt_16_subtile) { 16286 TEST_REQUIRES_ARM_NEON; 16287 for (uint32_t n = 17; n < 32; n++) { 16288 for (size_t k = 1; k <= 40; k += 9) { 16289 for (uint32_t m = 1; m <= 2; m++) { 16290 GemmMicrokernelTester() 16291 .mr(2) 16292 .nr(16) 16293 .kr(1) 16294 .sr(1) 16295 .m(m) 16296 .n(n) 16297 .k(k) 16298 .iterations(1) 16299 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 16300 } 16301 } 16302 } 16303 } 16304 TEST(QS8_GEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE_PRFM,n_div_16)16305 TEST(QS8_GEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE_PRFM, n_div_16) { 16306 TEST_REQUIRES_ARM_NEON; 16307 for (uint32_t n = 32; n <= 48; n += 16) { 16308 for (size_t k = 1; k <= 40; k += 9) { 16309 GemmMicrokernelTester() 16310 .mr(2) 16311 .nr(16) 16312 .kr(1) 16313 .sr(1) 16314 .m(2) 16315 .n(n) 16316 .k(k) 16317 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 16318 } 16319 } 16320 } 16321 TEST(QS8_GEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE_PRFM,n_div_16_strided_cn)16322 TEST(QS8_GEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE_PRFM, n_div_16_strided_cn) { 16323 TEST_REQUIRES_ARM_NEON; 16324 for (uint32_t n = 32; n <= 48; n += 16) { 16325 for (size_t k = 1; k <= 40; k += 9) { 16326 GemmMicrokernelTester() 16327 .mr(2) 16328 .nr(16) 16329 .kr(1) 16330 .sr(1) 16331 .m(2) 16332 .n(n) 16333 .k(k) 16334 .cn_stride(19) 16335 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 16336 } 16337 } 16338 } 16339 TEST(QS8_GEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE_PRFM,n_div_16_strided_a)16340 TEST(QS8_GEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE_PRFM, n_div_16_strided_a) { 16341 TEST_REQUIRES_ARM_NEON; 16342 for (uint32_t n = 32; n <= 48; n += 16) { 16343 for (size_t k = 1; k <= 40; k += 9) { 16344 GemmMicrokernelTester() 16345 .mr(2) 16346 .nr(16) 16347 .kr(1) 16348 .sr(1) 16349 .m(2) 16350 .n(n) 16351 .k(k) 16352 .a_stride(43) 16353 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 16354 } 16355 } 16356 } 16357 TEST(QS8_GEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE_PRFM,n_div_16_subtile)16358 TEST(QS8_GEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE_PRFM, n_div_16_subtile) { 16359 TEST_REQUIRES_ARM_NEON; 16360 for (uint32_t n = 32; n <= 48; n += 16) { 16361 for (size_t k = 1; k <= 40; k += 9) { 16362 for (uint32_t m = 1; m <= 2; m++) { 16363 GemmMicrokernelTester() 16364 .mr(2) 16365 .nr(16) 16366 .kr(1) 16367 .sr(1) 16368 .m(m) 16369 .n(n) 16370 .k(k) 16371 .iterations(1) 16372 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 16373 } 16374 } 16375 } 16376 } 16377 TEST(QS8_GEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE_PRFM,strided_cm_subtile)16378 TEST(QS8_GEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE_PRFM, strided_cm_subtile) { 16379 TEST_REQUIRES_ARM_NEON; 16380 for (size_t k = 1; k <= 40; k += 9) { 16381 for (uint32_t n = 1; n <= 16; n++) { 16382 for (uint32_t m = 1; m <= 2; m++) { 16383 GemmMicrokernelTester() 16384 .mr(2) 16385 .nr(16) 16386 .kr(1) 16387 .sr(1) 16388 .m(m) 16389 .n(n) 16390 .k(k) 16391 .cm_stride(19) 16392 .iterations(1) 16393 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 16394 } 16395 } 16396 } 16397 } 16398 TEST(QS8_GEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE_PRFM,qmin)16399 TEST(QS8_GEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE_PRFM, qmin) { 16400 TEST_REQUIRES_ARM_NEON; 16401 GemmMicrokernelTester() 16402 .mr(2) 16403 .nr(16) 16404 .kr(1) 16405 .sr(1) 16406 .m(2) 16407 .n(16) 16408 .k(8) 16409 .qmin(128) 16410 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 16411 } 16412 TEST(QS8_GEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE_PRFM,qmax)16413 TEST(QS8_GEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE_PRFM, qmax) { 16414 TEST_REQUIRES_ARM_NEON; 16415 GemmMicrokernelTester() 16416 .mr(2) 16417 .nr(16) 16418 .kr(1) 16419 .sr(1) 16420 .m(2) 16421 .n(16) 16422 .k(8) 16423 .qmax(128) 16424 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 16425 } 16426 TEST(QS8_GEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE_PRFM,strided_cm)16427 TEST(QS8_GEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE_PRFM, strided_cm) { 16428 TEST_REQUIRES_ARM_NEON; 16429 GemmMicrokernelTester() 16430 .mr(2) 16431 .nr(16) 16432 .kr(1) 16433 .sr(1) 16434 .m(2) 16435 .n(16) 16436 .k(8) 16437 .cm_stride(19) 16438 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 16439 } 16440 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 16441 16442 16443 #if XNN_ARCH_ARM || XNN_ARCH_ARM64 TEST(QS8_GEMM_MINMAX_RNDNU_2X16C16__NEON_MLAL,k_eq_16)16444 TEST(QS8_GEMM_MINMAX_RNDNU_2X16C16__NEON_MLAL, k_eq_16) { 16445 TEST_REQUIRES_ARM_NEON; 16446 GemmMicrokernelTester() 16447 .mr(2) 16448 .nr(16) 16449 .kr(16) 16450 .sr(1) 16451 .m(2) 16452 .n(16) 16453 .k(16) 16454 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 16455 } 16456 TEST(QS8_GEMM_MINMAX_RNDNU_2X16C16__NEON_MLAL,strided_cn)16457 TEST(QS8_GEMM_MINMAX_RNDNU_2X16C16__NEON_MLAL, strided_cn) { 16458 TEST_REQUIRES_ARM_NEON; 16459 GemmMicrokernelTester() 16460 .mr(2) 16461 .nr(16) 16462 .kr(16) 16463 .sr(1) 16464 .m(2) 16465 .n(16) 16466 .k(16) 16467 .cn_stride(19) 16468 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 16469 } 16470 TEST(QS8_GEMM_MINMAX_RNDNU_2X16C16__NEON_MLAL,k_eq_16_strided_a)16471 TEST(QS8_GEMM_MINMAX_RNDNU_2X16C16__NEON_MLAL, k_eq_16_strided_a) { 16472 TEST_REQUIRES_ARM_NEON; 16473 GemmMicrokernelTester() 16474 .mr(2) 16475 .nr(16) 16476 .kr(16) 16477 .sr(1) 16478 .m(2) 16479 .n(16) 16480 .k(16) 16481 .a_stride(19) 16482 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 16483 } 16484 TEST(QS8_GEMM_MINMAX_RNDNU_2X16C16__NEON_MLAL,k_eq_16_subtile)16485 TEST(QS8_GEMM_MINMAX_RNDNU_2X16C16__NEON_MLAL, k_eq_16_subtile) { 16486 TEST_REQUIRES_ARM_NEON; 16487 for (uint32_t n = 1; n <= 16; n++) { 16488 for (uint32_t m = 1; m <= 2; m++) { 16489 GemmMicrokernelTester() 16490 .mr(2) 16491 .nr(16) 16492 .kr(16) 16493 .sr(1) 16494 .m(m) 16495 .n(n) 16496 .k(16) 16497 .iterations(1) 16498 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 16499 } 16500 } 16501 } 16502 TEST(QS8_GEMM_MINMAX_RNDNU_2X16C16__NEON_MLAL,k_eq_16_subtile_m)16503 TEST(QS8_GEMM_MINMAX_RNDNU_2X16C16__NEON_MLAL, k_eq_16_subtile_m) { 16504 TEST_REQUIRES_ARM_NEON; 16505 for (uint32_t m = 1; m <= 2; m++) { 16506 GemmMicrokernelTester() 16507 .mr(2) 16508 .nr(16) 16509 .kr(16) 16510 .sr(1) 16511 .m(m) 16512 .n(16) 16513 .k(16) 16514 .iterations(1) 16515 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 16516 } 16517 } 16518 TEST(QS8_GEMM_MINMAX_RNDNU_2X16C16__NEON_MLAL,k_eq_16_subtile_n)16519 TEST(QS8_GEMM_MINMAX_RNDNU_2X16C16__NEON_MLAL, k_eq_16_subtile_n) { 16520 TEST_REQUIRES_ARM_NEON; 16521 for (uint32_t n = 1; n <= 16; n++) { 16522 GemmMicrokernelTester() 16523 .mr(2) 16524 .nr(16) 16525 .kr(16) 16526 .sr(1) 16527 .m(2) 16528 .n(n) 16529 .k(16) 16530 .iterations(1) 16531 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 16532 } 16533 } 16534 TEST(QS8_GEMM_MINMAX_RNDNU_2X16C16__NEON_MLAL,k_lt_16)16535 TEST(QS8_GEMM_MINMAX_RNDNU_2X16C16__NEON_MLAL, k_lt_16) { 16536 TEST_REQUIRES_ARM_NEON; 16537 for (size_t k = 1; k < 16; k++) { 16538 GemmMicrokernelTester() 16539 .mr(2) 16540 .nr(16) 16541 .kr(16) 16542 .sr(1) 16543 .m(2) 16544 .n(16) 16545 .k(k) 16546 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 16547 } 16548 } 16549 TEST(QS8_GEMM_MINMAX_RNDNU_2X16C16__NEON_MLAL,k_lt_16_strided_a)16550 TEST(QS8_GEMM_MINMAX_RNDNU_2X16C16__NEON_MLAL, k_lt_16_strided_a) { 16551 TEST_REQUIRES_ARM_NEON; 16552 for (size_t k = 1; k < 16; k++) { 16553 GemmMicrokernelTester() 16554 .mr(2) 16555 .nr(16) 16556 .kr(16) 16557 .sr(1) 16558 .m(2) 16559 .n(16) 16560 .k(k) 16561 .a_stride(19) 16562 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 16563 } 16564 } 16565 TEST(QS8_GEMM_MINMAX_RNDNU_2X16C16__NEON_MLAL,k_lt_16_subtile)16566 TEST(QS8_GEMM_MINMAX_RNDNU_2X16C16__NEON_MLAL, k_lt_16_subtile) { 16567 TEST_REQUIRES_ARM_NEON; 16568 for (size_t k = 1; k < 16; k++) { 16569 for (uint32_t n = 1; n <= 16; n++) { 16570 for (uint32_t m = 1; m <= 2; m++) { 16571 GemmMicrokernelTester() 16572 .mr(2) 16573 .nr(16) 16574 .kr(16) 16575 .sr(1) 16576 .m(m) 16577 .n(n) 16578 .k(k) 16579 .iterations(1) 16580 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 16581 } 16582 } 16583 } 16584 } 16585 TEST(QS8_GEMM_MINMAX_RNDNU_2X16C16__NEON_MLAL,k_gt_16)16586 TEST(QS8_GEMM_MINMAX_RNDNU_2X16C16__NEON_MLAL, k_gt_16) { 16587 TEST_REQUIRES_ARM_NEON; 16588 for (size_t k = 17; k < 32; k++) { 16589 GemmMicrokernelTester() 16590 .mr(2) 16591 .nr(16) 16592 .kr(16) 16593 .sr(1) 16594 .m(2) 16595 .n(16) 16596 .k(k) 16597 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 16598 } 16599 } 16600 TEST(QS8_GEMM_MINMAX_RNDNU_2X16C16__NEON_MLAL,k_gt_16_strided_a)16601 TEST(QS8_GEMM_MINMAX_RNDNU_2X16C16__NEON_MLAL, k_gt_16_strided_a) { 16602 TEST_REQUIRES_ARM_NEON; 16603 for (size_t k = 17; k < 32; k++) { 16604 GemmMicrokernelTester() 16605 .mr(2) 16606 .nr(16) 16607 .kr(16) 16608 .sr(1) 16609 .m(2) 16610 .n(16) 16611 .k(k) 16612 .a_stride(37) 16613 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 16614 } 16615 } 16616 TEST(QS8_GEMM_MINMAX_RNDNU_2X16C16__NEON_MLAL,k_gt_16_subtile)16617 TEST(QS8_GEMM_MINMAX_RNDNU_2X16C16__NEON_MLAL, k_gt_16_subtile) { 16618 TEST_REQUIRES_ARM_NEON; 16619 for (size_t k = 17; k < 32; k++) { 16620 for (uint32_t n = 1; n <= 16; n++) { 16621 for (uint32_t m = 1; m <= 2; m++) { 16622 GemmMicrokernelTester() 16623 .mr(2) 16624 .nr(16) 16625 .kr(16) 16626 .sr(1) 16627 .m(m) 16628 .n(n) 16629 .k(k) 16630 .iterations(1) 16631 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 16632 } 16633 } 16634 } 16635 } 16636 TEST(QS8_GEMM_MINMAX_RNDNU_2X16C16__NEON_MLAL,k_div_16)16637 TEST(QS8_GEMM_MINMAX_RNDNU_2X16C16__NEON_MLAL, k_div_16) { 16638 TEST_REQUIRES_ARM_NEON; 16639 for (size_t k = 32; k <= 160; k += 16) { 16640 GemmMicrokernelTester() 16641 .mr(2) 16642 .nr(16) 16643 .kr(16) 16644 .sr(1) 16645 .m(2) 16646 .n(16) 16647 .k(k) 16648 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 16649 } 16650 } 16651 TEST(QS8_GEMM_MINMAX_RNDNU_2X16C16__NEON_MLAL,k_div_16_strided_a)16652 TEST(QS8_GEMM_MINMAX_RNDNU_2X16C16__NEON_MLAL, k_div_16_strided_a) { 16653 TEST_REQUIRES_ARM_NEON; 16654 for (size_t k = 32; k <= 160; k += 16) { 16655 GemmMicrokernelTester() 16656 .mr(2) 16657 .nr(16) 16658 .kr(16) 16659 .sr(1) 16660 .m(2) 16661 .n(16) 16662 .k(k) 16663 .a_stride(163) 16664 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 16665 } 16666 } 16667 TEST(QS8_GEMM_MINMAX_RNDNU_2X16C16__NEON_MLAL,k_div_16_subtile)16668 TEST(QS8_GEMM_MINMAX_RNDNU_2X16C16__NEON_MLAL, k_div_16_subtile) { 16669 TEST_REQUIRES_ARM_NEON; 16670 for (size_t k = 32; k <= 160; k += 16) { 16671 for (uint32_t n = 1; n <= 16; n++) { 16672 for (uint32_t m = 1; m <= 2; m++) { 16673 GemmMicrokernelTester() 16674 .mr(2) 16675 .nr(16) 16676 .kr(16) 16677 .sr(1) 16678 .m(m) 16679 .n(n) 16680 .k(k) 16681 .iterations(1) 16682 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 16683 } 16684 } 16685 } 16686 } 16687 TEST(QS8_GEMM_MINMAX_RNDNU_2X16C16__NEON_MLAL,n_gt_16)16688 TEST(QS8_GEMM_MINMAX_RNDNU_2X16C16__NEON_MLAL, n_gt_16) { 16689 TEST_REQUIRES_ARM_NEON; 16690 for (uint32_t n = 17; n < 32; n++) { 16691 for (size_t k = 1; k <= 80; k += 17) { 16692 GemmMicrokernelTester() 16693 .mr(2) 16694 .nr(16) 16695 .kr(16) 16696 .sr(1) 16697 .m(2) 16698 .n(n) 16699 .k(k) 16700 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 16701 } 16702 } 16703 } 16704 TEST(QS8_GEMM_MINMAX_RNDNU_2X16C16__NEON_MLAL,n_gt_16_strided_cn)16705 TEST(QS8_GEMM_MINMAX_RNDNU_2X16C16__NEON_MLAL, n_gt_16_strided_cn) { 16706 TEST_REQUIRES_ARM_NEON; 16707 for (uint32_t n = 17; n < 32; n++) { 16708 for (size_t k = 1; k <= 80; k += 17) { 16709 GemmMicrokernelTester() 16710 .mr(2) 16711 .nr(16) 16712 .kr(16) 16713 .sr(1) 16714 .m(2) 16715 .n(n) 16716 .k(k) 16717 .cn_stride(19) 16718 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 16719 } 16720 } 16721 } 16722 TEST(QS8_GEMM_MINMAX_RNDNU_2X16C16__NEON_MLAL,n_gt_16_strided_a)16723 TEST(QS8_GEMM_MINMAX_RNDNU_2X16C16__NEON_MLAL, n_gt_16_strided_a) { 16724 TEST_REQUIRES_ARM_NEON; 16725 for (uint32_t n = 17; n < 32; n++) { 16726 for (size_t k = 1; k <= 80; k += 17) { 16727 GemmMicrokernelTester() 16728 .mr(2) 16729 .nr(16) 16730 .kr(16) 16731 .sr(1) 16732 .m(2) 16733 .n(n) 16734 .k(k) 16735 .a_stride(83) 16736 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 16737 } 16738 } 16739 } 16740 TEST(QS8_GEMM_MINMAX_RNDNU_2X16C16__NEON_MLAL,n_gt_16_subtile)16741 TEST(QS8_GEMM_MINMAX_RNDNU_2X16C16__NEON_MLAL, n_gt_16_subtile) { 16742 TEST_REQUIRES_ARM_NEON; 16743 for (uint32_t n = 17; n < 32; n++) { 16744 for (size_t k = 1; k <= 80; k += 17) { 16745 for (uint32_t m = 1; m <= 2; m++) { 16746 GemmMicrokernelTester() 16747 .mr(2) 16748 .nr(16) 16749 .kr(16) 16750 .sr(1) 16751 .m(m) 16752 .n(n) 16753 .k(k) 16754 .iterations(1) 16755 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 16756 } 16757 } 16758 } 16759 } 16760 TEST(QS8_GEMM_MINMAX_RNDNU_2X16C16__NEON_MLAL,n_div_16)16761 TEST(QS8_GEMM_MINMAX_RNDNU_2X16C16__NEON_MLAL, n_div_16) { 16762 TEST_REQUIRES_ARM_NEON; 16763 for (uint32_t n = 32; n <= 48; n += 16) { 16764 for (size_t k = 1; k <= 80; k += 17) { 16765 GemmMicrokernelTester() 16766 .mr(2) 16767 .nr(16) 16768 .kr(16) 16769 .sr(1) 16770 .m(2) 16771 .n(n) 16772 .k(k) 16773 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 16774 } 16775 } 16776 } 16777 TEST(QS8_GEMM_MINMAX_RNDNU_2X16C16__NEON_MLAL,n_div_16_strided_cn)16778 TEST(QS8_GEMM_MINMAX_RNDNU_2X16C16__NEON_MLAL, n_div_16_strided_cn) { 16779 TEST_REQUIRES_ARM_NEON; 16780 for (uint32_t n = 32; n <= 48; n += 16) { 16781 for (size_t k = 1; k <= 80; k += 17) { 16782 GemmMicrokernelTester() 16783 .mr(2) 16784 .nr(16) 16785 .kr(16) 16786 .sr(1) 16787 .m(2) 16788 .n(n) 16789 .k(k) 16790 .cn_stride(19) 16791 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 16792 } 16793 } 16794 } 16795 TEST(QS8_GEMM_MINMAX_RNDNU_2X16C16__NEON_MLAL,n_div_16_strided_a)16796 TEST(QS8_GEMM_MINMAX_RNDNU_2X16C16__NEON_MLAL, n_div_16_strided_a) { 16797 TEST_REQUIRES_ARM_NEON; 16798 for (uint32_t n = 32; n <= 48; n += 16) { 16799 for (size_t k = 1; k <= 80; k += 17) { 16800 GemmMicrokernelTester() 16801 .mr(2) 16802 .nr(16) 16803 .kr(16) 16804 .sr(1) 16805 .m(2) 16806 .n(n) 16807 .k(k) 16808 .a_stride(83) 16809 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 16810 } 16811 } 16812 } 16813 TEST(QS8_GEMM_MINMAX_RNDNU_2X16C16__NEON_MLAL,n_div_16_subtile)16814 TEST(QS8_GEMM_MINMAX_RNDNU_2X16C16__NEON_MLAL, n_div_16_subtile) { 16815 TEST_REQUIRES_ARM_NEON; 16816 for (uint32_t n = 32; n <= 48; n += 16) { 16817 for (size_t k = 1; k <= 80; k += 17) { 16818 for (uint32_t m = 1; m <= 2; m++) { 16819 GemmMicrokernelTester() 16820 .mr(2) 16821 .nr(16) 16822 .kr(16) 16823 .sr(1) 16824 .m(m) 16825 .n(n) 16826 .k(k) 16827 .iterations(1) 16828 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 16829 } 16830 } 16831 } 16832 } 16833 TEST(QS8_GEMM_MINMAX_RNDNU_2X16C16__NEON_MLAL,strided_cm_subtile)16834 TEST(QS8_GEMM_MINMAX_RNDNU_2X16C16__NEON_MLAL, strided_cm_subtile) { 16835 TEST_REQUIRES_ARM_NEON; 16836 for (size_t k = 1; k <= 80; k += 17) { 16837 for (uint32_t n = 1; n <= 16; n++) { 16838 for (uint32_t m = 1; m <= 2; m++) { 16839 GemmMicrokernelTester() 16840 .mr(2) 16841 .nr(16) 16842 .kr(16) 16843 .sr(1) 16844 .m(m) 16845 .n(n) 16846 .k(k) 16847 .cm_stride(19) 16848 .iterations(1) 16849 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 16850 } 16851 } 16852 } 16853 } 16854 TEST(QS8_GEMM_MINMAX_RNDNU_2X16C16__NEON_MLAL,qmin)16855 TEST(QS8_GEMM_MINMAX_RNDNU_2X16C16__NEON_MLAL, qmin) { 16856 TEST_REQUIRES_ARM_NEON; 16857 GemmMicrokernelTester() 16858 .mr(2) 16859 .nr(16) 16860 .kr(16) 16861 .sr(1) 16862 .m(2) 16863 .n(16) 16864 .k(16) 16865 .qmin(128) 16866 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 16867 } 16868 TEST(QS8_GEMM_MINMAX_RNDNU_2X16C16__NEON_MLAL,qmax)16869 TEST(QS8_GEMM_MINMAX_RNDNU_2X16C16__NEON_MLAL, qmax) { 16870 TEST_REQUIRES_ARM_NEON; 16871 GemmMicrokernelTester() 16872 .mr(2) 16873 .nr(16) 16874 .kr(16) 16875 .sr(1) 16876 .m(2) 16877 .n(16) 16878 .k(16) 16879 .qmax(128) 16880 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 16881 } 16882 TEST(QS8_GEMM_MINMAX_RNDNU_2X16C16__NEON_MLAL,strided_cm)16883 TEST(QS8_GEMM_MINMAX_RNDNU_2X16C16__NEON_MLAL, strided_cm) { 16884 TEST_REQUIRES_ARM_NEON; 16885 GemmMicrokernelTester() 16886 .mr(2) 16887 .nr(16) 16888 .kr(16) 16889 .sr(1) 16890 .m(2) 16891 .n(16) 16892 .k(16) 16893 .cm_stride(19) 16894 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 16895 } 16896 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 16897 16898 16899 #if XNN_ARCH_ARM || XNN_ARCH_ARM64 TEST(QS8_GEMM_MINMAX_RNDNU_3X8__NEON_MULL_ADDW_DUP,k_eq_8)16900 TEST(QS8_GEMM_MINMAX_RNDNU_3X8__NEON_MULL_ADDW_DUP, k_eq_8) { 16901 TEST_REQUIRES_ARM_NEON; 16902 GemmMicrokernelTester() 16903 .mr(3) 16904 .nr(8) 16905 .kr(1) 16906 .sr(1) 16907 .m(3) 16908 .n(8) 16909 .k(8) 16910 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 16911 } 16912 TEST(QS8_GEMM_MINMAX_RNDNU_3X8__NEON_MULL_ADDW_DUP,strided_cn)16913 TEST(QS8_GEMM_MINMAX_RNDNU_3X8__NEON_MULL_ADDW_DUP, strided_cn) { 16914 TEST_REQUIRES_ARM_NEON; 16915 GemmMicrokernelTester() 16916 .mr(3) 16917 .nr(8) 16918 .kr(1) 16919 .sr(1) 16920 .m(3) 16921 .n(8) 16922 .k(8) 16923 .cn_stride(11) 16924 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 16925 } 16926 TEST(QS8_GEMM_MINMAX_RNDNU_3X8__NEON_MULL_ADDW_DUP,k_eq_8_strided_a)16927 TEST(QS8_GEMM_MINMAX_RNDNU_3X8__NEON_MULL_ADDW_DUP, k_eq_8_strided_a) { 16928 TEST_REQUIRES_ARM_NEON; 16929 GemmMicrokernelTester() 16930 .mr(3) 16931 .nr(8) 16932 .kr(1) 16933 .sr(1) 16934 .m(3) 16935 .n(8) 16936 .k(8) 16937 .a_stride(11) 16938 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 16939 } 16940 TEST(QS8_GEMM_MINMAX_RNDNU_3X8__NEON_MULL_ADDW_DUP,k_eq_8_subtile)16941 TEST(QS8_GEMM_MINMAX_RNDNU_3X8__NEON_MULL_ADDW_DUP, k_eq_8_subtile) { 16942 TEST_REQUIRES_ARM_NEON; 16943 for (uint32_t n = 1; n <= 8; n++) { 16944 for (uint32_t m = 1; m <= 3; m++) { 16945 GemmMicrokernelTester() 16946 .mr(3) 16947 .nr(8) 16948 .kr(1) 16949 .sr(1) 16950 .m(m) 16951 .n(n) 16952 .k(8) 16953 .iterations(1) 16954 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 16955 } 16956 } 16957 } 16958 TEST(QS8_GEMM_MINMAX_RNDNU_3X8__NEON_MULL_ADDW_DUP,k_eq_8_subtile_m)16959 TEST(QS8_GEMM_MINMAX_RNDNU_3X8__NEON_MULL_ADDW_DUP, k_eq_8_subtile_m) { 16960 TEST_REQUIRES_ARM_NEON; 16961 for (uint32_t m = 1; m <= 3; m++) { 16962 GemmMicrokernelTester() 16963 .mr(3) 16964 .nr(8) 16965 .kr(1) 16966 .sr(1) 16967 .m(m) 16968 .n(8) 16969 .k(8) 16970 .iterations(1) 16971 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 16972 } 16973 } 16974 TEST(QS8_GEMM_MINMAX_RNDNU_3X8__NEON_MULL_ADDW_DUP,k_eq_8_subtile_n)16975 TEST(QS8_GEMM_MINMAX_RNDNU_3X8__NEON_MULL_ADDW_DUP, k_eq_8_subtile_n) { 16976 TEST_REQUIRES_ARM_NEON; 16977 for (uint32_t n = 1; n <= 8; n++) { 16978 GemmMicrokernelTester() 16979 .mr(3) 16980 .nr(8) 16981 .kr(1) 16982 .sr(1) 16983 .m(3) 16984 .n(n) 16985 .k(8) 16986 .iterations(1) 16987 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 16988 } 16989 } 16990 TEST(QS8_GEMM_MINMAX_RNDNU_3X8__NEON_MULL_ADDW_DUP,k_lt_8)16991 TEST(QS8_GEMM_MINMAX_RNDNU_3X8__NEON_MULL_ADDW_DUP, k_lt_8) { 16992 TEST_REQUIRES_ARM_NEON; 16993 for (size_t k = 1; k < 8; k++) { 16994 GemmMicrokernelTester() 16995 .mr(3) 16996 .nr(8) 16997 .kr(1) 16998 .sr(1) 16999 .m(3) 17000 .n(8) 17001 .k(k) 17002 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 17003 } 17004 } 17005 TEST(QS8_GEMM_MINMAX_RNDNU_3X8__NEON_MULL_ADDW_DUP,k_lt_8_strided_a)17006 TEST(QS8_GEMM_MINMAX_RNDNU_3X8__NEON_MULL_ADDW_DUP, k_lt_8_strided_a) { 17007 TEST_REQUIRES_ARM_NEON; 17008 for (size_t k = 1; k < 8; k++) { 17009 GemmMicrokernelTester() 17010 .mr(3) 17011 .nr(8) 17012 .kr(1) 17013 .sr(1) 17014 .m(3) 17015 .n(8) 17016 .k(k) 17017 .a_stride(11) 17018 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 17019 } 17020 } 17021 TEST(QS8_GEMM_MINMAX_RNDNU_3X8__NEON_MULL_ADDW_DUP,k_lt_8_subtile)17022 TEST(QS8_GEMM_MINMAX_RNDNU_3X8__NEON_MULL_ADDW_DUP, k_lt_8_subtile) { 17023 TEST_REQUIRES_ARM_NEON; 17024 for (size_t k = 1; k < 8; k++) { 17025 for (uint32_t n = 1; n <= 8; n++) { 17026 for (uint32_t m = 1; m <= 3; m++) { 17027 GemmMicrokernelTester() 17028 .mr(3) 17029 .nr(8) 17030 .kr(1) 17031 .sr(1) 17032 .m(m) 17033 .n(n) 17034 .k(k) 17035 .iterations(1) 17036 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 17037 } 17038 } 17039 } 17040 } 17041 TEST(QS8_GEMM_MINMAX_RNDNU_3X8__NEON_MULL_ADDW_DUP,k_gt_8)17042 TEST(QS8_GEMM_MINMAX_RNDNU_3X8__NEON_MULL_ADDW_DUP, k_gt_8) { 17043 TEST_REQUIRES_ARM_NEON; 17044 for (size_t k = 9; k < 16; k++) { 17045 GemmMicrokernelTester() 17046 .mr(3) 17047 .nr(8) 17048 .kr(1) 17049 .sr(1) 17050 .m(3) 17051 .n(8) 17052 .k(k) 17053 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 17054 } 17055 } 17056 TEST(QS8_GEMM_MINMAX_RNDNU_3X8__NEON_MULL_ADDW_DUP,k_gt_8_strided_a)17057 TEST(QS8_GEMM_MINMAX_RNDNU_3X8__NEON_MULL_ADDW_DUP, k_gt_8_strided_a) { 17058 TEST_REQUIRES_ARM_NEON; 17059 for (size_t k = 9; k < 16; k++) { 17060 GemmMicrokernelTester() 17061 .mr(3) 17062 .nr(8) 17063 .kr(1) 17064 .sr(1) 17065 .m(3) 17066 .n(8) 17067 .k(k) 17068 .a_stride(19) 17069 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 17070 } 17071 } 17072 TEST(QS8_GEMM_MINMAX_RNDNU_3X8__NEON_MULL_ADDW_DUP,k_gt_8_subtile)17073 TEST(QS8_GEMM_MINMAX_RNDNU_3X8__NEON_MULL_ADDW_DUP, k_gt_8_subtile) { 17074 TEST_REQUIRES_ARM_NEON; 17075 for (size_t k = 9; k < 16; k++) { 17076 for (uint32_t n = 1; n <= 8; n++) { 17077 for (uint32_t m = 1; m <= 3; m++) { 17078 GemmMicrokernelTester() 17079 .mr(3) 17080 .nr(8) 17081 .kr(1) 17082 .sr(1) 17083 .m(m) 17084 .n(n) 17085 .k(k) 17086 .iterations(1) 17087 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 17088 } 17089 } 17090 } 17091 } 17092 TEST(QS8_GEMM_MINMAX_RNDNU_3X8__NEON_MULL_ADDW_DUP,k_div_8)17093 TEST(QS8_GEMM_MINMAX_RNDNU_3X8__NEON_MULL_ADDW_DUP, k_div_8) { 17094 TEST_REQUIRES_ARM_NEON; 17095 for (size_t k = 16; k <= 80; k += 8) { 17096 GemmMicrokernelTester() 17097 .mr(3) 17098 .nr(8) 17099 .kr(1) 17100 .sr(1) 17101 .m(3) 17102 .n(8) 17103 .k(k) 17104 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 17105 } 17106 } 17107 TEST(QS8_GEMM_MINMAX_RNDNU_3X8__NEON_MULL_ADDW_DUP,k_div_8_strided_a)17108 TEST(QS8_GEMM_MINMAX_RNDNU_3X8__NEON_MULL_ADDW_DUP, k_div_8_strided_a) { 17109 TEST_REQUIRES_ARM_NEON; 17110 for (size_t k = 16; k <= 80; k += 8) { 17111 GemmMicrokernelTester() 17112 .mr(3) 17113 .nr(8) 17114 .kr(1) 17115 .sr(1) 17116 .m(3) 17117 .n(8) 17118 .k(k) 17119 .a_stride(83) 17120 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 17121 } 17122 } 17123 TEST(QS8_GEMM_MINMAX_RNDNU_3X8__NEON_MULL_ADDW_DUP,k_div_8_subtile)17124 TEST(QS8_GEMM_MINMAX_RNDNU_3X8__NEON_MULL_ADDW_DUP, k_div_8_subtile) { 17125 TEST_REQUIRES_ARM_NEON; 17126 for (size_t k = 16; k <= 80; k += 8) { 17127 for (uint32_t n = 1; n <= 8; n++) { 17128 for (uint32_t m = 1; m <= 3; m++) { 17129 GemmMicrokernelTester() 17130 .mr(3) 17131 .nr(8) 17132 .kr(1) 17133 .sr(1) 17134 .m(m) 17135 .n(n) 17136 .k(k) 17137 .iterations(1) 17138 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 17139 } 17140 } 17141 } 17142 } 17143 TEST(QS8_GEMM_MINMAX_RNDNU_3X8__NEON_MULL_ADDW_DUP,n_gt_8)17144 TEST(QS8_GEMM_MINMAX_RNDNU_3X8__NEON_MULL_ADDW_DUP, n_gt_8) { 17145 TEST_REQUIRES_ARM_NEON; 17146 for (uint32_t n = 9; n < 16; n++) { 17147 for (size_t k = 1; k <= 40; k += 9) { 17148 GemmMicrokernelTester() 17149 .mr(3) 17150 .nr(8) 17151 .kr(1) 17152 .sr(1) 17153 .m(3) 17154 .n(n) 17155 .k(k) 17156 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 17157 } 17158 } 17159 } 17160 TEST(QS8_GEMM_MINMAX_RNDNU_3X8__NEON_MULL_ADDW_DUP,n_gt_8_strided_cn)17161 TEST(QS8_GEMM_MINMAX_RNDNU_3X8__NEON_MULL_ADDW_DUP, n_gt_8_strided_cn) { 17162 TEST_REQUIRES_ARM_NEON; 17163 for (uint32_t n = 9; n < 16; n++) { 17164 for (size_t k = 1; k <= 40; k += 9) { 17165 GemmMicrokernelTester() 17166 .mr(3) 17167 .nr(8) 17168 .kr(1) 17169 .sr(1) 17170 .m(3) 17171 .n(n) 17172 .k(k) 17173 .cn_stride(11) 17174 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 17175 } 17176 } 17177 } 17178 TEST(QS8_GEMM_MINMAX_RNDNU_3X8__NEON_MULL_ADDW_DUP,n_gt_8_strided_a)17179 TEST(QS8_GEMM_MINMAX_RNDNU_3X8__NEON_MULL_ADDW_DUP, n_gt_8_strided_a) { 17180 TEST_REQUIRES_ARM_NEON; 17181 for (uint32_t n = 9; n < 16; n++) { 17182 for (size_t k = 1; k <= 40; k += 9) { 17183 GemmMicrokernelTester() 17184 .mr(3) 17185 .nr(8) 17186 .kr(1) 17187 .sr(1) 17188 .m(3) 17189 .n(n) 17190 .k(k) 17191 .a_stride(43) 17192 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 17193 } 17194 } 17195 } 17196 TEST(QS8_GEMM_MINMAX_RNDNU_3X8__NEON_MULL_ADDW_DUP,n_gt_8_subtile)17197 TEST(QS8_GEMM_MINMAX_RNDNU_3X8__NEON_MULL_ADDW_DUP, n_gt_8_subtile) { 17198 TEST_REQUIRES_ARM_NEON; 17199 for (uint32_t n = 9; n < 16; n++) { 17200 for (size_t k = 1; k <= 40; k += 9) { 17201 for (uint32_t m = 1; m <= 3; m++) { 17202 GemmMicrokernelTester() 17203 .mr(3) 17204 .nr(8) 17205 .kr(1) 17206 .sr(1) 17207 .m(m) 17208 .n(n) 17209 .k(k) 17210 .iterations(1) 17211 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 17212 } 17213 } 17214 } 17215 } 17216 TEST(QS8_GEMM_MINMAX_RNDNU_3X8__NEON_MULL_ADDW_DUP,n_div_8)17217 TEST(QS8_GEMM_MINMAX_RNDNU_3X8__NEON_MULL_ADDW_DUP, n_div_8) { 17218 TEST_REQUIRES_ARM_NEON; 17219 for (uint32_t n = 16; n <= 24; n += 8) { 17220 for (size_t k = 1; k <= 40; k += 9) { 17221 GemmMicrokernelTester() 17222 .mr(3) 17223 .nr(8) 17224 .kr(1) 17225 .sr(1) 17226 .m(3) 17227 .n(n) 17228 .k(k) 17229 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 17230 } 17231 } 17232 } 17233 TEST(QS8_GEMM_MINMAX_RNDNU_3X8__NEON_MULL_ADDW_DUP,n_div_8_strided_cn)17234 TEST(QS8_GEMM_MINMAX_RNDNU_3X8__NEON_MULL_ADDW_DUP, n_div_8_strided_cn) { 17235 TEST_REQUIRES_ARM_NEON; 17236 for (uint32_t n = 16; n <= 24; n += 8) { 17237 for (size_t k = 1; k <= 40; k += 9) { 17238 GemmMicrokernelTester() 17239 .mr(3) 17240 .nr(8) 17241 .kr(1) 17242 .sr(1) 17243 .m(3) 17244 .n(n) 17245 .k(k) 17246 .cn_stride(11) 17247 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 17248 } 17249 } 17250 } 17251 TEST(QS8_GEMM_MINMAX_RNDNU_3X8__NEON_MULL_ADDW_DUP,n_div_8_strided_a)17252 TEST(QS8_GEMM_MINMAX_RNDNU_3X8__NEON_MULL_ADDW_DUP, n_div_8_strided_a) { 17253 TEST_REQUIRES_ARM_NEON; 17254 for (uint32_t n = 16; n <= 24; n += 8) { 17255 for (size_t k = 1; k <= 40; k += 9) { 17256 GemmMicrokernelTester() 17257 .mr(3) 17258 .nr(8) 17259 .kr(1) 17260 .sr(1) 17261 .m(3) 17262 .n(n) 17263 .k(k) 17264 .a_stride(43) 17265 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 17266 } 17267 } 17268 } 17269 TEST(QS8_GEMM_MINMAX_RNDNU_3X8__NEON_MULL_ADDW_DUP,n_div_8_subtile)17270 TEST(QS8_GEMM_MINMAX_RNDNU_3X8__NEON_MULL_ADDW_DUP, n_div_8_subtile) { 17271 TEST_REQUIRES_ARM_NEON; 17272 for (uint32_t n = 16; n <= 24; n += 8) { 17273 for (size_t k = 1; k <= 40; k += 9) { 17274 for (uint32_t m = 1; m <= 3; m++) { 17275 GemmMicrokernelTester() 17276 .mr(3) 17277 .nr(8) 17278 .kr(1) 17279 .sr(1) 17280 .m(m) 17281 .n(n) 17282 .k(k) 17283 .iterations(1) 17284 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 17285 } 17286 } 17287 } 17288 } 17289 TEST(QS8_GEMM_MINMAX_RNDNU_3X8__NEON_MULL_ADDW_DUP,strided_cm_subtile)17290 TEST(QS8_GEMM_MINMAX_RNDNU_3X8__NEON_MULL_ADDW_DUP, strided_cm_subtile) { 17291 TEST_REQUIRES_ARM_NEON; 17292 for (size_t k = 1; k <= 40; k += 9) { 17293 for (uint32_t n = 1; n <= 8; n++) { 17294 for (uint32_t m = 1; m <= 3; m++) { 17295 GemmMicrokernelTester() 17296 .mr(3) 17297 .nr(8) 17298 .kr(1) 17299 .sr(1) 17300 .m(m) 17301 .n(n) 17302 .k(k) 17303 .cm_stride(11) 17304 .iterations(1) 17305 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 17306 } 17307 } 17308 } 17309 } 17310 TEST(QS8_GEMM_MINMAX_RNDNU_3X8__NEON_MULL_ADDW_DUP,qmin)17311 TEST(QS8_GEMM_MINMAX_RNDNU_3X8__NEON_MULL_ADDW_DUP, qmin) { 17312 TEST_REQUIRES_ARM_NEON; 17313 GemmMicrokernelTester() 17314 .mr(3) 17315 .nr(8) 17316 .kr(1) 17317 .sr(1) 17318 .m(3) 17319 .n(8) 17320 .k(8) 17321 .qmin(128) 17322 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 17323 } 17324 TEST(QS8_GEMM_MINMAX_RNDNU_3X8__NEON_MULL_ADDW_DUP,qmax)17325 TEST(QS8_GEMM_MINMAX_RNDNU_3X8__NEON_MULL_ADDW_DUP, qmax) { 17326 TEST_REQUIRES_ARM_NEON; 17327 GemmMicrokernelTester() 17328 .mr(3) 17329 .nr(8) 17330 .kr(1) 17331 .sr(1) 17332 .m(3) 17333 .n(8) 17334 .k(8) 17335 .qmax(128) 17336 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 17337 } 17338 TEST(QS8_GEMM_MINMAX_RNDNU_3X8__NEON_MULL_ADDW_DUP,strided_cm)17339 TEST(QS8_GEMM_MINMAX_RNDNU_3X8__NEON_MULL_ADDW_DUP, strided_cm) { 17340 TEST_REQUIRES_ARM_NEON; 17341 GemmMicrokernelTester() 17342 .mr(3) 17343 .nr(8) 17344 .kr(1) 17345 .sr(1) 17346 .m(3) 17347 .n(8) 17348 .k(8) 17349 .cm_stride(11) 17350 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 17351 } 17352 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 17353 17354 17355 #if XNN_ARCH_ARM || XNN_ARCH_ARM64 TEST(QS8_GEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE_PRFM,k_eq_8)17356 TEST(QS8_GEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE_PRFM, k_eq_8) { 17357 TEST_REQUIRES_ARM_NEON; 17358 GemmMicrokernelTester() 17359 .mr(3) 17360 .nr(16) 17361 .kr(1) 17362 .sr(1) 17363 .m(3) 17364 .n(16) 17365 .k(8) 17366 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 17367 } 17368 TEST(QS8_GEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE_PRFM,strided_cn)17369 TEST(QS8_GEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE_PRFM, strided_cn) { 17370 TEST_REQUIRES_ARM_NEON; 17371 GemmMicrokernelTester() 17372 .mr(3) 17373 .nr(16) 17374 .kr(1) 17375 .sr(1) 17376 .m(3) 17377 .n(16) 17378 .k(8) 17379 .cn_stride(19) 17380 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 17381 } 17382 TEST(QS8_GEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE_PRFM,k_eq_8_strided_a)17383 TEST(QS8_GEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE_PRFM, k_eq_8_strided_a) { 17384 TEST_REQUIRES_ARM_NEON; 17385 GemmMicrokernelTester() 17386 .mr(3) 17387 .nr(16) 17388 .kr(1) 17389 .sr(1) 17390 .m(3) 17391 .n(16) 17392 .k(8) 17393 .a_stride(11) 17394 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 17395 } 17396 TEST(QS8_GEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE_PRFM,k_eq_8_subtile)17397 TEST(QS8_GEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE_PRFM, k_eq_8_subtile) { 17398 TEST_REQUIRES_ARM_NEON; 17399 for (uint32_t n = 1; n <= 16; n++) { 17400 for (uint32_t m = 1; m <= 3; m++) { 17401 GemmMicrokernelTester() 17402 .mr(3) 17403 .nr(16) 17404 .kr(1) 17405 .sr(1) 17406 .m(m) 17407 .n(n) 17408 .k(8) 17409 .iterations(1) 17410 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 17411 } 17412 } 17413 } 17414 TEST(QS8_GEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE_PRFM,k_eq_8_subtile_m)17415 TEST(QS8_GEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE_PRFM, k_eq_8_subtile_m) { 17416 TEST_REQUIRES_ARM_NEON; 17417 for (uint32_t m = 1; m <= 3; m++) { 17418 GemmMicrokernelTester() 17419 .mr(3) 17420 .nr(16) 17421 .kr(1) 17422 .sr(1) 17423 .m(m) 17424 .n(16) 17425 .k(8) 17426 .iterations(1) 17427 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 17428 } 17429 } 17430 TEST(QS8_GEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE_PRFM,k_eq_8_subtile_n)17431 TEST(QS8_GEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE_PRFM, k_eq_8_subtile_n) { 17432 TEST_REQUIRES_ARM_NEON; 17433 for (uint32_t n = 1; n <= 16; n++) { 17434 GemmMicrokernelTester() 17435 .mr(3) 17436 .nr(16) 17437 .kr(1) 17438 .sr(1) 17439 .m(3) 17440 .n(n) 17441 .k(8) 17442 .iterations(1) 17443 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 17444 } 17445 } 17446 TEST(QS8_GEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE_PRFM,k_lt_8)17447 TEST(QS8_GEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE_PRFM, k_lt_8) { 17448 TEST_REQUIRES_ARM_NEON; 17449 for (size_t k = 1; k < 8; k++) { 17450 GemmMicrokernelTester() 17451 .mr(3) 17452 .nr(16) 17453 .kr(1) 17454 .sr(1) 17455 .m(3) 17456 .n(16) 17457 .k(k) 17458 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 17459 } 17460 } 17461 TEST(QS8_GEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE_PRFM,k_lt_8_strided_a)17462 TEST(QS8_GEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE_PRFM, k_lt_8_strided_a) { 17463 TEST_REQUIRES_ARM_NEON; 17464 for (size_t k = 1; k < 8; k++) { 17465 GemmMicrokernelTester() 17466 .mr(3) 17467 .nr(16) 17468 .kr(1) 17469 .sr(1) 17470 .m(3) 17471 .n(16) 17472 .k(k) 17473 .a_stride(11) 17474 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 17475 } 17476 } 17477 TEST(QS8_GEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE_PRFM,k_lt_8_subtile)17478 TEST(QS8_GEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE_PRFM, k_lt_8_subtile) { 17479 TEST_REQUIRES_ARM_NEON; 17480 for (size_t k = 1; k < 8; k++) { 17481 for (uint32_t n = 1; n <= 16; n++) { 17482 for (uint32_t m = 1; m <= 3; m++) { 17483 GemmMicrokernelTester() 17484 .mr(3) 17485 .nr(16) 17486 .kr(1) 17487 .sr(1) 17488 .m(m) 17489 .n(n) 17490 .k(k) 17491 .iterations(1) 17492 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 17493 } 17494 } 17495 } 17496 } 17497 TEST(QS8_GEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE_PRFM,k_gt_8)17498 TEST(QS8_GEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE_PRFM, k_gt_8) { 17499 TEST_REQUIRES_ARM_NEON; 17500 for (size_t k = 9; k < 16; k++) { 17501 GemmMicrokernelTester() 17502 .mr(3) 17503 .nr(16) 17504 .kr(1) 17505 .sr(1) 17506 .m(3) 17507 .n(16) 17508 .k(k) 17509 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 17510 } 17511 } 17512 TEST(QS8_GEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE_PRFM,k_gt_8_strided_a)17513 TEST(QS8_GEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE_PRFM, k_gt_8_strided_a) { 17514 TEST_REQUIRES_ARM_NEON; 17515 for (size_t k = 9; k < 16; k++) { 17516 GemmMicrokernelTester() 17517 .mr(3) 17518 .nr(16) 17519 .kr(1) 17520 .sr(1) 17521 .m(3) 17522 .n(16) 17523 .k(k) 17524 .a_stride(19) 17525 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 17526 } 17527 } 17528 TEST(QS8_GEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE_PRFM,k_gt_8_subtile)17529 TEST(QS8_GEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE_PRFM, k_gt_8_subtile) { 17530 TEST_REQUIRES_ARM_NEON; 17531 for (size_t k = 9; k < 16; k++) { 17532 for (uint32_t n = 1; n <= 16; n++) { 17533 for (uint32_t m = 1; m <= 3; m++) { 17534 GemmMicrokernelTester() 17535 .mr(3) 17536 .nr(16) 17537 .kr(1) 17538 .sr(1) 17539 .m(m) 17540 .n(n) 17541 .k(k) 17542 .iterations(1) 17543 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 17544 } 17545 } 17546 } 17547 } 17548 TEST(QS8_GEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE_PRFM,k_div_8)17549 TEST(QS8_GEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE_PRFM, k_div_8) { 17550 TEST_REQUIRES_ARM_NEON; 17551 for (size_t k = 16; k <= 80; k += 8) { 17552 GemmMicrokernelTester() 17553 .mr(3) 17554 .nr(16) 17555 .kr(1) 17556 .sr(1) 17557 .m(3) 17558 .n(16) 17559 .k(k) 17560 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 17561 } 17562 } 17563 TEST(QS8_GEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE_PRFM,k_div_8_strided_a)17564 TEST(QS8_GEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE_PRFM, k_div_8_strided_a) { 17565 TEST_REQUIRES_ARM_NEON; 17566 for (size_t k = 16; k <= 80; k += 8) { 17567 GemmMicrokernelTester() 17568 .mr(3) 17569 .nr(16) 17570 .kr(1) 17571 .sr(1) 17572 .m(3) 17573 .n(16) 17574 .k(k) 17575 .a_stride(83) 17576 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 17577 } 17578 } 17579 TEST(QS8_GEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE_PRFM,k_div_8_subtile)17580 TEST(QS8_GEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE_PRFM, k_div_8_subtile) { 17581 TEST_REQUIRES_ARM_NEON; 17582 for (size_t k = 16; k <= 80; k += 8) { 17583 for (uint32_t n = 1; n <= 16; n++) { 17584 for (uint32_t m = 1; m <= 3; m++) { 17585 GemmMicrokernelTester() 17586 .mr(3) 17587 .nr(16) 17588 .kr(1) 17589 .sr(1) 17590 .m(m) 17591 .n(n) 17592 .k(k) 17593 .iterations(1) 17594 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 17595 } 17596 } 17597 } 17598 } 17599 TEST(QS8_GEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE_PRFM,n_gt_16)17600 TEST(QS8_GEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE_PRFM, n_gt_16) { 17601 TEST_REQUIRES_ARM_NEON; 17602 for (uint32_t n = 17; n < 32; n++) { 17603 for (size_t k = 1; k <= 40; k += 9) { 17604 GemmMicrokernelTester() 17605 .mr(3) 17606 .nr(16) 17607 .kr(1) 17608 .sr(1) 17609 .m(3) 17610 .n(n) 17611 .k(k) 17612 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 17613 } 17614 } 17615 } 17616 TEST(QS8_GEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE_PRFM,n_gt_16_strided_cn)17617 TEST(QS8_GEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE_PRFM, n_gt_16_strided_cn) { 17618 TEST_REQUIRES_ARM_NEON; 17619 for (uint32_t n = 17; n < 32; n++) { 17620 for (size_t k = 1; k <= 40; k += 9) { 17621 GemmMicrokernelTester() 17622 .mr(3) 17623 .nr(16) 17624 .kr(1) 17625 .sr(1) 17626 .m(3) 17627 .n(n) 17628 .k(k) 17629 .cn_stride(19) 17630 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 17631 } 17632 } 17633 } 17634 TEST(QS8_GEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE_PRFM,n_gt_16_strided_a)17635 TEST(QS8_GEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE_PRFM, n_gt_16_strided_a) { 17636 TEST_REQUIRES_ARM_NEON; 17637 for (uint32_t n = 17; n < 32; n++) { 17638 for (size_t k = 1; k <= 40; k += 9) { 17639 GemmMicrokernelTester() 17640 .mr(3) 17641 .nr(16) 17642 .kr(1) 17643 .sr(1) 17644 .m(3) 17645 .n(n) 17646 .k(k) 17647 .a_stride(43) 17648 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 17649 } 17650 } 17651 } 17652 TEST(QS8_GEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE_PRFM,n_gt_16_subtile)17653 TEST(QS8_GEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE_PRFM, n_gt_16_subtile) { 17654 TEST_REQUIRES_ARM_NEON; 17655 for (uint32_t n = 17; n < 32; n++) { 17656 for (size_t k = 1; k <= 40; k += 9) { 17657 for (uint32_t m = 1; m <= 3; m++) { 17658 GemmMicrokernelTester() 17659 .mr(3) 17660 .nr(16) 17661 .kr(1) 17662 .sr(1) 17663 .m(m) 17664 .n(n) 17665 .k(k) 17666 .iterations(1) 17667 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 17668 } 17669 } 17670 } 17671 } 17672 TEST(QS8_GEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE_PRFM,n_div_16)17673 TEST(QS8_GEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE_PRFM, n_div_16) { 17674 TEST_REQUIRES_ARM_NEON; 17675 for (uint32_t n = 32; n <= 48; n += 16) { 17676 for (size_t k = 1; k <= 40; k += 9) { 17677 GemmMicrokernelTester() 17678 .mr(3) 17679 .nr(16) 17680 .kr(1) 17681 .sr(1) 17682 .m(3) 17683 .n(n) 17684 .k(k) 17685 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 17686 } 17687 } 17688 } 17689 TEST(QS8_GEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE_PRFM,n_div_16_strided_cn)17690 TEST(QS8_GEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE_PRFM, n_div_16_strided_cn) { 17691 TEST_REQUIRES_ARM_NEON; 17692 for (uint32_t n = 32; n <= 48; n += 16) { 17693 for (size_t k = 1; k <= 40; k += 9) { 17694 GemmMicrokernelTester() 17695 .mr(3) 17696 .nr(16) 17697 .kr(1) 17698 .sr(1) 17699 .m(3) 17700 .n(n) 17701 .k(k) 17702 .cn_stride(19) 17703 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 17704 } 17705 } 17706 } 17707 TEST(QS8_GEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE_PRFM,n_div_16_strided_a)17708 TEST(QS8_GEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE_PRFM, n_div_16_strided_a) { 17709 TEST_REQUIRES_ARM_NEON; 17710 for (uint32_t n = 32; n <= 48; n += 16) { 17711 for (size_t k = 1; k <= 40; k += 9) { 17712 GemmMicrokernelTester() 17713 .mr(3) 17714 .nr(16) 17715 .kr(1) 17716 .sr(1) 17717 .m(3) 17718 .n(n) 17719 .k(k) 17720 .a_stride(43) 17721 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 17722 } 17723 } 17724 } 17725 TEST(QS8_GEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE_PRFM,n_div_16_subtile)17726 TEST(QS8_GEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE_PRFM, n_div_16_subtile) { 17727 TEST_REQUIRES_ARM_NEON; 17728 for (uint32_t n = 32; n <= 48; n += 16) { 17729 for (size_t k = 1; k <= 40; k += 9) { 17730 for (uint32_t m = 1; m <= 3; m++) { 17731 GemmMicrokernelTester() 17732 .mr(3) 17733 .nr(16) 17734 .kr(1) 17735 .sr(1) 17736 .m(m) 17737 .n(n) 17738 .k(k) 17739 .iterations(1) 17740 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 17741 } 17742 } 17743 } 17744 } 17745 TEST(QS8_GEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE_PRFM,strided_cm_subtile)17746 TEST(QS8_GEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE_PRFM, strided_cm_subtile) { 17747 TEST_REQUIRES_ARM_NEON; 17748 for (size_t k = 1; k <= 40; k += 9) { 17749 for (uint32_t n = 1; n <= 16; n++) { 17750 for (uint32_t m = 1; m <= 3; m++) { 17751 GemmMicrokernelTester() 17752 .mr(3) 17753 .nr(16) 17754 .kr(1) 17755 .sr(1) 17756 .m(m) 17757 .n(n) 17758 .k(k) 17759 .cm_stride(19) 17760 .iterations(1) 17761 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 17762 } 17763 } 17764 } 17765 } 17766 TEST(QS8_GEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE_PRFM,qmin)17767 TEST(QS8_GEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE_PRFM, qmin) { 17768 TEST_REQUIRES_ARM_NEON; 17769 GemmMicrokernelTester() 17770 .mr(3) 17771 .nr(16) 17772 .kr(1) 17773 .sr(1) 17774 .m(3) 17775 .n(16) 17776 .k(8) 17777 .qmin(128) 17778 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 17779 } 17780 TEST(QS8_GEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE_PRFM,qmax)17781 TEST(QS8_GEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE_PRFM, qmax) { 17782 TEST_REQUIRES_ARM_NEON; 17783 GemmMicrokernelTester() 17784 .mr(3) 17785 .nr(16) 17786 .kr(1) 17787 .sr(1) 17788 .m(3) 17789 .n(16) 17790 .k(8) 17791 .qmax(128) 17792 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 17793 } 17794 TEST(QS8_GEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE_PRFM,strided_cm)17795 TEST(QS8_GEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE_PRFM, strided_cm) { 17796 TEST_REQUIRES_ARM_NEON; 17797 GemmMicrokernelTester() 17798 .mr(3) 17799 .nr(16) 17800 .kr(1) 17801 .sr(1) 17802 .m(3) 17803 .n(16) 17804 .k(8) 17805 .cm_stride(19) 17806 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 17807 } 17808 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 17809 17810 17811 #if XNN_ARCH_ARM || XNN_ARCH_ARM64 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C8__NEON_MLAL,k_eq_16)17812 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C8__NEON_MLAL, k_eq_16) { 17813 TEST_REQUIRES_ARM_NEON; 17814 GemmMicrokernelTester() 17815 .mr(4) 17816 .nr(8) 17817 .kr(8) 17818 .sr(1) 17819 .m(4) 17820 .n(8) 17821 .k(16) 17822 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 17823 } 17824 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C8__NEON_MLAL,strided_cn)17825 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C8__NEON_MLAL, strided_cn) { 17826 TEST_REQUIRES_ARM_NEON; 17827 GemmMicrokernelTester() 17828 .mr(4) 17829 .nr(8) 17830 .kr(8) 17831 .sr(1) 17832 .m(4) 17833 .n(8) 17834 .k(16) 17835 .cn_stride(11) 17836 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 17837 } 17838 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C8__NEON_MLAL,k_eq_16_strided_a)17839 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C8__NEON_MLAL, k_eq_16_strided_a) { 17840 TEST_REQUIRES_ARM_NEON; 17841 GemmMicrokernelTester() 17842 .mr(4) 17843 .nr(8) 17844 .kr(8) 17845 .sr(1) 17846 .m(4) 17847 .n(8) 17848 .k(16) 17849 .a_stride(19) 17850 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 17851 } 17852 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C8__NEON_MLAL,k_eq_16_subtile)17853 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C8__NEON_MLAL, k_eq_16_subtile) { 17854 TEST_REQUIRES_ARM_NEON; 17855 for (uint32_t n = 1; n <= 8; n++) { 17856 for (uint32_t m = 1; m <= 4; m++) { 17857 GemmMicrokernelTester() 17858 .mr(4) 17859 .nr(8) 17860 .kr(8) 17861 .sr(1) 17862 .m(m) 17863 .n(n) 17864 .k(16) 17865 .iterations(1) 17866 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 17867 } 17868 } 17869 } 17870 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C8__NEON_MLAL,k_eq_16_subtile_m)17871 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C8__NEON_MLAL, k_eq_16_subtile_m) { 17872 TEST_REQUIRES_ARM_NEON; 17873 for (uint32_t m = 1; m <= 4; m++) { 17874 GemmMicrokernelTester() 17875 .mr(4) 17876 .nr(8) 17877 .kr(8) 17878 .sr(1) 17879 .m(m) 17880 .n(8) 17881 .k(16) 17882 .iterations(1) 17883 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 17884 } 17885 } 17886 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C8__NEON_MLAL,k_eq_16_subtile_n)17887 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C8__NEON_MLAL, k_eq_16_subtile_n) { 17888 TEST_REQUIRES_ARM_NEON; 17889 for (uint32_t n = 1; n <= 8; n++) { 17890 GemmMicrokernelTester() 17891 .mr(4) 17892 .nr(8) 17893 .kr(8) 17894 .sr(1) 17895 .m(4) 17896 .n(n) 17897 .k(16) 17898 .iterations(1) 17899 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 17900 } 17901 } 17902 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C8__NEON_MLAL,k_lt_16)17903 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C8__NEON_MLAL, k_lt_16) { 17904 TEST_REQUIRES_ARM_NEON; 17905 for (size_t k = 1; k < 16; k++) { 17906 GemmMicrokernelTester() 17907 .mr(4) 17908 .nr(8) 17909 .kr(8) 17910 .sr(1) 17911 .m(4) 17912 .n(8) 17913 .k(k) 17914 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 17915 } 17916 } 17917 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C8__NEON_MLAL,k_lt_16_strided_a)17918 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C8__NEON_MLAL, k_lt_16_strided_a) { 17919 TEST_REQUIRES_ARM_NEON; 17920 for (size_t k = 1; k < 16; k++) { 17921 GemmMicrokernelTester() 17922 .mr(4) 17923 .nr(8) 17924 .kr(8) 17925 .sr(1) 17926 .m(4) 17927 .n(8) 17928 .k(k) 17929 .a_stride(19) 17930 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 17931 } 17932 } 17933 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C8__NEON_MLAL,k_lt_16_subtile)17934 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C8__NEON_MLAL, k_lt_16_subtile) { 17935 TEST_REQUIRES_ARM_NEON; 17936 for (size_t k = 1; k < 16; k++) { 17937 for (uint32_t n = 1; n <= 8; n++) { 17938 for (uint32_t m = 1; m <= 4; m++) { 17939 GemmMicrokernelTester() 17940 .mr(4) 17941 .nr(8) 17942 .kr(8) 17943 .sr(1) 17944 .m(m) 17945 .n(n) 17946 .k(k) 17947 .iterations(1) 17948 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 17949 } 17950 } 17951 } 17952 } 17953 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C8__NEON_MLAL,k_gt_16)17954 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C8__NEON_MLAL, k_gt_16) { 17955 TEST_REQUIRES_ARM_NEON; 17956 for (size_t k = 17; k < 32; k++) { 17957 GemmMicrokernelTester() 17958 .mr(4) 17959 .nr(8) 17960 .kr(8) 17961 .sr(1) 17962 .m(4) 17963 .n(8) 17964 .k(k) 17965 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 17966 } 17967 } 17968 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C8__NEON_MLAL,k_gt_16_strided_a)17969 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C8__NEON_MLAL, k_gt_16_strided_a) { 17970 TEST_REQUIRES_ARM_NEON; 17971 for (size_t k = 17; k < 32; k++) { 17972 GemmMicrokernelTester() 17973 .mr(4) 17974 .nr(8) 17975 .kr(8) 17976 .sr(1) 17977 .m(4) 17978 .n(8) 17979 .k(k) 17980 .a_stride(37) 17981 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 17982 } 17983 } 17984 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C8__NEON_MLAL,k_gt_16_subtile)17985 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C8__NEON_MLAL, k_gt_16_subtile) { 17986 TEST_REQUIRES_ARM_NEON; 17987 for (size_t k = 17; k < 32; k++) { 17988 for (uint32_t n = 1; n <= 8; n++) { 17989 for (uint32_t m = 1; m <= 4; m++) { 17990 GemmMicrokernelTester() 17991 .mr(4) 17992 .nr(8) 17993 .kr(8) 17994 .sr(1) 17995 .m(m) 17996 .n(n) 17997 .k(k) 17998 .iterations(1) 17999 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 18000 } 18001 } 18002 } 18003 } 18004 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C8__NEON_MLAL,k_div_16)18005 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C8__NEON_MLAL, k_div_16) { 18006 TEST_REQUIRES_ARM_NEON; 18007 for (size_t k = 32; k <= 160; k += 16) { 18008 GemmMicrokernelTester() 18009 .mr(4) 18010 .nr(8) 18011 .kr(8) 18012 .sr(1) 18013 .m(4) 18014 .n(8) 18015 .k(k) 18016 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 18017 } 18018 } 18019 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C8__NEON_MLAL,k_div_16_strided_a)18020 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C8__NEON_MLAL, k_div_16_strided_a) { 18021 TEST_REQUIRES_ARM_NEON; 18022 for (size_t k = 32; k <= 160; k += 16) { 18023 GemmMicrokernelTester() 18024 .mr(4) 18025 .nr(8) 18026 .kr(8) 18027 .sr(1) 18028 .m(4) 18029 .n(8) 18030 .k(k) 18031 .a_stride(163) 18032 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 18033 } 18034 } 18035 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C8__NEON_MLAL,k_div_16_subtile)18036 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C8__NEON_MLAL, k_div_16_subtile) { 18037 TEST_REQUIRES_ARM_NEON; 18038 for (size_t k = 32; k <= 160; k += 16) { 18039 for (uint32_t n = 1; n <= 8; n++) { 18040 for (uint32_t m = 1; m <= 4; m++) { 18041 GemmMicrokernelTester() 18042 .mr(4) 18043 .nr(8) 18044 .kr(8) 18045 .sr(1) 18046 .m(m) 18047 .n(n) 18048 .k(k) 18049 .iterations(1) 18050 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 18051 } 18052 } 18053 } 18054 } 18055 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C8__NEON_MLAL,n_gt_8)18056 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C8__NEON_MLAL, n_gt_8) { 18057 TEST_REQUIRES_ARM_NEON; 18058 for (uint32_t n = 9; n < 16; n++) { 18059 for (size_t k = 1; k <= 80; k += 17) { 18060 GemmMicrokernelTester() 18061 .mr(4) 18062 .nr(8) 18063 .kr(8) 18064 .sr(1) 18065 .m(4) 18066 .n(n) 18067 .k(k) 18068 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 18069 } 18070 } 18071 } 18072 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C8__NEON_MLAL,n_gt_8_strided_cn)18073 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C8__NEON_MLAL, n_gt_8_strided_cn) { 18074 TEST_REQUIRES_ARM_NEON; 18075 for (uint32_t n = 9; n < 16; n++) { 18076 for (size_t k = 1; k <= 80; k += 17) { 18077 GemmMicrokernelTester() 18078 .mr(4) 18079 .nr(8) 18080 .kr(8) 18081 .sr(1) 18082 .m(4) 18083 .n(n) 18084 .k(k) 18085 .cn_stride(11) 18086 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 18087 } 18088 } 18089 } 18090 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C8__NEON_MLAL,n_gt_8_strided_a)18091 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C8__NEON_MLAL, n_gt_8_strided_a) { 18092 TEST_REQUIRES_ARM_NEON; 18093 for (uint32_t n = 9; n < 16; n++) { 18094 for (size_t k = 1; k <= 80; k += 17) { 18095 GemmMicrokernelTester() 18096 .mr(4) 18097 .nr(8) 18098 .kr(8) 18099 .sr(1) 18100 .m(4) 18101 .n(n) 18102 .k(k) 18103 .a_stride(83) 18104 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 18105 } 18106 } 18107 } 18108 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C8__NEON_MLAL,n_gt_8_subtile)18109 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C8__NEON_MLAL, n_gt_8_subtile) { 18110 TEST_REQUIRES_ARM_NEON; 18111 for (uint32_t n = 9; n < 16; n++) { 18112 for (size_t k = 1; k <= 80; k += 17) { 18113 for (uint32_t m = 1; m <= 4; m++) { 18114 GemmMicrokernelTester() 18115 .mr(4) 18116 .nr(8) 18117 .kr(8) 18118 .sr(1) 18119 .m(m) 18120 .n(n) 18121 .k(k) 18122 .iterations(1) 18123 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 18124 } 18125 } 18126 } 18127 } 18128 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C8__NEON_MLAL,n_div_8)18129 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C8__NEON_MLAL, n_div_8) { 18130 TEST_REQUIRES_ARM_NEON; 18131 for (uint32_t n = 16; n <= 24; n += 8) { 18132 for (size_t k = 1; k <= 80; k += 17) { 18133 GemmMicrokernelTester() 18134 .mr(4) 18135 .nr(8) 18136 .kr(8) 18137 .sr(1) 18138 .m(4) 18139 .n(n) 18140 .k(k) 18141 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 18142 } 18143 } 18144 } 18145 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C8__NEON_MLAL,n_div_8_strided_cn)18146 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C8__NEON_MLAL, n_div_8_strided_cn) { 18147 TEST_REQUIRES_ARM_NEON; 18148 for (uint32_t n = 16; n <= 24; n += 8) { 18149 for (size_t k = 1; k <= 80; k += 17) { 18150 GemmMicrokernelTester() 18151 .mr(4) 18152 .nr(8) 18153 .kr(8) 18154 .sr(1) 18155 .m(4) 18156 .n(n) 18157 .k(k) 18158 .cn_stride(11) 18159 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 18160 } 18161 } 18162 } 18163 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C8__NEON_MLAL,n_div_8_strided_a)18164 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C8__NEON_MLAL, n_div_8_strided_a) { 18165 TEST_REQUIRES_ARM_NEON; 18166 for (uint32_t n = 16; n <= 24; n += 8) { 18167 for (size_t k = 1; k <= 80; k += 17) { 18168 GemmMicrokernelTester() 18169 .mr(4) 18170 .nr(8) 18171 .kr(8) 18172 .sr(1) 18173 .m(4) 18174 .n(n) 18175 .k(k) 18176 .a_stride(83) 18177 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 18178 } 18179 } 18180 } 18181 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C8__NEON_MLAL,n_div_8_subtile)18182 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C8__NEON_MLAL, n_div_8_subtile) { 18183 TEST_REQUIRES_ARM_NEON; 18184 for (uint32_t n = 16; n <= 24; n += 8) { 18185 for (size_t k = 1; k <= 80; k += 17) { 18186 for (uint32_t m = 1; m <= 4; m++) { 18187 GemmMicrokernelTester() 18188 .mr(4) 18189 .nr(8) 18190 .kr(8) 18191 .sr(1) 18192 .m(m) 18193 .n(n) 18194 .k(k) 18195 .iterations(1) 18196 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 18197 } 18198 } 18199 } 18200 } 18201 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C8__NEON_MLAL,strided_cm_subtile)18202 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C8__NEON_MLAL, strided_cm_subtile) { 18203 TEST_REQUIRES_ARM_NEON; 18204 for (size_t k = 1; k <= 80; k += 17) { 18205 for (uint32_t n = 1; n <= 8; n++) { 18206 for (uint32_t m = 1; m <= 4; m++) { 18207 GemmMicrokernelTester() 18208 .mr(4) 18209 .nr(8) 18210 .kr(8) 18211 .sr(1) 18212 .m(m) 18213 .n(n) 18214 .k(k) 18215 .cm_stride(11) 18216 .iterations(1) 18217 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 18218 } 18219 } 18220 } 18221 } 18222 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C8__NEON_MLAL,qmin)18223 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C8__NEON_MLAL, qmin) { 18224 TEST_REQUIRES_ARM_NEON; 18225 GemmMicrokernelTester() 18226 .mr(4) 18227 .nr(8) 18228 .kr(8) 18229 .sr(1) 18230 .m(4) 18231 .n(8) 18232 .k(16) 18233 .qmin(128) 18234 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 18235 } 18236 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C8__NEON_MLAL,qmax)18237 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C8__NEON_MLAL, qmax) { 18238 TEST_REQUIRES_ARM_NEON; 18239 GemmMicrokernelTester() 18240 .mr(4) 18241 .nr(8) 18242 .kr(8) 18243 .sr(1) 18244 .m(4) 18245 .n(8) 18246 .k(16) 18247 .qmax(128) 18248 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 18249 } 18250 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C8__NEON_MLAL,strided_cm)18251 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C8__NEON_MLAL, strided_cm) { 18252 TEST_REQUIRES_ARM_NEON; 18253 GemmMicrokernelTester() 18254 .mr(4) 18255 .nr(8) 18256 .kr(8) 18257 .sr(1) 18258 .m(4) 18259 .n(8) 18260 .k(16) 18261 .cm_stride(11) 18262 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 18263 } 18264 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 18265 18266 18267 #if XNN_ARCH_ARM || XNN_ARCH_ARM64 TEST(QS8_GEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE,k_eq_8)18268 TEST(QS8_GEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE, k_eq_8) { 18269 TEST_REQUIRES_ARM_NEON; 18270 GemmMicrokernelTester() 18271 .mr(4) 18272 .nr(16) 18273 .kr(1) 18274 .sr(1) 18275 .m(4) 18276 .n(16) 18277 .k(8) 18278 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 18279 } 18280 TEST(QS8_GEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE,strided_cn)18281 TEST(QS8_GEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE, strided_cn) { 18282 TEST_REQUIRES_ARM_NEON; 18283 GemmMicrokernelTester() 18284 .mr(4) 18285 .nr(16) 18286 .kr(1) 18287 .sr(1) 18288 .m(4) 18289 .n(16) 18290 .k(8) 18291 .cn_stride(19) 18292 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 18293 } 18294 TEST(QS8_GEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE,k_eq_8_strided_a)18295 TEST(QS8_GEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE, k_eq_8_strided_a) { 18296 TEST_REQUIRES_ARM_NEON; 18297 GemmMicrokernelTester() 18298 .mr(4) 18299 .nr(16) 18300 .kr(1) 18301 .sr(1) 18302 .m(4) 18303 .n(16) 18304 .k(8) 18305 .a_stride(11) 18306 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 18307 } 18308 TEST(QS8_GEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE,k_eq_8_subtile)18309 TEST(QS8_GEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE, k_eq_8_subtile) { 18310 TEST_REQUIRES_ARM_NEON; 18311 for (uint32_t n = 1; n <= 16; n++) { 18312 for (uint32_t m = 1; m <= 4; m++) { 18313 GemmMicrokernelTester() 18314 .mr(4) 18315 .nr(16) 18316 .kr(1) 18317 .sr(1) 18318 .m(m) 18319 .n(n) 18320 .k(8) 18321 .iterations(1) 18322 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 18323 } 18324 } 18325 } 18326 TEST(QS8_GEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE,k_eq_8_subtile_m)18327 TEST(QS8_GEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE, k_eq_8_subtile_m) { 18328 TEST_REQUIRES_ARM_NEON; 18329 for (uint32_t m = 1; m <= 4; m++) { 18330 GemmMicrokernelTester() 18331 .mr(4) 18332 .nr(16) 18333 .kr(1) 18334 .sr(1) 18335 .m(m) 18336 .n(16) 18337 .k(8) 18338 .iterations(1) 18339 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 18340 } 18341 } 18342 TEST(QS8_GEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE,k_eq_8_subtile_n)18343 TEST(QS8_GEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE, k_eq_8_subtile_n) { 18344 TEST_REQUIRES_ARM_NEON; 18345 for (uint32_t n = 1; n <= 16; n++) { 18346 GemmMicrokernelTester() 18347 .mr(4) 18348 .nr(16) 18349 .kr(1) 18350 .sr(1) 18351 .m(4) 18352 .n(n) 18353 .k(8) 18354 .iterations(1) 18355 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 18356 } 18357 } 18358 TEST(QS8_GEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE,k_lt_8)18359 TEST(QS8_GEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE, k_lt_8) { 18360 TEST_REQUIRES_ARM_NEON; 18361 for (size_t k = 1; k < 8; k++) { 18362 GemmMicrokernelTester() 18363 .mr(4) 18364 .nr(16) 18365 .kr(1) 18366 .sr(1) 18367 .m(4) 18368 .n(16) 18369 .k(k) 18370 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 18371 } 18372 } 18373 TEST(QS8_GEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE,k_lt_8_strided_a)18374 TEST(QS8_GEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE, k_lt_8_strided_a) { 18375 TEST_REQUIRES_ARM_NEON; 18376 for (size_t k = 1; k < 8; k++) { 18377 GemmMicrokernelTester() 18378 .mr(4) 18379 .nr(16) 18380 .kr(1) 18381 .sr(1) 18382 .m(4) 18383 .n(16) 18384 .k(k) 18385 .a_stride(11) 18386 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 18387 } 18388 } 18389 TEST(QS8_GEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE,k_lt_8_subtile)18390 TEST(QS8_GEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE, k_lt_8_subtile) { 18391 TEST_REQUIRES_ARM_NEON; 18392 for (size_t k = 1; k < 8; k++) { 18393 for (uint32_t n = 1; n <= 16; n++) { 18394 for (uint32_t m = 1; m <= 4; m++) { 18395 GemmMicrokernelTester() 18396 .mr(4) 18397 .nr(16) 18398 .kr(1) 18399 .sr(1) 18400 .m(m) 18401 .n(n) 18402 .k(k) 18403 .iterations(1) 18404 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 18405 } 18406 } 18407 } 18408 } 18409 TEST(QS8_GEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE,k_gt_8)18410 TEST(QS8_GEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE, k_gt_8) { 18411 TEST_REQUIRES_ARM_NEON; 18412 for (size_t k = 9; k < 16; k++) { 18413 GemmMicrokernelTester() 18414 .mr(4) 18415 .nr(16) 18416 .kr(1) 18417 .sr(1) 18418 .m(4) 18419 .n(16) 18420 .k(k) 18421 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 18422 } 18423 } 18424 TEST(QS8_GEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE,k_gt_8_strided_a)18425 TEST(QS8_GEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE, k_gt_8_strided_a) { 18426 TEST_REQUIRES_ARM_NEON; 18427 for (size_t k = 9; k < 16; k++) { 18428 GemmMicrokernelTester() 18429 .mr(4) 18430 .nr(16) 18431 .kr(1) 18432 .sr(1) 18433 .m(4) 18434 .n(16) 18435 .k(k) 18436 .a_stride(19) 18437 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 18438 } 18439 } 18440 TEST(QS8_GEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE,k_gt_8_subtile)18441 TEST(QS8_GEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE, k_gt_8_subtile) { 18442 TEST_REQUIRES_ARM_NEON; 18443 for (size_t k = 9; k < 16; k++) { 18444 for (uint32_t n = 1; n <= 16; n++) { 18445 for (uint32_t m = 1; m <= 4; m++) { 18446 GemmMicrokernelTester() 18447 .mr(4) 18448 .nr(16) 18449 .kr(1) 18450 .sr(1) 18451 .m(m) 18452 .n(n) 18453 .k(k) 18454 .iterations(1) 18455 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 18456 } 18457 } 18458 } 18459 } 18460 TEST(QS8_GEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE,k_div_8)18461 TEST(QS8_GEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE, k_div_8) { 18462 TEST_REQUIRES_ARM_NEON; 18463 for (size_t k = 16; k <= 80; k += 8) { 18464 GemmMicrokernelTester() 18465 .mr(4) 18466 .nr(16) 18467 .kr(1) 18468 .sr(1) 18469 .m(4) 18470 .n(16) 18471 .k(k) 18472 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 18473 } 18474 } 18475 TEST(QS8_GEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE,k_div_8_strided_a)18476 TEST(QS8_GEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE, k_div_8_strided_a) { 18477 TEST_REQUIRES_ARM_NEON; 18478 for (size_t k = 16; k <= 80; k += 8) { 18479 GemmMicrokernelTester() 18480 .mr(4) 18481 .nr(16) 18482 .kr(1) 18483 .sr(1) 18484 .m(4) 18485 .n(16) 18486 .k(k) 18487 .a_stride(83) 18488 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 18489 } 18490 } 18491 TEST(QS8_GEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE,k_div_8_subtile)18492 TEST(QS8_GEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE, k_div_8_subtile) { 18493 TEST_REQUIRES_ARM_NEON; 18494 for (size_t k = 16; k <= 80; k += 8) { 18495 for (uint32_t n = 1; n <= 16; n++) { 18496 for (uint32_t m = 1; m <= 4; m++) { 18497 GemmMicrokernelTester() 18498 .mr(4) 18499 .nr(16) 18500 .kr(1) 18501 .sr(1) 18502 .m(m) 18503 .n(n) 18504 .k(k) 18505 .iterations(1) 18506 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 18507 } 18508 } 18509 } 18510 } 18511 TEST(QS8_GEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE,n_gt_16)18512 TEST(QS8_GEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE, n_gt_16) { 18513 TEST_REQUIRES_ARM_NEON; 18514 for (uint32_t n = 17; n < 32; n++) { 18515 for (size_t k = 1; k <= 40; k += 9) { 18516 GemmMicrokernelTester() 18517 .mr(4) 18518 .nr(16) 18519 .kr(1) 18520 .sr(1) 18521 .m(4) 18522 .n(n) 18523 .k(k) 18524 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 18525 } 18526 } 18527 } 18528 TEST(QS8_GEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE,n_gt_16_strided_cn)18529 TEST(QS8_GEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE, n_gt_16_strided_cn) { 18530 TEST_REQUIRES_ARM_NEON; 18531 for (uint32_t n = 17; n < 32; n++) { 18532 for (size_t k = 1; k <= 40; k += 9) { 18533 GemmMicrokernelTester() 18534 .mr(4) 18535 .nr(16) 18536 .kr(1) 18537 .sr(1) 18538 .m(4) 18539 .n(n) 18540 .k(k) 18541 .cn_stride(19) 18542 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 18543 } 18544 } 18545 } 18546 TEST(QS8_GEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE,n_gt_16_strided_a)18547 TEST(QS8_GEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE, n_gt_16_strided_a) { 18548 TEST_REQUIRES_ARM_NEON; 18549 for (uint32_t n = 17; n < 32; n++) { 18550 for (size_t k = 1; k <= 40; k += 9) { 18551 GemmMicrokernelTester() 18552 .mr(4) 18553 .nr(16) 18554 .kr(1) 18555 .sr(1) 18556 .m(4) 18557 .n(n) 18558 .k(k) 18559 .a_stride(43) 18560 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 18561 } 18562 } 18563 } 18564 TEST(QS8_GEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE,n_gt_16_subtile)18565 TEST(QS8_GEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE, n_gt_16_subtile) { 18566 TEST_REQUIRES_ARM_NEON; 18567 for (uint32_t n = 17; n < 32; n++) { 18568 for (size_t k = 1; k <= 40; k += 9) { 18569 for (uint32_t m = 1; m <= 4; m++) { 18570 GemmMicrokernelTester() 18571 .mr(4) 18572 .nr(16) 18573 .kr(1) 18574 .sr(1) 18575 .m(m) 18576 .n(n) 18577 .k(k) 18578 .iterations(1) 18579 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 18580 } 18581 } 18582 } 18583 } 18584 TEST(QS8_GEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE,n_div_16)18585 TEST(QS8_GEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE, n_div_16) { 18586 TEST_REQUIRES_ARM_NEON; 18587 for (uint32_t n = 32; n <= 48; n += 16) { 18588 for (size_t k = 1; k <= 40; k += 9) { 18589 GemmMicrokernelTester() 18590 .mr(4) 18591 .nr(16) 18592 .kr(1) 18593 .sr(1) 18594 .m(4) 18595 .n(n) 18596 .k(k) 18597 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 18598 } 18599 } 18600 } 18601 TEST(QS8_GEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE,n_div_16_strided_cn)18602 TEST(QS8_GEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE, n_div_16_strided_cn) { 18603 TEST_REQUIRES_ARM_NEON; 18604 for (uint32_t n = 32; n <= 48; n += 16) { 18605 for (size_t k = 1; k <= 40; k += 9) { 18606 GemmMicrokernelTester() 18607 .mr(4) 18608 .nr(16) 18609 .kr(1) 18610 .sr(1) 18611 .m(4) 18612 .n(n) 18613 .k(k) 18614 .cn_stride(19) 18615 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 18616 } 18617 } 18618 } 18619 TEST(QS8_GEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE,n_div_16_strided_a)18620 TEST(QS8_GEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE, n_div_16_strided_a) { 18621 TEST_REQUIRES_ARM_NEON; 18622 for (uint32_t n = 32; n <= 48; n += 16) { 18623 for (size_t k = 1; k <= 40; k += 9) { 18624 GemmMicrokernelTester() 18625 .mr(4) 18626 .nr(16) 18627 .kr(1) 18628 .sr(1) 18629 .m(4) 18630 .n(n) 18631 .k(k) 18632 .a_stride(43) 18633 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 18634 } 18635 } 18636 } 18637 TEST(QS8_GEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE,n_div_16_subtile)18638 TEST(QS8_GEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE, n_div_16_subtile) { 18639 TEST_REQUIRES_ARM_NEON; 18640 for (uint32_t n = 32; n <= 48; n += 16) { 18641 for (size_t k = 1; k <= 40; k += 9) { 18642 for (uint32_t m = 1; m <= 4; m++) { 18643 GemmMicrokernelTester() 18644 .mr(4) 18645 .nr(16) 18646 .kr(1) 18647 .sr(1) 18648 .m(m) 18649 .n(n) 18650 .k(k) 18651 .iterations(1) 18652 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 18653 } 18654 } 18655 } 18656 } 18657 TEST(QS8_GEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE,strided_cm_subtile)18658 TEST(QS8_GEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE, strided_cm_subtile) { 18659 TEST_REQUIRES_ARM_NEON; 18660 for (size_t k = 1; k <= 40; k += 9) { 18661 for (uint32_t n = 1; n <= 16; n++) { 18662 for (uint32_t m = 1; m <= 4; m++) { 18663 GemmMicrokernelTester() 18664 .mr(4) 18665 .nr(16) 18666 .kr(1) 18667 .sr(1) 18668 .m(m) 18669 .n(n) 18670 .k(k) 18671 .cm_stride(19) 18672 .iterations(1) 18673 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 18674 } 18675 } 18676 } 18677 } 18678 TEST(QS8_GEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE,qmin)18679 TEST(QS8_GEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE, qmin) { 18680 TEST_REQUIRES_ARM_NEON; 18681 GemmMicrokernelTester() 18682 .mr(4) 18683 .nr(16) 18684 .kr(1) 18685 .sr(1) 18686 .m(4) 18687 .n(16) 18688 .k(8) 18689 .qmin(128) 18690 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 18691 } 18692 TEST(QS8_GEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE,qmax)18693 TEST(QS8_GEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE, qmax) { 18694 TEST_REQUIRES_ARM_NEON; 18695 GemmMicrokernelTester() 18696 .mr(4) 18697 .nr(16) 18698 .kr(1) 18699 .sr(1) 18700 .m(4) 18701 .n(16) 18702 .k(8) 18703 .qmax(128) 18704 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 18705 } 18706 TEST(QS8_GEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE,strided_cm)18707 TEST(QS8_GEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE, strided_cm) { 18708 TEST_REQUIRES_ARM_NEON; 18709 GemmMicrokernelTester() 18710 .mr(4) 18711 .nr(16) 18712 .kr(1) 18713 .sr(1) 18714 .m(4) 18715 .n(16) 18716 .k(8) 18717 .cm_stride(19) 18718 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 18719 } 18720 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 18721 18722 18723 #if XNN_ARCH_ARM || XNN_ARCH_ARM64 TEST(QS8_GEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE_PRFM,k_eq_8)18724 TEST(QS8_GEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE_PRFM, k_eq_8) { 18725 TEST_REQUIRES_ARM_NEON; 18726 GemmMicrokernelTester() 18727 .mr(6) 18728 .nr(8) 18729 .kr(1) 18730 .sr(1) 18731 .m(6) 18732 .n(8) 18733 .k(8) 18734 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 18735 } 18736 TEST(QS8_GEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE_PRFM,strided_cn)18737 TEST(QS8_GEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE_PRFM, strided_cn) { 18738 TEST_REQUIRES_ARM_NEON; 18739 GemmMicrokernelTester() 18740 .mr(6) 18741 .nr(8) 18742 .kr(1) 18743 .sr(1) 18744 .m(6) 18745 .n(8) 18746 .k(8) 18747 .cn_stride(11) 18748 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 18749 } 18750 TEST(QS8_GEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE_PRFM,k_eq_8_strided_a)18751 TEST(QS8_GEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE_PRFM, k_eq_8_strided_a) { 18752 TEST_REQUIRES_ARM_NEON; 18753 GemmMicrokernelTester() 18754 .mr(6) 18755 .nr(8) 18756 .kr(1) 18757 .sr(1) 18758 .m(6) 18759 .n(8) 18760 .k(8) 18761 .a_stride(11) 18762 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 18763 } 18764 TEST(QS8_GEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE_PRFM,k_eq_8_subtile)18765 TEST(QS8_GEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE_PRFM, k_eq_8_subtile) { 18766 TEST_REQUIRES_ARM_NEON; 18767 for (uint32_t n = 1; n <= 8; n++) { 18768 for (uint32_t m = 1; m <= 6; m++) { 18769 GemmMicrokernelTester() 18770 .mr(6) 18771 .nr(8) 18772 .kr(1) 18773 .sr(1) 18774 .m(m) 18775 .n(n) 18776 .k(8) 18777 .iterations(1) 18778 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 18779 } 18780 } 18781 } 18782 TEST(QS8_GEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE_PRFM,k_eq_8_subtile_m)18783 TEST(QS8_GEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE_PRFM, k_eq_8_subtile_m) { 18784 TEST_REQUIRES_ARM_NEON; 18785 for (uint32_t m = 1; m <= 6; m++) { 18786 GemmMicrokernelTester() 18787 .mr(6) 18788 .nr(8) 18789 .kr(1) 18790 .sr(1) 18791 .m(m) 18792 .n(8) 18793 .k(8) 18794 .iterations(1) 18795 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 18796 } 18797 } 18798 TEST(QS8_GEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE_PRFM,k_eq_8_subtile_n)18799 TEST(QS8_GEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE_PRFM, k_eq_8_subtile_n) { 18800 TEST_REQUIRES_ARM_NEON; 18801 for (uint32_t n = 1; n <= 8; n++) { 18802 GemmMicrokernelTester() 18803 .mr(6) 18804 .nr(8) 18805 .kr(1) 18806 .sr(1) 18807 .m(6) 18808 .n(n) 18809 .k(8) 18810 .iterations(1) 18811 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 18812 } 18813 } 18814 TEST(QS8_GEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE_PRFM,k_lt_8)18815 TEST(QS8_GEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE_PRFM, k_lt_8) { 18816 TEST_REQUIRES_ARM_NEON; 18817 for (size_t k = 1; k < 8; k++) { 18818 GemmMicrokernelTester() 18819 .mr(6) 18820 .nr(8) 18821 .kr(1) 18822 .sr(1) 18823 .m(6) 18824 .n(8) 18825 .k(k) 18826 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 18827 } 18828 } 18829 TEST(QS8_GEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE_PRFM,k_lt_8_strided_a)18830 TEST(QS8_GEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE_PRFM, k_lt_8_strided_a) { 18831 TEST_REQUIRES_ARM_NEON; 18832 for (size_t k = 1; k < 8; k++) { 18833 GemmMicrokernelTester() 18834 .mr(6) 18835 .nr(8) 18836 .kr(1) 18837 .sr(1) 18838 .m(6) 18839 .n(8) 18840 .k(k) 18841 .a_stride(11) 18842 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 18843 } 18844 } 18845 TEST(QS8_GEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE_PRFM,k_lt_8_subtile)18846 TEST(QS8_GEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE_PRFM, k_lt_8_subtile) { 18847 TEST_REQUIRES_ARM_NEON; 18848 for (size_t k = 1; k < 8; k++) { 18849 for (uint32_t n = 1; n <= 8; n++) { 18850 for (uint32_t m = 1; m <= 6; m++) { 18851 GemmMicrokernelTester() 18852 .mr(6) 18853 .nr(8) 18854 .kr(1) 18855 .sr(1) 18856 .m(m) 18857 .n(n) 18858 .k(k) 18859 .iterations(1) 18860 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 18861 } 18862 } 18863 } 18864 } 18865 TEST(QS8_GEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE_PRFM,k_gt_8)18866 TEST(QS8_GEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE_PRFM, k_gt_8) { 18867 TEST_REQUIRES_ARM_NEON; 18868 for (size_t k = 9; k < 16; k++) { 18869 GemmMicrokernelTester() 18870 .mr(6) 18871 .nr(8) 18872 .kr(1) 18873 .sr(1) 18874 .m(6) 18875 .n(8) 18876 .k(k) 18877 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 18878 } 18879 } 18880 TEST(QS8_GEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE_PRFM,k_gt_8_strided_a)18881 TEST(QS8_GEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE_PRFM, k_gt_8_strided_a) { 18882 TEST_REQUIRES_ARM_NEON; 18883 for (size_t k = 9; k < 16; k++) { 18884 GemmMicrokernelTester() 18885 .mr(6) 18886 .nr(8) 18887 .kr(1) 18888 .sr(1) 18889 .m(6) 18890 .n(8) 18891 .k(k) 18892 .a_stride(19) 18893 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 18894 } 18895 } 18896 TEST(QS8_GEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE_PRFM,k_gt_8_subtile)18897 TEST(QS8_GEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE_PRFM, k_gt_8_subtile) { 18898 TEST_REQUIRES_ARM_NEON; 18899 for (size_t k = 9; k < 16; k++) { 18900 for (uint32_t n = 1; n <= 8; n++) { 18901 for (uint32_t m = 1; m <= 6; m++) { 18902 GemmMicrokernelTester() 18903 .mr(6) 18904 .nr(8) 18905 .kr(1) 18906 .sr(1) 18907 .m(m) 18908 .n(n) 18909 .k(k) 18910 .iterations(1) 18911 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 18912 } 18913 } 18914 } 18915 } 18916 TEST(QS8_GEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE_PRFM,k_div_8)18917 TEST(QS8_GEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE_PRFM, k_div_8) { 18918 TEST_REQUIRES_ARM_NEON; 18919 for (size_t k = 16; k <= 80; k += 8) { 18920 GemmMicrokernelTester() 18921 .mr(6) 18922 .nr(8) 18923 .kr(1) 18924 .sr(1) 18925 .m(6) 18926 .n(8) 18927 .k(k) 18928 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 18929 } 18930 } 18931 TEST(QS8_GEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE_PRFM,k_div_8_strided_a)18932 TEST(QS8_GEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE_PRFM, k_div_8_strided_a) { 18933 TEST_REQUIRES_ARM_NEON; 18934 for (size_t k = 16; k <= 80; k += 8) { 18935 GemmMicrokernelTester() 18936 .mr(6) 18937 .nr(8) 18938 .kr(1) 18939 .sr(1) 18940 .m(6) 18941 .n(8) 18942 .k(k) 18943 .a_stride(83) 18944 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 18945 } 18946 } 18947 TEST(QS8_GEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE_PRFM,k_div_8_subtile)18948 TEST(QS8_GEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE_PRFM, k_div_8_subtile) { 18949 TEST_REQUIRES_ARM_NEON; 18950 for (size_t k = 16; k <= 80; k += 8) { 18951 for (uint32_t n = 1; n <= 8; n++) { 18952 for (uint32_t m = 1; m <= 6; m++) { 18953 GemmMicrokernelTester() 18954 .mr(6) 18955 .nr(8) 18956 .kr(1) 18957 .sr(1) 18958 .m(m) 18959 .n(n) 18960 .k(k) 18961 .iterations(1) 18962 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 18963 } 18964 } 18965 } 18966 } 18967 TEST(QS8_GEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE_PRFM,n_gt_8)18968 TEST(QS8_GEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE_PRFM, n_gt_8) { 18969 TEST_REQUIRES_ARM_NEON; 18970 for (uint32_t n = 9; n < 16; n++) { 18971 for (size_t k = 1; k <= 40; k += 9) { 18972 GemmMicrokernelTester() 18973 .mr(6) 18974 .nr(8) 18975 .kr(1) 18976 .sr(1) 18977 .m(6) 18978 .n(n) 18979 .k(k) 18980 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 18981 } 18982 } 18983 } 18984 TEST(QS8_GEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE_PRFM,n_gt_8_strided_cn)18985 TEST(QS8_GEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE_PRFM, n_gt_8_strided_cn) { 18986 TEST_REQUIRES_ARM_NEON; 18987 for (uint32_t n = 9; n < 16; n++) { 18988 for (size_t k = 1; k <= 40; k += 9) { 18989 GemmMicrokernelTester() 18990 .mr(6) 18991 .nr(8) 18992 .kr(1) 18993 .sr(1) 18994 .m(6) 18995 .n(n) 18996 .k(k) 18997 .cn_stride(11) 18998 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 18999 } 19000 } 19001 } 19002 TEST(QS8_GEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE_PRFM,n_gt_8_strided_a)19003 TEST(QS8_GEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE_PRFM, n_gt_8_strided_a) { 19004 TEST_REQUIRES_ARM_NEON; 19005 for (uint32_t n = 9; n < 16; n++) { 19006 for (size_t k = 1; k <= 40; k += 9) { 19007 GemmMicrokernelTester() 19008 .mr(6) 19009 .nr(8) 19010 .kr(1) 19011 .sr(1) 19012 .m(6) 19013 .n(n) 19014 .k(k) 19015 .a_stride(43) 19016 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 19017 } 19018 } 19019 } 19020 TEST(QS8_GEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE_PRFM,n_gt_8_subtile)19021 TEST(QS8_GEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE_PRFM, n_gt_8_subtile) { 19022 TEST_REQUIRES_ARM_NEON; 19023 for (uint32_t n = 9; n < 16; n++) { 19024 for (size_t k = 1; k <= 40; k += 9) { 19025 for (uint32_t m = 1; m <= 6; m++) { 19026 GemmMicrokernelTester() 19027 .mr(6) 19028 .nr(8) 19029 .kr(1) 19030 .sr(1) 19031 .m(m) 19032 .n(n) 19033 .k(k) 19034 .iterations(1) 19035 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 19036 } 19037 } 19038 } 19039 } 19040 TEST(QS8_GEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE_PRFM,n_div_8)19041 TEST(QS8_GEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE_PRFM, n_div_8) { 19042 TEST_REQUIRES_ARM_NEON; 19043 for (uint32_t n = 16; n <= 24; n += 8) { 19044 for (size_t k = 1; k <= 40; k += 9) { 19045 GemmMicrokernelTester() 19046 .mr(6) 19047 .nr(8) 19048 .kr(1) 19049 .sr(1) 19050 .m(6) 19051 .n(n) 19052 .k(k) 19053 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 19054 } 19055 } 19056 } 19057 TEST(QS8_GEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE_PRFM,n_div_8_strided_cn)19058 TEST(QS8_GEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE_PRFM, n_div_8_strided_cn) { 19059 TEST_REQUIRES_ARM_NEON; 19060 for (uint32_t n = 16; n <= 24; n += 8) { 19061 for (size_t k = 1; k <= 40; k += 9) { 19062 GemmMicrokernelTester() 19063 .mr(6) 19064 .nr(8) 19065 .kr(1) 19066 .sr(1) 19067 .m(6) 19068 .n(n) 19069 .k(k) 19070 .cn_stride(11) 19071 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 19072 } 19073 } 19074 } 19075 TEST(QS8_GEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE_PRFM,n_div_8_strided_a)19076 TEST(QS8_GEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE_PRFM, n_div_8_strided_a) { 19077 TEST_REQUIRES_ARM_NEON; 19078 for (uint32_t n = 16; n <= 24; n += 8) { 19079 for (size_t k = 1; k <= 40; k += 9) { 19080 GemmMicrokernelTester() 19081 .mr(6) 19082 .nr(8) 19083 .kr(1) 19084 .sr(1) 19085 .m(6) 19086 .n(n) 19087 .k(k) 19088 .a_stride(43) 19089 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 19090 } 19091 } 19092 } 19093 TEST(QS8_GEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE_PRFM,n_div_8_subtile)19094 TEST(QS8_GEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE_PRFM, n_div_8_subtile) { 19095 TEST_REQUIRES_ARM_NEON; 19096 for (uint32_t n = 16; n <= 24; n += 8) { 19097 for (size_t k = 1; k <= 40; k += 9) { 19098 for (uint32_t m = 1; m <= 6; m++) { 19099 GemmMicrokernelTester() 19100 .mr(6) 19101 .nr(8) 19102 .kr(1) 19103 .sr(1) 19104 .m(m) 19105 .n(n) 19106 .k(k) 19107 .iterations(1) 19108 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 19109 } 19110 } 19111 } 19112 } 19113 TEST(QS8_GEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE_PRFM,strided_cm_subtile)19114 TEST(QS8_GEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE_PRFM, strided_cm_subtile) { 19115 TEST_REQUIRES_ARM_NEON; 19116 for (size_t k = 1; k <= 40; k += 9) { 19117 for (uint32_t n = 1; n <= 8; n++) { 19118 for (uint32_t m = 1; m <= 6; m++) { 19119 GemmMicrokernelTester() 19120 .mr(6) 19121 .nr(8) 19122 .kr(1) 19123 .sr(1) 19124 .m(m) 19125 .n(n) 19126 .k(k) 19127 .cm_stride(11) 19128 .iterations(1) 19129 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 19130 } 19131 } 19132 } 19133 } 19134 TEST(QS8_GEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE_PRFM,qmin)19135 TEST(QS8_GEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE_PRFM, qmin) { 19136 TEST_REQUIRES_ARM_NEON; 19137 GemmMicrokernelTester() 19138 .mr(6) 19139 .nr(8) 19140 .kr(1) 19141 .sr(1) 19142 .m(6) 19143 .n(8) 19144 .k(8) 19145 .qmin(128) 19146 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 19147 } 19148 TEST(QS8_GEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE_PRFM,qmax)19149 TEST(QS8_GEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE_PRFM, qmax) { 19150 TEST_REQUIRES_ARM_NEON; 19151 GemmMicrokernelTester() 19152 .mr(6) 19153 .nr(8) 19154 .kr(1) 19155 .sr(1) 19156 .m(6) 19157 .n(8) 19158 .k(8) 19159 .qmax(128) 19160 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 19161 } 19162 TEST(QS8_GEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE_PRFM,strided_cm)19163 TEST(QS8_GEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE_PRFM, strided_cm) { 19164 TEST_REQUIRES_ARM_NEON; 19165 GemmMicrokernelTester() 19166 .mr(6) 19167 .nr(8) 19168 .kr(1) 19169 .sr(1) 19170 .m(6) 19171 .n(8) 19172 .k(8) 19173 .cm_stride(11) 19174 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 19175 } 19176 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 19177