1 // Copyright (c) Facebook, Inc. and its affiliates. 2 // All rights reserved. 3 // 4 // Copyright 2019 Google LLC 5 // 6 // This source code is licensed under the BSD-style license found in the 7 // LICENSE file in the root directory of this source tree. 8 // 9 // Auto-generated file. Do not edit! 10 // Specification: test/f16-gemm.yaml 11 // Generator: tools/generate-gemm-test.py 12 13 14 #include <gtest/gtest.h> 15 16 #include <xnnpack/common.h> 17 #include <xnnpack/isa-checks.h> 18 19 #include <xnnpack/gemm.h> 20 #include <xnnpack/igemm.h> 21 #include <xnnpack/ppmm.h> 22 #include "gemm-microkernel-tester.h" 23 24 25 #if XNN_ARCH_ARM64 TEST(F16_GEMM_4X8__NEONFP16ARITH_LD64,k_eq_4)26 TEST(F16_GEMM_4X8__NEONFP16ARITH_LD64, k_eq_4) { 27 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 28 GemmMicrokernelTester() 29 .mr(4) 30 .nr(8) 31 .kr(1) 32 .sr(1) 33 .m(4) 34 .n(8) 35 .k(4) 36 .Test(xnn_f16_gemm_ukernel_4x8__neonfp16arith_ld64); 37 } 38 TEST(F16_GEMM_4X8__NEONFP16ARITH_LD64,strided_cn)39 TEST(F16_GEMM_4X8__NEONFP16ARITH_LD64, strided_cn) { 40 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 41 GemmMicrokernelTester() 42 .mr(4) 43 .nr(8) 44 .kr(1) 45 .sr(1) 46 .m(4) 47 .n(8) 48 .k(4) 49 .cn_stride(11) 50 .Test(xnn_f16_gemm_ukernel_4x8__neonfp16arith_ld64); 51 } 52 TEST(F16_GEMM_4X8__NEONFP16ARITH_LD64,k_eq_4_strided_a)53 TEST(F16_GEMM_4X8__NEONFP16ARITH_LD64, k_eq_4_strided_a) { 54 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 55 GemmMicrokernelTester() 56 .mr(4) 57 .nr(8) 58 .kr(1) 59 .sr(1) 60 .m(4) 61 .n(8) 62 .k(4) 63 .a_stride(7) 64 .Test(xnn_f16_gemm_ukernel_4x8__neonfp16arith_ld64); 65 } 66 TEST(F16_GEMM_4X8__NEONFP16ARITH_LD64,k_eq_4_subtile)67 TEST(F16_GEMM_4X8__NEONFP16ARITH_LD64, k_eq_4_subtile) { 68 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 69 for (uint32_t m = 1; m <= 4; m++) { 70 for (uint32_t n = 1; n <= 8; n++) { 71 GemmMicrokernelTester() 72 .mr(4) 73 .nr(8) 74 .kr(1) 75 .sr(1) 76 .m(m) 77 .n(n) 78 .k(4) 79 .iterations(1) 80 .Test(xnn_f16_gemm_ukernel_4x8__neonfp16arith_ld64); 81 } 82 } 83 } 84 TEST(F16_GEMM_4X8__NEONFP16ARITH_LD64,k_eq_4_subtile_m)85 TEST(F16_GEMM_4X8__NEONFP16ARITH_LD64, k_eq_4_subtile_m) { 86 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 87 for (uint32_t m = 1; m <= 4; m++) { 88 GemmMicrokernelTester() 89 .mr(4) 90 .nr(8) 91 .kr(1) 92 .sr(1) 93 .m(m) 94 .n(8) 95 .k(4) 96 .iterations(1) 97 .Test(xnn_f16_gemm_ukernel_4x8__neonfp16arith_ld64); 98 } 99 } 100 TEST(F16_GEMM_4X8__NEONFP16ARITH_LD64,k_eq_4_subtile_n)101 TEST(F16_GEMM_4X8__NEONFP16ARITH_LD64, k_eq_4_subtile_n) { 102 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 103 for (uint32_t n = 1; n <= 8; n++) { 104 GemmMicrokernelTester() 105 .mr(4) 106 .nr(8) 107 .kr(1) 108 .sr(1) 109 .m(4) 110 .n(n) 111 .k(4) 112 .iterations(1) 113 .Test(xnn_f16_gemm_ukernel_4x8__neonfp16arith_ld64); 114 } 115 } 116 TEST(F16_GEMM_4X8__NEONFP16ARITH_LD64,k_lt_4)117 TEST(F16_GEMM_4X8__NEONFP16ARITH_LD64, k_lt_4) { 118 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 119 for (size_t k = 1; k < 4; k++) { 120 GemmMicrokernelTester() 121 .mr(4) 122 .nr(8) 123 .kr(1) 124 .sr(1) 125 .m(4) 126 .n(8) 127 .k(k) 128 .Test(xnn_f16_gemm_ukernel_4x8__neonfp16arith_ld64); 129 } 130 } 131 TEST(F16_GEMM_4X8__NEONFP16ARITH_LD64,k_lt_4_strided_a)132 TEST(F16_GEMM_4X8__NEONFP16ARITH_LD64, k_lt_4_strided_a) { 133 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 134 for (size_t k = 1; k < 4; k++) { 135 GemmMicrokernelTester() 136 .mr(4) 137 .nr(8) 138 .kr(1) 139 .sr(1) 140 .m(4) 141 .n(8) 142 .k(k) 143 .a_stride(7) 144 .Test(xnn_f16_gemm_ukernel_4x8__neonfp16arith_ld64); 145 } 146 } 147 TEST(F16_GEMM_4X8__NEONFP16ARITH_LD64,k_lt_4_subtile)148 TEST(F16_GEMM_4X8__NEONFP16ARITH_LD64, k_lt_4_subtile) { 149 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 150 for (size_t k = 1; k < 4; k++) { 151 for (uint32_t m = 1; m <= 4; m++) { 152 for (uint32_t n = 1; n <= 8; n++) { 153 GemmMicrokernelTester() 154 .mr(4) 155 .nr(8) 156 .kr(1) 157 .sr(1) 158 .m(m) 159 .n(n) 160 .k(k) 161 .iterations(1) 162 .Test(xnn_f16_gemm_ukernel_4x8__neonfp16arith_ld64); 163 } 164 } 165 } 166 } 167 TEST(F16_GEMM_4X8__NEONFP16ARITH_LD64,k_gt_4)168 TEST(F16_GEMM_4X8__NEONFP16ARITH_LD64, k_gt_4) { 169 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 170 for (size_t k = 5; k < 8; k++) { 171 GemmMicrokernelTester() 172 .mr(4) 173 .nr(8) 174 .kr(1) 175 .sr(1) 176 .m(4) 177 .n(8) 178 .k(k) 179 .Test(xnn_f16_gemm_ukernel_4x8__neonfp16arith_ld64); 180 } 181 } 182 TEST(F16_GEMM_4X8__NEONFP16ARITH_LD64,k_gt_4_strided_a)183 TEST(F16_GEMM_4X8__NEONFP16ARITH_LD64, k_gt_4_strided_a) { 184 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 185 for (size_t k = 5; k < 8; k++) { 186 GemmMicrokernelTester() 187 .mr(4) 188 .nr(8) 189 .kr(1) 190 .sr(1) 191 .m(4) 192 .n(8) 193 .k(k) 194 .a_stride(11) 195 .Test(xnn_f16_gemm_ukernel_4x8__neonfp16arith_ld64); 196 } 197 } 198 TEST(F16_GEMM_4X8__NEONFP16ARITH_LD64,k_gt_4_subtile)199 TEST(F16_GEMM_4X8__NEONFP16ARITH_LD64, k_gt_4_subtile) { 200 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 201 for (size_t k = 5; k < 8; k++) { 202 for (uint32_t m = 1; m <= 4; m++) { 203 for (uint32_t n = 1; n <= 8; n++) { 204 GemmMicrokernelTester() 205 .mr(4) 206 .nr(8) 207 .kr(1) 208 .sr(1) 209 .m(m) 210 .n(n) 211 .k(k) 212 .iterations(1) 213 .Test(xnn_f16_gemm_ukernel_4x8__neonfp16arith_ld64); 214 } 215 } 216 } 217 } 218 TEST(F16_GEMM_4X8__NEONFP16ARITH_LD64,k_div_4)219 TEST(F16_GEMM_4X8__NEONFP16ARITH_LD64, k_div_4) { 220 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 221 for (size_t k = 8; k <= 40; k += 4) { 222 GemmMicrokernelTester() 223 .mr(4) 224 .nr(8) 225 .kr(1) 226 .sr(1) 227 .m(4) 228 .n(8) 229 .k(k) 230 .Test(xnn_f16_gemm_ukernel_4x8__neonfp16arith_ld64); 231 } 232 } 233 TEST(F16_GEMM_4X8__NEONFP16ARITH_LD64,k_div_4_strided_a)234 TEST(F16_GEMM_4X8__NEONFP16ARITH_LD64, k_div_4_strided_a) { 235 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 236 for (size_t k = 8; k <= 40; k += 4) { 237 GemmMicrokernelTester() 238 .mr(4) 239 .nr(8) 240 .kr(1) 241 .sr(1) 242 .m(4) 243 .n(8) 244 .k(k) 245 .a_stride(43) 246 .Test(xnn_f16_gemm_ukernel_4x8__neonfp16arith_ld64); 247 } 248 } 249 TEST(F16_GEMM_4X8__NEONFP16ARITH_LD64,k_div_4_subtile)250 TEST(F16_GEMM_4X8__NEONFP16ARITH_LD64, k_div_4_subtile) { 251 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 252 for (size_t k = 8; k <= 40; k += 4) { 253 for (uint32_t m = 1; m <= 4; m++) { 254 for (uint32_t n = 1; n <= 8; n++) { 255 GemmMicrokernelTester() 256 .mr(4) 257 .nr(8) 258 .kr(1) 259 .sr(1) 260 .m(m) 261 .n(n) 262 .k(k) 263 .iterations(1) 264 .Test(xnn_f16_gemm_ukernel_4x8__neonfp16arith_ld64); 265 } 266 } 267 } 268 } 269 TEST(F16_GEMM_4X8__NEONFP16ARITH_LD64,n_gt_8)270 TEST(F16_GEMM_4X8__NEONFP16ARITH_LD64, n_gt_8) { 271 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 272 for (uint32_t n = 9; n < 16; n++) { 273 for (size_t k = 1; k <= 20; k += 5) { 274 GemmMicrokernelTester() 275 .mr(4) 276 .nr(8) 277 .kr(1) 278 .sr(1) 279 .m(4) 280 .n(8) 281 .k(k) 282 .Test(xnn_f16_gemm_ukernel_4x8__neonfp16arith_ld64); 283 } 284 } 285 } 286 TEST(F16_GEMM_4X8__NEONFP16ARITH_LD64,n_gt_8_strided_cn)287 TEST(F16_GEMM_4X8__NEONFP16ARITH_LD64, n_gt_8_strided_cn) { 288 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 289 for (uint32_t n = 9; n < 16; n++) { 290 for (size_t k = 1; k <= 20; k += 5) { 291 GemmMicrokernelTester() 292 .mr(4) 293 .nr(8) 294 .kr(1) 295 .sr(1) 296 .m(4) 297 .n(8) 298 .k(k) 299 .cn_stride(11) 300 .Test(xnn_f16_gemm_ukernel_4x8__neonfp16arith_ld64); 301 } 302 } 303 } 304 TEST(F16_GEMM_4X8__NEONFP16ARITH_LD64,n_gt_8_strided_a)305 TEST(F16_GEMM_4X8__NEONFP16ARITH_LD64, n_gt_8_strided_a) { 306 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 307 for (uint32_t n = 9; n < 16; n++) { 308 for (size_t k = 1; k <= 20; k += 5) { 309 GemmMicrokernelTester() 310 .mr(4) 311 .nr(8) 312 .kr(1) 313 .sr(1) 314 .m(4) 315 .n(n) 316 .k(k) 317 .a_stride(23) 318 .Test(xnn_f16_gemm_ukernel_4x8__neonfp16arith_ld64); 319 } 320 } 321 } 322 TEST(F16_GEMM_4X8__NEONFP16ARITH_LD64,n_gt_8_subtile)323 TEST(F16_GEMM_4X8__NEONFP16ARITH_LD64, n_gt_8_subtile) { 324 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 325 for (uint32_t n = 9; n < 16; n++) { 326 for (size_t k = 1; k <= 20; k += 5) { 327 for (uint32_t m = 1; m <= 4; m++) { 328 GemmMicrokernelTester() 329 .mr(4) 330 .nr(8) 331 .kr(1) 332 .sr(1) 333 .m(m) 334 .n(n) 335 .k(k) 336 .iterations(1) 337 .Test(xnn_f16_gemm_ukernel_4x8__neonfp16arith_ld64); 338 } 339 } 340 } 341 } 342 TEST(F16_GEMM_4X8__NEONFP16ARITH_LD64,n_div_8)343 TEST(F16_GEMM_4X8__NEONFP16ARITH_LD64, n_div_8) { 344 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 345 for (uint32_t n = 16; n <= 24; n += 8) { 346 for (size_t k = 1; k <= 20; k += 5) { 347 GemmMicrokernelTester() 348 .mr(4) 349 .nr(8) 350 .kr(1) 351 .sr(1) 352 .m(4) 353 .n(8) 354 .k(k) 355 .Test(xnn_f16_gemm_ukernel_4x8__neonfp16arith_ld64); 356 } 357 } 358 } 359 TEST(F16_GEMM_4X8__NEONFP16ARITH_LD64,n_div_8_strided_cn)360 TEST(F16_GEMM_4X8__NEONFP16ARITH_LD64, n_div_8_strided_cn) { 361 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 362 for (uint32_t n = 16; n <= 24; n += 8) { 363 for (size_t k = 1; k <= 20; k += 5) { 364 GemmMicrokernelTester() 365 .mr(4) 366 .nr(8) 367 .kr(1) 368 .sr(1) 369 .m(4) 370 .n(n) 371 .k(k) 372 .cn_stride(11) 373 .Test(xnn_f16_gemm_ukernel_4x8__neonfp16arith_ld64); 374 } 375 } 376 } 377 TEST(F16_GEMM_4X8__NEONFP16ARITH_LD64,n_div_8_strided_a)378 TEST(F16_GEMM_4X8__NEONFP16ARITH_LD64, n_div_8_strided_a) { 379 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 380 for (uint32_t n = 16; n <= 24; n += 8) { 381 for (size_t k = 1; k <= 20; k += 5) { 382 GemmMicrokernelTester() 383 .mr(4) 384 .nr(8) 385 .kr(1) 386 .sr(1) 387 .m(4) 388 .n(n) 389 .k(k) 390 .a_stride(23) 391 .Test(xnn_f16_gemm_ukernel_4x8__neonfp16arith_ld64); 392 } 393 } 394 } 395 TEST(F16_GEMM_4X8__NEONFP16ARITH_LD64,n_div_8_subtile)396 TEST(F16_GEMM_4X8__NEONFP16ARITH_LD64, n_div_8_subtile) { 397 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 398 for (uint32_t n = 16; n <= 24; n += 8) { 399 for (size_t k = 1; k <= 20; k += 5) { 400 for (uint32_t m = 1; m <= 4; m++) { 401 GemmMicrokernelTester() 402 .mr(4) 403 .nr(8) 404 .kr(1) 405 .sr(1) 406 .m(m) 407 .n(n) 408 .k(k) 409 .iterations(1) 410 .Test(xnn_f16_gemm_ukernel_4x8__neonfp16arith_ld64); 411 } 412 } 413 } 414 } 415 TEST(F16_GEMM_4X8__NEONFP16ARITH_LD64,strided_cm_subtile)416 TEST(F16_GEMM_4X8__NEONFP16ARITH_LD64, strided_cm_subtile) { 417 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 418 for (size_t k = 1; k <= 20; k += 5) { 419 for (uint32_t m = 1; m <= 4; m++) { 420 for (uint32_t n = 1; n <= 8; n++) { 421 GemmMicrokernelTester() 422 .mr(4) 423 .nr(8) 424 .kr(1) 425 .sr(1) 426 .m(m) 427 .n(n) 428 .k(k) 429 .cm_stride(11) 430 .iterations(1) 431 .Test(xnn_f16_gemm_ukernel_4x8__neonfp16arith_ld64); 432 } 433 } 434 } 435 } 436 TEST(F16_GEMM_4X8__NEONFP16ARITH_LD64,qmin)437 TEST(F16_GEMM_4X8__NEONFP16ARITH_LD64, qmin) { 438 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 439 GemmMicrokernelTester() 440 .mr(4) 441 .nr(8) 442 .kr(1) 443 .sr(1) 444 .m(4) 445 .n(8) 446 .k(4) 447 .qmin(128) 448 .Test(xnn_f16_gemm_ukernel_4x8__neonfp16arith_ld64); 449 } 450 TEST(F16_GEMM_4X8__NEONFP16ARITH_LD64,qmax)451 TEST(F16_GEMM_4X8__NEONFP16ARITH_LD64, qmax) { 452 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 453 GemmMicrokernelTester() 454 .mr(4) 455 .nr(8) 456 .kr(1) 457 .sr(1) 458 .m(4) 459 .n(8) 460 .k(4) 461 .qmax(128) 462 .Test(xnn_f16_gemm_ukernel_4x8__neonfp16arith_ld64); 463 } 464 TEST(F16_GEMM_4X8__NEONFP16ARITH_LD64,strided_cm)465 TEST(F16_GEMM_4X8__NEONFP16ARITH_LD64, strided_cm) { 466 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 467 GemmMicrokernelTester() 468 .mr(4) 469 .nr(8) 470 .kr(1) 471 .sr(1) 472 .m(4) 473 .n(8) 474 .k(4) 475 .cm_stride(11) 476 .Test(xnn_f16_gemm_ukernel_4x8__neonfp16arith_ld64); 477 } 478 #endif // XNN_ARCH_ARM64 479 480 481 #if XNN_ARCH_ARM64 TEST(F16_GEMM_6X8__NEONFP16ARITH_LD64,k_eq_4)482 TEST(F16_GEMM_6X8__NEONFP16ARITH_LD64, k_eq_4) { 483 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 484 GemmMicrokernelTester() 485 .mr(6) 486 .nr(8) 487 .kr(1) 488 .sr(1) 489 .m(6) 490 .n(8) 491 .k(4) 492 .Test(xnn_f16_gemm_ukernel_6x8__neonfp16arith_ld64); 493 } 494 TEST(F16_GEMM_6X8__NEONFP16ARITH_LD64,strided_cn)495 TEST(F16_GEMM_6X8__NEONFP16ARITH_LD64, strided_cn) { 496 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 497 GemmMicrokernelTester() 498 .mr(6) 499 .nr(8) 500 .kr(1) 501 .sr(1) 502 .m(6) 503 .n(8) 504 .k(4) 505 .cn_stride(11) 506 .Test(xnn_f16_gemm_ukernel_6x8__neonfp16arith_ld64); 507 } 508 TEST(F16_GEMM_6X8__NEONFP16ARITH_LD64,k_eq_4_strided_a)509 TEST(F16_GEMM_6X8__NEONFP16ARITH_LD64, k_eq_4_strided_a) { 510 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 511 GemmMicrokernelTester() 512 .mr(6) 513 .nr(8) 514 .kr(1) 515 .sr(1) 516 .m(6) 517 .n(8) 518 .k(4) 519 .a_stride(7) 520 .Test(xnn_f16_gemm_ukernel_6x8__neonfp16arith_ld64); 521 } 522 TEST(F16_GEMM_6X8__NEONFP16ARITH_LD64,k_eq_4_subtile)523 TEST(F16_GEMM_6X8__NEONFP16ARITH_LD64, k_eq_4_subtile) { 524 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 525 for (uint32_t m = 1; m <= 6; m++) { 526 for (uint32_t n = 1; n <= 8; n++) { 527 GemmMicrokernelTester() 528 .mr(6) 529 .nr(8) 530 .kr(1) 531 .sr(1) 532 .m(m) 533 .n(n) 534 .k(4) 535 .iterations(1) 536 .Test(xnn_f16_gemm_ukernel_6x8__neonfp16arith_ld64); 537 } 538 } 539 } 540 TEST(F16_GEMM_6X8__NEONFP16ARITH_LD64,k_eq_4_subtile_m)541 TEST(F16_GEMM_6X8__NEONFP16ARITH_LD64, k_eq_4_subtile_m) { 542 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 543 for (uint32_t m = 1; m <= 6; m++) { 544 GemmMicrokernelTester() 545 .mr(6) 546 .nr(8) 547 .kr(1) 548 .sr(1) 549 .m(m) 550 .n(8) 551 .k(4) 552 .iterations(1) 553 .Test(xnn_f16_gemm_ukernel_6x8__neonfp16arith_ld64); 554 } 555 } 556 TEST(F16_GEMM_6X8__NEONFP16ARITH_LD64,k_eq_4_subtile_n)557 TEST(F16_GEMM_6X8__NEONFP16ARITH_LD64, k_eq_4_subtile_n) { 558 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 559 for (uint32_t n = 1; n <= 8; n++) { 560 GemmMicrokernelTester() 561 .mr(6) 562 .nr(8) 563 .kr(1) 564 .sr(1) 565 .m(6) 566 .n(n) 567 .k(4) 568 .iterations(1) 569 .Test(xnn_f16_gemm_ukernel_6x8__neonfp16arith_ld64); 570 } 571 } 572 TEST(F16_GEMM_6X8__NEONFP16ARITH_LD64,k_lt_4)573 TEST(F16_GEMM_6X8__NEONFP16ARITH_LD64, k_lt_4) { 574 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 575 for (size_t k = 1; k < 4; k++) { 576 GemmMicrokernelTester() 577 .mr(6) 578 .nr(8) 579 .kr(1) 580 .sr(1) 581 .m(6) 582 .n(8) 583 .k(k) 584 .Test(xnn_f16_gemm_ukernel_6x8__neonfp16arith_ld64); 585 } 586 } 587 TEST(F16_GEMM_6X8__NEONFP16ARITH_LD64,k_lt_4_strided_a)588 TEST(F16_GEMM_6X8__NEONFP16ARITH_LD64, k_lt_4_strided_a) { 589 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 590 for (size_t k = 1; k < 4; k++) { 591 GemmMicrokernelTester() 592 .mr(6) 593 .nr(8) 594 .kr(1) 595 .sr(1) 596 .m(6) 597 .n(8) 598 .k(k) 599 .a_stride(7) 600 .Test(xnn_f16_gemm_ukernel_6x8__neonfp16arith_ld64); 601 } 602 } 603 TEST(F16_GEMM_6X8__NEONFP16ARITH_LD64,k_lt_4_subtile)604 TEST(F16_GEMM_6X8__NEONFP16ARITH_LD64, k_lt_4_subtile) { 605 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 606 for (size_t k = 1; k < 4; k++) { 607 for (uint32_t m = 1; m <= 6; m++) { 608 for (uint32_t n = 1; n <= 8; n++) { 609 GemmMicrokernelTester() 610 .mr(6) 611 .nr(8) 612 .kr(1) 613 .sr(1) 614 .m(m) 615 .n(n) 616 .k(k) 617 .iterations(1) 618 .Test(xnn_f16_gemm_ukernel_6x8__neonfp16arith_ld64); 619 } 620 } 621 } 622 } 623 TEST(F16_GEMM_6X8__NEONFP16ARITH_LD64,k_gt_4)624 TEST(F16_GEMM_6X8__NEONFP16ARITH_LD64, k_gt_4) { 625 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 626 for (size_t k = 5; k < 8; k++) { 627 GemmMicrokernelTester() 628 .mr(6) 629 .nr(8) 630 .kr(1) 631 .sr(1) 632 .m(6) 633 .n(8) 634 .k(k) 635 .Test(xnn_f16_gemm_ukernel_6x8__neonfp16arith_ld64); 636 } 637 } 638 TEST(F16_GEMM_6X8__NEONFP16ARITH_LD64,k_gt_4_strided_a)639 TEST(F16_GEMM_6X8__NEONFP16ARITH_LD64, k_gt_4_strided_a) { 640 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 641 for (size_t k = 5; k < 8; k++) { 642 GemmMicrokernelTester() 643 .mr(6) 644 .nr(8) 645 .kr(1) 646 .sr(1) 647 .m(6) 648 .n(8) 649 .k(k) 650 .a_stride(11) 651 .Test(xnn_f16_gemm_ukernel_6x8__neonfp16arith_ld64); 652 } 653 } 654 TEST(F16_GEMM_6X8__NEONFP16ARITH_LD64,k_gt_4_subtile)655 TEST(F16_GEMM_6X8__NEONFP16ARITH_LD64, k_gt_4_subtile) { 656 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 657 for (size_t k = 5; k < 8; k++) { 658 for (uint32_t m = 1; m <= 6; m++) { 659 for (uint32_t n = 1; n <= 8; n++) { 660 GemmMicrokernelTester() 661 .mr(6) 662 .nr(8) 663 .kr(1) 664 .sr(1) 665 .m(m) 666 .n(n) 667 .k(k) 668 .iterations(1) 669 .Test(xnn_f16_gemm_ukernel_6x8__neonfp16arith_ld64); 670 } 671 } 672 } 673 } 674 TEST(F16_GEMM_6X8__NEONFP16ARITH_LD64,k_div_4)675 TEST(F16_GEMM_6X8__NEONFP16ARITH_LD64, k_div_4) { 676 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 677 for (size_t k = 8; k <= 40; k += 4) { 678 GemmMicrokernelTester() 679 .mr(6) 680 .nr(8) 681 .kr(1) 682 .sr(1) 683 .m(6) 684 .n(8) 685 .k(k) 686 .Test(xnn_f16_gemm_ukernel_6x8__neonfp16arith_ld64); 687 } 688 } 689 TEST(F16_GEMM_6X8__NEONFP16ARITH_LD64,k_div_4_strided_a)690 TEST(F16_GEMM_6X8__NEONFP16ARITH_LD64, k_div_4_strided_a) { 691 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 692 for (size_t k = 8; k <= 40; k += 4) { 693 GemmMicrokernelTester() 694 .mr(6) 695 .nr(8) 696 .kr(1) 697 .sr(1) 698 .m(6) 699 .n(8) 700 .k(k) 701 .a_stride(43) 702 .Test(xnn_f16_gemm_ukernel_6x8__neonfp16arith_ld64); 703 } 704 } 705 TEST(F16_GEMM_6X8__NEONFP16ARITH_LD64,k_div_4_subtile)706 TEST(F16_GEMM_6X8__NEONFP16ARITH_LD64, k_div_4_subtile) { 707 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 708 for (size_t k = 8; k <= 40; k += 4) { 709 for (uint32_t m = 1; m <= 6; m++) { 710 for (uint32_t n = 1; n <= 8; n++) { 711 GemmMicrokernelTester() 712 .mr(6) 713 .nr(8) 714 .kr(1) 715 .sr(1) 716 .m(m) 717 .n(n) 718 .k(k) 719 .iterations(1) 720 .Test(xnn_f16_gemm_ukernel_6x8__neonfp16arith_ld64); 721 } 722 } 723 } 724 } 725 TEST(F16_GEMM_6X8__NEONFP16ARITH_LD64,n_gt_8)726 TEST(F16_GEMM_6X8__NEONFP16ARITH_LD64, n_gt_8) { 727 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 728 for (uint32_t n = 9; n < 16; n++) { 729 for (size_t k = 1; k <= 20; k += 5) { 730 GemmMicrokernelTester() 731 .mr(6) 732 .nr(8) 733 .kr(1) 734 .sr(1) 735 .m(6) 736 .n(8) 737 .k(k) 738 .Test(xnn_f16_gemm_ukernel_6x8__neonfp16arith_ld64); 739 } 740 } 741 } 742 TEST(F16_GEMM_6X8__NEONFP16ARITH_LD64,n_gt_8_strided_cn)743 TEST(F16_GEMM_6X8__NEONFP16ARITH_LD64, n_gt_8_strided_cn) { 744 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 745 for (uint32_t n = 9; n < 16; n++) { 746 for (size_t k = 1; k <= 20; k += 5) { 747 GemmMicrokernelTester() 748 .mr(6) 749 .nr(8) 750 .kr(1) 751 .sr(1) 752 .m(6) 753 .n(8) 754 .k(k) 755 .cn_stride(11) 756 .Test(xnn_f16_gemm_ukernel_6x8__neonfp16arith_ld64); 757 } 758 } 759 } 760 TEST(F16_GEMM_6X8__NEONFP16ARITH_LD64,n_gt_8_strided_a)761 TEST(F16_GEMM_6X8__NEONFP16ARITH_LD64, n_gt_8_strided_a) { 762 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 763 for (uint32_t n = 9; n < 16; n++) { 764 for (size_t k = 1; k <= 20; k += 5) { 765 GemmMicrokernelTester() 766 .mr(6) 767 .nr(8) 768 .kr(1) 769 .sr(1) 770 .m(6) 771 .n(n) 772 .k(k) 773 .a_stride(23) 774 .Test(xnn_f16_gemm_ukernel_6x8__neonfp16arith_ld64); 775 } 776 } 777 } 778 TEST(F16_GEMM_6X8__NEONFP16ARITH_LD64,n_gt_8_subtile)779 TEST(F16_GEMM_6X8__NEONFP16ARITH_LD64, n_gt_8_subtile) { 780 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 781 for (uint32_t n = 9; n < 16; n++) { 782 for (size_t k = 1; k <= 20; k += 5) { 783 for (uint32_t m = 1; m <= 6; m++) { 784 GemmMicrokernelTester() 785 .mr(6) 786 .nr(8) 787 .kr(1) 788 .sr(1) 789 .m(m) 790 .n(n) 791 .k(k) 792 .iterations(1) 793 .Test(xnn_f16_gemm_ukernel_6x8__neonfp16arith_ld64); 794 } 795 } 796 } 797 } 798 TEST(F16_GEMM_6X8__NEONFP16ARITH_LD64,n_div_8)799 TEST(F16_GEMM_6X8__NEONFP16ARITH_LD64, n_div_8) { 800 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 801 for (uint32_t n = 16; n <= 24; n += 8) { 802 for (size_t k = 1; k <= 20; k += 5) { 803 GemmMicrokernelTester() 804 .mr(6) 805 .nr(8) 806 .kr(1) 807 .sr(1) 808 .m(6) 809 .n(8) 810 .k(k) 811 .Test(xnn_f16_gemm_ukernel_6x8__neonfp16arith_ld64); 812 } 813 } 814 } 815 TEST(F16_GEMM_6X8__NEONFP16ARITH_LD64,n_div_8_strided_cn)816 TEST(F16_GEMM_6X8__NEONFP16ARITH_LD64, n_div_8_strided_cn) { 817 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 818 for (uint32_t n = 16; n <= 24; n += 8) { 819 for (size_t k = 1; k <= 20; k += 5) { 820 GemmMicrokernelTester() 821 .mr(6) 822 .nr(8) 823 .kr(1) 824 .sr(1) 825 .m(6) 826 .n(n) 827 .k(k) 828 .cn_stride(11) 829 .Test(xnn_f16_gemm_ukernel_6x8__neonfp16arith_ld64); 830 } 831 } 832 } 833 TEST(F16_GEMM_6X8__NEONFP16ARITH_LD64,n_div_8_strided_a)834 TEST(F16_GEMM_6X8__NEONFP16ARITH_LD64, n_div_8_strided_a) { 835 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 836 for (uint32_t n = 16; n <= 24; n += 8) { 837 for (size_t k = 1; k <= 20; k += 5) { 838 GemmMicrokernelTester() 839 .mr(6) 840 .nr(8) 841 .kr(1) 842 .sr(1) 843 .m(6) 844 .n(n) 845 .k(k) 846 .a_stride(23) 847 .Test(xnn_f16_gemm_ukernel_6x8__neonfp16arith_ld64); 848 } 849 } 850 } 851 TEST(F16_GEMM_6X8__NEONFP16ARITH_LD64,n_div_8_subtile)852 TEST(F16_GEMM_6X8__NEONFP16ARITH_LD64, n_div_8_subtile) { 853 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 854 for (uint32_t n = 16; n <= 24; n += 8) { 855 for (size_t k = 1; k <= 20; k += 5) { 856 for (uint32_t m = 1; m <= 6; m++) { 857 GemmMicrokernelTester() 858 .mr(6) 859 .nr(8) 860 .kr(1) 861 .sr(1) 862 .m(m) 863 .n(n) 864 .k(k) 865 .iterations(1) 866 .Test(xnn_f16_gemm_ukernel_6x8__neonfp16arith_ld64); 867 } 868 } 869 } 870 } 871 TEST(F16_GEMM_6X8__NEONFP16ARITH_LD64,strided_cm_subtile)872 TEST(F16_GEMM_6X8__NEONFP16ARITH_LD64, strided_cm_subtile) { 873 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 874 for (size_t k = 1; k <= 20; k += 5) { 875 for (uint32_t m = 1; m <= 6; m++) { 876 for (uint32_t n = 1; n <= 8; n++) { 877 GemmMicrokernelTester() 878 .mr(6) 879 .nr(8) 880 .kr(1) 881 .sr(1) 882 .m(m) 883 .n(n) 884 .k(k) 885 .cm_stride(11) 886 .iterations(1) 887 .Test(xnn_f16_gemm_ukernel_6x8__neonfp16arith_ld64); 888 } 889 } 890 } 891 } 892 TEST(F16_GEMM_6X8__NEONFP16ARITH_LD64,qmin)893 TEST(F16_GEMM_6X8__NEONFP16ARITH_LD64, qmin) { 894 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 895 GemmMicrokernelTester() 896 .mr(6) 897 .nr(8) 898 .kr(1) 899 .sr(1) 900 .m(6) 901 .n(8) 902 .k(4) 903 .qmin(128) 904 .Test(xnn_f16_gemm_ukernel_6x8__neonfp16arith_ld64); 905 } 906 TEST(F16_GEMM_6X8__NEONFP16ARITH_LD64,qmax)907 TEST(F16_GEMM_6X8__NEONFP16ARITH_LD64, qmax) { 908 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 909 GemmMicrokernelTester() 910 .mr(6) 911 .nr(8) 912 .kr(1) 913 .sr(1) 914 .m(6) 915 .n(8) 916 .k(4) 917 .qmax(128) 918 .Test(xnn_f16_gemm_ukernel_6x8__neonfp16arith_ld64); 919 } 920 TEST(F16_GEMM_6X8__NEONFP16ARITH_LD64,strided_cm)921 TEST(F16_GEMM_6X8__NEONFP16ARITH_LD64, strided_cm) { 922 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 923 GemmMicrokernelTester() 924 .mr(6) 925 .nr(8) 926 .kr(1) 927 .sr(1) 928 .m(6) 929 .n(8) 930 .k(4) 931 .cm_stride(11) 932 .Test(xnn_f16_gemm_ukernel_6x8__neonfp16arith_ld64); 933 } 934 #endif // XNN_ARCH_ARM64 935 936 937 #if XNN_ARCH_ARM64 TEST(F16_GEMM_8X8__NEONFP16ARITH_LD64,k_eq_4)938 TEST(F16_GEMM_8X8__NEONFP16ARITH_LD64, k_eq_4) { 939 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 940 GemmMicrokernelTester() 941 .mr(8) 942 .nr(8) 943 .kr(1) 944 .sr(1) 945 .m(8) 946 .n(8) 947 .k(4) 948 .Test(xnn_f16_gemm_ukernel_8x8__neonfp16arith_ld64); 949 } 950 TEST(F16_GEMM_8X8__NEONFP16ARITH_LD64,strided_cn)951 TEST(F16_GEMM_8X8__NEONFP16ARITH_LD64, strided_cn) { 952 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 953 GemmMicrokernelTester() 954 .mr(8) 955 .nr(8) 956 .kr(1) 957 .sr(1) 958 .m(8) 959 .n(8) 960 .k(4) 961 .cn_stride(11) 962 .Test(xnn_f16_gemm_ukernel_8x8__neonfp16arith_ld64); 963 } 964 TEST(F16_GEMM_8X8__NEONFP16ARITH_LD64,k_eq_4_strided_a)965 TEST(F16_GEMM_8X8__NEONFP16ARITH_LD64, k_eq_4_strided_a) { 966 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 967 GemmMicrokernelTester() 968 .mr(8) 969 .nr(8) 970 .kr(1) 971 .sr(1) 972 .m(8) 973 .n(8) 974 .k(4) 975 .a_stride(7) 976 .Test(xnn_f16_gemm_ukernel_8x8__neonfp16arith_ld64); 977 } 978 TEST(F16_GEMM_8X8__NEONFP16ARITH_LD64,k_eq_4_subtile)979 TEST(F16_GEMM_8X8__NEONFP16ARITH_LD64, k_eq_4_subtile) { 980 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 981 for (uint32_t m = 1; m <= 8; m++) { 982 for (uint32_t n = 1; n <= 8; n++) { 983 GemmMicrokernelTester() 984 .mr(8) 985 .nr(8) 986 .kr(1) 987 .sr(1) 988 .m(m) 989 .n(n) 990 .k(4) 991 .iterations(1) 992 .Test(xnn_f16_gemm_ukernel_8x8__neonfp16arith_ld64); 993 } 994 } 995 } 996 TEST(F16_GEMM_8X8__NEONFP16ARITH_LD64,k_eq_4_subtile_m)997 TEST(F16_GEMM_8X8__NEONFP16ARITH_LD64, k_eq_4_subtile_m) { 998 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 999 for (uint32_t m = 1; m <= 8; m++) { 1000 GemmMicrokernelTester() 1001 .mr(8) 1002 .nr(8) 1003 .kr(1) 1004 .sr(1) 1005 .m(m) 1006 .n(8) 1007 .k(4) 1008 .iterations(1) 1009 .Test(xnn_f16_gemm_ukernel_8x8__neonfp16arith_ld64); 1010 } 1011 } 1012 TEST(F16_GEMM_8X8__NEONFP16ARITH_LD64,k_eq_4_subtile_n)1013 TEST(F16_GEMM_8X8__NEONFP16ARITH_LD64, k_eq_4_subtile_n) { 1014 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 1015 for (uint32_t n = 1; n <= 8; n++) { 1016 GemmMicrokernelTester() 1017 .mr(8) 1018 .nr(8) 1019 .kr(1) 1020 .sr(1) 1021 .m(8) 1022 .n(n) 1023 .k(4) 1024 .iterations(1) 1025 .Test(xnn_f16_gemm_ukernel_8x8__neonfp16arith_ld64); 1026 } 1027 } 1028 TEST(F16_GEMM_8X8__NEONFP16ARITH_LD64,k_lt_4)1029 TEST(F16_GEMM_8X8__NEONFP16ARITH_LD64, k_lt_4) { 1030 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 1031 for (size_t k = 1; k < 4; k++) { 1032 GemmMicrokernelTester() 1033 .mr(8) 1034 .nr(8) 1035 .kr(1) 1036 .sr(1) 1037 .m(8) 1038 .n(8) 1039 .k(k) 1040 .Test(xnn_f16_gemm_ukernel_8x8__neonfp16arith_ld64); 1041 } 1042 } 1043 TEST(F16_GEMM_8X8__NEONFP16ARITH_LD64,k_lt_4_strided_a)1044 TEST(F16_GEMM_8X8__NEONFP16ARITH_LD64, k_lt_4_strided_a) { 1045 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 1046 for (size_t k = 1; k < 4; k++) { 1047 GemmMicrokernelTester() 1048 .mr(8) 1049 .nr(8) 1050 .kr(1) 1051 .sr(1) 1052 .m(8) 1053 .n(8) 1054 .k(k) 1055 .a_stride(7) 1056 .Test(xnn_f16_gemm_ukernel_8x8__neonfp16arith_ld64); 1057 } 1058 } 1059 TEST(F16_GEMM_8X8__NEONFP16ARITH_LD64,k_lt_4_subtile)1060 TEST(F16_GEMM_8X8__NEONFP16ARITH_LD64, k_lt_4_subtile) { 1061 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 1062 for (size_t k = 1; k < 4; k++) { 1063 for (uint32_t m = 1; m <= 8; m++) { 1064 for (uint32_t n = 1; n <= 8; n++) { 1065 GemmMicrokernelTester() 1066 .mr(8) 1067 .nr(8) 1068 .kr(1) 1069 .sr(1) 1070 .m(m) 1071 .n(n) 1072 .k(k) 1073 .iterations(1) 1074 .Test(xnn_f16_gemm_ukernel_8x8__neonfp16arith_ld64); 1075 } 1076 } 1077 } 1078 } 1079 TEST(F16_GEMM_8X8__NEONFP16ARITH_LD64,k_gt_4)1080 TEST(F16_GEMM_8X8__NEONFP16ARITH_LD64, k_gt_4) { 1081 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 1082 for (size_t k = 5; k < 8; k++) { 1083 GemmMicrokernelTester() 1084 .mr(8) 1085 .nr(8) 1086 .kr(1) 1087 .sr(1) 1088 .m(8) 1089 .n(8) 1090 .k(k) 1091 .Test(xnn_f16_gemm_ukernel_8x8__neonfp16arith_ld64); 1092 } 1093 } 1094 TEST(F16_GEMM_8X8__NEONFP16ARITH_LD64,k_gt_4_strided_a)1095 TEST(F16_GEMM_8X8__NEONFP16ARITH_LD64, k_gt_4_strided_a) { 1096 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 1097 for (size_t k = 5; k < 8; k++) { 1098 GemmMicrokernelTester() 1099 .mr(8) 1100 .nr(8) 1101 .kr(1) 1102 .sr(1) 1103 .m(8) 1104 .n(8) 1105 .k(k) 1106 .a_stride(11) 1107 .Test(xnn_f16_gemm_ukernel_8x8__neonfp16arith_ld64); 1108 } 1109 } 1110 TEST(F16_GEMM_8X8__NEONFP16ARITH_LD64,k_gt_4_subtile)1111 TEST(F16_GEMM_8X8__NEONFP16ARITH_LD64, k_gt_4_subtile) { 1112 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 1113 for (size_t k = 5; k < 8; k++) { 1114 for (uint32_t m = 1; m <= 8; m++) { 1115 for (uint32_t n = 1; n <= 8; n++) { 1116 GemmMicrokernelTester() 1117 .mr(8) 1118 .nr(8) 1119 .kr(1) 1120 .sr(1) 1121 .m(m) 1122 .n(n) 1123 .k(k) 1124 .iterations(1) 1125 .Test(xnn_f16_gemm_ukernel_8x8__neonfp16arith_ld64); 1126 } 1127 } 1128 } 1129 } 1130 TEST(F16_GEMM_8X8__NEONFP16ARITH_LD64,k_div_4)1131 TEST(F16_GEMM_8X8__NEONFP16ARITH_LD64, k_div_4) { 1132 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 1133 for (size_t k = 8; k <= 40; k += 4) { 1134 GemmMicrokernelTester() 1135 .mr(8) 1136 .nr(8) 1137 .kr(1) 1138 .sr(1) 1139 .m(8) 1140 .n(8) 1141 .k(k) 1142 .Test(xnn_f16_gemm_ukernel_8x8__neonfp16arith_ld64); 1143 } 1144 } 1145 TEST(F16_GEMM_8X8__NEONFP16ARITH_LD64,k_div_4_strided_a)1146 TEST(F16_GEMM_8X8__NEONFP16ARITH_LD64, k_div_4_strided_a) { 1147 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 1148 for (size_t k = 8; k <= 40; k += 4) { 1149 GemmMicrokernelTester() 1150 .mr(8) 1151 .nr(8) 1152 .kr(1) 1153 .sr(1) 1154 .m(8) 1155 .n(8) 1156 .k(k) 1157 .a_stride(43) 1158 .Test(xnn_f16_gemm_ukernel_8x8__neonfp16arith_ld64); 1159 } 1160 } 1161 TEST(F16_GEMM_8X8__NEONFP16ARITH_LD64,k_div_4_subtile)1162 TEST(F16_GEMM_8X8__NEONFP16ARITH_LD64, k_div_4_subtile) { 1163 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 1164 for (size_t k = 8; k <= 40; k += 4) { 1165 for (uint32_t m = 1; m <= 8; m++) { 1166 for (uint32_t n = 1; n <= 8; n++) { 1167 GemmMicrokernelTester() 1168 .mr(8) 1169 .nr(8) 1170 .kr(1) 1171 .sr(1) 1172 .m(m) 1173 .n(n) 1174 .k(k) 1175 .iterations(1) 1176 .Test(xnn_f16_gemm_ukernel_8x8__neonfp16arith_ld64); 1177 } 1178 } 1179 } 1180 } 1181 TEST(F16_GEMM_8X8__NEONFP16ARITH_LD64,n_gt_8)1182 TEST(F16_GEMM_8X8__NEONFP16ARITH_LD64, n_gt_8) { 1183 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 1184 for (uint32_t n = 9; n < 16; n++) { 1185 for (size_t k = 1; k <= 20; k += 5) { 1186 GemmMicrokernelTester() 1187 .mr(8) 1188 .nr(8) 1189 .kr(1) 1190 .sr(1) 1191 .m(8) 1192 .n(8) 1193 .k(k) 1194 .Test(xnn_f16_gemm_ukernel_8x8__neonfp16arith_ld64); 1195 } 1196 } 1197 } 1198 TEST(F16_GEMM_8X8__NEONFP16ARITH_LD64,n_gt_8_strided_cn)1199 TEST(F16_GEMM_8X8__NEONFP16ARITH_LD64, n_gt_8_strided_cn) { 1200 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 1201 for (uint32_t n = 9; n < 16; n++) { 1202 for (size_t k = 1; k <= 20; k += 5) { 1203 GemmMicrokernelTester() 1204 .mr(8) 1205 .nr(8) 1206 .kr(1) 1207 .sr(1) 1208 .m(8) 1209 .n(8) 1210 .k(k) 1211 .cn_stride(11) 1212 .Test(xnn_f16_gemm_ukernel_8x8__neonfp16arith_ld64); 1213 } 1214 } 1215 } 1216 TEST(F16_GEMM_8X8__NEONFP16ARITH_LD64,n_gt_8_strided_a)1217 TEST(F16_GEMM_8X8__NEONFP16ARITH_LD64, n_gt_8_strided_a) { 1218 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 1219 for (uint32_t n = 9; n < 16; n++) { 1220 for (size_t k = 1; k <= 20; k += 5) { 1221 GemmMicrokernelTester() 1222 .mr(8) 1223 .nr(8) 1224 .kr(1) 1225 .sr(1) 1226 .m(8) 1227 .n(n) 1228 .k(k) 1229 .a_stride(23) 1230 .Test(xnn_f16_gemm_ukernel_8x8__neonfp16arith_ld64); 1231 } 1232 } 1233 } 1234 TEST(F16_GEMM_8X8__NEONFP16ARITH_LD64,n_gt_8_subtile)1235 TEST(F16_GEMM_8X8__NEONFP16ARITH_LD64, n_gt_8_subtile) { 1236 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 1237 for (uint32_t n = 9; n < 16; n++) { 1238 for (size_t k = 1; k <= 20; k += 5) { 1239 for (uint32_t m = 1; m <= 8; m++) { 1240 GemmMicrokernelTester() 1241 .mr(8) 1242 .nr(8) 1243 .kr(1) 1244 .sr(1) 1245 .m(m) 1246 .n(n) 1247 .k(k) 1248 .iterations(1) 1249 .Test(xnn_f16_gemm_ukernel_8x8__neonfp16arith_ld64); 1250 } 1251 } 1252 } 1253 } 1254 TEST(F16_GEMM_8X8__NEONFP16ARITH_LD64,n_div_8)1255 TEST(F16_GEMM_8X8__NEONFP16ARITH_LD64, n_div_8) { 1256 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 1257 for (uint32_t n = 16; n <= 24; n += 8) { 1258 for (size_t k = 1; k <= 20; k += 5) { 1259 GemmMicrokernelTester() 1260 .mr(8) 1261 .nr(8) 1262 .kr(1) 1263 .sr(1) 1264 .m(8) 1265 .n(8) 1266 .k(k) 1267 .Test(xnn_f16_gemm_ukernel_8x8__neonfp16arith_ld64); 1268 } 1269 } 1270 } 1271 TEST(F16_GEMM_8X8__NEONFP16ARITH_LD64,n_div_8_strided_cn)1272 TEST(F16_GEMM_8X8__NEONFP16ARITH_LD64, n_div_8_strided_cn) { 1273 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 1274 for (uint32_t n = 16; n <= 24; n += 8) { 1275 for (size_t k = 1; k <= 20; k += 5) { 1276 GemmMicrokernelTester() 1277 .mr(8) 1278 .nr(8) 1279 .kr(1) 1280 .sr(1) 1281 .m(8) 1282 .n(n) 1283 .k(k) 1284 .cn_stride(11) 1285 .Test(xnn_f16_gemm_ukernel_8x8__neonfp16arith_ld64); 1286 } 1287 } 1288 } 1289 TEST(F16_GEMM_8X8__NEONFP16ARITH_LD64,n_div_8_strided_a)1290 TEST(F16_GEMM_8X8__NEONFP16ARITH_LD64, n_div_8_strided_a) { 1291 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 1292 for (uint32_t n = 16; n <= 24; n += 8) { 1293 for (size_t k = 1; k <= 20; k += 5) { 1294 GemmMicrokernelTester() 1295 .mr(8) 1296 .nr(8) 1297 .kr(1) 1298 .sr(1) 1299 .m(8) 1300 .n(n) 1301 .k(k) 1302 .a_stride(23) 1303 .Test(xnn_f16_gemm_ukernel_8x8__neonfp16arith_ld64); 1304 } 1305 } 1306 } 1307 TEST(F16_GEMM_8X8__NEONFP16ARITH_LD64,n_div_8_subtile)1308 TEST(F16_GEMM_8X8__NEONFP16ARITH_LD64, n_div_8_subtile) { 1309 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 1310 for (uint32_t n = 16; n <= 24; n += 8) { 1311 for (size_t k = 1; k <= 20; k += 5) { 1312 for (uint32_t m = 1; m <= 8; m++) { 1313 GemmMicrokernelTester() 1314 .mr(8) 1315 .nr(8) 1316 .kr(1) 1317 .sr(1) 1318 .m(m) 1319 .n(n) 1320 .k(k) 1321 .iterations(1) 1322 .Test(xnn_f16_gemm_ukernel_8x8__neonfp16arith_ld64); 1323 } 1324 } 1325 } 1326 } 1327 TEST(F16_GEMM_8X8__NEONFP16ARITH_LD64,strided_cm_subtile)1328 TEST(F16_GEMM_8X8__NEONFP16ARITH_LD64, strided_cm_subtile) { 1329 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 1330 for (size_t k = 1; k <= 20; k += 5) { 1331 for (uint32_t m = 1; m <= 8; m++) { 1332 for (uint32_t n = 1; n <= 8; n++) { 1333 GemmMicrokernelTester() 1334 .mr(8) 1335 .nr(8) 1336 .kr(1) 1337 .sr(1) 1338 .m(m) 1339 .n(n) 1340 .k(k) 1341 .cm_stride(11) 1342 .iterations(1) 1343 .Test(xnn_f16_gemm_ukernel_8x8__neonfp16arith_ld64); 1344 } 1345 } 1346 } 1347 } 1348 TEST(F16_GEMM_8X8__NEONFP16ARITH_LD64,qmin)1349 TEST(F16_GEMM_8X8__NEONFP16ARITH_LD64, qmin) { 1350 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 1351 GemmMicrokernelTester() 1352 .mr(8) 1353 .nr(8) 1354 .kr(1) 1355 .sr(1) 1356 .m(8) 1357 .n(8) 1358 .k(4) 1359 .qmin(128) 1360 .Test(xnn_f16_gemm_ukernel_8x8__neonfp16arith_ld64); 1361 } 1362 TEST(F16_GEMM_8X8__NEONFP16ARITH_LD64,qmax)1363 TEST(F16_GEMM_8X8__NEONFP16ARITH_LD64, qmax) { 1364 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 1365 GemmMicrokernelTester() 1366 .mr(8) 1367 .nr(8) 1368 .kr(1) 1369 .sr(1) 1370 .m(8) 1371 .n(8) 1372 .k(4) 1373 .qmax(128) 1374 .Test(xnn_f16_gemm_ukernel_8x8__neonfp16arith_ld64); 1375 } 1376 TEST(F16_GEMM_8X8__NEONFP16ARITH_LD64,strided_cm)1377 TEST(F16_GEMM_8X8__NEONFP16ARITH_LD64, strided_cm) { 1378 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 1379 GemmMicrokernelTester() 1380 .mr(8) 1381 .nr(8) 1382 .kr(1) 1383 .sr(1) 1384 .m(8) 1385 .n(8) 1386 .k(4) 1387 .cm_stride(11) 1388 .Test(xnn_f16_gemm_ukernel_8x8__neonfp16arith_ld64); 1389 } 1390 #endif // XNN_ARCH_ARM64 1391