1 // Copyright 2019 Google LLC 2 // 3 // This source code is licensed under the BSD-style license found in the 4 // LICENSE file in the root directory of this source tree. 5 // 6 // Auto-generated file. Do not edit! 7 // Specification: test/f16-spmm-minmax.yaml 8 // Generator: tools/generate-spmm-test.py 9 10 11 #include <gtest/gtest.h> 12 13 #include <xnnpack/common.h> 14 #include <xnnpack/isa-checks.h> 15 16 #include <xnnpack/spmm.h> 17 #include "spmm-microkernel-tester.h" 18 19 20 #if XNN_ARCH_ARM64 TEST(F16_SPMM_MINMAX_8X1__NEONFP16ARITH,k_eq_1)21 TEST(F16_SPMM_MINMAX_8X1__NEONFP16ARITH, k_eq_1) { 22 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 23 SpMMMicrokernelTester() 24 .mr(8) 25 .nr(1) 26 .m(8) 27 .n(1) 28 .k(1) 29 .sparsity(0.0f) 30 .Test(xnn_f16_spmm_minmax_ukernel_8x1__neonfp16arith); 31 } 32 TEST(F16_SPMM_MINMAX_8X1__NEONFP16ARITH,k_gt_1)33 TEST(F16_SPMM_MINMAX_8X1__NEONFP16ARITH, k_gt_1) { 34 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 35 for (size_t k = 2; k < 10; k++) { 36 SpMMMicrokernelTester() 37 .mr(8) 38 .nr(1) 39 .m(8) 40 .n(1) 41 .k(k) 42 .sparsity(0.0f) 43 .Test(xnn_f16_spmm_minmax_ukernel_8x1__neonfp16arith); 44 } 45 } 46 TEST(F16_SPMM_MINMAX_8X1__NEONFP16ARITH,n_gt_1)47 TEST(F16_SPMM_MINMAX_8X1__NEONFP16ARITH, n_gt_1) { 48 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 49 for (uint32_t n = 2; n < 10; n++) { 50 for (size_t k = 1; k <= 5; k += 2) { 51 SpMMMicrokernelTester() 52 .mr(8) 53 .nr(1) 54 .m(8) 55 .n(n) 56 .k(k) 57 .sparsity(0.0f) 58 .Test(xnn_f16_spmm_minmax_ukernel_8x1__neonfp16arith); 59 } 60 } 61 } 62 TEST(F16_SPMM_MINMAX_8X1__NEONFP16ARITH,m_lt_8)63 TEST(F16_SPMM_MINMAX_8X1__NEONFP16ARITH, m_lt_8) { 64 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 65 for (uint32_t m = 1; m < 8; m++) { 66 for (uint32_t n = 1; n < 10; n += 2) { 67 for (size_t k = 1; k <= 5; k += 2) { 68 SpMMMicrokernelTester() 69 .mr(8) 70 .nr(1) 71 .m(m) 72 .n(n) 73 .k(k) 74 .sparsity(0.0f) 75 .Test(xnn_f16_spmm_minmax_ukernel_8x1__neonfp16arith); 76 } 77 } 78 } 79 } 80 TEST(F16_SPMM_MINMAX_8X1__NEONFP16ARITH,m_div_8)81 TEST(F16_SPMM_MINMAX_8X1__NEONFP16ARITH, m_div_8) { 82 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 83 for (uint32_t m = 16; m <= 24; m += 8) { 84 for (uint32_t n = 1; n < 10; n += 2) { 85 for (size_t k = 1; k <= 5; k += 2) { 86 SpMMMicrokernelTester() 87 .mr(8) 88 .nr(1) 89 .m(m) 90 .n(n) 91 .k(k) 92 .sparsity(0.0f) 93 .Test(xnn_f16_spmm_minmax_ukernel_8x1__neonfp16arith); 94 } 95 } 96 } 97 } 98 TEST(F16_SPMM_MINMAX_8X1__NEONFP16ARITH,m_gt_8)99 TEST(F16_SPMM_MINMAX_8X1__NEONFP16ARITH, m_gt_8) { 100 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 101 for (uint32_t m = 9; m < 16; m++) { 102 for (uint32_t n = 1; n < 10; n += 2) { 103 for (size_t k = 1; k <= 5; k += 2) { 104 SpMMMicrokernelTester() 105 .mr(8) 106 .nr(1) 107 .m(m) 108 .n(n) 109 .k(k) 110 .sparsity(0.0f) 111 .Test(xnn_f16_spmm_minmax_ukernel_8x1__neonfp16arith); 112 } 113 } 114 } 115 } 116 TEST(F16_SPMM_MINMAX_8X1__NEONFP16ARITH,output_stride)117 TEST(F16_SPMM_MINMAX_8X1__NEONFP16ARITH, output_stride) { 118 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 119 for (uint32_t n = 1; n < 10; n += 2) { 120 for (size_t k = 1; k <= 5; k += 2) { 121 SpMMMicrokernelTester() 122 .mr(8) 123 .nr(1) 124 .m(16) 125 .n(n) 126 .k(k) 127 .output_stride(19) 128 .sparsity(0.0f) 129 .Test(xnn_f16_spmm_minmax_ukernel_8x1__neonfp16arith); 130 } 131 } 132 } 133 TEST(F16_SPMM_MINMAX_8X1__NEONFP16ARITH,qmin)134 TEST(F16_SPMM_MINMAX_8X1__NEONFP16ARITH, qmin) { 135 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 136 for (uint32_t n = 1; n < 10; n += 2) { 137 for (size_t k = 1; k <= 5; k += 2) { 138 SpMMMicrokernelTester() 139 .mr(8) 140 .nr(1) 141 .m(16) 142 .n(n) 143 .k(k) 144 .sparsity(0.0f) 145 .qmin(128) 146 .Test(xnn_f16_spmm_minmax_ukernel_8x1__neonfp16arith); 147 } 148 } 149 } 150 TEST(F16_SPMM_MINMAX_8X1__NEONFP16ARITH,qmax)151 TEST(F16_SPMM_MINMAX_8X1__NEONFP16ARITH, qmax) { 152 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 153 for (uint32_t n = 1; n < 10; n += 2) { 154 for (size_t k = 1; k <= 5; k += 2) { 155 SpMMMicrokernelTester() 156 .mr(8) 157 .nr(1) 158 .m(16) 159 .n(n) 160 .k(k) 161 .sparsity(0.0f) 162 .qmax(128) 163 .Test(xnn_f16_spmm_minmax_ukernel_8x1__neonfp16arith); 164 } 165 } 166 } 167 TEST(F16_SPMM_MINMAX_8X1__NEONFP16ARITH,half_sparse)168 TEST(F16_SPMM_MINMAX_8X1__NEONFP16ARITH, half_sparse) { 169 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 170 for (uint32_t n = 1; n < 10; n += 2) { 171 for (size_t k = 1; k <= 5; k += 2) { 172 SpMMMicrokernelTester() 173 .mr(8) 174 .nr(1) 175 .m(16) 176 .n(n) 177 .k(k) 178 .sparsity(0.5f) 179 .Test(xnn_f16_spmm_minmax_ukernel_8x1__neonfp16arith); 180 } 181 } 182 } 183 TEST(F16_SPMM_MINMAX_8X1__NEONFP16ARITH,zero_weights)184 TEST(F16_SPMM_MINMAX_8X1__NEONFP16ARITH, zero_weights) { 185 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 186 for (uint32_t n = 1; n < 10; n += 2) { 187 for (size_t k = 1; k <= 5; k += 2) { 188 SpMMMicrokernelTester() 189 .mr(8) 190 .nr(1) 191 .m(16) 192 .n(n) 193 .k(k) 194 .sparsity(1.0f) 195 .Test(xnn_f16_spmm_minmax_ukernel_8x1__neonfp16arith); 196 } 197 } 198 } 199 #endif // XNN_ARCH_ARM64 200 201 202 #if XNN_ARCH_ARM64 TEST(F16_SPMM_MINMAX_8X1__NEONFP16ARITH_X2,k_eq_2)203 TEST(F16_SPMM_MINMAX_8X1__NEONFP16ARITH_X2, k_eq_2) { 204 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 205 SpMMMicrokernelTester() 206 .mr(8) 207 .nr(1) 208 .m(8) 209 .n(1) 210 .k(2) 211 .sparsity(0.0f) 212 .Test(xnn_f16_spmm_minmax_ukernel_8x1__neonfp16arith_x2); 213 } 214 TEST(F16_SPMM_MINMAX_8X1__NEONFP16ARITH_X2,k_lt_2)215 TEST(F16_SPMM_MINMAX_8X1__NEONFP16ARITH_X2, k_lt_2) { 216 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 217 for (size_t k = 1; k < 2; k++) { 218 SpMMMicrokernelTester() 219 .mr(8) 220 .nr(1) 221 .m(8) 222 .n(1) 223 .k(k) 224 .sparsity(0.0f) 225 .Test(xnn_f16_spmm_minmax_ukernel_8x1__neonfp16arith_x2); 226 } 227 } 228 TEST(F16_SPMM_MINMAX_8X1__NEONFP16ARITH_X2,k_gt_2)229 TEST(F16_SPMM_MINMAX_8X1__NEONFP16ARITH_X2, k_gt_2) { 230 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 231 for (size_t k = 3; k < 4; k++) { 232 SpMMMicrokernelTester() 233 .mr(8) 234 .nr(1) 235 .m(8) 236 .n(1) 237 .k(k) 238 .sparsity(0.0f) 239 .Test(xnn_f16_spmm_minmax_ukernel_8x1__neonfp16arith_x2); 240 } 241 } 242 TEST(F16_SPMM_MINMAX_8X1__NEONFP16ARITH_X2,k_div_2)243 TEST(F16_SPMM_MINMAX_8X1__NEONFP16ARITH_X2, k_div_2) { 244 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 245 for (size_t k = 4; k <= 20; k += 2) { 246 SpMMMicrokernelTester() 247 .mr(8) 248 .nr(1) 249 .m(8) 250 .n(1) 251 .k(k) 252 .sparsity(0.0f) 253 .Test(xnn_f16_spmm_minmax_ukernel_8x1__neonfp16arith_x2); 254 } 255 } 256 TEST(F16_SPMM_MINMAX_8X1__NEONFP16ARITH_X2,n_gt_1)257 TEST(F16_SPMM_MINMAX_8X1__NEONFP16ARITH_X2, n_gt_1) { 258 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 259 for (uint32_t n = 2; n < 10; n++) { 260 for (size_t k = 1; k <= 10; k += 3) { 261 SpMMMicrokernelTester() 262 .mr(8) 263 .nr(1) 264 .m(8) 265 .n(n) 266 .k(k) 267 .sparsity(0.0f) 268 .Test(xnn_f16_spmm_minmax_ukernel_8x1__neonfp16arith_x2); 269 } 270 } 271 } 272 TEST(F16_SPMM_MINMAX_8X1__NEONFP16ARITH_X2,m_lt_8)273 TEST(F16_SPMM_MINMAX_8X1__NEONFP16ARITH_X2, m_lt_8) { 274 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 275 for (uint32_t m = 1; m < 8; m++) { 276 for (uint32_t n = 1; n < 10; n += 2) { 277 for (size_t k = 1; k <= 10; k += 3) { 278 SpMMMicrokernelTester() 279 .mr(8) 280 .nr(1) 281 .m(m) 282 .n(n) 283 .k(k) 284 .sparsity(0.0f) 285 .Test(xnn_f16_spmm_minmax_ukernel_8x1__neonfp16arith_x2); 286 } 287 } 288 } 289 } 290 TEST(F16_SPMM_MINMAX_8X1__NEONFP16ARITH_X2,m_div_8)291 TEST(F16_SPMM_MINMAX_8X1__NEONFP16ARITH_X2, m_div_8) { 292 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 293 for (uint32_t m = 16; m <= 24; m += 8) { 294 for (uint32_t n = 1; n < 10; n += 2) { 295 for (size_t k = 1; k <= 10; k += 3) { 296 SpMMMicrokernelTester() 297 .mr(8) 298 .nr(1) 299 .m(m) 300 .n(n) 301 .k(k) 302 .sparsity(0.0f) 303 .Test(xnn_f16_spmm_minmax_ukernel_8x1__neonfp16arith_x2); 304 } 305 } 306 } 307 } 308 TEST(F16_SPMM_MINMAX_8X1__NEONFP16ARITH_X2,m_gt_8)309 TEST(F16_SPMM_MINMAX_8X1__NEONFP16ARITH_X2, m_gt_8) { 310 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 311 for (uint32_t m = 9; m < 16; m++) { 312 for (uint32_t n = 1; n < 10; n += 2) { 313 for (size_t k = 1; k <= 10; k += 3) { 314 SpMMMicrokernelTester() 315 .mr(8) 316 .nr(1) 317 .m(m) 318 .n(n) 319 .k(k) 320 .sparsity(0.0f) 321 .Test(xnn_f16_spmm_minmax_ukernel_8x1__neonfp16arith_x2); 322 } 323 } 324 } 325 } 326 TEST(F16_SPMM_MINMAX_8X1__NEONFP16ARITH_X2,output_stride)327 TEST(F16_SPMM_MINMAX_8X1__NEONFP16ARITH_X2, output_stride) { 328 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 329 for (uint32_t n = 1; n < 10; n += 2) { 330 for (size_t k = 1; k <= 10; k += 3) { 331 SpMMMicrokernelTester() 332 .mr(8) 333 .nr(1) 334 .m(16) 335 .n(n) 336 .k(k) 337 .output_stride(19) 338 .sparsity(0.0f) 339 .Test(xnn_f16_spmm_minmax_ukernel_8x1__neonfp16arith_x2); 340 } 341 } 342 } 343 TEST(F16_SPMM_MINMAX_8X1__NEONFP16ARITH_X2,qmin)344 TEST(F16_SPMM_MINMAX_8X1__NEONFP16ARITH_X2, qmin) { 345 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 346 for (uint32_t n = 1; n < 10; n += 2) { 347 for (size_t k = 1; k <= 10; k += 3) { 348 SpMMMicrokernelTester() 349 .mr(8) 350 .nr(1) 351 .m(16) 352 .n(n) 353 .k(k) 354 .sparsity(0.0f) 355 .qmin(128) 356 .Test(xnn_f16_spmm_minmax_ukernel_8x1__neonfp16arith_x2); 357 } 358 } 359 } 360 TEST(F16_SPMM_MINMAX_8X1__NEONFP16ARITH_X2,qmax)361 TEST(F16_SPMM_MINMAX_8X1__NEONFP16ARITH_X2, qmax) { 362 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 363 for (uint32_t n = 1; n < 10; n += 2) { 364 for (size_t k = 1; k <= 10; k += 3) { 365 SpMMMicrokernelTester() 366 .mr(8) 367 .nr(1) 368 .m(16) 369 .n(n) 370 .k(k) 371 .sparsity(0.0f) 372 .qmax(128) 373 .Test(xnn_f16_spmm_minmax_ukernel_8x1__neonfp16arith_x2); 374 } 375 } 376 } 377 TEST(F16_SPMM_MINMAX_8X1__NEONFP16ARITH_X2,half_sparse)378 TEST(F16_SPMM_MINMAX_8X1__NEONFP16ARITH_X2, half_sparse) { 379 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 380 for (uint32_t n = 1; n < 10; n += 2) { 381 for (size_t k = 1; k <= 10; k += 3) { 382 SpMMMicrokernelTester() 383 .mr(8) 384 .nr(1) 385 .m(16) 386 .n(n) 387 .k(k) 388 .sparsity(0.5f) 389 .Test(xnn_f16_spmm_minmax_ukernel_8x1__neonfp16arith_x2); 390 } 391 } 392 } 393 TEST(F16_SPMM_MINMAX_8X1__NEONFP16ARITH_X2,zero_weights)394 TEST(F16_SPMM_MINMAX_8X1__NEONFP16ARITH_X2, zero_weights) { 395 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 396 for (uint32_t n = 1; n < 10; n += 2) { 397 for (size_t k = 1; k <= 10; k += 3) { 398 SpMMMicrokernelTester() 399 .mr(8) 400 .nr(1) 401 .m(16) 402 .n(n) 403 .k(k) 404 .sparsity(1.0f) 405 .Test(xnn_f16_spmm_minmax_ukernel_8x1__neonfp16arith_x2); 406 } 407 } 408 } 409 #endif // XNN_ARCH_ARM64 410 411 412 #if XNN_ARCH_ARM64 TEST(F16_SPMM_MINMAX_16X1__NEONFP16ARITH,k_eq_1)413 TEST(F16_SPMM_MINMAX_16X1__NEONFP16ARITH, k_eq_1) { 414 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 415 SpMMMicrokernelTester() 416 .mr(16) 417 .nr(1) 418 .m(16) 419 .n(1) 420 .k(1) 421 .sparsity(0.0f) 422 .Test(xnn_f16_spmm_minmax_ukernel_16x1__neonfp16arith); 423 } 424 TEST(F16_SPMM_MINMAX_16X1__NEONFP16ARITH,k_gt_1)425 TEST(F16_SPMM_MINMAX_16X1__NEONFP16ARITH, k_gt_1) { 426 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 427 for (size_t k = 2; k < 10; k++) { 428 SpMMMicrokernelTester() 429 .mr(16) 430 .nr(1) 431 .m(16) 432 .n(1) 433 .k(k) 434 .sparsity(0.0f) 435 .Test(xnn_f16_spmm_minmax_ukernel_16x1__neonfp16arith); 436 } 437 } 438 TEST(F16_SPMM_MINMAX_16X1__NEONFP16ARITH,n_gt_1)439 TEST(F16_SPMM_MINMAX_16X1__NEONFP16ARITH, n_gt_1) { 440 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 441 for (uint32_t n = 2; n < 10; n++) { 442 for (size_t k = 1; k <= 5; k += 2) { 443 SpMMMicrokernelTester() 444 .mr(16) 445 .nr(1) 446 .m(16) 447 .n(n) 448 .k(k) 449 .sparsity(0.0f) 450 .Test(xnn_f16_spmm_minmax_ukernel_16x1__neonfp16arith); 451 } 452 } 453 } 454 TEST(F16_SPMM_MINMAX_16X1__NEONFP16ARITH,m_lt_16)455 TEST(F16_SPMM_MINMAX_16X1__NEONFP16ARITH, m_lt_16) { 456 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 457 for (uint32_t m = 1; m < 16; m++) { 458 for (uint32_t n = 1; n < 10; n += 2) { 459 for (size_t k = 1; k <= 5; k += 2) { 460 SpMMMicrokernelTester() 461 .mr(16) 462 .nr(1) 463 .m(m) 464 .n(n) 465 .k(k) 466 .sparsity(0.0f) 467 .Test(xnn_f16_spmm_minmax_ukernel_16x1__neonfp16arith); 468 } 469 } 470 } 471 } 472 TEST(F16_SPMM_MINMAX_16X1__NEONFP16ARITH,m_div_16)473 TEST(F16_SPMM_MINMAX_16X1__NEONFP16ARITH, m_div_16) { 474 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 475 for (uint32_t m = 32; m <= 48; m += 16) { 476 for (uint32_t n = 1; n < 10; n += 2) { 477 for (size_t k = 1; k <= 5; k += 2) { 478 SpMMMicrokernelTester() 479 .mr(16) 480 .nr(1) 481 .m(m) 482 .n(n) 483 .k(k) 484 .sparsity(0.0f) 485 .Test(xnn_f16_spmm_minmax_ukernel_16x1__neonfp16arith); 486 } 487 } 488 } 489 } 490 TEST(F16_SPMM_MINMAX_16X1__NEONFP16ARITH,m_gt_16)491 TEST(F16_SPMM_MINMAX_16X1__NEONFP16ARITH, m_gt_16) { 492 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 493 for (uint32_t m = 17; m < 32; m++) { 494 for (uint32_t n = 1; n < 10; n += 2) { 495 for (size_t k = 1; k <= 5; k += 2) { 496 SpMMMicrokernelTester() 497 .mr(16) 498 .nr(1) 499 .m(m) 500 .n(n) 501 .k(k) 502 .sparsity(0.0f) 503 .Test(xnn_f16_spmm_minmax_ukernel_16x1__neonfp16arith); 504 } 505 } 506 } 507 } 508 TEST(F16_SPMM_MINMAX_16X1__NEONFP16ARITH,output_stride)509 TEST(F16_SPMM_MINMAX_16X1__NEONFP16ARITH, output_stride) { 510 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 511 for (uint32_t n = 1; n < 10; n += 2) { 512 for (size_t k = 1; k <= 5; k += 2) { 513 SpMMMicrokernelTester() 514 .mr(16) 515 .nr(1) 516 .m(32) 517 .n(n) 518 .k(k) 519 .output_stride(37) 520 .sparsity(0.0f) 521 .Test(xnn_f16_spmm_minmax_ukernel_16x1__neonfp16arith); 522 } 523 } 524 } 525 TEST(F16_SPMM_MINMAX_16X1__NEONFP16ARITH,qmin)526 TEST(F16_SPMM_MINMAX_16X1__NEONFP16ARITH, qmin) { 527 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 528 for (uint32_t n = 1; n < 10; n += 2) { 529 for (size_t k = 1; k <= 5; k += 2) { 530 SpMMMicrokernelTester() 531 .mr(16) 532 .nr(1) 533 .m(32) 534 .n(n) 535 .k(k) 536 .sparsity(0.0f) 537 .qmin(128) 538 .Test(xnn_f16_spmm_minmax_ukernel_16x1__neonfp16arith); 539 } 540 } 541 } 542 TEST(F16_SPMM_MINMAX_16X1__NEONFP16ARITH,qmax)543 TEST(F16_SPMM_MINMAX_16X1__NEONFP16ARITH, qmax) { 544 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 545 for (uint32_t n = 1; n < 10; n += 2) { 546 for (size_t k = 1; k <= 5; k += 2) { 547 SpMMMicrokernelTester() 548 .mr(16) 549 .nr(1) 550 .m(32) 551 .n(n) 552 .k(k) 553 .sparsity(0.0f) 554 .qmax(128) 555 .Test(xnn_f16_spmm_minmax_ukernel_16x1__neonfp16arith); 556 } 557 } 558 } 559 TEST(F16_SPMM_MINMAX_16X1__NEONFP16ARITH,half_sparse)560 TEST(F16_SPMM_MINMAX_16X1__NEONFP16ARITH, half_sparse) { 561 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 562 for (uint32_t n = 1; n < 10; n += 2) { 563 for (size_t k = 1; k <= 5; k += 2) { 564 SpMMMicrokernelTester() 565 .mr(16) 566 .nr(1) 567 .m(32) 568 .n(n) 569 .k(k) 570 .sparsity(0.5f) 571 .Test(xnn_f16_spmm_minmax_ukernel_16x1__neonfp16arith); 572 } 573 } 574 } 575 TEST(F16_SPMM_MINMAX_16X1__NEONFP16ARITH,zero_weights)576 TEST(F16_SPMM_MINMAX_16X1__NEONFP16ARITH, zero_weights) { 577 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 578 for (uint32_t n = 1; n < 10; n += 2) { 579 for (size_t k = 1; k <= 5; k += 2) { 580 SpMMMicrokernelTester() 581 .mr(16) 582 .nr(1) 583 .m(32) 584 .n(n) 585 .k(k) 586 .sparsity(1.0f) 587 .Test(xnn_f16_spmm_minmax_ukernel_16x1__neonfp16arith); 588 } 589 } 590 } 591 #endif // XNN_ARCH_ARM64 592 593 594 #if XNN_ARCH_ARM64 TEST(F16_SPMM_MINMAX_16X1__NEONFP16ARITH_X2,k_eq_2)595 TEST(F16_SPMM_MINMAX_16X1__NEONFP16ARITH_X2, k_eq_2) { 596 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 597 SpMMMicrokernelTester() 598 .mr(16) 599 .nr(1) 600 .m(16) 601 .n(1) 602 .k(2) 603 .sparsity(0.0f) 604 .Test(xnn_f16_spmm_minmax_ukernel_16x1__neonfp16arith_x2); 605 } 606 TEST(F16_SPMM_MINMAX_16X1__NEONFP16ARITH_X2,k_lt_2)607 TEST(F16_SPMM_MINMAX_16X1__NEONFP16ARITH_X2, k_lt_2) { 608 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 609 for (size_t k = 1; k < 2; k++) { 610 SpMMMicrokernelTester() 611 .mr(16) 612 .nr(1) 613 .m(16) 614 .n(1) 615 .k(k) 616 .sparsity(0.0f) 617 .Test(xnn_f16_spmm_minmax_ukernel_16x1__neonfp16arith_x2); 618 } 619 } 620 TEST(F16_SPMM_MINMAX_16X1__NEONFP16ARITH_X2,k_gt_2)621 TEST(F16_SPMM_MINMAX_16X1__NEONFP16ARITH_X2, k_gt_2) { 622 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 623 for (size_t k = 3; k < 4; k++) { 624 SpMMMicrokernelTester() 625 .mr(16) 626 .nr(1) 627 .m(16) 628 .n(1) 629 .k(k) 630 .sparsity(0.0f) 631 .Test(xnn_f16_spmm_minmax_ukernel_16x1__neonfp16arith_x2); 632 } 633 } 634 TEST(F16_SPMM_MINMAX_16X1__NEONFP16ARITH_X2,k_div_2)635 TEST(F16_SPMM_MINMAX_16X1__NEONFP16ARITH_X2, k_div_2) { 636 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 637 for (size_t k = 4; k <= 20; k += 2) { 638 SpMMMicrokernelTester() 639 .mr(16) 640 .nr(1) 641 .m(16) 642 .n(1) 643 .k(k) 644 .sparsity(0.0f) 645 .Test(xnn_f16_spmm_minmax_ukernel_16x1__neonfp16arith_x2); 646 } 647 } 648 TEST(F16_SPMM_MINMAX_16X1__NEONFP16ARITH_X2,n_gt_1)649 TEST(F16_SPMM_MINMAX_16X1__NEONFP16ARITH_X2, n_gt_1) { 650 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 651 for (uint32_t n = 2; n < 10; n++) { 652 for (size_t k = 1; k <= 10; k += 3) { 653 SpMMMicrokernelTester() 654 .mr(16) 655 .nr(1) 656 .m(16) 657 .n(n) 658 .k(k) 659 .sparsity(0.0f) 660 .Test(xnn_f16_spmm_minmax_ukernel_16x1__neonfp16arith_x2); 661 } 662 } 663 } 664 TEST(F16_SPMM_MINMAX_16X1__NEONFP16ARITH_X2,m_lt_16)665 TEST(F16_SPMM_MINMAX_16X1__NEONFP16ARITH_X2, m_lt_16) { 666 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 667 for (uint32_t m = 1; m < 16; m++) { 668 for (uint32_t n = 1; n < 10; n += 2) { 669 for (size_t k = 1; k <= 10; k += 3) { 670 SpMMMicrokernelTester() 671 .mr(16) 672 .nr(1) 673 .m(m) 674 .n(n) 675 .k(k) 676 .sparsity(0.0f) 677 .Test(xnn_f16_spmm_minmax_ukernel_16x1__neonfp16arith_x2); 678 } 679 } 680 } 681 } 682 TEST(F16_SPMM_MINMAX_16X1__NEONFP16ARITH_X2,m_div_16)683 TEST(F16_SPMM_MINMAX_16X1__NEONFP16ARITH_X2, m_div_16) { 684 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 685 for (uint32_t m = 32; m <= 48; m += 16) { 686 for (uint32_t n = 1; n < 10; n += 2) { 687 for (size_t k = 1; k <= 10; k += 3) { 688 SpMMMicrokernelTester() 689 .mr(16) 690 .nr(1) 691 .m(m) 692 .n(n) 693 .k(k) 694 .sparsity(0.0f) 695 .Test(xnn_f16_spmm_minmax_ukernel_16x1__neonfp16arith_x2); 696 } 697 } 698 } 699 } 700 TEST(F16_SPMM_MINMAX_16X1__NEONFP16ARITH_X2,m_gt_16)701 TEST(F16_SPMM_MINMAX_16X1__NEONFP16ARITH_X2, m_gt_16) { 702 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 703 for (uint32_t m = 17; m < 32; m++) { 704 for (uint32_t n = 1; n < 10; n += 2) { 705 for (size_t k = 1; k <= 10; k += 3) { 706 SpMMMicrokernelTester() 707 .mr(16) 708 .nr(1) 709 .m(m) 710 .n(n) 711 .k(k) 712 .sparsity(0.0f) 713 .Test(xnn_f16_spmm_minmax_ukernel_16x1__neonfp16arith_x2); 714 } 715 } 716 } 717 } 718 TEST(F16_SPMM_MINMAX_16X1__NEONFP16ARITH_X2,output_stride)719 TEST(F16_SPMM_MINMAX_16X1__NEONFP16ARITH_X2, output_stride) { 720 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 721 for (uint32_t n = 1; n < 10; n += 2) { 722 for (size_t k = 1; k <= 10; k += 3) { 723 SpMMMicrokernelTester() 724 .mr(16) 725 .nr(1) 726 .m(32) 727 .n(n) 728 .k(k) 729 .output_stride(37) 730 .sparsity(0.0f) 731 .Test(xnn_f16_spmm_minmax_ukernel_16x1__neonfp16arith_x2); 732 } 733 } 734 } 735 TEST(F16_SPMM_MINMAX_16X1__NEONFP16ARITH_X2,qmin)736 TEST(F16_SPMM_MINMAX_16X1__NEONFP16ARITH_X2, qmin) { 737 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 738 for (uint32_t n = 1; n < 10; n += 2) { 739 for (size_t k = 1; k <= 10; k += 3) { 740 SpMMMicrokernelTester() 741 .mr(16) 742 .nr(1) 743 .m(32) 744 .n(n) 745 .k(k) 746 .sparsity(0.0f) 747 .qmin(128) 748 .Test(xnn_f16_spmm_minmax_ukernel_16x1__neonfp16arith_x2); 749 } 750 } 751 } 752 TEST(F16_SPMM_MINMAX_16X1__NEONFP16ARITH_X2,qmax)753 TEST(F16_SPMM_MINMAX_16X1__NEONFP16ARITH_X2, qmax) { 754 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 755 for (uint32_t n = 1; n < 10; n += 2) { 756 for (size_t k = 1; k <= 10; k += 3) { 757 SpMMMicrokernelTester() 758 .mr(16) 759 .nr(1) 760 .m(32) 761 .n(n) 762 .k(k) 763 .sparsity(0.0f) 764 .qmax(128) 765 .Test(xnn_f16_spmm_minmax_ukernel_16x1__neonfp16arith_x2); 766 } 767 } 768 } 769 TEST(F16_SPMM_MINMAX_16X1__NEONFP16ARITH_X2,half_sparse)770 TEST(F16_SPMM_MINMAX_16X1__NEONFP16ARITH_X2, half_sparse) { 771 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 772 for (uint32_t n = 1; n < 10; n += 2) { 773 for (size_t k = 1; k <= 10; k += 3) { 774 SpMMMicrokernelTester() 775 .mr(16) 776 .nr(1) 777 .m(32) 778 .n(n) 779 .k(k) 780 .sparsity(0.5f) 781 .Test(xnn_f16_spmm_minmax_ukernel_16x1__neonfp16arith_x2); 782 } 783 } 784 } 785 TEST(F16_SPMM_MINMAX_16X1__NEONFP16ARITH_X2,zero_weights)786 TEST(F16_SPMM_MINMAX_16X1__NEONFP16ARITH_X2, zero_weights) { 787 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 788 for (uint32_t n = 1; n < 10; n += 2) { 789 for (size_t k = 1; k <= 10; k += 3) { 790 SpMMMicrokernelTester() 791 .mr(16) 792 .nr(1) 793 .m(32) 794 .n(n) 795 .k(k) 796 .sparsity(1.0f) 797 .Test(xnn_f16_spmm_minmax_ukernel_16x1__neonfp16arith_x2); 798 } 799 } 800 } 801 #endif // XNN_ARCH_ARM64 802 803 804 #if XNN_ARCH_ARM64 TEST(F16_SPMM_MINMAX_24X1__NEONFP16ARITH,k_eq_1)805 TEST(F16_SPMM_MINMAX_24X1__NEONFP16ARITH, k_eq_1) { 806 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 807 SpMMMicrokernelTester() 808 .mr(24) 809 .nr(1) 810 .m(24) 811 .n(1) 812 .k(1) 813 .sparsity(0.0f) 814 .Test(xnn_f16_spmm_minmax_ukernel_24x1__neonfp16arith); 815 } 816 TEST(F16_SPMM_MINMAX_24X1__NEONFP16ARITH,k_gt_1)817 TEST(F16_SPMM_MINMAX_24X1__NEONFP16ARITH, k_gt_1) { 818 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 819 for (size_t k = 2; k < 10; k++) { 820 SpMMMicrokernelTester() 821 .mr(24) 822 .nr(1) 823 .m(24) 824 .n(1) 825 .k(k) 826 .sparsity(0.0f) 827 .Test(xnn_f16_spmm_minmax_ukernel_24x1__neonfp16arith); 828 } 829 } 830 TEST(F16_SPMM_MINMAX_24X1__NEONFP16ARITH,n_gt_1)831 TEST(F16_SPMM_MINMAX_24X1__NEONFP16ARITH, n_gt_1) { 832 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 833 for (uint32_t n = 2; n < 10; n++) { 834 for (size_t k = 1; k <= 5; k += 2) { 835 SpMMMicrokernelTester() 836 .mr(24) 837 .nr(1) 838 .m(24) 839 .n(n) 840 .k(k) 841 .sparsity(0.0f) 842 .Test(xnn_f16_spmm_minmax_ukernel_24x1__neonfp16arith); 843 } 844 } 845 } 846 TEST(F16_SPMM_MINMAX_24X1__NEONFP16ARITH,m_lt_24)847 TEST(F16_SPMM_MINMAX_24X1__NEONFP16ARITH, m_lt_24) { 848 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 849 for (uint32_t m = 1; m < 24; m++) { 850 for (uint32_t n = 1; n < 10; n += 2) { 851 for (size_t k = 1; k <= 5; k += 2) { 852 SpMMMicrokernelTester() 853 .mr(24) 854 .nr(1) 855 .m(m) 856 .n(n) 857 .k(k) 858 .sparsity(0.0f) 859 .Test(xnn_f16_spmm_minmax_ukernel_24x1__neonfp16arith); 860 } 861 } 862 } 863 } 864 TEST(F16_SPMM_MINMAX_24X1__NEONFP16ARITH,m_div_24)865 TEST(F16_SPMM_MINMAX_24X1__NEONFP16ARITH, m_div_24) { 866 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 867 for (uint32_t m = 48; m <= 72; m += 24) { 868 for (uint32_t n = 1; n < 10; n += 2) { 869 for (size_t k = 1; k <= 5; k += 2) { 870 SpMMMicrokernelTester() 871 .mr(24) 872 .nr(1) 873 .m(m) 874 .n(n) 875 .k(k) 876 .sparsity(0.0f) 877 .Test(xnn_f16_spmm_minmax_ukernel_24x1__neonfp16arith); 878 } 879 } 880 } 881 } 882 TEST(F16_SPMM_MINMAX_24X1__NEONFP16ARITH,m_gt_24)883 TEST(F16_SPMM_MINMAX_24X1__NEONFP16ARITH, m_gt_24) { 884 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 885 for (uint32_t m = 25; m < 48; m++) { 886 for (uint32_t n = 1; n < 10; n += 2) { 887 for (size_t k = 1; k <= 5; k += 2) { 888 SpMMMicrokernelTester() 889 .mr(24) 890 .nr(1) 891 .m(m) 892 .n(n) 893 .k(k) 894 .sparsity(0.0f) 895 .Test(xnn_f16_spmm_minmax_ukernel_24x1__neonfp16arith); 896 } 897 } 898 } 899 } 900 TEST(F16_SPMM_MINMAX_24X1__NEONFP16ARITH,output_stride)901 TEST(F16_SPMM_MINMAX_24X1__NEONFP16ARITH, output_stride) { 902 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 903 for (uint32_t n = 1; n < 10; n += 2) { 904 for (size_t k = 1; k <= 5; k += 2) { 905 SpMMMicrokernelTester() 906 .mr(24) 907 .nr(1) 908 .m(48) 909 .n(n) 910 .k(k) 911 .output_stride(53) 912 .sparsity(0.0f) 913 .Test(xnn_f16_spmm_minmax_ukernel_24x1__neonfp16arith); 914 } 915 } 916 } 917 TEST(F16_SPMM_MINMAX_24X1__NEONFP16ARITH,qmin)918 TEST(F16_SPMM_MINMAX_24X1__NEONFP16ARITH, qmin) { 919 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 920 for (uint32_t n = 1; n < 10; n += 2) { 921 for (size_t k = 1; k <= 5; k += 2) { 922 SpMMMicrokernelTester() 923 .mr(24) 924 .nr(1) 925 .m(48) 926 .n(n) 927 .k(k) 928 .sparsity(0.0f) 929 .qmin(128) 930 .Test(xnn_f16_spmm_minmax_ukernel_24x1__neonfp16arith); 931 } 932 } 933 } 934 TEST(F16_SPMM_MINMAX_24X1__NEONFP16ARITH,qmax)935 TEST(F16_SPMM_MINMAX_24X1__NEONFP16ARITH, qmax) { 936 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 937 for (uint32_t n = 1; n < 10; n += 2) { 938 for (size_t k = 1; k <= 5; k += 2) { 939 SpMMMicrokernelTester() 940 .mr(24) 941 .nr(1) 942 .m(48) 943 .n(n) 944 .k(k) 945 .sparsity(0.0f) 946 .qmax(128) 947 .Test(xnn_f16_spmm_minmax_ukernel_24x1__neonfp16arith); 948 } 949 } 950 } 951 TEST(F16_SPMM_MINMAX_24X1__NEONFP16ARITH,half_sparse)952 TEST(F16_SPMM_MINMAX_24X1__NEONFP16ARITH, half_sparse) { 953 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 954 for (uint32_t n = 1; n < 10; n += 2) { 955 for (size_t k = 1; k <= 5; k += 2) { 956 SpMMMicrokernelTester() 957 .mr(24) 958 .nr(1) 959 .m(48) 960 .n(n) 961 .k(k) 962 .sparsity(0.5f) 963 .Test(xnn_f16_spmm_minmax_ukernel_24x1__neonfp16arith); 964 } 965 } 966 } 967 TEST(F16_SPMM_MINMAX_24X1__NEONFP16ARITH,zero_weights)968 TEST(F16_SPMM_MINMAX_24X1__NEONFP16ARITH, zero_weights) { 969 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 970 for (uint32_t n = 1; n < 10; n += 2) { 971 for (size_t k = 1; k <= 5; k += 2) { 972 SpMMMicrokernelTester() 973 .mr(24) 974 .nr(1) 975 .m(48) 976 .n(n) 977 .k(k) 978 .sparsity(1.0f) 979 .Test(xnn_f16_spmm_minmax_ukernel_24x1__neonfp16arith); 980 } 981 } 982 } 983 #endif // XNN_ARCH_ARM64 984 985 986 #if XNN_ARCH_ARM64 TEST(F16_SPMM_MINMAX_24X1__NEONFP16ARITH_X2,k_eq_2)987 TEST(F16_SPMM_MINMAX_24X1__NEONFP16ARITH_X2, k_eq_2) { 988 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 989 SpMMMicrokernelTester() 990 .mr(24) 991 .nr(1) 992 .m(24) 993 .n(1) 994 .k(2) 995 .sparsity(0.0f) 996 .Test(xnn_f16_spmm_minmax_ukernel_24x1__neonfp16arith_x2); 997 } 998 TEST(F16_SPMM_MINMAX_24X1__NEONFP16ARITH_X2,k_lt_2)999 TEST(F16_SPMM_MINMAX_24X1__NEONFP16ARITH_X2, k_lt_2) { 1000 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 1001 for (size_t k = 1; k < 2; k++) { 1002 SpMMMicrokernelTester() 1003 .mr(24) 1004 .nr(1) 1005 .m(24) 1006 .n(1) 1007 .k(k) 1008 .sparsity(0.0f) 1009 .Test(xnn_f16_spmm_minmax_ukernel_24x1__neonfp16arith_x2); 1010 } 1011 } 1012 TEST(F16_SPMM_MINMAX_24X1__NEONFP16ARITH_X2,k_gt_2)1013 TEST(F16_SPMM_MINMAX_24X1__NEONFP16ARITH_X2, k_gt_2) { 1014 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 1015 for (size_t k = 3; k < 4; k++) { 1016 SpMMMicrokernelTester() 1017 .mr(24) 1018 .nr(1) 1019 .m(24) 1020 .n(1) 1021 .k(k) 1022 .sparsity(0.0f) 1023 .Test(xnn_f16_spmm_minmax_ukernel_24x1__neonfp16arith_x2); 1024 } 1025 } 1026 TEST(F16_SPMM_MINMAX_24X1__NEONFP16ARITH_X2,k_div_2)1027 TEST(F16_SPMM_MINMAX_24X1__NEONFP16ARITH_X2, k_div_2) { 1028 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 1029 for (size_t k = 4; k <= 20; k += 2) { 1030 SpMMMicrokernelTester() 1031 .mr(24) 1032 .nr(1) 1033 .m(24) 1034 .n(1) 1035 .k(k) 1036 .sparsity(0.0f) 1037 .Test(xnn_f16_spmm_minmax_ukernel_24x1__neonfp16arith_x2); 1038 } 1039 } 1040 TEST(F16_SPMM_MINMAX_24X1__NEONFP16ARITH_X2,n_gt_1)1041 TEST(F16_SPMM_MINMAX_24X1__NEONFP16ARITH_X2, n_gt_1) { 1042 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 1043 for (uint32_t n = 2; n < 10; n++) { 1044 for (size_t k = 1; k <= 10; k += 3) { 1045 SpMMMicrokernelTester() 1046 .mr(24) 1047 .nr(1) 1048 .m(24) 1049 .n(n) 1050 .k(k) 1051 .sparsity(0.0f) 1052 .Test(xnn_f16_spmm_minmax_ukernel_24x1__neonfp16arith_x2); 1053 } 1054 } 1055 } 1056 TEST(F16_SPMM_MINMAX_24X1__NEONFP16ARITH_X2,m_lt_24)1057 TEST(F16_SPMM_MINMAX_24X1__NEONFP16ARITH_X2, m_lt_24) { 1058 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 1059 for (uint32_t m = 1; m < 24; m++) { 1060 for (uint32_t n = 1; n < 10; n += 2) { 1061 for (size_t k = 1; k <= 10; k += 3) { 1062 SpMMMicrokernelTester() 1063 .mr(24) 1064 .nr(1) 1065 .m(m) 1066 .n(n) 1067 .k(k) 1068 .sparsity(0.0f) 1069 .Test(xnn_f16_spmm_minmax_ukernel_24x1__neonfp16arith_x2); 1070 } 1071 } 1072 } 1073 } 1074 TEST(F16_SPMM_MINMAX_24X1__NEONFP16ARITH_X2,m_div_24)1075 TEST(F16_SPMM_MINMAX_24X1__NEONFP16ARITH_X2, m_div_24) { 1076 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 1077 for (uint32_t m = 48; m <= 72; m += 24) { 1078 for (uint32_t n = 1; n < 10; n += 2) { 1079 for (size_t k = 1; k <= 10; k += 3) { 1080 SpMMMicrokernelTester() 1081 .mr(24) 1082 .nr(1) 1083 .m(m) 1084 .n(n) 1085 .k(k) 1086 .sparsity(0.0f) 1087 .Test(xnn_f16_spmm_minmax_ukernel_24x1__neonfp16arith_x2); 1088 } 1089 } 1090 } 1091 } 1092 TEST(F16_SPMM_MINMAX_24X1__NEONFP16ARITH_X2,m_gt_24)1093 TEST(F16_SPMM_MINMAX_24X1__NEONFP16ARITH_X2, m_gt_24) { 1094 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 1095 for (uint32_t m = 25; m < 48; m++) { 1096 for (uint32_t n = 1; n < 10; n += 2) { 1097 for (size_t k = 1; k <= 10; k += 3) { 1098 SpMMMicrokernelTester() 1099 .mr(24) 1100 .nr(1) 1101 .m(m) 1102 .n(n) 1103 .k(k) 1104 .sparsity(0.0f) 1105 .Test(xnn_f16_spmm_minmax_ukernel_24x1__neonfp16arith_x2); 1106 } 1107 } 1108 } 1109 } 1110 TEST(F16_SPMM_MINMAX_24X1__NEONFP16ARITH_X2,output_stride)1111 TEST(F16_SPMM_MINMAX_24X1__NEONFP16ARITH_X2, output_stride) { 1112 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 1113 for (uint32_t n = 1; n < 10; n += 2) { 1114 for (size_t k = 1; k <= 10; k += 3) { 1115 SpMMMicrokernelTester() 1116 .mr(24) 1117 .nr(1) 1118 .m(48) 1119 .n(n) 1120 .k(k) 1121 .output_stride(53) 1122 .sparsity(0.0f) 1123 .Test(xnn_f16_spmm_minmax_ukernel_24x1__neonfp16arith_x2); 1124 } 1125 } 1126 } 1127 TEST(F16_SPMM_MINMAX_24X1__NEONFP16ARITH_X2,qmin)1128 TEST(F16_SPMM_MINMAX_24X1__NEONFP16ARITH_X2, qmin) { 1129 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 1130 for (uint32_t n = 1; n < 10; n += 2) { 1131 for (size_t k = 1; k <= 10; k += 3) { 1132 SpMMMicrokernelTester() 1133 .mr(24) 1134 .nr(1) 1135 .m(48) 1136 .n(n) 1137 .k(k) 1138 .sparsity(0.0f) 1139 .qmin(128) 1140 .Test(xnn_f16_spmm_minmax_ukernel_24x1__neonfp16arith_x2); 1141 } 1142 } 1143 } 1144 TEST(F16_SPMM_MINMAX_24X1__NEONFP16ARITH_X2,qmax)1145 TEST(F16_SPMM_MINMAX_24X1__NEONFP16ARITH_X2, qmax) { 1146 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 1147 for (uint32_t n = 1; n < 10; n += 2) { 1148 for (size_t k = 1; k <= 10; k += 3) { 1149 SpMMMicrokernelTester() 1150 .mr(24) 1151 .nr(1) 1152 .m(48) 1153 .n(n) 1154 .k(k) 1155 .sparsity(0.0f) 1156 .qmax(128) 1157 .Test(xnn_f16_spmm_minmax_ukernel_24x1__neonfp16arith_x2); 1158 } 1159 } 1160 } 1161 TEST(F16_SPMM_MINMAX_24X1__NEONFP16ARITH_X2,half_sparse)1162 TEST(F16_SPMM_MINMAX_24X1__NEONFP16ARITH_X2, half_sparse) { 1163 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 1164 for (uint32_t n = 1; n < 10; n += 2) { 1165 for (size_t k = 1; k <= 10; k += 3) { 1166 SpMMMicrokernelTester() 1167 .mr(24) 1168 .nr(1) 1169 .m(48) 1170 .n(n) 1171 .k(k) 1172 .sparsity(0.5f) 1173 .Test(xnn_f16_spmm_minmax_ukernel_24x1__neonfp16arith_x2); 1174 } 1175 } 1176 } 1177 TEST(F16_SPMM_MINMAX_24X1__NEONFP16ARITH_X2,zero_weights)1178 TEST(F16_SPMM_MINMAX_24X1__NEONFP16ARITH_X2, zero_weights) { 1179 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 1180 for (uint32_t n = 1; n < 10; n += 2) { 1181 for (size_t k = 1; k <= 10; k += 3) { 1182 SpMMMicrokernelTester() 1183 .mr(24) 1184 .nr(1) 1185 .m(48) 1186 .n(n) 1187 .k(k) 1188 .sparsity(1.0f) 1189 .Test(xnn_f16_spmm_minmax_ukernel_24x1__neonfp16arith_x2); 1190 } 1191 } 1192 } 1193 #endif // XNN_ARCH_ARM64 1194 1195 1196 #if XNN_ARCH_ARM64 TEST(F16_SPMM_MINMAX_32X1__NEONFP16ARITH,k_eq_1)1197 TEST(F16_SPMM_MINMAX_32X1__NEONFP16ARITH, k_eq_1) { 1198 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 1199 SpMMMicrokernelTester() 1200 .mr(32) 1201 .nr(1) 1202 .m(32) 1203 .n(1) 1204 .k(1) 1205 .sparsity(0.0f) 1206 .Test(xnn_f16_spmm_minmax_ukernel_32x1__neonfp16arith); 1207 } 1208 TEST(F16_SPMM_MINMAX_32X1__NEONFP16ARITH,k_gt_1)1209 TEST(F16_SPMM_MINMAX_32X1__NEONFP16ARITH, k_gt_1) { 1210 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 1211 for (size_t k = 2; k < 10; k++) { 1212 SpMMMicrokernelTester() 1213 .mr(32) 1214 .nr(1) 1215 .m(32) 1216 .n(1) 1217 .k(k) 1218 .sparsity(0.0f) 1219 .Test(xnn_f16_spmm_minmax_ukernel_32x1__neonfp16arith); 1220 } 1221 } 1222 TEST(F16_SPMM_MINMAX_32X1__NEONFP16ARITH,n_gt_1)1223 TEST(F16_SPMM_MINMAX_32X1__NEONFP16ARITH, n_gt_1) { 1224 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 1225 for (uint32_t n = 2; n < 10; n++) { 1226 for (size_t k = 1; k <= 5; k += 2) { 1227 SpMMMicrokernelTester() 1228 .mr(32) 1229 .nr(1) 1230 .m(32) 1231 .n(n) 1232 .k(k) 1233 .sparsity(0.0f) 1234 .Test(xnn_f16_spmm_minmax_ukernel_32x1__neonfp16arith); 1235 } 1236 } 1237 } 1238 TEST(F16_SPMM_MINMAX_32X1__NEONFP16ARITH,m_lt_32)1239 TEST(F16_SPMM_MINMAX_32X1__NEONFP16ARITH, m_lt_32) { 1240 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 1241 for (uint32_t m = 1; m < 32; m++) { 1242 for (uint32_t n = 1; n < 10; n += 2) { 1243 for (size_t k = 1; k <= 5; k += 2) { 1244 SpMMMicrokernelTester() 1245 .mr(32) 1246 .nr(1) 1247 .m(m) 1248 .n(n) 1249 .k(k) 1250 .sparsity(0.0f) 1251 .Test(xnn_f16_spmm_minmax_ukernel_32x1__neonfp16arith); 1252 } 1253 } 1254 } 1255 } 1256 TEST(F16_SPMM_MINMAX_32X1__NEONFP16ARITH,m_div_32)1257 TEST(F16_SPMM_MINMAX_32X1__NEONFP16ARITH, m_div_32) { 1258 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 1259 for (uint32_t m = 64; m <= 96; m += 32) { 1260 for (uint32_t n = 1; n < 10; n += 2) { 1261 for (size_t k = 1; k <= 5; k += 2) { 1262 SpMMMicrokernelTester() 1263 .mr(32) 1264 .nr(1) 1265 .m(m) 1266 .n(n) 1267 .k(k) 1268 .sparsity(0.0f) 1269 .Test(xnn_f16_spmm_minmax_ukernel_32x1__neonfp16arith); 1270 } 1271 } 1272 } 1273 } 1274 TEST(F16_SPMM_MINMAX_32X1__NEONFP16ARITH,m_gt_32)1275 TEST(F16_SPMM_MINMAX_32X1__NEONFP16ARITH, m_gt_32) { 1276 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 1277 for (uint32_t m = 33; m < 64; m++) { 1278 for (uint32_t n = 1; n < 10; n += 2) { 1279 for (size_t k = 1; k <= 5; k += 2) { 1280 SpMMMicrokernelTester() 1281 .mr(32) 1282 .nr(1) 1283 .m(m) 1284 .n(n) 1285 .k(k) 1286 .sparsity(0.0f) 1287 .Test(xnn_f16_spmm_minmax_ukernel_32x1__neonfp16arith); 1288 } 1289 } 1290 } 1291 } 1292 TEST(F16_SPMM_MINMAX_32X1__NEONFP16ARITH,output_stride)1293 TEST(F16_SPMM_MINMAX_32X1__NEONFP16ARITH, output_stride) { 1294 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 1295 for (uint32_t n = 1; n < 10; n += 2) { 1296 for (size_t k = 1; k <= 5; k += 2) { 1297 SpMMMicrokernelTester() 1298 .mr(32) 1299 .nr(1) 1300 .m(64) 1301 .n(n) 1302 .k(k) 1303 .output_stride(67) 1304 .sparsity(0.0f) 1305 .Test(xnn_f16_spmm_minmax_ukernel_32x1__neonfp16arith); 1306 } 1307 } 1308 } 1309 TEST(F16_SPMM_MINMAX_32X1__NEONFP16ARITH,qmin)1310 TEST(F16_SPMM_MINMAX_32X1__NEONFP16ARITH, qmin) { 1311 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 1312 for (uint32_t n = 1; n < 10; n += 2) { 1313 for (size_t k = 1; k <= 5; k += 2) { 1314 SpMMMicrokernelTester() 1315 .mr(32) 1316 .nr(1) 1317 .m(64) 1318 .n(n) 1319 .k(k) 1320 .sparsity(0.0f) 1321 .qmin(128) 1322 .Test(xnn_f16_spmm_minmax_ukernel_32x1__neonfp16arith); 1323 } 1324 } 1325 } 1326 TEST(F16_SPMM_MINMAX_32X1__NEONFP16ARITH,qmax)1327 TEST(F16_SPMM_MINMAX_32X1__NEONFP16ARITH, qmax) { 1328 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 1329 for (uint32_t n = 1; n < 10; n += 2) { 1330 for (size_t k = 1; k <= 5; k += 2) { 1331 SpMMMicrokernelTester() 1332 .mr(32) 1333 .nr(1) 1334 .m(64) 1335 .n(n) 1336 .k(k) 1337 .sparsity(0.0f) 1338 .qmax(128) 1339 .Test(xnn_f16_spmm_minmax_ukernel_32x1__neonfp16arith); 1340 } 1341 } 1342 } 1343 TEST(F16_SPMM_MINMAX_32X1__NEONFP16ARITH,half_sparse)1344 TEST(F16_SPMM_MINMAX_32X1__NEONFP16ARITH, half_sparse) { 1345 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 1346 for (uint32_t n = 1; n < 10; n += 2) { 1347 for (size_t k = 1; k <= 5; k += 2) { 1348 SpMMMicrokernelTester() 1349 .mr(32) 1350 .nr(1) 1351 .m(64) 1352 .n(n) 1353 .k(k) 1354 .sparsity(0.5f) 1355 .Test(xnn_f16_spmm_minmax_ukernel_32x1__neonfp16arith); 1356 } 1357 } 1358 } 1359 TEST(F16_SPMM_MINMAX_32X1__NEONFP16ARITH,zero_weights)1360 TEST(F16_SPMM_MINMAX_32X1__NEONFP16ARITH, zero_weights) { 1361 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 1362 for (uint32_t n = 1; n < 10; n += 2) { 1363 for (size_t k = 1; k <= 5; k += 2) { 1364 SpMMMicrokernelTester() 1365 .mr(32) 1366 .nr(1) 1367 .m(64) 1368 .n(n) 1369 .k(k) 1370 .sparsity(1.0f) 1371 .Test(xnn_f16_spmm_minmax_ukernel_32x1__neonfp16arith); 1372 } 1373 } 1374 } 1375 #endif // XNN_ARCH_ARM64 1376 1377 1378 #if XNN_ARCH_ARM64 TEST(F16_SPMM_MINMAX_32X1__NEONFP16ARITH_X2,k_eq_2)1379 TEST(F16_SPMM_MINMAX_32X1__NEONFP16ARITH_X2, k_eq_2) { 1380 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 1381 SpMMMicrokernelTester() 1382 .mr(32) 1383 .nr(1) 1384 .m(32) 1385 .n(1) 1386 .k(2) 1387 .sparsity(0.0f) 1388 .Test(xnn_f16_spmm_minmax_ukernel_32x1__neonfp16arith_x2); 1389 } 1390 TEST(F16_SPMM_MINMAX_32X1__NEONFP16ARITH_X2,k_lt_2)1391 TEST(F16_SPMM_MINMAX_32X1__NEONFP16ARITH_X2, k_lt_2) { 1392 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 1393 for (size_t k = 1; k < 2; k++) { 1394 SpMMMicrokernelTester() 1395 .mr(32) 1396 .nr(1) 1397 .m(32) 1398 .n(1) 1399 .k(k) 1400 .sparsity(0.0f) 1401 .Test(xnn_f16_spmm_minmax_ukernel_32x1__neonfp16arith_x2); 1402 } 1403 } 1404 TEST(F16_SPMM_MINMAX_32X1__NEONFP16ARITH_X2,k_gt_2)1405 TEST(F16_SPMM_MINMAX_32X1__NEONFP16ARITH_X2, k_gt_2) { 1406 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 1407 for (size_t k = 3; k < 4; k++) { 1408 SpMMMicrokernelTester() 1409 .mr(32) 1410 .nr(1) 1411 .m(32) 1412 .n(1) 1413 .k(k) 1414 .sparsity(0.0f) 1415 .Test(xnn_f16_spmm_minmax_ukernel_32x1__neonfp16arith_x2); 1416 } 1417 } 1418 TEST(F16_SPMM_MINMAX_32X1__NEONFP16ARITH_X2,k_div_2)1419 TEST(F16_SPMM_MINMAX_32X1__NEONFP16ARITH_X2, k_div_2) { 1420 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 1421 for (size_t k = 4; k <= 20; k += 2) { 1422 SpMMMicrokernelTester() 1423 .mr(32) 1424 .nr(1) 1425 .m(32) 1426 .n(1) 1427 .k(k) 1428 .sparsity(0.0f) 1429 .Test(xnn_f16_spmm_minmax_ukernel_32x1__neonfp16arith_x2); 1430 } 1431 } 1432 TEST(F16_SPMM_MINMAX_32X1__NEONFP16ARITH_X2,n_gt_1)1433 TEST(F16_SPMM_MINMAX_32X1__NEONFP16ARITH_X2, n_gt_1) { 1434 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 1435 for (uint32_t n = 2; n < 10; n++) { 1436 for (size_t k = 1; k <= 10; k += 3) { 1437 SpMMMicrokernelTester() 1438 .mr(32) 1439 .nr(1) 1440 .m(32) 1441 .n(n) 1442 .k(k) 1443 .sparsity(0.0f) 1444 .Test(xnn_f16_spmm_minmax_ukernel_32x1__neonfp16arith_x2); 1445 } 1446 } 1447 } 1448 TEST(F16_SPMM_MINMAX_32X1__NEONFP16ARITH_X2,m_lt_32)1449 TEST(F16_SPMM_MINMAX_32X1__NEONFP16ARITH_X2, m_lt_32) { 1450 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 1451 for (uint32_t m = 1; m < 32; m++) { 1452 for (uint32_t n = 1; n < 10; n += 2) { 1453 for (size_t k = 1; k <= 10; k += 3) { 1454 SpMMMicrokernelTester() 1455 .mr(32) 1456 .nr(1) 1457 .m(m) 1458 .n(n) 1459 .k(k) 1460 .sparsity(0.0f) 1461 .Test(xnn_f16_spmm_minmax_ukernel_32x1__neonfp16arith_x2); 1462 } 1463 } 1464 } 1465 } 1466 TEST(F16_SPMM_MINMAX_32X1__NEONFP16ARITH_X2,m_div_32)1467 TEST(F16_SPMM_MINMAX_32X1__NEONFP16ARITH_X2, m_div_32) { 1468 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 1469 for (uint32_t m = 64; m <= 96; m += 32) { 1470 for (uint32_t n = 1; n < 10; n += 2) { 1471 for (size_t k = 1; k <= 10; k += 3) { 1472 SpMMMicrokernelTester() 1473 .mr(32) 1474 .nr(1) 1475 .m(m) 1476 .n(n) 1477 .k(k) 1478 .sparsity(0.0f) 1479 .Test(xnn_f16_spmm_minmax_ukernel_32x1__neonfp16arith_x2); 1480 } 1481 } 1482 } 1483 } 1484 TEST(F16_SPMM_MINMAX_32X1__NEONFP16ARITH_X2,m_gt_32)1485 TEST(F16_SPMM_MINMAX_32X1__NEONFP16ARITH_X2, m_gt_32) { 1486 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 1487 for (uint32_t m = 33; m < 64; m++) { 1488 for (uint32_t n = 1; n < 10; n += 2) { 1489 for (size_t k = 1; k <= 10; k += 3) { 1490 SpMMMicrokernelTester() 1491 .mr(32) 1492 .nr(1) 1493 .m(m) 1494 .n(n) 1495 .k(k) 1496 .sparsity(0.0f) 1497 .Test(xnn_f16_spmm_minmax_ukernel_32x1__neonfp16arith_x2); 1498 } 1499 } 1500 } 1501 } 1502 TEST(F16_SPMM_MINMAX_32X1__NEONFP16ARITH_X2,output_stride)1503 TEST(F16_SPMM_MINMAX_32X1__NEONFP16ARITH_X2, output_stride) { 1504 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 1505 for (uint32_t n = 1; n < 10; n += 2) { 1506 for (size_t k = 1; k <= 10; k += 3) { 1507 SpMMMicrokernelTester() 1508 .mr(32) 1509 .nr(1) 1510 .m(64) 1511 .n(n) 1512 .k(k) 1513 .output_stride(67) 1514 .sparsity(0.0f) 1515 .Test(xnn_f16_spmm_minmax_ukernel_32x1__neonfp16arith_x2); 1516 } 1517 } 1518 } 1519 TEST(F16_SPMM_MINMAX_32X1__NEONFP16ARITH_X2,qmin)1520 TEST(F16_SPMM_MINMAX_32X1__NEONFP16ARITH_X2, qmin) { 1521 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 1522 for (uint32_t n = 1; n < 10; n += 2) { 1523 for (size_t k = 1; k <= 10; k += 3) { 1524 SpMMMicrokernelTester() 1525 .mr(32) 1526 .nr(1) 1527 .m(64) 1528 .n(n) 1529 .k(k) 1530 .sparsity(0.0f) 1531 .qmin(128) 1532 .Test(xnn_f16_spmm_minmax_ukernel_32x1__neonfp16arith_x2); 1533 } 1534 } 1535 } 1536 TEST(F16_SPMM_MINMAX_32X1__NEONFP16ARITH_X2,qmax)1537 TEST(F16_SPMM_MINMAX_32X1__NEONFP16ARITH_X2, qmax) { 1538 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 1539 for (uint32_t n = 1; n < 10; n += 2) { 1540 for (size_t k = 1; k <= 10; k += 3) { 1541 SpMMMicrokernelTester() 1542 .mr(32) 1543 .nr(1) 1544 .m(64) 1545 .n(n) 1546 .k(k) 1547 .sparsity(0.0f) 1548 .qmax(128) 1549 .Test(xnn_f16_spmm_minmax_ukernel_32x1__neonfp16arith_x2); 1550 } 1551 } 1552 } 1553 TEST(F16_SPMM_MINMAX_32X1__NEONFP16ARITH_X2,half_sparse)1554 TEST(F16_SPMM_MINMAX_32X1__NEONFP16ARITH_X2, half_sparse) { 1555 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 1556 for (uint32_t n = 1; n < 10; n += 2) { 1557 for (size_t k = 1; k <= 10; k += 3) { 1558 SpMMMicrokernelTester() 1559 .mr(32) 1560 .nr(1) 1561 .m(64) 1562 .n(n) 1563 .k(k) 1564 .sparsity(0.5f) 1565 .Test(xnn_f16_spmm_minmax_ukernel_32x1__neonfp16arith_x2); 1566 } 1567 } 1568 } 1569 TEST(F16_SPMM_MINMAX_32X1__NEONFP16ARITH_X2,zero_weights)1570 TEST(F16_SPMM_MINMAX_32X1__NEONFP16ARITH_X2, zero_weights) { 1571 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 1572 for (uint32_t n = 1; n < 10; n += 2) { 1573 for (size_t k = 1; k <= 10; k += 3) { 1574 SpMMMicrokernelTester() 1575 .mr(32) 1576 .nr(1) 1577 .m(64) 1578 .n(n) 1579 .k(k) 1580 .sparsity(1.0f) 1581 .Test(xnn_f16_spmm_minmax_ukernel_32x1__neonfp16arith_x2); 1582 } 1583 } 1584 } 1585 #endif // XNN_ARCH_ARM64 1586