1 // Copyright (c) Facebook, Inc. and its affiliates.
2 // All rights reserved.
3 //
4 // Copyright 2019 Google LLC
5 //
6 // This source code is licensed under the BSD-style license found in the
7 // LICENSE file in the root directory of this source tree.
8 //
9 // Auto-generated file. Do not edit!
10 // Specification: test/f32-igemm.yaml
11 // Generator: tools/generate-gemm-test.py
12
13
14 #include <gtest/gtest.h>
15
16 #include <xnnpack/allocator.h>
17 #include <xnnpack/common.h>
18 #include <xnnpack/isa-checks.h>
19 #include <xnnpack/microparams-init.h>
20
21 #include <xnnpack/gemm.h>
22 #include <xnnpack/igemm.h>
23 #include <xnnpack/ppmm.h>
24 #include "gemm-microkernel-tester.h"
25
26
27 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_IGEMM_1X8__WASMSIMD_LOADSPLAT,k_eq_1)28 TEST(F32_IGEMM_1X8__WASMSIMD_LOADSPLAT, k_eq_1) {
29 GemmMicrokernelTester()
30 .mr(1)
31 .nr(8)
32 .kr(1)
33 .sr(1)
34 .m(1)
35 .n(8)
36 .k(1)
37 .Test(xnn_f32_igemm_ukernel_1x8__wasmsimd_loadsplat);
38 }
39
TEST(F32_IGEMM_1X8__WASMSIMD_LOADSPLAT,strided_cn)40 TEST(F32_IGEMM_1X8__WASMSIMD_LOADSPLAT, strided_cn) {
41 GemmMicrokernelTester()
42 .mr(1)
43 .nr(8)
44 .kr(1)
45 .sr(1)
46 .m(1)
47 .n(8)
48 .k(1)
49 .cn_stride(11)
50 .Test(xnn_f32_igemm_ukernel_1x8__wasmsimd_loadsplat);
51 }
52
TEST(F32_IGEMM_1X8__WASMSIMD_LOADSPLAT,k_eq_1_subtile)53 TEST(F32_IGEMM_1X8__WASMSIMD_LOADSPLAT, k_eq_1_subtile) {
54 for (uint32_t n = 1; n <= 8; n++) {
55 for (uint32_t m = 1; m <= 1; m++) {
56 GemmMicrokernelTester()
57 .mr(1)
58 .nr(8)
59 .kr(1)
60 .sr(1)
61 .m(m)
62 .n(n)
63 .k(1)
64 .iterations(1)
65 .Test(xnn_f32_igemm_ukernel_1x8__wasmsimd_loadsplat);
66 }
67 }
68 }
69
TEST(F32_IGEMM_1X8__WASMSIMD_LOADSPLAT,k_eq_1_subtile_m)70 TEST(F32_IGEMM_1X8__WASMSIMD_LOADSPLAT, k_eq_1_subtile_m) {
71 for (uint32_t m = 1; m <= 1; m++) {
72 GemmMicrokernelTester()
73 .mr(1)
74 .nr(8)
75 .kr(1)
76 .sr(1)
77 .m(m)
78 .n(8)
79 .k(1)
80 .iterations(1)
81 .Test(xnn_f32_igemm_ukernel_1x8__wasmsimd_loadsplat);
82 }
83 }
84
TEST(F32_IGEMM_1X8__WASMSIMD_LOADSPLAT,k_eq_1_subtile_n)85 TEST(F32_IGEMM_1X8__WASMSIMD_LOADSPLAT, k_eq_1_subtile_n) {
86 for (uint32_t n = 1; n <= 8; n++) {
87 GemmMicrokernelTester()
88 .mr(1)
89 .nr(8)
90 .kr(1)
91 .sr(1)
92 .m(1)
93 .n(n)
94 .k(1)
95 .iterations(1)
96 .Test(xnn_f32_igemm_ukernel_1x8__wasmsimd_loadsplat);
97 }
98 }
99
TEST(F32_IGEMM_1X8__WASMSIMD_LOADSPLAT,k_gt_1)100 TEST(F32_IGEMM_1X8__WASMSIMD_LOADSPLAT, k_gt_1) {
101 for (size_t k = 2; k < 10; k++) {
102 GemmMicrokernelTester()
103 .mr(1)
104 .nr(8)
105 .kr(1)
106 .sr(1)
107 .m(1)
108 .n(8)
109 .k(k)
110 .Test(xnn_f32_igemm_ukernel_1x8__wasmsimd_loadsplat);
111 }
112 }
113
TEST(F32_IGEMM_1X8__WASMSIMD_LOADSPLAT,k_gt_1_subtile)114 TEST(F32_IGEMM_1X8__WASMSIMD_LOADSPLAT, k_gt_1_subtile) {
115 for (size_t k = 2; k < 10; k++) {
116 for (uint32_t n = 1; n <= 8; n++) {
117 for (uint32_t m = 1; m <= 1; m++) {
118 GemmMicrokernelTester()
119 .mr(1)
120 .nr(8)
121 .kr(1)
122 .sr(1)
123 .m(m)
124 .n(n)
125 .k(k)
126 .iterations(1)
127 .Test(xnn_f32_igemm_ukernel_1x8__wasmsimd_loadsplat);
128 }
129 }
130 }
131 }
132
TEST(F32_IGEMM_1X8__WASMSIMD_LOADSPLAT,n_gt_8)133 TEST(F32_IGEMM_1X8__WASMSIMD_LOADSPLAT, n_gt_8) {
134 for (uint32_t n = 9; n < 16; n++) {
135 for (size_t k = 1; k <= 5; k += 2) {
136 GemmMicrokernelTester()
137 .mr(1)
138 .nr(8)
139 .kr(1)
140 .sr(1)
141 .m(1)
142 .n(n)
143 .k(k)
144 .Test(xnn_f32_igemm_ukernel_1x8__wasmsimd_loadsplat);
145 }
146 }
147 }
148
TEST(F32_IGEMM_1X8__WASMSIMD_LOADSPLAT,n_gt_8_strided_cn)149 TEST(F32_IGEMM_1X8__WASMSIMD_LOADSPLAT, n_gt_8_strided_cn) {
150 for (uint32_t n = 9; n < 16; n++) {
151 for (size_t k = 1; k <= 5; k += 2) {
152 GemmMicrokernelTester()
153 .mr(1)
154 .nr(8)
155 .kr(1)
156 .sr(1)
157 .m(1)
158 .n(n)
159 .k(k)
160 .cn_stride(11)
161 .Test(xnn_f32_igemm_ukernel_1x8__wasmsimd_loadsplat);
162 }
163 }
164 }
165
TEST(F32_IGEMM_1X8__WASMSIMD_LOADSPLAT,n_gt_8_subtile)166 TEST(F32_IGEMM_1X8__WASMSIMD_LOADSPLAT, n_gt_8_subtile) {
167 for (uint32_t n = 9; n < 16; n++) {
168 for (size_t k = 1; k <= 5; k += 2) {
169 for (uint32_t m = 1; m <= 1; m++) {
170 GemmMicrokernelTester()
171 .mr(1)
172 .nr(8)
173 .kr(1)
174 .sr(1)
175 .m(m)
176 .n(n)
177 .k(k)
178 .iterations(1)
179 .Test(xnn_f32_igemm_ukernel_1x8__wasmsimd_loadsplat);
180 }
181 }
182 }
183 }
184
TEST(F32_IGEMM_1X8__WASMSIMD_LOADSPLAT,n_div_8)185 TEST(F32_IGEMM_1X8__WASMSIMD_LOADSPLAT, n_div_8) {
186 for (uint32_t n = 16; n <= 24; n += 8) {
187 for (size_t k = 1; k <= 5; k += 2) {
188 GemmMicrokernelTester()
189 .mr(1)
190 .nr(8)
191 .kr(1)
192 .sr(1)
193 .m(1)
194 .n(n)
195 .k(k)
196 .Test(xnn_f32_igemm_ukernel_1x8__wasmsimd_loadsplat);
197 }
198 }
199 }
200
TEST(F32_IGEMM_1X8__WASMSIMD_LOADSPLAT,n_div_8_strided_cn)201 TEST(F32_IGEMM_1X8__WASMSIMD_LOADSPLAT, n_div_8_strided_cn) {
202 for (uint32_t n = 16; n <= 24; n += 8) {
203 for (size_t k = 1; k <= 5; k += 2) {
204 GemmMicrokernelTester()
205 .mr(1)
206 .nr(8)
207 .kr(1)
208 .sr(1)
209 .m(1)
210 .n(n)
211 .k(k)
212 .cn_stride(11)
213 .Test(xnn_f32_igemm_ukernel_1x8__wasmsimd_loadsplat);
214 }
215 }
216 }
217
TEST(F32_IGEMM_1X8__WASMSIMD_LOADSPLAT,n_div_8_subtile)218 TEST(F32_IGEMM_1X8__WASMSIMD_LOADSPLAT, n_div_8_subtile) {
219 for (uint32_t n = 16; n <= 24; n += 8) {
220 for (size_t k = 1; k <= 5; k += 2) {
221 for (uint32_t m = 1; m <= 1; m++) {
222 GemmMicrokernelTester()
223 .mr(1)
224 .nr(8)
225 .kr(1)
226 .sr(1)
227 .m(m)
228 .n(n)
229 .k(k)
230 .iterations(1)
231 .Test(xnn_f32_igemm_ukernel_1x8__wasmsimd_loadsplat);
232 }
233 }
234 }
235 }
236
TEST(F32_IGEMM_1X8__WASMSIMD_LOADSPLAT,small_kernel)237 TEST(F32_IGEMM_1X8__WASMSIMD_LOADSPLAT, small_kernel) {
238 for (size_t k = 1; k <= 5; k += 2) {
239 GemmMicrokernelTester()
240 .mr(1)
241 .nr(8)
242 .kr(1)
243 .sr(1)
244 .m(1)
245 .n(8)
246 .k(k)
247 .ks(3)
248 .Test(xnn_f32_igemm_ukernel_1x8__wasmsimd_loadsplat);
249 }
250 }
251
TEST(F32_IGEMM_1X8__WASMSIMD_LOADSPLAT,small_kernel_subtile)252 TEST(F32_IGEMM_1X8__WASMSIMD_LOADSPLAT, small_kernel_subtile) {
253 for (size_t k = 1; k <= 5; k += 2) {
254 for (uint32_t n = 1; n <= 8; n++) {
255 for (uint32_t m = 1; m <= 1; m++) {
256 GemmMicrokernelTester()
257 .mr(1)
258 .nr(8)
259 .kr(1)
260 .sr(1)
261 .m(m)
262 .n(n)
263 .k(k)
264 .ks(3)
265 .iterations(1)
266 .Test(xnn_f32_igemm_ukernel_1x8__wasmsimd_loadsplat);
267 }
268 }
269 }
270 }
271
TEST(F32_IGEMM_1X8__WASMSIMD_LOADSPLAT,n_gt_8_small_kernel)272 TEST(F32_IGEMM_1X8__WASMSIMD_LOADSPLAT, n_gt_8_small_kernel) {
273 for (uint32_t n = 9; n < 16; n++) {
274 for (size_t k = 1; k <= 5; k += 2) {
275 GemmMicrokernelTester()
276 .mr(1)
277 .nr(8)
278 .kr(1)
279 .sr(1)
280 .m(1)
281 .n(n)
282 .k(k)
283 .ks(3)
284 .Test(xnn_f32_igemm_ukernel_1x8__wasmsimd_loadsplat);
285 }
286 }
287 }
288
TEST(F32_IGEMM_1X8__WASMSIMD_LOADSPLAT,n_div_8_small_kernel)289 TEST(F32_IGEMM_1X8__WASMSIMD_LOADSPLAT, n_div_8_small_kernel) {
290 for (uint32_t n = 16; n <= 24; n += 8) {
291 for (size_t k = 1; k <= 5; k += 2) {
292 GemmMicrokernelTester()
293 .mr(1)
294 .nr(8)
295 .kr(1)
296 .sr(1)
297 .m(1)
298 .n(n)
299 .k(k)
300 .ks(3)
301 .Test(xnn_f32_igemm_ukernel_1x8__wasmsimd_loadsplat);
302 }
303 }
304 }
305
TEST(F32_IGEMM_1X8__WASMSIMD_LOADSPLAT,strided_cm_subtile)306 TEST(F32_IGEMM_1X8__WASMSIMD_LOADSPLAT, strided_cm_subtile) {
307 for (size_t k = 1; k <= 5; k += 2) {
308 for (uint32_t n = 1; n <= 8; n++) {
309 for (uint32_t m = 1; m <= 1; m++) {
310 GemmMicrokernelTester()
311 .mr(1)
312 .nr(8)
313 .kr(1)
314 .sr(1)
315 .m(m)
316 .n(n)
317 .k(k)
318 .cm_stride(11)
319 .iterations(1)
320 .Test(xnn_f32_igemm_ukernel_1x8__wasmsimd_loadsplat);
321 }
322 }
323 }
324 }
325
TEST(F32_IGEMM_1X8__WASMSIMD_LOADSPLAT,a_offset)326 TEST(F32_IGEMM_1X8__WASMSIMD_LOADSPLAT, a_offset) {
327 for (size_t k = 1; k <= 5; k += 2) {
328 GemmMicrokernelTester()
329 .mr(1)
330 .nr(8)
331 .kr(1)
332 .sr(1)
333 .m(1)
334 .n(8)
335 .k(k)
336 .ks(3)
337 .a_offset(7)
338 .Test(xnn_f32_igemm_ukernel_1x8__wasmsimd_loadsplat);
339 }
340 }
341
TEST(F32_IGEMM_1X8__WASMSIMD_LOADSPLAT,zero)342 TEST(F32_IGEMM_1X8__WASMSIMD_LOADSPLAT, zero) {
343 for (size_t k = 1; k <= 5; k += 2) {
344 for (uint32_t mz = 0; mz < 1; mz++) {
345 GemmMicrokernelTester()
346 .mr(1)
347 .nr(8)
348 .kr(1)
349 .sr(1)
350 .m(1)
351 .n(8)
352 .k(k)
353 .ks(3)
354 .a_offset(7)
355 .zero_index(mz)
356 .Test(xnn_f32_igemm_ukernel_1x8__wasmsimd_loadsplat);
357 }
358 }
359 }
360
TEST(F32_IGEMM_1X8__WASMSIMD_LOADSPLAT,strided_cm)361 TEST(F32_IGEMM_1X8__WASMSIMD_LOADSPLAT, strided_cm) {
362 GemmMicrokernelTester()
363 .mr(1)
364 .nr(8)
365 .kr(1)
366 .sr(1)
367 .m(1)
368 .n(8)
369 .k(1)
370 .cm_stride(11)
371 .Test(xnn_f32_igemm_ukernel_1x8__wasmsimd_loadsplat);
372 }
373 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
374
375
376 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_IGEMM_1X8__WASMSIMD_SPLAT,k_eq_4)377 TEST(F32_IGEMM_1X8__WASMSIMD_SPLAT, k_eq_4) {
378 GemmMicrokernelTester()
379 .mr(1)
380 .nr(8)
381 .kr(1)
382 .sr(1)
383 .m(1)
384 .n(8)
385 .k(4)
386 .Test(xnn_f32_igemm_ukernel_1x8__wasmsimd_splat);
387 }
388
TEST(F32_IGEMM_1X8__WASMSIMD_SPLAT,strided_cn)389 TEST(F32_IGEMM_1X8__WASMSIMD_SPLAT, strided_cn) {
390 GemmMicrokernelTester()
391 .mr(1)
392 .nr(8)
393 .kr(1)
394 .sr(1)
395 .m(1)
396 .n(8)
397 .k(4)
398 .cn_stride(11)
399 .Test(xnn_f32_igemm_ukernel_1x8__wasmsimd_splat);
400 }
401
TEST(F32_IGEMM_1X8__WASMSIMD_SPLAT,k_eq_4_subtile)402 TEST(F32_IGEMM_1X8__WASMSIMD_SPLAT, k_eq_4_subtile) {
403 for (uint32_t n = 1; n <= 8; n++) {
404 for (uint32_t m = 1; m <= 1; m++) {
405 GemmMicrokernelTester()
406 .mr(1)
407 .nr(8)
408 .kr(1)
409 .sr(1)
410 .m(m)
411 .n(n)
412 .k(4)
413 .iterations(1)
414 .Test(xnn_f32_igemm_ukernel_1x8__wasmsimd_splat);
415 }
416 }
417 }
418
TEST(F32_IGEMM_1X8__WASMSIMD_SPLAT,k_eq_4_subtile_m)419 TEST(F32_IGEMM_1X8__WASMSIMD_SPLAT, k_eq_4_subtile_m) {
420 for (uint32_t m = 1; m <= 1; m++) {
421 GemmMicrokernelTester()
422 .mr(1)
423 .nr(8)
424 .kr(1)
425 .sr(1)
426 .m(m)
427 .n(8)
428 .k(4)
429 .iterations(1)
430 .Test(xnn_f32_igemm_ukernel_1x8__wasmsimd_splat);
431 }
432 }
433
TEST(F32_IGEMM_1X8__WASMSIMD_SPLAT,k_eq_4_subtile_n)434 TEST(F32_IGEMM_1X8__WASMSIMD_SPLAT, k_eq_4_subtile_n) {
435 for (uint32_t n = 1; n <= 8; n++) {
436 GemmMicrokernelTester()
437 .mr(1)
438 .nr(8)
439 .kr(1)
440 .sr(1)
441 .m(1)
442 .n(n)
443 .k(4)
444 .iterations(1)
445 .Test(xnn_f32_igemm_ukernel_1x8__wasmsimd_splat);
446 }
447 }
448
TEST(F32_IGEMM_1X8__WASMSIMD_SPLAT,k_lt_4)449 TEST(F32_IGEMM_1X8__WASMSIMD_SPLAT, k_lt_4) {
450 for (size_t k = 1; k < 4; k++) {
451 GemmMicrokernelTester()
452 .mr(1)
453 .nr(8)
454 .kr(1)
455 .sr(1)
456 .m(1)
457 .n(8)
458 .k(k)
459 .Test(xnn_f32_igemm_ukernel_1x8__wasmsimd_splat);
460 }
461 }
462
TEST(F32_IGEMM_1X8__WASMSIMD_SPLAT,k_lt_4_subtile)463 TEST(F32_IGEMM_1X8__WASMSIMD_SPLAT, k_lt_4_subtile) {
464 for (size_t k = 1; k < 4; k++) {
465 for (uint32_t n = 1; n <= 8; n++) {
466 for (uint32_t m = 1; m <= 1; m++) {
467 GemmMicrokernelTester()
468 .mr(1)
469 .nr(8)
470 .kr(1)
471 .sr(1)
472 .m(m)
473 .n(n)
474 .k(k)
475 .iterations(1)
476 .Test(xnn_f32_igemm_ukernel_1x8__wasmsimd_splat);
477 }
478 }
479 }
480 }
481
TEST(F32_IGEMM_1X8__WASMSIMD_SPLAT,k_gt_4)482 TEST(F32_IGEMM_1X8__WASMSIMD_SPLAT, k_gt_4) {
483 for (size_t k = 5; k < 8; k++) {
484 GemmMicrokernelTester()
485 .mr(1)
486 .nr(8)
487 .kr(1)
488 .sr(1)
489 .m(1)
490 .n(8)
491 .k(k)
492 .Test(xnn_f32_igemm_ukernel_1x8__wasmsimd_splat);
493 }
494 }
495
TEST(F32_IGEMM_1X8__WASMSIMD_SPLAT,k_gt_4_subtile)496 TEST(F32_IGEMM_1X8__WASMSIMD_SPLAT, k_gt_4_subtile) {
497 for (size_t k = 5; k < 8; k++) {
498 for (uint32_t n = 1; n <= 8; n++) {
499 for (uint32_t m = 1; m <= 1; m++) {
500 GemmMicrokernelTester()
501 .mr(1)
502 .nr(8)
503 .kr(1)
504 .sr(1)
505 .m(m)
506 .n(n)
507 .k(k)
508 .iterations(1)
509 .Test(xnn_f32_igemm_ukernel_1x8__wasmsimd_splat);
510 }
511 }
512 }
513 }
514
TEST(F32_IGEMM_1X8__WASMSIMD_SPLAT,k_div_4)515 TEST(F32_IGEMM_1X8__WASMSIMD_SPLAT, k_div_4) {
516 for (size_t k = 8; k <= 40; k += 4) {
517 GemmMicrokernelTester()
518 .mr(1)
519 .nr(8)
520 .kr(1)
521 .sr(1)
522 .m(1)
523 .n(8)
524 .k(k)
525 .Test(xnn_f32_igemm_ukernel_1x8__wasmsimd_splat);
526 }
527 }
528
TEST(F32_IGEMM_1X8__WASMSIMD_SPLAT,k_div_4_subtile)529 TEST(F32_IGEMM_1X8__WASMSIMD_SPLAT, k_div_4_subtile) {
530 for (size_t k = 8; k <= 40; k += 4) {
531 for (uint32_t n = 1; n <= 8; n++) {
532 for (uint32_t m = 1; m <= 1; m++) {
533 GemmMicrokernelTester()
534 .mr(1)
535 .nr(8)
536 .kr(1)
537 .sr(1)
538 .m(m)
539 .n(n)
540 .k(k)
541 .iterations(1)
542 .Test(xnn_f32_igemm_ukernel_1x8__wasmsimd_splat);
543 }
544 }
545 }
546 }
547
TEST(F32_IGEMM_1X8__WASMSIMD_SPLAT,n_gt_8)548 TEST(F32_IGEMM_1X8__WASMSIMD_SPLAT, n_gt_8) {
549 for (uint32_t n = 9; n < 16; n++) {
550 for (size_t k = 1; k <= 20; k += 5) {
551 GemmMicrokernelTester()
552 .mr(1)
553 .nr(8)
554 .kr(1)
555 .sr(1)
556 .m(1)
557 .n(n)
558 .k(k)
559 .Test(xnn_f32_igemm_ukernel_1x8__wasmsimd_splat);
560 }
561 }
562 }
563
TEST(F32_IGEMM_1X8__WASMSIMD_SPLAT,n_gt_8_strided_cn)564 TEST(F32_IGEMM_1X8__WASMSIMD_SPLAT, n_gt_8_strided_cn) {
565 for (uint32_t n = 9; n < 16; n++) {
566 for (size_t k = 1; k <= 20; k += 5) {
567 GemmMicrokernelTester()
568 .mr(1)
569 .nr(8)
570 .kr(1)
571 .sr(1)
572 .m(1)
573 .n(n)
574 .k(k)
575 .cn_stride(11)
576 .Test(xnn_f32_igemm_ukernel_1x8__wasmsimd_splat);
577 }
578 }
579 }
580
TEST(F32_IGEMM_1X8__WASMSIMD_SPLAT,n_gt_8_subtile)581 TEST(F32_IGEMM_1X8__WASMSIMD_SPLAT, n_gt_8_subtile) {
582 for (uint32_t n = 9; n < 16; n++) {
583 for (size_t k = 1; k <= 20; k += 5) {
584 for (uint32_t m = 1; m <= 1; m++) {
585 GemmMicrokernelTester()
586 .mr(1)
587 .nr(8)
588 .kr(1)
589 .sr(1)
590 .m(m)
591 .n(n)
592 .k(k)
593 .iterations(1)
594 .Test(xnn_f32_igemm_ukernel_1x8__wasmsimd_splat);
595 }
596 }
597 }
598 }
599
TEST(F32_IGEMM_1X8__WASMSIMD_SPLAT,n_div_8)600 TEST(F32_IGEMM_1X8__WASMSIMD_SPLAT, n_div_8) {
601 for (uint32_t n = 16; n <= 24; n += 8) {
602 for (size_t k = 1; k <= 20; k += 5) {
603 GemmMicrokernelTester()
604 .mr(1)
605 .nr(8)
606 .kr(1)
607 .sr(1)
608 .m(1)
609 .n(n)
610 .k(k)
611 .Test(xnn_f32_igemm_ukernel_1x8__wasmsimd_splat);
612 }
613 }
614 }
615
TEST(F32_IGEMM_1X8__WASMSIMD_SPLAT,n_div_8_strided_cn)616 TEST(F32_IGEMM_1X8__WASMSIMD_SPLAT, n_div_8_strided_cn) {
617 for (uint32_t n = 16; n <= 24; n += 8) {
618 for (size_t k = 1; k <= 20; k += 5) {
619 GemmMicrokernelTester()
620 .mr(1)
621 .nr(8)
622 .kr(1)
623 .sr(1)
624 .m(1)
625 .n(n)
626 .k(k)
627 .cn_stride(11)
628 .Test(xnn_f32_igemm_ukernel_1x8__wasmsimd_splat);
629 }
630 }
631 }
632
TEST(F32_IGEMM_1X8__WASMSIMD_SPLAT,n_div_8_subtile)633 TEST(F32_IGEMM_1X8__WASMSIMD_SPLAT, n_div_8_subtile) {
634 for (uint32_t n = 16; n <= 24; n += 8) {
635 for (size_t k = 1; k <= 20; k += 5) {
636 for (uint32_t m = 1; m <= 1; m++) {
637 GemmMicrokernelTester()
638 .mr(1)
639 .nr(8)
640 .kr(1)
641 .sr(1)
642 .m(m)
643 .n(n)
644 .k(k)
645 .iterations(1)
646 .Test(xnn_f32_igemm_ukernel_1x8__wasmsimd_splat);
647 }
648 }
649 }
650 }
651
TEST(F32_IGEMM_1X8__WASMSIMD_SPLAT,small_kernel)652 TEST(F32_IGEMM_1X8__WASMSIMD_SPLAT, small_kernel) {
653 for (size_t k = 1; k <= 20; k += 5) {
654 GemmMicrokernelTester()
655 .mr(1)
656 .nr(8)
657 .kr(1)
658 .sr(1)
659 .m(1)
660 .n(8)
661 .k(k)
662 .ks(3)
663 .Test(xnn_f32_igemm_ukernel_1x8__wasmsimd_splat);
664 }
665 }
666
TEST(F32_IGEMM_1X8__WASMSIMD_SPLAT,small_kernel_subtile)667 TEST(F32_IGEMM_1X8__WASMSIMD_SPLAT, small_kernel_subtile) {
668 for (size_t k = 1; k <= 20; k += 5) {
669 for (uint32_t n = 1; n <= 8; n++) {
670 for (uint32_t m = 1; m <= 1; m++) {
671 GemmMicrokernelTester()
672 .mr(1)
673 .nr(8)
674 .kr(1)
675 .sr(1)
676 .m(m)
677 .n(n)
678 .k(k)
679 .ks(3)
680 .iterations(1)
681 .Test(xnn_f32_igemm_ukernel_1x8__wasmsimd_splat);
682 }
683 }
684 }
685 }
686
TEST(F32_IGEMM_1X8__WASMSIMD_SPLAT,n_gt_8_small_kernel)687 TEST(F32_IGEMM_1X8__WASMSIMD_SPLAT, n_gt_8_small_kernel) {
688 for (uint32_t n = 9; n < 16; n++) {
689 for (size_t k = 1; k <= 20; k += 5) {
690 GemmMicrokernelTester()
691 .mr(1)
692 .nr(8)
693 .kr(1)
694 .sr(1)
695 .m(1)
696 .n(n)
697 .k(k)
698 .ks(3)
699 .Test(xnn_f32_igemm_ukernel_1x8__wasmsimd_splat);
700 }
701 }
702 }
703
TEST(F32_IGEMM_1X8__WASMSIMD_SPLAT,n_div_8_small_kernel)704 TEST(F32_IGEMM_1X8__WASMSIMD_SPLAT, n_div_8_small_kernel) {
705 for (uint32_t n = 16; n <= 24; n += 8) {
706 for (size_t k = 1; k <= 20; k += 5) {
707 GemmMicrokernelTester()
708 .mr(1)
709 .nr(8)
710 .kr(1)
711 .sr(1)
712 .m(1)
713 .n(n)
714 .k(k)
715 .ks(3)
716 .Test(xnn_f32_igemm_ukernel_1x8__wasmsimd_splat);
717 }
718 }
719 }
720
TEST(F32_IGEMM_1X8__WASMSIMD_SPLAT,strided_cm_subtile)721 TEST(F32_IGEMM_1X8__WASMSIMD_SPLAT, strided_cm_subtile) {
722 for (size_t k = 1; k <= 20; k += 5) {
723 for (uint32_t n = 1; n <= 8; n++) {
724 for (uint32_t m = 1; m <= 1; m++) {
725 GemmMicrokernelTester()
726 .mr(1)
727 .nr(8)
728 .kr(1)
729 .sr(1)
730 .m(m)
731 .n(n)
732 .k(k)
733 .cm_stride(11)
734 .iterations(1)
735 .Test(xnn_f32_igemm_ukernel_1x8__wasmsimd_splat);
736 }
737 }
738 }
739 }
740
TEST(F32_IGEMM_1X8__WASMSIMD_SPLAT,a_offset)741 TEST(F32_IGEMM_1X8__WASMSIMD_SPLAT, a_offset) {
742 for (size_t k = 1; k <= 20; k += 5) {
743 GemmMicrokernelTester()
744 .mr(1)
745 .nr(8)
746 .kr(1)
747 .sr(1)
748 .m(1)
749 .n(8)
750 .k(k)
751 .ks(3)
752 .a_offset(23)
753 .Test(xnn_f32_igemm_ukernel_1x8__wasmsimd_splat);
754 }
755 }
756
TEST(F32_IGEMM_1X8__WASMSIMD_SPLAT,zero)757 TEST(F32_IGEMM_1X8__WASMSIMD_SPLAT, zero) {
758 for (size_t k = 1; k <= 20; k += 5) {
759 for (uint32_t mz = 0; mz < 1; mz++) {
760 GemmMicrokernelTester()
761 .mr(1)
762 .nr(8)
763 .kr(1)
764 .sr(1)
765 .m(1)
766 .n(8)
767 .k(k)
768 .ks(3)
769 .a_offset(23)
770 .zero_index(mz)
771 .Test(xnn_f32_igemm_ukernel_1x8__wasmsimd_splat);
772 }
773 }
774 }
775
TEST(F32_IGEMM_1X8__WASMSIMD_SPLAT,strided_cm)776 TEST(F32_IGEMM_1X8__WASMSIMD_SPLAT, strided_cm) {
777 GemmMicrokernelTester()
778 .mr(1)
779 .nr(8)
780 .kr(1)
781 .sr(1)
782 .m(1)
783 .n(8)
784 .k(4)
785 .cm_stride(11)
786 .Test(xnn_f32_igemm_ukernel_1x8__wasmsimd_splat);
787 }
788 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
789
790
791 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_IGEMM_1X8S4__WASMSIMD,k_eq_4)792 TEST(F32_IGEMM_1X8S4__WASMSIMD, k_eq_4) {
793 GemmMicrokernelTester()
794 .mr(1)
795 .nr(8)
796 .kr(1)
797 .sr(4)
798 .m(1)
799 .n(8)
800 .k(4)
801 .Test(xnn_f32_igemm_ukernel_1x8s4__wasmsimd);
802 }
803
TEST(F32_IGEMM_1X8S4__WASMSIMD,strided_cn)804 TEST(F32_IGEMM_1X8S4__WASMSIMD, strided_cn) {
805 GemmMicrokernelTester()
806 .mr(1)
807 .nr(8)
808 .kr(1)
809 .sr(4)
810 .m(1)
811 .n(8)
812 .k(4)
813 .cn_stride(11)
814 .Test(xnn_f32_igemm_ukernel_1x8s4__wasmsimd);
815 }
816
TEST(F32_IGEMM_1X8S4__WASMSIMD,k_eq_4_subtile)817 TEST(F32_IGEMM_1X8S4__WASMSIMD, k_eq_4_subtile) {
818 for (uint32_t n = 1; n <= 8; n++) {
819 for (uint32_t m = 1; m <= 1; m++) {
820 GemmMicrokernelTester()
821 .mr(1)
822 .nr(8)
823 .kr(1)
824 .sr(4)
825 .m(m)
826 .n(n)
827 .k(4)
828 .iterations(1)
829 .Test(xnn_f32_igemm_ukernel_1x8s4__wasmsimd);
830 }
831 }
832 }
833
TEST(F32_IGEMM_1X8S4__WASMSIMD,k_eq_4_subtile_m)834 TEST(F32_IGEMM_1X8S4__WASMSIMD, k_eq_4_subtile_m) {
835 for (uint32_t m = 1; m <= 1; m++) {
836 GemmMicrokernelTester()
837 .mr(1)
838 .nr(8)
839 .kr(1)
840 .sr(4)
841 .m(m)
842 .n(8)
843 .k(4)
844 .iterations(1)
845 .Test(xnn_f32_igemm_ukernel_1x8s4__wasmsimd);
846 }
847 }
848
TEST(F32_IGEMM_1X8S4__WASMSIMD,k_eq_4_subtile_n)849 TEST(F32_IGEMM_1X8S4__WASMSIMD, k_eq_4_subtile_n) {
850 for (uint32_t n = 1; n <= 8; n++) {
851 GemmMicrokernelTester()
852 .mr(1)
853 .nr(8)
854 .kr(1)
855 .sr(4)
856 .m(1)
857 .n(n)
858 .k(4)
859 .iterations(1)
860 .Test(xnn_f32_igemm_ukernel_1x8s4__wasmsimd);
861 }
862 }
863
TEST(F32_IGEMM_1X8S4__WASMSIMD,k_lt_4)864 TEST(F32_IGEMM_1X8S4__WASMSIMD, k_lt_4) {
865 for (size_t k = 1; k < 4; k++) {
866 GemmMicrokernelTester()
867 .mr(1)
868 .nr(8)
869 .kr(1)
870 .sr(4)
871 .m(1)
872 .n(8)
873 .k(k)
874 .Test(xnn_f32_igemm_ukernel_1x8s4__wasmsimd);
875 }
876 }
877
TEST(F32_IGEMM_1X8S4__WASMSIMD,k_lt_4_subtile)878 TEST(F32_IGEMM_1X8S4__WASMSIMD, k_lt_4_subtile) {
879 for (size_t k = 1; k < 4; k++) {
880 for (uint32_t n = 1; n <= 8; n++) {
881 for (uint32_t m = 1; m <= 1; m++) {
882 GemmMicrokernelTester()
883 .mr(1)
884 .nr(8)
885 .kr(1)
886 .sr(4)
887 .m(m)
888 .n(n)
889 .k(k)
890 .iterations(1)
891 .Test(xnn_f32_igemm_ukernel_1x8s4__wasmsimd);
892 }
893 }
894 }
895 }
896
TEST(F32_IGEMM_1X8S4__WASMSIMD,k_gt_4)897 TEST(F32_IGEMM_1X8S4__WASMSIMD, k_gt_4) {
898 for (size_t k = 5; k < 8; k++) {
899 GemmMicrokernelTester()
900 .mr(1)
901 .nr(8)
902 .kr(1)
903 .sr(4)
904 .m(1)
905 .n(8)
906 .k(k)
907 .Test(xnn_f32_igemm_ukernel_1x8s4__wasmsimd);
908 }
909 }
910
TEST(F32_IGEMM_1X8S4__WASMSIMD,k_gt_4_subtile)911 TEST(F32_IGEMM_1X8S4__WASMSIMD, k_gt_4_subtile) {
912 for (size_t k = 5; k < 8; k++) {
913 for (uint32_t n = 1; n <= 8; n++) {
914 for (uint32_t m = 1; m <= 1; m++) {
915 GemmMicrokernelTester()
916 .mr(1)
917 .nr(8)
918 .kr(1)
919 .sr(4)
920 .m(m)
921 .n(n)
922 .k(k)
923 .iterations(1)
924 .Test(xnn_f32_igemm_ukernel_1x8s4__wasmsimd);
925 }
926 }
927 }
928 }
929
TEST(F32_IGEMM_1X8S4__WASMSIMD,k_div_4)930 TEST(F32_IGEMM_1X8S4__WASMSIMD, k_div_4) {
931 for (size_t k = 8; k <= 40; k += 4) {
932 GemmMicrokernelTester()
933 .mr(1)
934 .nr(8)
935 .kr(1)
936 .sr(4)
937 .m(1)
938 .n(8)
939 .k(k)
940 .Test(xnn_f32_igemm_ukernel_1x8s4__wasmsimd);
941 }
942 }
943
TEST(F32_IGEMM_1X8S4__WASMSIMD,k_div_4_subtile)944 TEST(F32_IGEMM_1X8S4__WASMSIMD, k_div_4_subtile) {
945 for (size_t k = 8; k <= 40; k += 4) {
946 for (uint32_t n = 1; n <= 8; n++) {
947 for (uint32_t m = 1; m <= 1; m++) {
948 GemmMicrokernelTester()
949 .mr(1)
950 .nr(8)
951 .kr(1)
952 .sr(4)
953 .m(m)
954 .n(n)
955 .k(k)
956 .iterations(1)
957 .Test(xnn_f32_igemm_ukernel_1x8s4__wasmsimd);
958 }
959 }
960 }
961 }
962
TEST(F32_IGEMM_1X8S4__WASMSIMD,n_gt_8)963 TEST(F32_IGEMM_1X8S4__WASMSIMD, n_gt_8) {
964 for (uint32_t n = 9; n < 16; n++) {
965 for (size_t k = 1; k <= 20; k += 5) {
966 GemmMicrokernelTester()
967 .mr(1)
968 .nr(8)
969 .kr(1)
970 .sr(4)
971 .m(1)
972 .n(n)
973 .k(k)
974 .Test(xnn_f32_igemm_ukernel_1x8s4__wasmsimd);
975 }
976 }
977 }
978
TEST(F32_IGEMM_1X8S4__WASMSIMD,n_gt_8_strided_cn)979 TEST(F32_IGEMM_1X8S4__WASMSIMD, n_gt_8_strided_cn) {
980 for (uint32_t n = 9; n < 16; n++) {
981 for (size_t k = 1; k <= 20; k += 5) {
982 GemmMicrokernelTester()
983 .mr(1)
984 .nr(8)
985 .kr(1)
986 .sr(4)
987 .m(1)
988 .n(n)
989 .k(k)
990 .cn_stride(11)
991 .Test(xnn_f32_igemm_ukernel_1x8s4__wasmsimd);
992 }
993 }
994 }
995
TEST(F32_IGEMM_1X8S4__WASMSIMD,n_gt_8_subtile)996 TEST(F32_IGEMM_1X8S4__WASMSIMD, n_gt_8_subtile) {
997 for (uint32_t n = 9; n < 16; n++) {
998 for (size_t k = 1; k <= 20; k += 5) {
999 for (uint32_t m = 1; m <= 1; m++) {
1000 GemmMicrokernelTester()
1001 .mr(1)
1002 .nr(8)
1003 .kr(1)
1004 .sr(4)
1005 .m(m)
1006 .n(n)
1007 .k(k)
1008 .iterations(1)
1009 .Test(xnn_f32_igemm_ukernel_1x8s4__wasmsimd);
1010 }
1011 }
1012 }
1013 }
1014
TEST(F32_IGEMM_1X8S4__WASMSIMD,n_div_8)1015 TEST(F32_IGEMM_1X8S4__WASMSIMD, n_div_8) {
1016 for (uint32_t n = 16; n <= 24; n += 8) {
1017 for (size_t k = 1; k <= 20; k += 5) {
1018 GemmMicrokernelTester()
1019 .mr(1)
1020 .nr(8)
1021 .kr(1)
1022 .sr(4)
1023 .m(1)
1024 .n(n)
1025 .k(k)
1026 .Test(xnn_f32_igemm_ukernel_1x8s4__wasmsimd);
1027 }
1028 }
1029 }
1030
TEST(F32_IGEMM_1X8S4__WASMSIMD,n_div_8_strided_cn)1031 TEST(F32_IGEMM_1X8S4__WASMSIMD, n_div_8_strided_cn) {
1032 for (uint32_t n = 16; n <= 24; n += 8) {
1033 for (size_t k = 1; k <= 20; k += 5) {
1034 GemmMicrokernelTester()
1035 .mr(1)
1036 .nr(8)
1037 .kr(1)
1038 .sr(4)
1039 .m(1)
1040 .n(n)
1041 .k(k)
1042 .cn_stride(11)
1043 .Test(xnn_f32_igemm_ukernel_1x8s4__wasmsimd);
1044 }
1045 }
1046 }
1047
TEST(F32_IGEMM_1X8S4__WASMSIMD,n_div_8_subtile)1048 TEST(F32_IGEMM_1X8S4__WASMSIMD, n_div_8_subtile) {
1049 for (uint32_t n = 16; n <= 24; n += 8) {
1050 for (size_t k = 1; k <= 20; k += 5) {
1051 for (uint32_t m = 1; m <= 1; m++) {
1052 GemmMicrokernelTester()
1053 .mr(1)
1054 .nr(8)
1055 .kr(1)
1056 .sr(4)
1057 .m(m)
1058 .n(n)
1059 .k(k)
1060 .iterations(1)
1061 .Test(xnn_f32_igemm_ukernel_1x8s4__wasmsimd);
1062 }
1063 }
1064 }
1065 }
1066
TEST(F32_IGEMM_1X8S4__WASMSIMD,small_kernel)1067 TEST(F32_IGEMM_1X8S4__WASMSIMD, small_kernel) {
1068 for (size_t k = 1; k <= 20; k += 5) {
1069 GemmMicrokernelTester()
1070 .mr(1)
1071 .nr(8)
1072 .kr(1)
1073 .sr(4)
1074 .m(1)
1075 .n(8)
1076 .k(k)
1077 .ks(3)
1078 .Test(xnn_f32_igemm_ukernel_1x8s4__wasmsimd);
1079 }
1080 }
1081
TEST(F32_IGEMM_1X8S4__WASMSIMD,small_kernel_subtile)1082 TEST(F32_IGEMM_1X8S4__WASMSIMD, small_kernel_subtile) {
1083 for (size_t k = 1; k <= 20; k += 5) {
1084 for (uint32_t n = 1; n <= 8; n++) {
1085 for (uint32_t m = 1; m <= 1; m++) {
1086 GemmMicrokernelTester()
1087 .mr(1)
1088 .nr(8)
1089 .kr(1)
1090 .sr(4)
1091 .m(m)
1092 .n(n)
1093 .k(k)
1094 .ks(3)
1095 .iterations(1)
1096 .Test(xnn_f32_igemm_ukernel_1x8s4__wasmsimd);
1097 }
1098 }
1099 }
1100 }
1101
TEST(F32_IGEMM_1X8S4__WASMSIMD,n_gt_8_small_kernel)1102 TEST(F32_IGEMM_1X8S4__WASMSIMD, n_gt_8_small_kernel) {
1103 for (uint32_t n = 9; n < 16; n++) {
1104 for (size_t k = 1; k <= 20; k += 5) {
1105 GemmMicrokernelTester()
1106 .mr(1)
1107 .nr(8)
1108 .kr(1)
1109 .sr(4)
1110 .m(1)
1111 .n(n)
1112 .k(k)
1113 .ks(3)
1114 .Test(xnn_f32_igemm_ukernel_1x8s4__wasmsimd);
1115 }
1116 }
1117 }
1118
TEST(F32_IGEMM_1X8S4__WASMSIMD,n_div_8_small_kernel)1119 TEST(F32_IGEMM_1X8S4__WASMSIMD, n_div_8_small_kernel) {
1120 for (uint32_t n = 16; n <= 24; n += 8) {
1121 for (size_t k = 1; k <= 20; k += 5) {
1122 GemmMicrokernelTester()
1123 .mr(1)
1124 .nr(8)
1125 .kr(1)
1126 .sr(4)
1127 .m(1)
1128 .n(n)
1129 .k(k)
1130 .ks(3)
1131 .Test(xnn_f32_igemm_ukernel_1x8s4__wasmsimd);
1132 }
1133 }
1134 }
1135
TEST(F32_IGEMM_1X8S4__WASMSIMD,strided_cm_subtile)1136 TEST(F32_IGEMM_1X8S4__WASMSIMD, strided_cm_subtile) {
1137 for (size_t k = 1; k <= 20; k += 5) {
1138 for (uint32_t n = 1; n <= 8; n++) {
1139 for (uint32_t m = 1; m <= 1; m++) {
1140 GemmMicrokernelTester()
1141 .mr(1)
1142 .nr(8)
1143 .kr(1)
1144 .sr(4)
1145 .m(m)
1146 .n(n)
1147 .k(k)
1148 .cm_stride(11)
1149 .iterations(1)
1150 .Test(xnn_f32_igemm_ukernel_1x8s4__wasmsimd);
1151 }
1152 }
1153 }
1154 }
1155
TEST(F32_IGEMM_1X8S4__WASMSIMD,a_offset)1156 TEST(F32_IGEMM_1X8S4__WASMSIMD, a_offset) {
1157 for (size_t k = 1; k <= 20; k += 5) {
1158 GemmMicrokernelTester()
1159 .mr(1)
1160 .nr(8)
1161 .kr(1)
1162 .sr(4)
1163 .m(1)
1164 .n(8)
1165 .k(k)
1166 .ks(3)
1167 .a_offset(23)
1168 .Test(xnn_f32_igemm_ukernel_1x8s4__wasmsimd);
1169 }
1170 }
1171
TEST(F32_IGEMM_1X8S4__WASMSIMD,zero)1172 TEST(F32_IGEMM_1X8S4__WASMSIMD, zero) {
1173 for (size_t k = 1; k <= 20; k += 5) {
1174 for (uint32_t mz = 0; mz < 1; mz++) {
1175 GemmMicrokernelTester()
1176 .mr(1)
1177 .nr(8)
1178 .kr(1)
1179 .sr(4)
1180 .m(1)
1181 .n(8)
1182 .k(k)
1183 .ks(3)
1184 .a_offset(23)
1185 .zero_index(mz)
1186 .Test(xnn_f32_igemm_ukernel_1x8s4__wasmsimd);
1187 }
1188 }
1189 }
1190
TEST(F32_IGEMM_1X8S4__WASMSIMD,strided_cm)1191 TEST(F32_IGEMM_1X8S4__WASMSIMD, strided_cm) {
1192 GemmMicrokernelTester()
1193 .mr(1)
1194 .nr(8)
1195 .kr(1)
1196 .sr(4)
1197 .m(1)
1198 .n(8)
1199 .k(4)
1200 .cm_stride(11)
1201 .Test(xnn_f32_igemm_ukernel_1x8s4__wasmsimd);
1202 }
1203 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
1204
1205
1206 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_IGEMM_4X8__WASMSIMD_SPLAT,k_eq_4)1207 TEST(F32_IGEMM_4X8__WASMSIMD_SPLAT, k_eq_4) {
1208 GemmMicrokernelTester()
1209 .mr(4)
1210 .nr(8)
1211 .kr(1)
1212 .sr(1)
1213 .m(4)
1214 .n(8)
1215 .k(4)
1216 .Test(xnn_f32_igemm_ukernel_4x8__wasmsimd_splat);
1217 }
1218
TEST(F32_IGEMM_4X8__WASMSIMD_SPLAT,strided_cn)1219 TEST(F32_IGEMM_4X8__WASMSIMD_SPLAT, strided_cn) {
1220 GemmMicrokernelTester()
1221 .mr(4)
1222 .nr(8)
1223 .kr(1)
1224 .sr(1)
1225 .m(4)
1226 .n(8)
1227 .k(4)
1228 .cn_stride(11)
1229 .Test(xnn_f32_igemm_ukernel_4x8__wasmsimd_splat);
1230 }
1231
TEST(F32_IGEMM_4X8__WASMSIMD_SPLAT,k_eq_4_subtile)1232 TEST(F32_IGEMM_4X8__WASMSIMD_SPLAT, k_eq_4_subtile) {
1233 for (uint32_t n = 1; n <= 8; n++) {
1234 for (uint32_t m = 1; m <= 4; m++) {
1235 GemmMicrokernelTester()
1236 .mr(4)
1237 .nr(8)
1238 .kr(1)
1239 .sr(1)
1240 .m(m)
1241 .n(n)
1242 .k(4)
1243 .iterations(1)
1244 .Test(xnn_f32_igemm_ukernel_4x8__wasmsimd_splat);
1245 }
1246 }
1247 }
1248
TEST(F32_IGEMM_4X8__WASMSIMD_SPLAT,k_eq_4_subtile_m)1249 TEST(F32_IGEMM_4X8__WASMSIMD_SPLAT, k_eq_4_subtile_m) {
1250 for (uint32_t m = 1; m <= 4; m++) {
1251 GemmMicrokernelTester()
1252 .mr(4)
1253 .nr(8)
1254 .kr(1)
1255 .sr(1)
1256 .m(m)
1257 .n(8)
1258 .k(4)
1259 .iterations(1)
1260 .Test(xnn_f32_igemm_ukernel_4x8__wasmsimd_splat);
1261 }
1262 }
1263
TEST(F32_IGEMM_4X8__WASMSIMD_SPLAT,k_eq_4_subtile_n)1264 TEST(F32_IGEMM_4X8__WASMSIMD_SPLAT, k_eq_4_subtile_n) {
1265 for (uint32_t n = 1; n <= 8; n++) {
1266 GemmMicrokernelTester()
1267 .mr(4)
1268 .nr(8)
1269 .kr(1)
1270 .sr(1)
1271 .m(4)
1272 .n(n)
1273 .k(4)
1274 .iterations(1)
1275 .Test(xnn_f32_igemm_ukernel_4x8__wasmsimd_splat);
1276 }
1277 }
1278
TEST(F32_IGEMM_4X8__WASMSIMD_SPLAT,k_lt_4)1279 TEST(F32_IGEMM_4X8__WASMSIMD_SPLAT, k_lt_4) {
1280 for (size_t k = 1; k < 4; k++) {
1281 GemmMicrokernelTester()
1282 .mr(4)
1283 .nr(8)
1284 .kr(1)
1285 .sr(1)
1286 .m(4)
1287 .n(8)
1288 .k(k)
1289 .Test(xnn_f32_igemm_ukernel_4x8__wasmsimd_splat);
1290 }
1291 }
1292
TEST(F32_IGEMM_4X8__WASMSIMD_SPLAT,k_lt_4_subtile)1293 TEST(F32_IGEMM_4X8__WASMSIMD_SPLAT, k_lt_4_subtile) {
1294 for (size_t k = 1; k < 4; k++) {
1295 for (uint32_t n = 1; n <= 8; n++) {
1296 for (uint32_t m = 1; m <= 4; m++) {
1297 GemmMicrokernelTester()
1298 .mr(4)
1299 .nr(8)
1300 .kr(1)
1301 .sr(1)
1302 .m(m)
1303 .n(n)
1304 .k(k)
1305 .iterations(1)
1306 .Test(xnn_f32_igemm_ukernel_4x8__wasmsimd_splat);
1307 }
1308 }
1309 }
1310 }
1311
TEST(F32_IGEMM_4X8__WASMSIMD_SPLAT,k_gt_4)1312 TEST(F32_IGEMM_4X8__WASMSIMD_SPLAT, k_gt_4) {
1313 for (size_t k = 5; k < 8; k++) {
1314 GemmMicrokernelTester()
1315 .mr(4)
1316 .nr(8)
1317 .kr(1)
1318 .sr(1)
1319 .m(4)
1320 .n(8)
1321 .k(k)
1322 .Test(xnn_f32_igemm_ukernel_4x8__wasmsimd_splat);
1323 }
1324 }
1325
TEST(F32_IGEMM_4X8__WASMSIMD_SPLAT,k_gt_4_subtile)1326 TEST(F32_IGEMM_4X8__WASMSIMD_SPLAT, k_gt_4_subtile) {
1327 for (size_t k = 5; k < 8; k++) {
1328 for (uint32_t n = 1; n <= 8; n++) {
1329 for (uint32_t m = 1; m <= 4; m++) {
1330 GemmMicrokernelTester()
1331 .mr(4)
1332 .nr(8)
1333 .kr(1)
1334 .sr(1)
1335 .m(m)
1336 .n(n)
1337 .k(k)
1338 .iterations(1)
1339 .Test(xnn_f32_igemm_ukernel_4x8__wasmsimd_splat);
1340 }
1341 }
1342 }
1343 }
1344
TEST(F32_IGEMM_4X8__WASMSIMD_SPLAT,k_div_4)1345 TEST(F32_IGEMM_4X8__WASMSIMD_SPLAT, k_div_4) {
1346 for (size_t k = 8; k <= 40; k += 4) {
1347 GemmMicrokernelTester()
1348 .mr(4)
1349 .nr(8)
1350 .kr(1)
1351 .sr(1)
1352 .m(4)
1353 .n(8)
1354 .k(k)
1355 .Test(xnn_f32_igemm_ukernel_4x8__wasmsimd_splat);
1356 }
1357 }
1358
TEST(F32_IGEMM_4X8__WASMSIMD_SPLAT,k_div_4_subtile)1359 TEST(F32_IGEMM_4X8__WASMSIMD_SPLAT, k_div_4_subtile) {
1360 for (size_t k = 8; k <= 40; k += 4) {
1361 for (uint32_t n = 1; n <= 8; n++) {
1362 for (uint32_t m = 1; m <= 4; m++) {
1363 GemmMicrokernelTester()
1364 .mr(4)
1365 .nr(8)
1366 .kr(1)
1367 .sr(1)
1368 .m(m)
1369 .n(n)
1370 .k(k)
1371 .iterations(1)
1372 .Test(xnn_f32_igemm_ukernel_4x8__wasmsimd_splat);
1373 }
1374 }
1375 }
1376 }
1377
TEST(F32_IGEMM_4X8__WASMSIMD_SPLAT,n_gt_8)1378 TEST(F32_IGEMM_4X8__WASMSIMD_SPLAT, n_gt_8) {
1379 for (uint32_t n = 9; n < 16; n++) {
1380 for (size_t k = 1; k <= 20; k += 5) {
1381 GemmMicrokernelTester()
1382 .mr(4)
1383 .nr(8)
1384 .kr(1)
1385 .sr(1)
1386 .m(4)
1387 .n(n)
1388 .k(k)
1389 .Test(xnn_f32_igemm_ukernel_4x8__wasmsimd_splat);
1390 }
1391 }
1392 }
1393
TEST(F32_IGEMM_4X8__WASMSIMD_SPLAT,n_gt_8_strided_cn)1394 TEST(F32_IGEMM_4X8__WASMSIMD_SPLAT, n_gt_8_strided_cn) {
1395 for (uint32_t n = 9; n < 16; n++) {
1396 for (size_t k = 1; k <= 20; k += 5) {
1397 GemmMicrokernelTester()
1398 .mr(4)
1399 .nr(8)
1400 .kr(1)
1401 .sr(1)
1402 .m(4)
1403 .n(n)
1404 .k(k)
1405 .cn_stride(11)
1406 .Test(xnn_f32_igemm_ukernel_4x8__wasmsimd_splat);
1407 }
1408 }
1409 }
1410
TEST(F32_IGEMM_4X8__WASMSIMD_SPLAT,n_gt_8_subtile)1411 TEST(F32_IGEMM_4X8__WASMSIMD_SPLAT, n_gt_8_subtile) {
1412 for (uint32_t n = 9; n < 16; n++) {
1413 for (size_t k = 1; k <= 20; k += 5) {
1414 for (uint32_t m = 1; m <= 4; m++) {
1415 GemmMicrokernelTester()
1416 .mr(4)
1417 .nr(8)
1418 .kr(1)
1419 .sr(1)
1420 .m(m)
1421 .n(n)
1422 .k(k)
1423 .iterations(1)
1424 .Test(xnn_f32_igemm_ukernel_4x8__wasmsimd_splat);
1425 }
1426 }
1427 }
1428 }
1429
TEST(F32_IGEMM_4X8__WASMSIMD_SPLAT,n_div_8)1430 TEST(F32_IGEMM_4X8__WASMSIMD_SPLAT, n_div_8) {
1431 for (uint32_t n = 16; n <= 24; n += 8) {
1432 for (size_t k = 1; k <= 20; k += 5) {
1433 GemmMicrokernelTester()
1434 .mr(4)
1435 .nr(8)
1436 .kr(1)
1437 .sr(1)
1438 .m(4)
1439 .n(n)
1440 .k(k)
1441 .Test(xnn_f32_igemm_ukernel_4x8__wasmsimd_splat);
1442 }
1443 }
1444 }
1445
TEST(F32_IGEMM_4X8__WASMSIMD_SPLAT,n_div_8_strided_cn)1446 TEST(F32_IGEMM_4X8__WASMSIMD_SPLAT, n_div_8_strided_cn) {
1447 for (uint32_t n = 16; n <= 24; n += 8) {
1448 for (size_t k = 1; k <= 20; k += 5) {
1449 GemmMicrokernelTester()
1450 .mr(4)
1451 .nr(8)
1452 .kr(1)
1453 .sr(1)
1454 .m(4)
1455 .n(n)
1456 .k(k)
1457 .cn_stride(11)
1458 .Test(xnn_f32_igemm_ukernel_4x8__wasmsimd_splat);
1459 }
1460 }
1461 }
1462
TEST(F32_IGEMM_4X8__WASMSIMD_SPLAT,n_div_8_subtile)1463 TEST(F32_IGEMM_4X8__WASMSIMD_SPLAT, n_div_8_subtile) {
1464 for (uint32_t n = 16; n <= 24; n += 8) {
1465 for (size_t k = 1; k <= 20; k += 5) {
1466 for (uint32_t m = 1; m <= 4; m++) {
1467 GemmMicrokernelTester()
1468 .mr(4)
1469 .nr(8)
1470 .kr(1)
1471 .sr(1)
1472 .m(m)
1473 .n(n)
1474 .k(k)
1475 .iterations(1)
1476 .Test(xnn_f32_igemm_ukernel_4x8__wasmsimd_splat);
1477 }
1478 }
1479 }
1480 }
1481
TEST(F32_IGEMM_4X8__WASMSIMD_SPLAT,small_kernel)1482 TEST(F32_IGEMM_4X8__WASMSIMD_SPLAT, small_kernel) {
1483 for (size_t k = 1; k <= 20; k += 5) {
1484 GemmMicrokernelTester()
1485 .mr(4)
1486 .nr(8)
1487 .kr(1)
1488 .sr(1)
1489 .m(4)
1490 .n(8)
1491 .k(k)
1492 .ks(3)
1493 .Test(xnn_f32_igemm_ukernel_4x8__wasmsimd_splat);
1494 }
1495 }
1496
TEST(F32_IGEMM_4X8__WASMSIMD_SPLAT,small_kernel_subtile)1497 TEST(F32_IGEMM_4X8__WASMSIMD_SPLAT, small_kernel_subtile) {
1498 for (size_t k = 1; k <= 20; k += 5) {
1499 for (uint32_t n = 1; n <= 8; n++) {
1500 for (uint32_t m = 1; m <= 4; m++) {
1501 GemmMicrokernelTester()
1502 .mr(4)
1503 .nr(8)
1504 .kr(1)
1505 .sr(1)
1506 .m(m)
1507 .n(n)
1508 .k(k)
1509 .ks(3)
1510 .iterations(1)
1511 .Test(xnn_f32_igemm_ukernel_4x8__wasmsimd_splat);
1512 }
1513 }
1514 }
1515 }
1516
TEST(F32_IGEMM_4X8__WASMSIMD_SPLAT,n_gt_8_small_kernel)1517 TEST(F32_IGEMM_4X8__WASMSIMD_SPLAT, n_gt_8_small_kernel) {
1518 for (uint32_t n = 9; n < 16; n++) {
1519 for (size_t k = 1; k <= 20; k += 5) {
1520 GemmMicrokernelTester()
1521 .mr(4)
1522 .nr(8)
1523 .kr(1)
1524 .sr(1)
1525 .m(4)
1526 .n(n)
1527 .k(k)
1528 .ks(3)
1529 .Test(xnn_f32_igemm_ukernel_4x8__wasmsimd_splat);
1530 }
1531 }
1532 }
1533
TEST(F32_IGEMM_4X8__WASMSIMD_SPLAT,n_div_8_small_kernel)1534 TEST(F32_IGEMM_4X8__WASMSIMD_SPLAT, n_div_8_small_kernel) {
1535 for (uint32_t n = 16; n <= 24; n += 8) {
1536 for (size_t k = 1; k <= 20; k += 5) {
1537 GemmMicrokernelTester()
1538 .mr(4)
1539 .nr(8)
1540 .kr(1)
1541 .sr(1)
1542 .m(4)
1543 .n(n)
1544 .k(k)
1545 .ks(3)
1546 .Test(xnn_f32_igemm_ukernel_4x8__wasmsimd_splat);
1547 }
1548 }
1549 }
1550
TEST(F32_IGEMM_4X8__WASMSIMD_SPLAT,strided_cm_subtile)1551 TEST(F32_IGEMM_4X8__WASMSIMD_SPLAT, strided_cm_subtile) {
1552 for (size_t k = 1; k <= 20; k += 5) {
1553 for (uint32_t n = 1; n <= 8; n++) {
1554 for (uint32_t m = 1; m <= 4; m++) {
1555 GemmMicrokernelTester()
1556 .mr(4)
1557 .nr(8)
1558 .kr(1)
1559 .sr(1)
1560 .m(m)
1561 .n(n)
1562 .k(k)
1563 .cm_stride(11)
1564 .iterations(1)
1565 .Test(xnn_f32_igemm_ukernel_4x8__wasmsimd_splat);
1566 }
1567 }
1568 }
1569 }
1570
TEST(F32_IGEMM_4X8__WASMSIMD_SPLAT,a_offset)1571 TEST(F32_IGEMM_4X8__WASMSIMD_SPLAT, a_offset) {
1572 for (size_t k = 1; k <= 20; k += 5) {
1573 GemmMicrokernelTester()
1574 .mr(4)
1575 .nr(8)
1576 .kr(1)
1577 .sr(1)
1578 .m(4)
1579 .n(8)
1580 .k(k)
1581 .ks(3)
1582 .a_offset(83)
1583 .Test(xnn_f32_igemm_ukernel_4x8__wasmsimd_splat);
1584 }
1585 }
1586
TEST(F32_IGEMM_4X8__WASMSIMD_SPLAT,zero)1587 TEST(F32_IGEMM_4X8__WASMSIMD_SPLAT, zero) {
1588 for (size_t k = 1; k <= 20; k += 5) {
1589 for (uint32_t mz = 0; mz < 4; mz++) {
1590 GemmMicrokernelTester()
1591 .mr(4)
1592 .nr(8)
1593 .kr(1)
1594 .sr(1)
1595 .m(4)
1596 .n(8)
1597 .k(k)
1598 .ks(3)
1599 .a_offset(83)
1600 .zero_index(mz)
1601 .Test(xnn_f32_igemm_ukernel_4x8__wasmsimd_splat);
1602 }
1603 }
1604 }
1605
TEST(F32_IGEMM_4X8__WASMSIMD_SPLAT,strided_cm)1606 TEST(F32_IGEMM_4X8__WASMSIMD_SPLAT, strided_cm) {
1607 GemmMicrokernelTester()
1608 .mr(4)
1609 .nr(8)
1610 .kr(1)
1611 .sr(1)
1612 .m(4)
1613 .n(8)
1614 .k(4)
1615 .cm_stride(11)
1616 .Test(xnn_f32_igemm_ukernel_4x8__wasmsimd_splat);
1617 }
1618 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
1619
1620
1621 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_IGEMM_5X8__WASMSIMD_SPLAT,k_eq_4)1622 TEST(F32_IGEMM_5X8__WASMSIMD_SPLAT, k_eq_4) {
1623 GemmMicrokernelTester()
1624 .mr(5)
1625 .nr(8)
1626 .kr(1)
1627 .sr(1)
1628 .m(5)
1629 .n(8)
1630 .k(4)
1631 .Test(xnn_f32_igemm_ukernel_5x8__wasmsimd_splat);
1632 }
1633
TEST(F32_IGEMM_5X8__WASMSIMD_SPLAT,strided_cn)1634 TEST(F32_IGEMM_5X8__WASMSIMD_SPLAT, strided_cn) {
1635 GemmMicrokernelTester()
1636 .mr(5)
1637 .nr(8)
1638 .kr(1)
1639 .sr(1)
1640 .m(5)
1641 .n(8)
1642 .k(4)
1643 .cn_stride(11)
1644 .Test(xnn_f32_igemm_ukernel_5x8__wasmsimd_splat);
1645 }
1646
TEST(F32_IGEMM_5X8__WASMSIMD_SPLAT,k_eq_4_subtile)1647 TEST(F32_IGEMM_5X8__WASMSIMD_SPLAT, k_eq_4_subtile) {
1648 for (uint32_t n = 1; n <= 8; n++) {
1649 for (uint32_t m = 1; m <= 5; m++) {
1650 GemmMicrokernelTester()
1651 .mr(5)
1652 .nr(8)
1653 .kr(1)
1654 .sr(1)
1655 .m(m)
1656 .n(n)
1657 .k(4)
1658 .iterations(1)
1659 .Test(xnn_f32_igemm_ukernel_5x8__wasmsimd_splat);
1660 }
1661 }
1662 }
1663
TEST(F32_IGEMM_5X8__WASMSIMD_SPLAT,k_eq_4_subtile_m)1664 TEST(F32_IGEMM_5X8__WASMSIMD_SPLAT, k_eq_4_subtile_m) {
1665 for (uint32_t m = 1; m <= 5; m++) {
1666 GemmMicrokernelTester()
1667 .mr(5)
1668 .nr(8)
1669 .kr(1)
1670 .sr(1)
1671 .m(m)
1672 .n(8)
1673 .k(4)
1674 .iterations(1)
1675 .Test(xnn_f32_igemm_ukernel_5x8__wasmsimd_splat);
1676 }
1677 }
1678
TEST(F32_IGEMM_5X8__WASMSIMD_SPLAT,k_eq_4_subtile_n)1679 TEST(F32_IGEMM_5X8__WASMSIMD_SPLAT, k_eq_4_subtile_n) {
1680 for (uint32_t n = 1; n <= 8; n++) {
1681 GemmMicrokernelTester()
1682 .mr(5)
1683 .nr(8)
1684 .kr(1)
1685 .sr(1)
1686 .m(5)
1687 .n(n)
1688 .k(4)
1689 .iterations(1)
1690 .Test(xnn_f32_igemm_ukernel_5x8__wasmsimd_splat);
1691 }
1692 }
1693
TEST(F32_IGEMM_5X8__WASMSIMD_SPLAT,k_lt_4)1694 TEST(F32_IGEMM_5X8__WASMSIMD_SPLAT, k_lt_4) {
1695 for (size_t k = 1; k < 4; k++) {
1696 GemmMicrokernelTester()
1697 .mr(5)
1698 .nr(8)
1699 .kr(1)
1700 .sr(1)
1701 .m(5)
1702 .n(8)
1703 .k(k)
1704 .Test(xnn_f32_igemm_ukernel_5x8__wasmsimd_splat);
1705 }
1706 }
1707
TEST(F32_IGEMM_5X8__WASMSIMD_SPLAT,k_lt_4_subtile)1708 TEST(F32_IGEMM_5X8__WASMSIMD_SPLAT, k_lt_4_subtile) {
1709 for (size_t k = 1; k < 4; k++) {
1710 for (uint32_t n = 1; n <= 8; n++) {
1711 for (uint32_t m = 1; m <= 5; m++) {
1712 GemmMicrokernelTester()
1713 .mr(5)
1714 .nr(8)
1715 .kr(1)
1716 .sr(1)
1717 .m(m)
1718 .n(n)
1719 .k(k)
1720 .iterations(1)
1721 .Test(xnn_f32_igemm_ukernel_5x8__wasmsimd_splat);
1722 }
1723 }
1724 }
1725 }
1726
TEST(F32_IGEMM_5X8__WASMSIMD_SPLAT,k_gt_4)1727 TEST(F32_IGEMM_5X8__WASMSIMD_SPLAT, k_gt_4) {
1728 for (size_t k = 5; k < 8; k++) {
1729 GemmMicrokernelTester()
1730 .mr(5)
1731 .nr(8)
1732 .kr(1)
1733 .sr(1)
1734 .m(5)
1735 .n(8)
1736 .k(k)
1737 .Test(xnn_f32_igemm_ukernel_5x8__wasmsimd_splat);
1738 }
1739 }
1740
TEST(F32_IGEMM_5X8__WASMSIMD_SPLAT,k_gt_4_subtile)1741 TEST(F32_IGEMM_5X8__WASMSIMD_SPLAT, k_gt_4_subtile) {
1742 for (size_t k = 5; k < 8; k++) {
1743 for (uint32_t n = 1; n <= 8; n++) {
1744 for (uint32_t m = 1; m <= 5; m++) {
1745 GemmMicrokernelTester()
1746 .mr(5)
1747 .nr(8)
1748 .kr(1)
1749 .sr(1)
1750 .m(m)
1751 .n(n)
1752 .k(k)
1753 .iterations(1)
1754 .Test(xnn_f32_igemm_ukernel_5x8__wasmsimd_splat);
1755 }
1756 }
1757 }
1758 }
1759
TEST(F32_IGEMM_5X8__WASMSIMD_SPLAT,k_div_4)1760 TEST(F32_IGEMM_5X8__WASMSIMD_SPLAT, k_div_4) {
1761 for (size_t k = 8; k <= 40; k += 4) {
1762 GemmMicrokernelTester()
1763 .mr(5)
1764 .nr(8)
1765 .kr(1)
1766 .sr(1)
1767 .m(5)
1768 .n(8)
1769 .k(k)
1770 .Test(xnn_f32_igemm_ukernel_5x8__wasmsimd_splat);
1771 }
1772 }
1773
TEST(F32_IGEMM_5X8__WASMSIMD_SPLAT,k_div_4_subtile)1774 TEST(F32_IGEMM_5X8__WASMSIMD_SPLAT, k_div_4_subtile) {
1775 for (size_t k = 8; k <= 40; k += 4) {
1776 for (uint32_t n = 1; n <= 8; n++) {
1777 for (uint32_t m = 1; m <= 5; m++) {
1778 GemmMicrokernelTester()
1779 .mr(5)
1780 .nr(8)
1781 .kr(1)
1782 .sr(1)
1783 .m(m)
1784 .n(n)
1785 .k(k)
1786 .iterations(1)
1787 .Test(xnn_f32_igemm_ukernel_5x8__wasmsimd_splat);
1788 }
1789 }
1790 }
1791 }
1792
TEST(F32_IGEMM_5X8__WASMSIMD_SPLAT,n_gt_8)1793 TEST(F32_IGEMM_5X8__WASMSIMD_SPLAT, n_gt_8) {
1794 for (uint32_t n = 9; n < 16; n++) {
1795 for (size_t k = 1; k <= 20; k += 5) {
1796 GemmMicrokernelTester()
1797 .mr(5)
1798 .nr(8)
1799 .kr(1)
1800 .sr(1)
1801 .m(5)
1802 .n(n)
1803 .k(k)
1804 .Test(xnn_f32_igemm_ukernel_5x8__wasmsimd_splat);
1805 }
1806 }
1807 }
1808
TEST(F32_IGEMM_5X8__WASMSIMD_SPLAT,n_gt_8_strided_cn)1809 TEST(F32_IGEMM_5X8__WASMSIMD_SPLAT, n_gt_8_strided_cn) {
1810 for (uint32_t n = 9; n < 16; n++) {
1811 for (size_t k = 1; k <= 20; k += 5) {
1812 GemmMicrokernelTester()
1813 .mr(5)
1814 .nr(8)
1815 .kr(1)
1816 .sr(1)
1817 .m(5)
1818 .n(n)
1819 .k(k)
1820 .cn_stride(11)
1821 .Test(xnn_f32_igemm_ukernel_5x8__wasmsimd_splat);
1822 }
1823 }
1824 }
1825
TEST(F32_IGEMM_5X8__WASMSIMD_SPLAT,n_gt_8_subtile)1826 TEST(F32_IGEMM_5X8__WASMSIMD_SPLAT, n_gt_8_subtile) {
1827 for (uint32_t n = 9; n < 16; n++) {
1828 for (size_t k = 1; k <= 20; k += 5) {
1829 for (uint32_t m = 1; m <= 5; m++) {
1830 GemmMicrokernelTester()
1831 .mr(5)
1832 .nr(8)
1833 .kr(1)
1834 .sr(1)
1835 .m(m)
1836 .n(n)
1837 .k(k)
1838 .iterations(1)
1839 .Test(xnn_f32_igemm_ukernel_5x8__wasmsimd_splat);
1840 }
1841 }
1842 }
1843 }
1844
TEST(F32_IGEMM_5X8__WASMSIMD_SPLAT,n_div_8)1845 TEST(F32_IGEMM_5X8__WASMSIMD_SPLAT, n_div_8) {
1846 for (uint32_t n = 16; n <= 24; n += 8) {
1847 for (size_t k = 1; k <= 20; k += 5) {
1848 GemmMicrokernelTester()
1849 .mr(5)
1850 .nr(8)
1851 .kr(1)
1852 .sr(1)
1853 .m(5)
1854 .n(n)
1855 .k(k)
1856 .Test(xnn_f32_igemm_ukernel_5x8__wasmsimd_splat);
1857 }
1858 }
1859 }
1860
TEST(F32_IGEMM_5X8__WASMSIMD_SPLAT,n_div_8_strided_cn)1861 TEST(F32_IGEMM_5X8__WASMSIMD_SPLAT, n_div_8_strided_cn) {
1862 for (uint32_t n = 16; n <= 24; n += 8) {
1863 for (size_t k = 1; k <= 20; k += 5) {
1864 GemmMicrokernelTester()
1865 .mr(5)
1866 .nr(8)
1867 .kr(1)
1868 .sr(1)
1869 .m(5)
1870 .n(n)
1871 .k(k)
1872 .cn_stride(11)
1873 .Test(xnn_f32_igemm_ukernel_5x8__wasmsimd_splat);
1874 }
1875 }
1876 }
1877
TEST(F32_IGEMM_5X8__WASMSIMD_SPLAT,n_div_8_subtile)1878 TEST(F32_IGEMM_5X8__WASMSIMD_SPLAT, n_div_8_subtile) {
1879 for (uint32_t n = 16; n <= 24; n += 8) {
1880 for (size_t k = 1; k <= 20; k += 5) {
1881 for (uint32_t m = 1; m <= 5; m++) {
1882 GemmMicrokernelTester()
1883 .mr(5)
1884 .nr(8)
1885 .kr(1)
1886 .sr(1)
1887 .m(m)
1888 .n(n)
1889 .k(k)
1890 .iterations(1)
1891 .Test(xnn_f32_igemm_ukernel_5x8__wasmsimd_splat);
1892 }
1893 }
1894 }
1895 }
1896
TEST(F32_IGEMM_5X8__WASMSIMD_SPLAT,small_kernel)1897 TEST(F32_IGEMM_5X8__WASMSIMD_SPLAT, small_kernel) {
1898 for (size_t k = 1; k <= 20; k += 5) {
1899 GemmMicrokernelTester()
1900 .mr(5)
1901 .nr(8)
1902 .kr(1)
1903 .sr(1)
1904 .m(5)
1905 .n(8)
1906 .k(k)
1907 .ks(3)
1908 .Test(xnn_f32_igemm_ukernel_5x8__wasmsimd_splat);
1909 }
1910 }
1911
TEST(F32_IGEMM_5X8__WASMSIMD_SPLAT,small_kernel_subtile)1912 TEST(F32_IGEMM_5X8__WASMSIMD_SPLAT, small_kernel_subtile) {
1913 for (size_t k = 1; k <= 20; k += 5) {
1914 for (uint32_t n = 1; n <= 8; n++) {
1915 for (uint32_t m = 1; m <= 5; m++) {
1916 GemmMicrokernelTester()
1917 .mr(5)
1918 .nr(8)
1919 .kr(1)
1920 .sr(1)
1921 .m(m)
1922 .n(n)
1923 .k(k)
1924 .ks(3)
1925 .iterations(1)
1926 .Test(xnn_f32_igemm_ukernel_5x8__wasmsimd_splat);
1927 }
1928 }
1929 }
1930 }
1931
TEST(F32_IGEMM_5X8__WASMSIMD_SPLAT,n_gt_8_small_kernel)1932 TEST(F32_IGEMM_5X8__WASMSIMD_SPLAT, n_gt_8_small_kernel) {
1933 for (uint32_t n = 9; n < 16; n++) {
1934 for (size_t k = 1; k <= 20; k += 5) {
1935 GemmMicrokernelTester()
1936 .mr(5)
1937 .nr(8)
1938 .kr(1)
1939 .sr(1)
1940 .m(5)
1941 .n(n)
1942 .k(k)
1943 .ks(3)
1944 .Test(xnn_f32_igemm_ukernel_5x8__wasmsimd_splat);
1945 }
1946 }
1947 }
1948
TEST(F32_IGEMM_5X8__WASMSIMD_SPLAT,n_div_8_small_kernel)1949 TEST(F32_IGEMM_5X8__WASMSIMD_SPLAT, n_div_8_small_kernel) {
1950 for (uint32_t n = 16; n <= 24; n += 8) {
1951 for (size_t k = 1; k <= 20; k += 5) {
1952 GemmMicrokernelTester()
1953 .mr(5)
1954 .nr(8)
1955 .kr(1)
1956 .sr(1)
1957 .m(5)
1958 .n(n)
1959 .k(k)
1960 .ks(3)
1961 .Test(xnn_f32_igemm_ukernel_5x8__wasmsimd_splat);
1962 }
1963 }
1964 }
1965
TEST(F32_IGEMM_5X8__WASMSIMD_SPLAT,strided_cm_subtile)1966 TEST(F32_IGEMM_5X8__WASMSIMD_SPLAT, strided_cm_subtile) {
1967 for (size_t k = 1; k <= 20; k += 5) {
1968 for (uint32_t n = 1; n <= 8; n++) {
1969 for (uint32_t m = 1; m <= 5; m++) {
1970 GemmMicrokernelTester()
1971 .mr(5)
1972 .nr(8)
1973 .kr(1)
1974 .sr(1)
1975 .m(m)
1976 .n(n)
1977 .k(k)
1978 .cm_stride(11)
1979 .iterations(1)
1980 .Test(xnn_f32_igemm_ukernel_5x8__wasmsimd_splat);
1981 }
1982 }
1983 }
1984 }
1985
TEST(F32_IGEMM_5X8__WASMSIMD_SPLAT,a_offset)1986 TEST(F32_IGEMM_5X8__WASMSIMD_SPLAT, a_offset) {
1987 for (size_t k = 1; k <= 20; k += 5) {
1988 GemmMicrokernelTester()
1989 .mr(5)
1990 .nr(8)
1991 .kr(1)
1992 .sr(1)
1993 .m(5)
1994 .n(8)
1995 .k(k)
1996 .ks(3)
1997 .a_offset(103)
1998 .Test(xnn_f32_igemm_ukernel_5x8__wasmsimd_splat);
1999 }
2000 }
2001
TEST(F32_IGEMM_5X8__WASMSIMD_SPLAT,zero)2002 TEST(F32_IGEMM_5X8__WASMSIMD_SPLAT, zero) {
2003 for (size_t k = 1; k <= 20; k += 5) {
2004 for (uint32_t mz = 0; mz < 5; mz++) {
2005 GemmMicrokernelTester()
2006 .mr(5)
2007 .nr(8)
2008 .kr(1)
2009 .sr(1)
2010 .m(5)
2011 .n(8)
2012 .k(k)
2013 .ks(3)
2014 .a_offset(103)
2015 .zero_index(mz)
2016 .Test(xnn_f32_igemm_ukernel_5x8__wasmsimd_splat);
2017 }
2018 }
2019 }
2020
TEST(F32_IGEMM_5X8__WASMSIMD_SPLAT,strided_cm)2021 TEST(F32_IGEMM_5X8__WASMSIMD_SPLAT, strided_cm) {
2022 GemmMicrokernelTester()
2023 .mr(5)
2024 .nr(8)
2025 .kr(1)
2026 .sr(1)
2027 .m(5)
2028 .n(8)
2029 .k(4)
2030 .cm_stride(11)
2031 .Test(xnn_f32_igemm_ukernel_5x8__wasmsimd_splat);
2032 }
2033 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
2034
2035
2036 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_IGEMM_6X8__WASMSIMD_LOADSPLAT,k_eq_1)2037 TEST(F32_IGEMM_6X8__WASMSIMD_LOADSPLAT, k_eq_1) {
2038 GemmMicrokernelTester()
2039 .mr(6)
2040 .nr(8)
2041 .kr(1)
2042 .sr(1)
2043 .m(6)
2044 .n(8)
2045 .k(1)
2046 .Test(xnn_f32_igemm_ukernel_6x8__wasmsimd_loadsplat);
2047 }
2048
TEST(F32_IGEMM_6X8__WASMSIMD_LOADSPLAT,strided_cn)2049 TEST(F32_IGEMM_6X8__WASMSIMD_LOADSPLAT, strided_cn) {
2050 GemmMicrokernelTester()
2051 .mr(6)
2052 .nr(8)
2053 .kr(1)
2054 .sr(1)
2055 .m(6)
2056 .n(8)
2057 .k(1)
2058 .cn_stride(11)
2059 .Test(xnn_f32_igemm_ukernel_6x8__wasmsimd_loadsplat);
2060 }
2061
TEST(F32_IGEMM_6X8__WASMSIMD_LOADSPLAT,k_eq_1_subtile)2062 TEST(F32_IGEMM_6X8__WASMSIMD_LOADSPLAT, k_eq_1_subtile) {
2063 for (uint32_t n = 1; n <= 8; n++) {
2064 for (uint32_t m = 1; m <= 6; m++) {
2065 GemmMicrokernelTester()
2066 .mr(6)
2067 .nr(8)
2068 .kr(1)
2069 .sr(1)
2070 .m(m)
2071 .n(n)
2072 .k(1)
2073 .iterations(1)
2074 .Test(xnn_f32_igemm_ukernel_6x8__wasmsimd_loadsplat);
2075 }
2076 }
2077 }
2078
TEST(F32_IGEMM_6X8__WASMSIMD_LOADSPLAT,k_eq_1_subtile_m)2079 TEST(F32_IGEMM_6X8__WASMSIMD_LOADSPLAT, k_eq_1_subtile_m) {
2080 for (uint32_t m = 1; m <= 6; m++) {
2081 GemmMicrokernelTester()
2082 .mr(6)
2083 .nr(8)
2084 .kr(1)
2085 .sr(1)
2086 .m(m)
2087 .n(8)
2088 .k(1)
2089 .iterations(1)
2090 .Test(xnn_f32_igemm_ukernel_6x8__wasmsimd_loadsplat);
2091 }
2092 }
2093
TEST(F32_IGEMM_6X8__WASMSIMD_LOADSPLAT,k_eq_1_subtile_n)2094 TEST(F32_IGEMM_6X8__WASMSIMD_LOADSPLAT, k_eq_1_subtile_n) {
2095 for (uint32_t n = 1; n <= 8; n++) {
2096 GemmMicrokernelTester()
2097 .mr(6)
2098 .nr(8)
2099 .kr(1)
2100 .sr(1)
2101 .m(6)
2102 .n(n)
2103 .k(1)
2104 .iterations(1)
2105 .Test(xnn_f32_igemm_ukernel_6x8__wasmsimd_loadsplat);
2106 }
2107 }
2108
TEST(F32_IGEMM_6X8__WASMSIMD_LOADSPLAT,k_gt_1)2109 TEST(F32_IGEMM_6X8__WASMSIMD_LOADSPLAT, k_gt_1) {
2110 for (size_t k = 2; k < 10; k++) {
2111 GemmMicrokernelTester()
2112 .mr(6)
2113 .nr(8)
2114 .kr(1)
2115 .sr(1)
2116 .m(6)
2117 .n(8)
2118 .k(k)
2119 .Test(xnn_f32_igemm_ukernel_6x8__wasmsimd_loadsplat);
2120 }
2121 }
2122
TEST(F32_IGEMM_6X8__WASMSIMD_LOADSPLAT,k_gt_1_subtile)2123 TEST(F32_IGEMM_6X8__WASMSIMD_LOADSPLAT, k_gt_1_subtile) {
2124 for (size_t k = 2; k < 10; k++) {
2125 for (uint32_t n = 1; n <= 8; n++) {
2126 for (uint32_t m = 1; m <= 6; m++) {
2127 GemmMicrokernelTester()
2128 .mr(6)
2129 .nr(8)
2130 .kr(1)
2131 .sr(1)
2132 .m(m)
2133 .n(n)
2134 .k(k)
2135 .iterations(1)
2136 .Test(xnn_f32_igemm_ukernel_6x8__wasmsimd_loadsplat);
2137 }
2138 }
2139 }
2140 }
2141
TEST(F32_IGEMM_6X8__WASMSIMD_LOADSPLAT,n_gt_8)2142 TEST(F32_IGEMM_6X8__WASMSIMD_LOADSPLAT, n_gt_8) {
2143 for (uint32_t n = 9; n < 16; n++) {
2144 for (size_t k = 1; k <= 5; k += 2) {
2145 GemmMicrokernelTester()
2146 .mr(6)
2147 .nr(8)
2148 .kr(1)
2149 .sr(1)
2150 .m(6)
2151 .n(n)
2152 .k(k)
2153 .Test(xnn_f32_igemm_ukernel_6x8__wasmsimd_loadsplat);
2154 }
2155 }
2156 }
2157
TEST(F32_IGEMM_6X8__WASMSIMD_LOADSPLAT,n_gt_8_strided_cn)2158 TEST(F32_IGEMM_6X8__WASMSIMD_LOADSPLAT, n_gt_8_strided_cn) {
2159 for (uint32_t n = 9; n < 16; n++) {
2160 for (size_t k = 1; k <= 5; k += 2) {
2161 GemmMicrokernelTester()
2162 .mr(6)
2163 .nr(8)
2164 .kr(1)
2165 .sr(1)
2166 .m(6)
2167 .n(n)
2168 .k(k)
2169 .cn_stride(11)
2170 .Test(xnn_f32_igemm_ukernel_6x8__wasmsimd_loadsplat);
2171 }
2172 }
2173 }
2174
TEST(F32_IGEMM_6X8__WASMSIMD_LOADSPLAT,n_gt_8_subtile)2175 TEST(F32_IGEMM_6X8__WASMSIMD_LOADSPLAT, n_gt_8_subtile) {
2176 for (uint32_t n = 9; n < 16; n++) {
2177 for (size_t k = 1; k <= 5; k += 2) {
2178 for (uint32_t m = 1; m <= 6; m++) {
2179 GemmMicrokernelTester()
2180 .mr(6)
2181 .nr(8)
2182 .kr(1)
2183 .sr(1)
2184 .m(m)
2185 .n(n)
2186 .k(k)
2187 .iterations(1)
2188 .Test(xnn_f32_igemm_ukernel_6x8__wasmsimd_loadsplat);
2189 }
2190 }
2191 }
2192 }
2193
TEST(F32_IGEMM_6X8__WASMSIMD_LOADSPLAT,n_div_8)2194 TEST(F32_IGEMM_6X8__WASMSIMD_LOADSPLAT, n_div_8) {
2195 for (uint32_t n = 16; n <= 24; n += 8) {
2196 for (size_t k = 1; k <= 5; k += 2) {
2197 GemmMicrokernelTester()
2198 .mr(6)
2199 .nr(8)
2200 .kr(1)
2201 .sr(1)
2202 .m(6)
2203 .n(n)
2204 .k(k)
2205 .Test(xnn_f32_igemm_ukernel_6x8__wasmsimd_loadsplat);
2206 }
2207 }
2208 }
2209
TEST(F32_IGEMM_6X8__WASMSIMD_LOADSPLAT,n_div_8_strided_cn)2210 TEST(F32_IGEMM_6X8__WASMSIMD_LOADSPLAT, n_div_8_strided_cn) {
2211 for (uint32_t n = 16; n <= 24; n += 8) {
2212 for (size_t k = 1; k <= 5; k += 2) {
2213 GemmMicrokernelTester()
2214 .mr(6)
2215 .nr(8)
2216 .kr(1)
2217 .sr(1)
2218 .m(6)
2219 .n(n)
2220 .k(k)
2221 .cn_stride(11)
2222 .Test(xnn_f32_igemm_ukernel_6x8__wasmsimd_loadsplat);
2223 }
2224 }
2225 }
2226
TEST(F32_IGEMM_6X8__WASMSIMD_LOADSPLAT,n_div_8_subtile)2227 TEST(F32_IGEMM_6X8__WASMSIMD_LOADSPLAT, n_div_8_subtile) {
2228 for (uint32_t n = 16; n <= 24; n += 8) {
2229 for (size_t k = 1; k <= 5; k += 2) {
2230 for (uint32_t m = 1; m <= 6; m++) {
2231 GemmMicrokernelTester()
2232 .mr(6)
2233 .nr(8)
2234 .kr(1)
2235 .sr(1)
2236 .m(m)
2237 .n(n)
2238 .k(k)
2239 .iterations(1)
2240 .Test(xnn_f32_igemm_ukernel_6x8__wasmsimd_loadsplat);
2241 }
2242 }
2243 }
2244 }
2245
TEST(F32_IGEMM_6X8__WASMSIMD_LOADSPLAT,small_kernel)2246 TEST(F32_IGEMM_6X8__WASMSIMD_LOADSPLAT, small_kernel) {
2247 for (size_t k = 1; k <= 5; k += 2) {
2248 GemmMicrokernelTester()
2249 .mr(6)
2250 .nr(8)
2251 .kr(1)
2252 .sr(1)
2253 .m(6)
2254 .n(8)
2255 .k(k)
2256 .ks(3)
2257 .Test(xnn_f32_igemm_ukernel_6x8__wasmsimd_loadsplat);
2258 }
2259 }
2260
TEST(F32_IGEMM_6X8__WASMSIMD_LOADSPLAT,small_kernel_subtile)2261 TEST(F32_IGEMM_6X8__WASMSIMD_LOADSPLAT, small_kernel_subtile) {
2262 for (size_t k = 1; k <= 5; k += 2) {
2263 for (uint32_t n = 1; n <= 8; n++) {
2264 for (uint32_t m = 1; m <= 6; m++) {
2265 GemmMicrokernelTester()
2266 .mr(6)
2267 .nr(8)
2268 .kr(1)
2269 .sr(1)
2270 .m(m)
2271 .n(n)
2272 .k(k)
2273 .ks(3)
2274 .iterations(1)
2275 .Test(xnn_f32_igemm_ukernel_6x8__wasmsimd_loadsplat);
2276 }
2277 }
2278 }
2279 }
2280
TEST(F32_IGEMM_6X8__WASMSIMD_LOADSPLAT,n_gt_8_small_kernel)2281 TEST(F32_IGEMM_6X8__WASMSIMD_LOADSPLAT, n_gt_8_small_kernel) {
2282 for (uint32_t n = 9; n < 16; n++) {
2283 for (size_t k = 1; k <= 5; k += 2) {
2284 GemmMicrokernelTester()
2285 .mr(6)
2286 .nr(8)
2287 .kr(1)
2288 .sr(1)
2289 .m(6)
2290 .n(n)
2291 .k(k)
2292 .ks(3)
2293 .Test(xnn_f32_igemm_ukernel_6x8__wasmsimd_loadsplat);
2294 }
2295 }
2296 }
2297
TEST(F32_IGEMM_6X8__WASMSIMD_LOADSPLAT,n_div_8_small_kernel)2298 TEST(F32_IGEMM_6X8__WASMSIMD_LOADSPLAT, n_div_8_small_kernel) {
2299 for (uint32_t n = 16; n <= 24; n += 8) {
2300 for (size_t k = 1; k <= 5; k += 2) {
2301 GemmMicrokernelTester()
2302 .mr(6)
2303 .nr(8)
2304 .kr(1)
2305 .sr(1)
2306 .m(6)
2307 .n(n)
2308 .k(k)
2309 .ks(3)
2310 .Test(xnn_f32_igemm_ukernel_6x8__wasmsimd_loadsplat);
2311 }
2312 }
2313 }
2314
TEST(F32_IGEMM_6X8__WASMSIMD_LOADSPLAT,strided_cm_subtile)2315 TEST(F32_IGEMM_6X8__WASMSIMD_LOADSPLAT, strided_cm_subtile) {
2316 for (size_t k = 1; k <= 5; k += 2) {
2317 for (uint32_t n = 1; n <= 8; n++) {
2318 for (uint32_t m = 1; m <= 6; m++) {
2319 GemmMicrokernelTester()
2320 .mr(6)
2321 .nr(8)
2322 .kr(1)
2323 .sr(1)
2324 .m(m)
2325 .n(n)
2326 .k(k)
2327 .cm_stride(11)
2328 .iterations(1)
2329 .Test(xnn_f32_igemm_ukernel_6x8__wasmsimd_loadsplat);
2330 }
2331 }
2332 }
2333 }
2334
TEST(F32_IGEMM_6X8__WASMSIMD_LOADSPLAT,a_offset)2335 TEST(F32_IGEMM_6X8__WASMSIMD_LOADSPLAT, a_offset) {
2336 for (size_t k = 1; k <= 5; k += 2) {
2337 GemmMicrokernelTester()
2338 .mr(6)
2339 .nr(8)
2340 .kr(1)
2341 .sr(1)
2342 .m(6)
2343 .n(8)
2344 .k(k)
2345 .ks(3)
2346 .a_offset(37)
2347 .Test(xnn_f32_igemm_ukernel_6x8__wasmsimd_loadsplat);
2348 }
2349 }
2350
TEST(F32_IGEMM_6X8__WASMSIMD_LOADSPLAT,zero)2351 TEST(F32_IGEMM_6X8__WASMSIMD_LOADSPLAT, zero) {
2352 for (size_t k = 1; k <= 5; k += 2) {
2353 for (uint32_t mz = 0; mz < 6; mz++) {
2354 GemmMicrokernelTester()
2355 .mr(6)
2356 .nr(8)
2357 .kr(1)
2358 .sr(1)
2359 .m(6)
2360 .n(8)
2361 .k(k)
2362 .ks(3)
2363 .a_offset(37)
2364 .zero_index(mz)
2365 .Test(xnn_f32_igemm_ukernel_6x8__wasmsimd_loadsplat);
2366 }
2367 }
2368 }
2369
TEST(F32_IGEMM_6X8__WASMSIMD_LOADSPLAT,strided_cm)2370 TEST(F32_IGEMM_6X8__WASMSIMD_LOADSPLAT, strided_cm) {
2371 GemmMicrokernelTester()
2372 .mr(6)
2373 .nr(8)
2374 .kr(1)
2375 .sr(1)
2376 .m(6)
2377 .n(8)
2378 .k(1)
2379 .cm_stride(11)
2380 .Test(xnn_f32_igemm_ukernel_6x8__wasmsimd_loadsplat);
2381 }
2382 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
2383
2384
2385 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_IGEMM_6X8S4__WASMSIMD,k_eq_4)2386 TEST(F32_IGEMM_6X8S4__WASMSIMD, k_eq_4) {
2387 GemmMicrokernelTester()
2388 .mr(6)
2389 .nr(8)
2390 .kr(1)
2391 .sr(4)
2392 .m(6)
2393 .n(8)
2394 .k(4)
2395 .Test(xnn_f32_igemm_ukernel_6x8s4__wasmsimd);
2396 }
2397
TEST(F32_IGEMM_6X8S4__WASMSIMD,strided_cn)2398 TEST(F32_IGEMM_6X8S4__WASMSIMD, strided_cn) {
2399 GemmMicrokernelTester()
2400 .mr(6)
2401 .nr(8)
2402 .kr(1)
2403 .sr(4)
2404 .m(6)
2405 .n(8)
2406 .k(4)
2407 .cn_stride(11)
2408 .Test(xnn_f32_igemm_ukernel_6x8s4__wasmsimd);
2409 }
2410
TEST(F32_IGEMM_6X8S4__WASMSIMD,k_eq_4_subtile)2411 TEST(F32_IGEMM_6X8S4__WASMSIMD, k_eq_4_subtile) {
2412 for (uint32_t n = 1; n <= 8; n++) {
2413 for (uint32_t m = 1; m <= 6; m++) {
2414 GemmMicrokernelTester()
2415 .mr(6)
2416 .nr(8)
2417 .kr(1)
2418 .sr(4)
2419 .m(m)
2420 .n(n)
2421 .k(4)
2422 .iterations(1)
2423 .Test(xnn_f32_igemm_ukernel_6x8s4__wasmsimd);
2424 }
2425 }
2426 }
2427
TEST(F32_IGEMM_6X8S4__WASMSIMD,k_eq_4_subtile_m)2428 TEST(F32_IGEMM_6X8S4__WASMSIMD, k_eq_4_subtile_m) {
2429 for (uint32_t m = 1; m <= 6; m++) {
2430 GemmMicrokernelTester()
2431 .mr(6)
2432 .nr(8)
2433 .kr(1)
2434 .sr(4)
2435 .m(m)
2436 .n(8)
2437 .k(4)
2438 .iterations(1)
2439 .Test(xnn_f32_igemm_ukernel_6x8s4__wasmsimd);
2440 }
2441 }
2442
TEST(F32_IGEMM_6X8S4__WASMSIMD,k_eq_4_subtile_n)2443 TEST(F32_IGEMM_6X8S4__WASMSIMD, k_eq_4_subtile_n) {
2444 for (uint32_t n = 1; n <= 8; n++) {
2445 GemmMicrokernelTester()
2446 .mr(6)
2447 .nr(8)
2448 .kr(1)
2449 .sr(4)
2450 .m(6)
2451 .n(n)
2452 .k(4)
2453 .iterations(1)
2454 .Test(xnn_f32_igemm_ukernel_6x8s4__wasmsimd);
2455 }
2456 }
2457
TEST(F32_IGEMM_6X8S4__WASMSIMD,k_lt_4)2458 TEST(F32_IGEMM_6X8S4__WASMSIMD, k_lt_4) {
2459 for (size_t k = 1; k < 4; k++) {
2460 GemmMicrokernelTester()
2461 .mr(6)
2462 .nr(8)
2463 .kr(1)
2464 .sr(4)
2465 .m(6)
2466 .n(8)
2467 .k(k)
2468 .Test(xnn_f32_igemm_ukernel_6x8s4__wasmsimd);
2469 }
2470 }
2471
TEST(F32_IGEMM_6X8S4__WASMSIMD,k_lt_4_subtile)2472 TEST(F32_IGEMM_6X8S4__WASMSIMD, k_lt_4_subtile) {
2473 for (size_t k = 1; k < 4; k++) {
2474 for (uint32_t n = 1; n <= 8; n++) {
2475 for (uint32_t m = 1; m <= 6; m++) {
2476 GemmMicrokernelTester()
2477 .mr(6)
2478 .nr(8)
2479 .kr(1)
2480 .sr(4)
2481 .m(m)
2482 .n(n)
2483 .k(k)
2484 .iterations(1)
2485 .Test(xnn_f32_igemm_ukernel_6x8s4__wasmsimd);
2486 }
2487 }
2488 }
2489 }
2490
TEST(F32_IGEMM_6X8S4__WASMSIMD,k_gt_4)2491 TEST(F32_IGEMM_6X8S4__WASMSIMD, k_gt_4) {
2492 for (size_t k = 5; k < 8; k++) {
2493 GemmMicrokernelTester()
2494 .mr(6)
2495 .nr(8)
2496 .kr(1)
2497 .sr(4)
2498 .m(6)
2499 .n(8)
2500 .k(k)
2501 .Test(xnn_f32_igemm_ukernel_6x8s4__wasmsimd);
2502 }
2503 }
2504
TEST(F32_IGEMM_6X8S4__WASMSIMD,k_gt_4_subtile)2505 TEST(F32_IGEMM_6X8S4__WASMSIMD, k_gt_4_subtile) {
2506 for (size_t k = 5; k < 8; k++) {
2507 for (uint32_t n = 1; n <= 8; n++) {
2508 for (uint32_t m = 1; m <= 6; m++) {
2509 GemmMicrokernelTester()
2510 .mr(6)
2511 .nr(8)
2512 .kr(1)
2513 .sr(4)
2514 .m(m)
2515 .n(n)
2516 .k(k)
2517 .iterations(1)
2518 .Test(xnn_f32_igemm_ukernel_6x8s4__wasmsimd);
2519 }
2520 }
2521 }
2522 }
2523
TEST(F32_IGEMM_6X8S4__WASMSIMD,k_div_4)2524 TEST(F32_IGEMM_6X8S4__WASMSIMD, k_div_4) {
2525 for (size_t k = 8; k <= 40; k += 4) {
2526 GemmMicrokernelTester()
2527 .mr(6)
2528 .nr(8)
2529 .kr(1)
2530 .sr(4)
2531 .m(6)
2532 .n(8)
2533 .k(k)
2534 .Test(xnn_f32_igemm_ukernel_6x8s4__wasmsimd);
2535 }
2536 }
2537
TEST(F32_IGEMM_6X8S4__WASMSIMD,k_div_4_subtile)2538 TEST(F32_IGEMM_6X8S4__WASMSIMD, k_div_4_subtile) {
2539 for (size_t k = 8; k <= 40; k += 4) {
2540 for (uint32_t n = 1; n <= 8; n++) {
2541 for (uint32_t m = 1; m <= 6; m++) {
2542 GemmMicrokernelTester()
2543 .mr(6)
2544 .nr(8)
2545 .kr(1)
2546 .sr(4)
2547 .m(m)
2548 .n(n)
2549 .k(k)
2550 .iterations(1)
2551 .Test(xnn_f32_igemm_ukernel_6x8s4__wasmsimd);
2552 }
2553 }
2554 }
2555 }
2556
TEST(F32_IGEMM_6X8S4__WASMSIMD,n_gt_8)2557 TEST(F32_IGEMM_6X8S4__WASMSIMD, n_gt_8) {
2558 for (uint32_t n = 9; n < 16; n++) {
2559 for (size_t k = 1; k <= 20; k += 5) {
2560 GemmMicrokernelTester()
2561 .mr(6)
2562 .nr(8)
2563 .kr(1)
2564 .sr(4)
2565 .m(6)
2566 .n(n)
2567 .k(k)
2568 .Test(xnn_f32_igemm_ukernel_6x8s4__wasmsimd);
2569 }
2570 }
2571 }
2572
TEST(F32_IGEMM_6X8S4__WASMSIMD,n_gt_8_strided_cn)2573 TEST(F32_IGEMM_6X8S4__WASMSIMD, n_gt_8_strided_cn) {
2574 for (uint32_t n = 9; n < 16; n++) {
2575 for (size_t k = 1; k <= 20; k += 5) {
2576 GemmMicrokernelTester()
2577 .mr(6)
2578 .nr(8)
2579 .kr(1)
2580 .sr(4)
2581 .m(6)
2582 .n(n)
2583 .k(k)
2584 .cn_stride(11)
2585 .Test(xnn_f32_igemm_ukernel_6x8s4__wasmsimd);
2586 }
2587 }
2588 }
2589
TEST(F32_IGEMM_6X8S4__WASMSIMD,n_gt_8_subtile)2590 TEST(F32_IGEMM_6X8S4__WASMSIMD, n_gt_8_subtile) {
2591 for (uint32_t n = 9; n < 16; n++) {
2592 for (size_t k = 1; k <= 20; k += 5) {
2593 for (uint32_t m = 1; m <= 6; m++) {
2594 GemmMicrokernelTester()
2595 .mr(6)
2596 .nr(8)
2597 .kr(1)
2598 .sr(4)
2599 .m(m)
2600 .n(n)
2601 .k(k)
2602 .iterations(1)
2603 .Test(xnn_f32_igemm_ukernel_6x8s4__wasmsimd);
2604 }
2605 }
2606 }
2607 }
2608
TEST(F32_IGEMM_6X8S4__WASMSIMD,n_div_8)2609 TEST(F32_IGEMM_6X8S4__WASMSIMD, n_div_8) {
2610 for (uint32_t n = 16; n <= 24; n += 8) {
2611 for (size_t k = 1; k <= 20; k += 5) {
2612 GemmMicrokernelTester()
2613 .mr(6)
2614 .nr(8)
2615 .kr(1)
2616 .sr(4)
2617 .m(6)
2618 .n(n)
2619 .k(k)
2620 .Test(xnn_f32_igemm_ukernel_6x8s4__wasmsimd);
2621 }
2622 }
2623 }
2624
TEST(F32_IGEMM_6X8S4__WASMSIMD,n_div_8_strided_cn)2625 TEST(F32_IGEMM_6X8S4__WASMSIMD, n_div_8_strided_cn) {
2626 for (uint32_t n = 16; n <= 24; n += 8) {
2627 for (size_t k = 1; k <= 20; k += 5) {
2628 GemmMicrokernelTester()
2629 .mr(6)
2630 .nr(8)
2631 .kr(1)
2632 .sr(4)
2633 .m(6)
2634 .n(n)
2635 .k(k)
2636 .cn_stride(11)
2637 .Test(xnn_f32_igemm_ukernel_6x8s4__wasmsimd);
2638 }
2639 }
2640 }
2641
TEST(F32_IGEMM_6X8S4__WASMSIMD,n_div_8_subtile)2642 TEST(F32_IGEMM_6X8S4__WASMSIMD, n_div_8_subtile) {
2643 for (uint32_t n = 16; n <= 24; n += 8) {
2644 for (size_t k = 1; k <= 20; k += 5) {
2645 for (uint32_t m = 1; m <= 6; m++) {
2646 GemmMicrokernelTester()
2647 .mr(6)
2648 .nr(8)
2649 .kr(1)
2650 .sr(4)
2651 .m(m)
2652 .n(n)
2653 .k(k)
2654 .iterations(1)
2655 .Test(xnn_f32_igemm_ukernel_6x8s4__wasmsimd);
2656 }
2657 }
2658 }
2659 }
2660
TEST(F32_IGEMM_6X8S4__WASMSIMD,small_kernel)2661 TEST(F32_IGEMM_6X8S4__WASMSIMD, small_kernel) {
2662 for (size_t k = 1; k <= 20; k += 5) {
2663 GemmMicrokernelTester()
2664 .mr(6)
2665 .nr(8)
2666 .kr(1)
2667 .sr(4)
2668 .m(6)
2669 .n(8)
2670 .k(k)
2671 .ks(3)
2672 .Test(xnn_f32_igemm_ukernel_6x8s4__wasmsimd);
2673 }
2674 }
2675
TEST(F32_IGEMM_6X8S4__WASMSIMD,small_kernel_subtile)2676 TEST(F32_IGEMM_6X8S4__WASMSIMD, small_kernel_subtile) {
2677 for (size_t k = 1; k <= 20; k += 5) {
2678 for (uint32_t n = 1; n <= 8; n++) {
2679 for (uint32_t m = 1; m <= 6; m++) {
2680 GemmMicrokernelTester()
2681 .mr(6)
2682 .nr(8)
2683 .kr(1)
2684 .sr(4)
2685 .m(m)
2686 .n(n)
2687 .k(k)
2688 .ks(3)
2689 .iterations(1)
2690 .Test(xnn_f32_igemm_ukernel_6x8s4__wasmsimd);
2691 }
2692 }
2693 }
2694 }
2695
TEST(F32_IGEMM_6X8S4__WASMSIMD,n_gt_8_small_kernel)2696 TEST(F32_IGEMM_6X8S4__WASMSIMD, n_gt_8_small_kernel) {
2697 for (uint32_t n = 9; n < 16; n++) {
2698 for (size_t k = 1; k <= 20; k += 5) {
2699 GemmMicrokernelTester()
2700 .mr(6)
2701 .nr(8)
2702 .kr(1)
2703 .sr(4)
2704 .m(6)
2705 .n(n)
2706 .k(k)
2707 .ks(3)
2708 .Test(xnn_f32_igemm_ukernel_6x8s4__wasmsimd);
2709 }
2710 }
2711 }
2712
TEST(F32_IGEMM_6X8S4__WASMSIMD,n_div_8_small_kernel)2713 TEST(F32_IGEMM_6X8S4__WASMSIMD, n_div_8_small_kernel) {
2714 for (uint32_t n = 16; n <= 24; n += 8) {
2715 for (size_t k = 1; k <= 20; k += 5) {
2716 GemmMicrokernelTester()
2717 .mr(6)
2718 .nr(8)
2719 .kr(1)
2720 .sr(4)
2721 .m(6)
2722 .n(n)
2723 .k(k)
2724 .ks(3)
2725 .Test(xnn_f32_igemm_ukernel_6x8s4__wasmsimd);
2726 }
2727 }
2728 }
2729
TEST(F32_IGEMM_6X8S4__WASMSIMD,strided_cm_subtile)2730 TEST(F32_IGEMM_6X8S4__WASMSIMD, strided_cm_subtile) {
2731 for (size_t k = 1; k <= 20; k += 5) {
2732 for (uint32_t n = 1; n <= 8; n++) {
2733 for (uint32_t m = 1; m <= 6; m++) {
2734 GemmMicrokernelTester()
2735 .mr(6)
2736 .nr(8)
2737 .kr(1)
2738 .sr(4)
2739 .m(m)
2740 .n(n)
2741 .k(k)
2742 .cm_stride(11)
2743 .iterations(1)
2744 .Test(xnn_f32_igemm_ukernel_6x8s4__wasmsimd);
2745 }
2746 }
2747 }
2748 }
2749
TEST(F32_IGEMM_6X8S4__WASMSIMD,a_offset)2750 TEST(F32_IGEMM_6X8S4__WASMSIMD, a_offset) {
2751 for (size_t k = 1; k <= 20; k += 5) {
2752 GemmMicrokernelTester()
2753 .mr(6)
2754 .nr(8)
2755 .kr(1)
2756 .sr(4)
2757 .m(6)
2758 .n(8)
2759 .k(k)
2760 .ks(3)
2761 .a_offset(127)
2762 .Test(xnn_f32_igemm_ukernel_6x8s4__wasmsimd);
2763 }
2764 }
2765
TEST(F32_IGEMM_6X8S4__WASMSIMD,zero)2766 TEST(F32_IGEMM_6X8S4__WASMSIMD, zero) {
2767 for (size_t k = 1; k <= 20; k += 5) {
2768 for (uint32_t mz = 0; mz < 6; mz++) {
2769 GemmMicrokernelTester()
2770 .mr(6)
2771 .nr(8)
2772 .kr(1)
2773 .sr(4)
2774 .m(6)
2775 .n(8)
2776 .k(k)
2777 .ks(3)
2778 .a_offset(127)
2779 .zero_index(mz)
2780 .Test(xnn_f32_igemm_ukernel_6x8s4__wasmsimd);
2781 }
2782 }
2783 }
2784
TEST(F32_IGEMM_6X8S4__WASMSIMD,strided_cm)2785 TEST(F32_IGEMM_6X8S4__WASMSIMD, strided_cm) {
2786 GemmMicrokernelTester()
2787 .mr(6)
2788 .nr(8)
2789 .kr(1)
2790 .sr(4)
2791 .m(6)
2792 .n(8)
2793 .k(4)
2794 .cm_stride(11)
2795 .Test(xnn_f32_igemm_ukernel_6x8s4__wasmsimd);
2796 }
2797 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
2798
2799
2800 #if XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_IGEMM_1X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,k_eq_1)2801 TEST(F32_IGEMM_1X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, k_eq_1) {
2802 GemmMicrokernelTester()
2803 .mr(1)
2804 .nr(8)
2805 .kr(1)
2806 .sr(1)
2807 .m(1)
2808 .n(8)
2809 .k(1)
2810 .Test(xnn_f32_igemm_ukernel_1x8__wasmrelaxedsimd_fma_loadsplat);
2811 }
2812
TEST(F32_IGEMM_1X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,strided_cn)2813 TEST(F32_IGEMM_1X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, strided_cn) {
2814 GemmMicrokernelTester()
2815 .mr(1)
2816 .nr(8)
2817 .kr(1)
2818 .sr(1)
2819 .m(1)
2820 .n(8)
2821 .k(1)
2822 .cn_stride(11)
2823 .Test(xnn_f32_igemm_ukernel_1x8__wasmrelaxedsimd_fma_loadsplat);
2824 }
2825
TEST(F32_IGEMM_1X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,k_eq_1_subtile)2826 TEST(F32_IGEMM_1X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, k_eq_1_subtile) {
2827 for (uint32_t n = 1; n <= 8; n++) {
2828 for (uint32_t m = 1; m <= 1; m++) {
2829 GemmMicrokernelTester()
2830 .mr(1)
2831 .nr(8)
2832 .kr(1)
2833 .sr(1)
2834 .m(m)
2835 .n(n)
2836 .k(1)
2837 .iterations(1)
2838 .Test(xnn_f32_igemm_ukernel_1x8__wasmrelaxedsimd_fma_loadsplat);
2839 }
2840 }
2841 }
2842
TEST(F32_IGEMM_1X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,k_eq_1_subtile_m)2843 TEST(F32_IGEMM_1X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, k_eq_1_subtile_m) {
2844 for (uint32_t m = 1; m <= 1; m++) {
2845 GemmMicrokernelTester()
2846 .mr(1)
2847 .nr(8)
2848 .kr(1)
2849 .sr(1)
2850 .m(m)
2851 .n(8)
2852 .k(1)
2853 .iterations(1)
2854 .Test(xnn_f32_igemm_ukernel_1x8__wasmrelaxedsimd_fma_loadsplat);
2855 }
2856 }
2857
TEST(F32_IGEMM_1X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,k_eq_1_subtile_n)2858 TEST(F32_IGEMM_1X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, k_eq_1_subtile_n) {
2859 for (uint32_t n = 1; n <= 8; n++) {
2860 GemmMicrokernelTester()
2861 .mr(1)
2862 .nr(8)
2863 .kr(1)
2864 .sr(1)
2865 .m(1)
2866 .n(n)
2867 .k(1)
2868 .iterations(1)
2869 .Test(xnn_f32_igemm_ukernel_1x8__wasmrelaxedsimd_fma_loadsplat);
2870 }
2871 }
2872
TEST(F32_IGEMM_1X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,k_gt_1)2873 TEST(F32_IGEMM_1X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, k_gt_1) {
2874 for (size_t k = 2; k < 10; k++) {
2875 GemmMicrokernelTester()
2876 .mr(1)
2877 .nr(8)
2878 .kr(1)
2879 .sr(1)
2880 .m(1)
2881 .n(8)
2882 .k(k)
2883 .Test(xnn_f32_igemm_ukernel_1x8__wasmrelaxedsimd_fma_loadsplat);
2884 }
2885 }
2886
TEST(F32_IGEMM_1X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,k_gt_1_subtile)2887 TEST(F32_IGEMM_1X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, k_gt_1_subtile) {
2888 for (size_t k = 2; k < 10; k++) {
2889 for (uint32_t n = 1; n <= 8; n++) {
2890 for (uint32_t m = 1; m <= 1; m++) {
2891 GemmMicrokernelTester()
2892 .mr(1)
2893 .nr(8)
2894 .kr(1)
2895 .sr(1)
2896 .m(m)
2897 .n(n)
2898 .k(k)
2899 .iterations(1)
2900 .Test(xnn_f32_igemm_ukernel_1x8__wasmrelaxedsimd_fma_loadsplat);
2901 }
2902 }
2903 }
2904 }
2905
TEST(F32_IGEMM_1X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,n_gt_8)2906 TEST(F32_IGEMM_1X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, n_gt_8) {
2907 for (uint32_t n = 9; n < 16; n++) {
2908 for (size_t k = 1; k <= 5; k += 2) {
2909 GemmMicrokernelTester()
2910 .mr(1)
2911 .nr(8)
2912 .kr(1)
2913 .sr(1)
2914 .m(1)
2915 .n(n)
2916 .k(k)
2917 .Test(xnn_f32_igemm_ukernel_1x8__wasmrelaxedsimd_fma_loadsplat);
2918 }
2919 }
2920 }
2921
TEST(F32_IGEMM_1X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,n_gt_8_strided_cn)2922 TEST(F32_IGEMM_1X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, n_gt_8_strided_cn) {
2923 for (uint32_t n = 9; n < 16; n++) {
2924 for (size_t k = 1; k <= 5; k += 2) {
2925 GemmMicrokernelTester()
2926 .mr(1)
2927 .nr(8)
2928 .kr(1)
2929 .sr(1)
2930 .m(1)
2931 .n(n)
2932 .k(k)
2933 .cn_stride(11)
2934 .Test(xnn_f32_igemm_ukernel_1x8__wasmrelaxedsimd_fma_loadsplat);
2935 }
2936 }
2937 }
2938
TEST(F32_IGEMM_1X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,n_gt_8_subtile)2939 TEST(F32_IGEMM_1X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, n_gt_8_subtile) {
2940 for (uint32_t n = 9; n < 16; n++) {
2941 for (size_t k = 1; k <= 5; k += 2) {
2942 for (uint32_t m = 1; m <= 1; m++) {
2943 GemmMicrokernelTester()
2944 .mr(1)
2945 .nr(8)
2946 .kr(1)
2947 .sr(1)
2948 .m(m)
2949 .n(n)
2950 .k(k)
2951 .iterations(1)
2952 .Test(xnn_f32_igemm_ukernel_1x8__wasmrelaxedsimd_fma_loadsplat);
2953 }
2954 }
2955 }
2956 }
2957
TEST(F32_IGEMM_1X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,n_div_8)2958 TEST(F32_IGEMM_1X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, n_div_8) {
2959 for (uint32_t n = 16; n <= 24; n += 8) {
2960 for (size_t k = 1; k <= 5; k += 2) {
2961 GemmMicrokernelTester()
2962 .mr(1)
2963 .nr(8)
2964 .kr(1)
2965 .sr(1)
2966 .m(1)
2967 .n(n)
2968 .k(k)
2969 .Test(xnn_f32_igemm_ukernel_1x8__wasmrelaxedsimd_fma_loadsplat);
2970 }
2971 }
2972 }
2973
TEST(F32_IGEMM_1X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,n_div_8_strided_cn)2974 TEST(F32_IGEMM_1X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, n_div_8_strided_cn) {
2975 for (uint32_t n = 16; n <= 24; n += 8) {
2976 for (size_t k = 1; k <= 5; k += 2) {
2977 GemmMicrokernelTester()
2978 .mr(1)
2979 .nr(8)
2980 .kr(1)
2981 .sr(1)
2982 .m(1)
2983 .n(n)
2984 .k(k)
2985 .cn_stride(11)
2986 .Test(xnn_f32_igemm_ukernel_1x8__wasmrelaxedsimd_fma_loadsplat);
2987 }
2988 }
2989 }
2990
TEST(F32_IGEMM_1X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,n_div_8_subtile)2991 TEST(F32_IGEMM_1X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, n_div_8_subtile) {
2992 for (uint32_t n = 16; n <= 24; n += 8) {
2993 for (size_t k = 1; k <= 5; k += 2) {
2994 for (uint32_t m = 1; m <= 1; m++) {
2995 GemmMicrokernelTester()
2996 .mr(1)
2997 .nr(8)
2998 .kr(1)
2999 .sr(1)
3000 .m(m)
3001 .n(n)
3002 .k(k)
3003 .iterations(1)
3004 .Test(xnn_f32_igemm_ukernel_1x8__wasmrelaxedsimd_fma_loadsplat);
3005 }
3006 }
3007 }
3008 }
3009
TEST(F32_IGEMM_1X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,small_kernel)3010 TEST(F32_IGEMM_1X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, small_kernel) {
3011 for (size_t k = 1; k <= 5; k += 2) {
3012 GemmMicrokernelTester()
3013 .mr(1)
3014 .nr(8)
3015 .kr(1)
3016 .sr(1)
3017 .m(1)
3018 .n(8)
3019 .k(k)
3020 .ks(3)
3021 .Test(xnn_f32_igemm_ukernel_1x8__wasmrelaxedsimd_fma_loadsplat);
3022 }
3023 }
3024
TEST(F32_IGEMM_1X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,small_kernel_subtile)3025 TEST(F32_IGEMM_1X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, small_kernel_subtile) {
3026 for (size_t k = 1; k <= 5; k += 2) {
3027 for (uint32_t n = 1; n <= 8; n++) {
3028 for (uint32_t m = 1; m <= 1; m++) {
3029 GemmMicrokernelTester()
3030 .mr(1)
3031 .nr(8)
3032 .kr(1)
3033 .sr(1)
3034 .m(m)
3035 .n(n)
3036 .k(k)
3037 .ks(3)
3038 .iterations(1)
3039 .Test(xnn_f32_igemm_ukernel_1x8__wasmrelaxedsimd_fma_loadsplat);
3040 }
3041 }
3042 }
3043 }
3044
TEST(F32_IGEMM_1X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,n_gt_8_small_kernel)3045 TEST(F32_IGEMM_1X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, n_gt_8_small_kernel) {
3046 for (uint32_t n = 9; n < 16; n++) {
3047 for (size_t k = 1; k <= 5; k += 2) {
3048 GemmMicrokernelTester()
3049 .mr(1)
3050 .nr(8)
3051 .kr(1)
3052 .sr(1)
3053 .m(1)
3054 .n(n)
3055 .k(k)
3056 .ks(3)
3057 .Test(xnn_f32_igemm_ukernel_1x8__wasmrelaxedsimd_fma_loadsplat);
3058 }
3059 }
3060 }
3061
TEST(F32_IGEMM_1X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,n_div_8_small_kernel)3062 TEST(F32_IGEMM_1X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, n_div_8_small_kernel) {
3063 for (uint32_t n = 16; n <= 24; n += 8) {
3064 for (size_t k = 1; k <= 5; k += 2) {
3065 GemmMicrokernelTester()
3066 .mr(1)
3067 .nr(8)
3068 .kr(1)
3069 .sr(1)
3070 .m(1)
3071 .n(n)
3072 .k(k)
3073 .ks(3)
3074 .Test(xnn_f32_igemm_ukernel_1x8__wasmrelaxedsimd_fma_loadsplat);
3075 }
3076 }
3077 }
3078
TEST(F32_IGEMM_1X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,strided_cm_subtile)3079 TEST(F32_IGEMM_1X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, strided_cm_subtile) {
3080 for (size_t k = 1; k <= 5; k += 2) {
3081 for (uint32_t n = 1; n <= 8; n++) {
3082 for (uint32_t m = 1; m <= 1; m++) {
3083 GemmMicrokernelTester()
3084 .mr(1)
3085 .nr(8)
3086 .kr(1)
3087 .sr(1)
3088 .m(m)
3089 .n(n)
3090 .k(k)
3091 .cm_stride(11)
3092 .iterations(1)
3093 .Test(xnn_f32_igemm_ukernel_1x8__wasmrelaxedsimd_fma_loadsplat);
3094 }
3095 }
3096 }
3097 }
3098
TEST(F32_IGEMM_1X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,a_offset)3099 TEST(F32_IGEMM_1X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, a_offset) {
3100 for (size_t k = 1; k <= 5; k += 2) {
3101 GemmMicrokernelTester()
3102 .mr(1)
3103 .nr(8)
3104 .kr(1)
3105 .sr(1)
3106 .m(1)
3107 .n(8)
3108 .k(k)
3109 .ks(3)
3110 .a_offset(7)
3111 .Test(xnn_f32_igemm_ukernel_1x8__wasmrelaxedsimd_fma_loadsplat);
3112 }
3113 }
3114
TEST(F32_IGEMM_1X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,zero)3115 TEST(F32_IGEMM_1X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, zero) {
3116 for (size_t k = 1; k <= 5; k += 2) {
3117 for (uint32_t mz = 0; mz < 1; mz++) {
3118 GemmMicrokernelTester()
3119 .mr(1)
3120 .nr(8)
3121 .kr(1)
3122 .sr(1)
3123 .m(1)
3124 .n(8)
3125 .k(k)
3126 .ks(3)
3127 .a_offset(7)
3128 .zero_index(mz)
3129 .Test(xnn_f32_igemm_ukernel_1x8__wasmrelaxedsimd_fma_loadsplat);
3130 }
3131 }
3132 }
3133
TEST(F32_IGEMM_1X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,strided_cm)3134 TEST(F32_IGEMM_1X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, strided_cm) {
3135 GemmMicrokernelTester()
3136 .mr(1)
3137 .nr(8)
3138 .kr(1)
3139 .sr(1)
3140 .m(1)
3141 .n(8)
3142 .k(1)
3143 .cm_stride(11)
3144 .Test(xnn_f32_igemm_ukernel_1x8__wasmrelaxedsimd_fma_loadsplat);
3145 }
3146 #endif // XNN_ARCH_WASMRELAXEDSIMD
3147
3148
3149 #if XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_IGEMM_3X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,k_eq_1)3150 TEST(F32_IGEMM_3X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, k_eq_1) {
3151 GemmMicrokernelTester()
3152 .mr(3)
3153 .nr(8)
3154 .kr(1)
3155 .sr(1)
3156 .m(3)
3157 .n(8)
3158 .k(1)
3159 .Test(xnn_f32_igemm_ukernel_3x8__wasmrelaxedsimd_fma_loadsplat);
3160 }
3161
TEST(F32_IGEMM_3X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,strided_cn)3162 TEST(F32_IGEMM_3X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, strided_cn) {
3163 GemmMicrokernelTester()
3164 .mr(3)
3165 .nr(8)
3166 .kr(1)
3167 .sr(1)
3168 .m(3)
3169 .n(8)
3170 .k(1)
3171 .cn_stride(11)
3172 .Test(xnn_f32_igemm_ukernel_3x8__wasmrelaxedsimd_fma_loadsplat);
3173 }
3174
TEST(F32_IGEMM_3X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,k_eq_1_subtile)3175 TEST(F32_IGEMM_3X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, k_eq_1_subtile) {
3176 for (uint32_t n = 1; n <= 8; n++) {
3177 for (uint32_t m = 1; m <= 3; m++) {
3178 GemmMicrokernelTester()
3179 .mr(3)
3180 .nr(8)
3181 .kr(1)
3182 .sr(1)
3183 .m(m)
3184 .n(n)
3185 .k(1)
3186 .iterations(1)
3187 .Test(xnn_f32_igemm_ukernel_3x8__wasmrelaxedsimd_fma_loadsplat);
3188 }
3189 }
3190 }
3191
TEST(F32_IGEMM_3X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,k_eq_1_subtile_m)3192 TEST(F32_IGEMM_3X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, k_eq_1_subtile_m) {
3193 for (uint32_t m = 1; m <= 3; m++) {
3194 GemmMicrokernelTester()
3195 .mr(3)
3196 .nr(8)
3197 .kr(1)
3198 .sr(1)
3199 .m(m)
3200 .n(8)
3201 .k(1)
3202 .iterations(1)
3203 .Test(xnn_f32_igemm_ukernel_3x8__wasmrelaxedsimd_fma_loadsplat);
3204 }
3205 }
3206
TEST(F32_IGEMM_3X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,k_eq_1_subtile_n)3207 TEST(F32_IGEMM_3X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, k_eq_1_subtile_n) {
3208 for (uint32_t n = 1; n <= 8; n++) {
3209 GemmMicrokernelTester()
3210 .mr(3)
3211 .nr(8)
3212 .kr(1)
3213 .sr(1)
3214 .m(3)
3215 .n(n)
3216 .k(1)
3217 .iterations(1)
3218 .Test(xnn_f32_igemm_ukernel_3x8__wasmrelaxedsimd_fma_loadsplat);
3219 }
3220 }
3221
TEST(F32_IGEMM_3X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,k_gt_1)3222 TEST(F32_IGEMM_3X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, k_gt_1) {
3223 for (size_t k = 2; k < 10; k++) {
3224 GemmMicrokernelTester()
3225 .mr(3)
3226 .nr(8)
3227 .kr(1)
3228 .sr(1)
3229 .m(3)
3230 .n(8)
3231 .k(k)
3232 .Test(xnn_f32_igemm_ukernel_3x8__wasmrelaxedsimd_fma_loadsplat);
3233 }
3234 }
3235
TEST(F32_IGEMM_3X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,k_gt_1_subtile)3236 TEST(F32_IGEMM_3X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, k_gt_1_subtile) {
3237 for (size_t k = 2; k < 10; k++) {
3238 for (uint32_t n = 1; n <= 8; n++) {
3239 for (uint32_t m = 1; m <= 3; m++) {
3240 GemmMicrokernelTester()
3241 .mr(3)
3242 .nr(8)
3243 .kr(1)
3244 .sr(1)
3245 .m(m)
3246 .n(n)
3247 .k(k)
3248 .iterations(1)
3249 .Test(xnn_f32_igemm_ukernel_3x8__wasmrelaxedsimd_fma_loadsplat);
3250 }
3251 }
3252 }
3253 }
3254
TEST(F32_IGEMM_3X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,n_gt_8)3255 TEST(F32_IGEMM_3X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, n_gt_8) {
3256 for (uint32_t n = 9; n < 16; n++) {
3257 for (size_t k = 1; k <= 5; k += 2) {
3258 GemmMicrokernelTester()
3259 .mr(3)
3260 .nr(8)
3261 .kr(1)
3262 .sr(1)
3263 .m(3)
3264 .n(n)
3265 .k(k)
3266 .Test(xnn_f32_igemm_ukernel_3x8__wasmrelaxedsimd_fma_loadsplat);
3267 }
3268 }
3269 }
3270
TEST(F32_IGEMM_3X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,n_gt_8_strided_cn)3271 TEST(F32_IGEMM_3X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, n_gt_8_strided_cn) {
3272 for (uint32_t n = 9; n < 16; n++) {
3273 for (size_t k = 1; k <= 5; k += 2) {
3274 GemmMicrokernelTester()
3275 .mr(3)
3276 .nr(8)
3277 .kr(1)
3278 .sr(1)
3279 .m(3)
3280 .n(n)
3281 .k(k)
3282 .cn_stride(11)
3283 .Test(xnn_f32_igemm_ukernel_3x8__wasmrelaxedsimd_fma_loadsplat);
3284 }
3285 }
3286 }
3287
TEST(F32_IGEMM_3X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,n_gt_8_subtile)3288 TEST(F32_IGEMM_3X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, n_gt_8_subtile) {
3289 for (uint32_t n = 9; n < 16; n++) {
3290 for (size_t k = 1; k <= 5; k += 2) {
3291 for (uint32_t m = 1; m <= 3; m++) {
3292 GemmMicrokernelTester()
3293 .mr(3)
3294 .nr(8)
3295 .kr(1)
3296 .sr(1)
3297 .m(m)
3298 .n(n)
3299 .k(k)
3300 .iterations(1)
3301 .Test(xnn_f32_igemm_ukernel_3x8__wasmrelaxedsimd_fma_loadsplat);
3302 }
3303 }
3304 }
3305 }
3306
TEST(F32_IGEMM_3X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,n_div_8)3307 TEST(F32_IGEMM_3X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, n_div_8) {
3308 for (uint32_t n = 16; n <= 24; n += 8) {
3309 for (size_t k = 1; k <= 5; k += 2) {
3310 GemmMicrokernelTester()
3311 .mr(3)
3312 .nr(8)
3313 .kr(1)
3314 .sr(1)
3315 .m(3)
3316 .n(n)
3317 .k(k)
3318 .Test(xnn_f32_igemm_ukernel_3x8__wasmrelaxedsimd_fma_loadsplat);
3319 }
3320 }
3321 }
3322
TEST(F32_IGEMM_3X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,n_div_8_strided_cn)3323 TEST(F32_IGEMM_3X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, n_div_8_strided_cn) {
3324 for (uint32_t n = 16; n <= 24; n += 8) {
3325 for (size_t k = 1; k <= 5; k += 2) {
3326 GemmMicrokernelTester()
3327 .mr(3)
3328 .nr(8)
3329 .kr(1)
3330 .sr(1)
3331 .m(3)
3332 .n(n)
3333 .k(k)
3334 .cn_stride(11)
3335 .Test(xnn_f32_igemm_ukernel_3x8__wasmrelaxedsimd_fma_loadsplat);
3336 }
3337 }
3338 }
3339
TEST(F32_IGEMM_3X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,n_div_8_subtile)3340 TEST(F32_IGEMM_3X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, n_div_8_subtile) {
3341 for (uint32_t n = 16; n <= 24; n += 8) {
3342 for (size_t k = 1; k <= 5; k += 2) {
3343 for (uint32_t m = 1; m <= 3; m++) {
3344 GemmMicrokernelTester()
3345 .mr(3)
3346 .nr(8)
3347 .kr(1)
3348 .sr(1)
3349 .m(m)
3350 .n(n)
3351 .k(k)
3352 .iterations(1)
3353 .Test(xnn_f32_igemm_ukernel_3x8__wasmrelaxedsimd_fma_loadsplat);
3354 }
3355 }
3356 }
3357 }
3358
TEST(F32_IGEMM_3X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,small_kernel)3359 TEST(F32_IGEMM_3X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, small_kernel) {
3360 for (size_t k = 1; k <= 5; k += 2) {
3361 GemmMicrokernelTester()
3362 .mr(3)
3363 .nr(8)
3364 .kr(1)
3365 .sr(1)
3366 .m(3)
3367 .n(8)
3368 .k(k)
3369 .ks(3)
3370 .Test(xnn_f32_igemm_ukernel_3x8__wasmrelaxedsimd_fma_loadsplat);
3371 }
3372 }
3373
TEST(F32_IGEMM_3X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,small_kernel_subtile)3374 TEST(F32_IGEMM_3X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, small_kernel_subtile) {
3375 for (size_t k = 1; k <= 5; k += 2) {
3376 for (uint32_t n = 1; n <= 8; n++) {
3377 for (uint32_t m = 1; m <= 3; m++) {
3378 GemmMicrokernelTester()
3379 .mr(3)
3380 .nr(8)
3381 .kr(1)
3382 .sr(1)
3383 .m(m)
3384 .n(n)
3385 .k(k)
3386 .ks(3)
3387 .iterations(1)
3388 .Test(xnn_f32_igemm_ukernel_3x8__wasmrelaxedsimd_fma_loadsplat);
3389 }
3390 }
3391 }
3392 }
3393
TEST(F32_IGEMM_3X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,n_gt_8_small_kernel)3394 TEST(F32_IGEMM_3X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, n_gt_8_small_kernel) {
3395 for (uint32_t n = 9; n < 16; n++) {
3396 for (size_t k = 1; k <= 5; k += 2) {
3397 GemmMicrokernelTester()
3398 .mr(3)
3399 .nr(8)
3400 .kr(1)
3401 .sr(1)
3402 .m(3)
3403 .n(n)
3404 .k(k)
3405 .ks(3)
3406 .Test(xnn_f32_igemm_ukernel_3x8__wasmrelaxedsimd_fma_loadsplat);
3407 }
3408 }
3409 }
3410
TEST(F32_IGEMM_3X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,n_div_8_small_kernel)3411 TEST(F32_IGEMM_3X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, n_div_8_small_kernel) {
3412 for (uint32_t n = 16; n <= 24; n += 8) {
3413 for (size_t k = 1; k <= 5; k += 2) {
3414 GemmMicrokernelTester()
3415 .mr(3)
3416 .nr(8)
3417 .kr(1)
3418 .sr(1)
3419 .m(3)
3420 .n(n)
3421 .k(k)
3422 .ks(3)
3423 .Test(xnn_f32_igemm_ukernel_3x8__wasmrelaxedsimd_fma_loadsplat);
3424 }
3425 }
3426 }
3427
TEST(F32_IGEMM_3X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,strided_cm_subtile)3428 TEST(F32_IGEMM_3X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, strided_cm_subtile) {
3429 for (size_t k = 1; k <= 5; k += 2) {
3430 for (uint32_t n = 1; n <= 8; n++) {
3431 for (uint32_t m = 1; m <= 3; m++) {
3432 GemmMicrokernelTester()
3433 .mr(3)
3434 .nr(8)
3435 .kr(1)
3436 .sr(1)
3437 .m(m)
3438 .n(n)
3439 .k(k)
3440 .cm_stride(11)
3441 .iterations(1)
3442 .Test(xnn_f32_igemm_ukernel_3x8__wasmrelaxedsimd_fma_loadsplat);
3443 }
3444 }
3445 }
3446 }
3447
TEST(F32_IGEMM_3X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,a_offset)3448 TEST(F32_IGEMM_3X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, a_offset) {
3449 for (size_t k = 1; k <= 5; k += 2) {
3450 GemmMicrokernelTester()
3451 .mr(3)
3452 .nr(8)
3453 .kr(1)
3454 .sr(1)
3455 .m(3)
3456 .n(8)
3457 .k(k)
3458 .ks(3)
3459 .a_offset(17)
3460 .Test(xnn_f32_igemm_ukernel_3x8__wasmrelaxedsimd_fma_loadsplat);
3461 }
3462 }
3463
TEST(F32_IGEMM_3X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,zero)3464 TEST(F32_IGEMM_3X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, zero) {
3465 for (size_t k = 1; k <= 5; k += 2) {
3466 for (uint32_t mz = 0; mz < 3; mz++) {
3467 GemmMicrokernelTester()
3468 .mr(3)
3469 .nr(8)
3470 .kr(1)
3471 .sr(1)
3472 .m(3)
3473 .n(8)
3474 .k(k)
3475 .ks(3)
3476 .a_offset(17)
3477 .zero_index(mz)
3478 .Test(xnn_f32_igemm_ukernel_3x8__wasmrelaxedsimd_fma_loadsplat);
3479 }
3480 }
3481 }
3482
TEST(F32_IGEMM_3X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,strided_cm)3483 TEST(F32_IGEMM_3X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, strided_cm) {
3484 GemmMicrokernelTester()
3485 .mr(3)
3486 .nr(8)
3487 .kr(1)
3488 .sr(1)
3489 .m(3)
3490 .n(8)
3491 .k(1)
3492 .cm_stride(11)
3493 .Test(xnn_f32_igemm_ukernel_3x8__wasmrelaxedsimd_fma_loadsplat);
3494 }
3495 #endif // XNN_ARCH_WASMRELAXEDSIMD
3496
3497
3498 #if XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_IGEMM_3X8S4__WASMRELAXEDSIMD_FMA,k_eq_4)3499 TEST(F32_IGEMM_3X8S4__WASMRELAXEDSIMD_FMA, k_eq_4) {
3500 GemmMicrokernelTester()
3501 .mr(3)
3502 .nr(8)
3503 .kr(1)
3504 .sr(4)
3505 .m(3)
3506 .n(8)
3507 .k(4)
3508 .Test(xnn_f32_igemm_ukernel_3x8s4__wasmrelaxedsimd_fma);
3509 }
3510
TEST(F32_IGEMM_3X8S4__WASMRELAXEDSIMD_FMA,strided_cn)3511 TEST(F32_IGEMM_3X8S4__WASMRELAXEDSIMD_FMA, strided_cn) {
3512 GemmMicrokernelTester()
3513 .mr(3)
3514 .nr(8)
3515 .kr(1)
3516 .sr(4)
3517 .m(3)
3518 .n(8)
3519 .k(4)
3520 .cn_stride(11)
3521 .Test(xnn_f32_igemm_ukernel_3x8s4__wasmrelaxedsimd_fma);
3522 }
3523
TEST(F32_IGEMM_3X8S4__WASMRELAXEDSIMD_FMA,k_eq_4_subtile)3524 TEST(F32_IGEMM_3X8S4__WASMRELAXEDSIMD_FMA, k_eq_4_subtile) {
3525 for (uint32_t n = 1; n <= 8; n++) {
3526 for (uint32_t m = 1; m <= 3; m++) {
3527 GemmMicrokernelTester()
3528 .mr(3)
3529 .nr(8)
3530 .kr(1)
3531 .sr(4)
3532 .m(m)
3533 .n(n)
3534 .k(4)
3535 .iterations(1)
3536 .Test(xnn_f32_igemm_ukernel_3x8s4__wasmrelaxedsimd_fma);
3537 }
3538 }
3539 }
3540
TEST(F32_IGEMM_3X8S4__WASMRELAXEDSIMD_FMA,k_eq_4_subtile_m)3541 TEST(F32_IGEMM_3X8S4__WASMRELAXEDSIMD_FMA, k_eq_4_subtile_m) {
3542 for (uint32_t m = 1; m <= 3; m++) {
3543 GemmMicrokernelTester()
3544 .mr(3)
3545 .nr(8)
3546 .kr(1)
3547 .sr(4)
3548 .m(m)
3549 .n(8)
3550 .k(4)
3551 .iterations(1)
3552 .Test(xnn_f32_igemm_ukernel_3x8s4__wasmrelaxedsimd_fma);
3553 }
3554 }
3555
TEST(F32_IGEMM_3X8S4__WASMRELAXEDSIMD_FMA,k_eq_4_subtile_n)3556 TEST(F32_IGEMM_3X8S4__WASMRELAXEDSIMD_FMA, k_eq_4_subtile_n) {
3557 for (uint32_t n = 1; n <= 8; n++) {
3558 GemmMicrokernelTester()
3559 .mr(3)
3560 .nr(8)
3561 .kr(1)
3562 .sr(4)
3563 .m(3)
3564 .n(n)
3565 .k(4)
3566 .iterations(1)
3567 .Test(xnn_f32_igemm_ukernel_3x8s4__wasmrelaxedsimd_fma);
3568 }
3569 }
3570
TEST(F32_IGEMM_3X8S4__WASMRELAXEDSIMD_FMA,k_lt_4)3571 TEST(F32_IGEMM_3X8S4__WASMRELAXEDSIMD_FMA, k_lt_4) {
3572 for (size_t k = 1; k < 4; k++) {
3573 GemmMicrokernelTester()
3574 .mr(3)
3575 .nr(8)
3576 .kr(1)
3577 .sr(4)
3578 .m(3)
3579 .n(8)
3580 .k(k)
3581 .Test(xnn_f32_igemm_ukernel_3x8s4__wasmrelaxedsimd_fma);
3582 }
3583 }
3584
TEST(F32_IGEMM_3X8S4__WASMRELAXEDSIMD_FMA,k_lt_4_subtile)3585 TEST(F32_IGEMM_3X8S4__WASMRELAXEDSIMD_FMA, k_lt_4_subtile) {
3586 for (size_t k = 1; k < 4; k++) {
3587 for (uint32_t n = 1; n <= 8; n++) {
3588 for (uint32_t m = 1; m <= 3; m++) {
3589 GemmMicrokernelTester()
3590 .mr(3)
3591 .nr(8)
3592 .kr(1)
3593 .sr(4)
3594 .m(m)
3595 .n(n)
3596 .k(k)
3597 .iterations(1)
3598 .Test(xnn_f32_igemm_ukernel_3x8s4__wasmrelaxedsimd_fma);
3599 }
3600 }
3601 }
3602 }
3603
TEST(F32_IGEMM_3X8S4__WASMRELAXEDSIMD_FMA,k_gt_4)3604 TEST(F32_IGEMM_3X8S4__WASMRELAXEDSIMD_FMA, k_gt_4) {
3605 for (size_t k = 5; k < 8; k++) {
3606 GemmMicrokernelTester()
3607 .mr(3)
3608 .nr(8)
3609 .kr(1)
3610 .sr(4)
3611 .m(3)
3612 .n(8)
3613 .k(k)
3614 .Test(xnn_f32_igemm_ukernel_3x8s4__wasmrelaxedsimd_fma);
3615 }
3616 }
3617
TEST(F32_IGEMM_3X8S4__WASMRELAXEDSIMD_FMA,k_gt_4_subtile)3618 TEST(F32_IGEMM_3X8S4__WASMRELAXEDSIMD_FMA, k_gt_4_subtile) {
3619 for (size_t k = 5; k < 8; k++) {
3620 for (uint32_t n = 1; n <= 8; n++) {
3621 for (uint32_t m = 1; m <= 3; m++) {
3622 GemmMicrokernelTester()
3623 .mr(3)
3624 .nr(8)
3625 .kr(1)
3626 .sr(4)
3627 .m(m)
3628 .n(n)
3629 .k(k)
3630 .iterations(1)
3631 .Test(xnn_f32_igemm_ukernel_3x8s4__wasmrelaxedsimd_fma);
3632 }
3633 }
3634 }
3635 }
3636
TEST(F32_IGEMM_3X8S4__WASMRELAXEDSIMD_FMA,k_div_4)3637 TEST(F32_IGEMM_3X8S4__WASMRELAXEDSIMD_FMA, k_div_4) {
3638 for (size_t k = 8; k <= 40; k += 4) {
3639 GemmMicrokernelTester()
3640 .mr(3)
3641 .nr(8)
3642 .kr(1)
3643 .sr(4)
3644 .m(3)
3645 .n(8)
3646 .k(k)
3647 .Test(xnn_f32_igemm_ukernel_3x8s4__wasmrelaxedsimd_fma);
3648 }
3649 }
3650
TEST(F32_IGEMM_3X8S4__WASMRELAXEDSIMD_FMA,k_div_4_subtile)3651 TEST(F32_IGEMM_3X8S4__WASMRELAXEDSIMD_FMA, k_div_4_subtile) {
3652 for (size_t k = 8; k <= 40; k += 4) {
3653 for (uint32_t n = 1; n <= 8; n++) {
3654 for (uint32_t m = 1; m <= 3; m++) {
3655 GemmMicrokernelTester()
3656 .mr(3)
3657 .nr(8)
3658 .kr(1)
3659 .sr(4)
3660 .m(m)
3661 .n(n)
3662 .k(k)
3663 .iterations(1)
3664 .Test(xnn_f32_igemm_ukernel_3x8s4__wasmrelaxedsimd_fma);
3665 }
3666 }
3667 }
3668 }
3669
TEST(F32_IGEMM_3X8S4__WASMRELAXEDSIMD_FMA,n_gt_8)3670 TEST(F32_IGEMM_3X8S4__WASMRELAXEDSIMD_FMA, n_gt_8) {
3671 for (uint32_t n = 9; n < 16; n++) {
3672 for (size_t k = 1; k <= 20; k += 5) {
3673 GemmMicrokernelTester()
3674 .mr(3)
3675 .nr(8)
3676 .kr(1)
3677 .sr(4)
3678 .m(3)
3679 .n(n)
3680 .k(k)
3681 .Test(xnn_f32_igemm_ukernel_3x8s4__wasmrelaxedsimd_fma);
3682 }
3683 }
3684 }
3685
TEST(F32_IGEMM_3X8S4__WASMRELAXEDSIMD_FMA,n_gt_8_strided_cn)3686 TEST(F32_IGEMM_3X8S4__WASMRELAXEDSIMD_FMA, n_gt_8_strided_cn) {
3687 for (uint32_t n = 9; n < 16; n++) {
3688 for (size_t k = 1; k <= 20; k += 5) {
3689 GemmMicrokernelTester()
3690 .mr(3)
3691 .nr(8)
3692 .kr(1)
3693 .sr(4)
3694 .m(3)
3695 .n(n)
3696 .k(k)
3697 .cn_stride(11)
3698 .Test(xnn_f32_igemm_ukernel_3x8s4__wasmrelaxedsimd_fma);
3699 }
3700 }
3701 }
3702
TEST(F32_IGEMM_3X8S4__WASMRELAXEDSIMD_FMA,n_gt_8_subtile)3703 TEST(F32_IGEMM_3X8S4__WASMRELAXEDSIMD_FMA, n_gt_8_subtile) {
3704 for (uint32_t n = 9; n < 16; n++) {
3705 for (size_t k = 1; k <= 20; k += 5) {
3706 for (uint32_t m = 1; m <= 3; m++) {
3707 GemmMicrokernelTester()
3708 .mr(3)
3709 .nr(8)
3710 .kr(1)
3711 .sr(4)
3712 .m(m)
3713 .n(n)
3714 .k(k)
3715 .iterations(1)
3716 .Test(xnn_f32_igemm_ukernel_3x8s4__wasmrelaxedsimd_fma);
3717 }
3718 }
3719 }
3720 }
3721
TEST(F32_IGEMM_3X8S4__WASMRELAXEDSIMD_FMA,n_div_8)3722 TEST(F32_IGEMM_3X8S4__WASMRELAXEDSIMD_FMA, n_div_8) {
3723 for (uint32_t n = 16; n <= 24; n += 8) {
3724 for (size_t k = 1; k <= 20; k += 5) {
3725 GemmMicrokernelTester()
3726 .mr(3)
3727 .nr(8)
3728 .kr(1)
3729 .sr(4)
3730 .m(3)
3731 .n(n)
3732 .k(k)
3733 .Test(xnn_f32_igemm_ukernel_3x8s4__wasmrelaxedsimd_fma);
3734 }
3735 }
3736 }
3737
TEST(F32_IGEMM_3X8S4__WASMRELAXEDSIMD_FMA,n_div_8_strided_cn)3738 TEST(F32_IGEMM_3X8S4__WASMRELAXEDSIMD_FMA, n_div_8_strided_cn) {
3739 for (uint32_t n = 16; n <= 24; n += 8) {
3740 for (size_t k = 1; k <= 20; k += 5) {
3741 GemmMicrokernelTester()
3742 .mr(3)
3743 .nr(8)
3744 .kr(1)
3745 .sr(4)
3746 .m(3)
3747 .n(n)
3748 .k(k)
3749 .cn_stride(11)
3750 .Test(xnn_f32_igemm_ukernel_3x8s4__wasmrelaxedsimd_fma);
3751 }
3752 }
3753 }
3754
TEST(F32_IGEMM_3X8S4__WASMRELAXEDSIMD_FMA,n_div_8_subtile)3755 TEST(F32_IGEMM_3X8S4__WASMRELAXEDSIMD_FMA, n_div_8_subtile) {
3756 for (uint32_t n = 16; n <= 24; n += 8) {
3757 for (size_t k = 1; k <= 20; k += 5) {
3758 for (uint32_t m = 1; m <= 3; m++) {
3759 GemmMicrokernelTester()
3760 .mr(3)
3761 .nr(8)
3762 .kr(1)
3763 .sr(4)
3764 .m(m)
3765 .n(n)
3766 .k(k)
3767 .iterations(1)
3768 .Test(xnn_f32_igemm_ukernel_3x8s4__wasmrelaxedsimd_fma);
3769 }
3770 }
3771 }
3772 }
3773
TEST(F32_IGEMM_3X8S4__WASMRELAXEDSIMD_FMA,small_kernel)3774 TEST(F32_IGEMM_3X8S4__WASMRELAXEDSIMD_FMA, small_kernel) {
3775 for (size_t k = 1; k <= 20; k += 5) {
3776 GemmMicrokernelTester()
3777 .mr(3)
3778 .nr(8)
3779 .kr(1)
3780 .sr(4)
3781 .m(3)
3782 .n(8)
3783 .k(k)
3784 .ks(3)
3785 .Test(xnn_f32_igemm_ukernel_3x8s4__wasmrelaxedsimd_fma);
3786 }
3787 }
3788
TEST(F32_IGEMM_3X8S4__WASMRELAXEDSIMD_FMA,small_kernel_subtile)3789 TEST(F32_IGEMM_3X8S4__WASMRELAXEDSIMD_FMA, small_kernel_subtile) {
3790 for (size_t k = 1; k <= 20; k += 5) {
3791 for (uint32_t n = 1; n <= 8; n++) {
3792 for (uint32_t m = 1; m <= 3; m++) {
3793 GemmMicrokernelTester()
3794 .mr(3)
3795 .nr(8)
3796 .kr(1)
3797 .sr(4)
3798 .m(m)
3799 .n(n)
3800 .k(k)
3801 .ks(3)
3802 .iterations(1)
3803 .Test(xnn_f32_igemm_ukernel_3x8s4__wasmrelaxedsimd_fma);
3804 }
3805 }
3806 }
3807 }
3808
TEST(F32_IGEMM_3X8S4__WASMRELAXEDSIMD_FMA,n_gt_8_small_kernel)3809 TEST(F32_IGEMM_3X8S4__WASMRELAXEDSIMD_FMA, n_gt_8_small_kernel) {
3810 for (uint32_t n = 9; n < 16; n++) {
3811 for (size_t k = 1; k <= 20; k += 5) {
3812 GemmMicrokernelTester()
3813 .mr(3)
3814 .nr(8)
3815 .kr(1)
3816 .sr(4)
3817 .m(3)
3818 .n(n)
3819 .k(k)
3820 .ks(3)
3821 .Test(xnn_f32_igemm_ukernel_3x8s4__wasmrelaxedsimd_fma);
3822 }
3823 }
3824 }
3825
TEST(F32_IGEMM_3X8S4__WASMRELAXEDSIMD_FMA,n_div_8_small_kernel)3826 TEST(F32_IGEMM_3X8S4__WASMRELAXEDSIMD_FMA, n_div_8_small_kernel) {
3827 for (uint32_t n = 16; n <= 24; n += 8) {
3828 for (size_t k = 1; k <= 20; k += 5) {
3829 GemmMicrokernelTester()
3830 .mr(3)
3831 .nr(8)
3832 .kr(1)
3833 .sr(4)
3834 .m(3)
3835 .n(n)
3836 .k(k)
3837 .ks(3)
3838 .Test(xnn_f32_igemm_ukernel_3x8s4__wasmrelaxedsimd_fma);
3839 }
3840 }
3841 }
3842
TEST(F32_IGEMM_3X8S4__WASMRELAXEDSIMD_FMA,strided_cm_subtile)3843 TEST(F32_IGEMM_3X8S4__WASMRELAXEDSIMD_FMA, strided_cm_subtile) {
3844 for (size_t k = 1; k <= 20; k += 5) {
3845 for (uint32_t n = 1; n <= 8; n++) {
3846 for (uint32_t m = 1; m <= 3; m++) {
3847 GemmMicrokernelTester()
3848 .mr(3)
3849 .nr(8)
3850 .kr(1)
3851 .sr(4)
3852 .m(m)
3853 .n(n)
3854 .k(k)
3855 .cm_stride(11)
3856 .iterations(1)
3857 .Test(xnn_f32_igemm_ukernel_3x8s4__wasmrelaxedsimd_fma);
3858 }
3859 }
3860 }
3861 }
3862
TEST(F32_IGEMM_3X8S4__WASMRELAXEDSIMD_FMA,a_offset)3863 TEST(F32_IGEMM_3X8S4__WASMRELAXEDSIMD_FMA, a_offset) {
3864 for (size_t k = 1; k <= 20; k += 5) {
3865 GemmMicrokernelTester()
3866 .mr(3)
3867 .nr(8)
3868 .kr(1)
3869 .sr(4)
3870 .m(3)
3871 .n(8)
3872 .k(k)
3873 .ks(3)
3874 .a_offset(67)
3875 .Test(xnn_f32_igemm_ukernel_3x8s4__wasmrelaxedsimd_fma);
3876 }
3877 }
3878
TEST(F32_IGEMM_3X8S4__WASMRELAXEDSIMD_FMA,zero)3879 TEST(F32_IGEMM_3X8S4__WASMRELAXEDSIMD_FMA, zero) {
3880 for (size_t k = 1; k <= 20; k += 5) {
3881 for (uint32_t mz = 0; mz < 3; mz++) {
3882 GemmMicrokernelTester()
3883 .mr(3)
3884 .nr(8)
3885 .kr(1)
3886 .sr(4)
3887 .m(3)
3888 .n(8)
3889 .k(k)
3890 .ks(3)
3891 .a_offset(67)
3892 .zero_index(mz)
3893 .Test(xnn_f32_igemm_ukernel_3x8s4__wasmrelaxedsimd_fma);
3894 }
3895 }
3896 }
3897
TEST(F32_IGEMM_3X8S4__WASMRELAXEDSIMD_FMA,strided_cm)3898 TEST(F32_IGEMM_3X8S4__WASMRELAXEDSIMD_FMA, strided_cm) {
3899 GemmMicrokernelTester()
3900 .mr(3)
3901 .nr(8)
3902 .kr(1)
3903 .sr(4)
3904 .m(3)
3905 .n(8)
3906 .k(4)
3907 .cm_stride(11)
3908 .Test(xnn_f32_igemm_ukernel_3x8s4__wasmrelaxedsimd_fma);
3909 }
3910 #endif // XNN_ARCH_WASMRELAXEDSIMD
3911
3912
3913 #if XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_IGEMM_4X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,k_eq_1)3914 TEST(F32_IGEMM_4X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, k_eq_1) {
3915 GemmMicrokernelTester()
3916 .mr(4)
3917 .nr(8)
3918 .kr(1)
3919 .sr(1)
3920 .m(4)
3921 .n(8)
3922 .k(1)
3923 .Test(xnn_f32_igemm_ukernel_4x8__wasmrelaxedsimd_fma_loadsplat);
3924 }
3925
TEST(F32_IGEMM_4X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,strided_cn)3926 TEST(F32_IGEMM_4X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, strided_cn) {
3927 GemmMicrokernelTester()
3928 .mr(4)
3929 .nr(8)
3930 .kr(1)
3931 .sr(1)
3932 .m(4)
3933 .n(8)
3934 .k(1)
3935 .cn_stride(11)
3936 .Test(xnn_f32_igemm_ukernel_4x8__wasmrelaxedsimd_fma_loadsplat);
3937 }
3938
TEST(F32_IGEMM_4X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,k_eq_1_subtile)3939 TEST(F32_IGEMM_4X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, k_eq_1_subtile) {
3940 for (uint32_t n = 1; n <= 8; n++) {
3941 for (uint32_t m = 1; m <= 4; m++) {
3942 GemmMicrokernelTester()
3943 .mr(4)
3944 .nr(8)
3945 .kr(1)
3946 .sr(1)
3947 .m(m)
3948 .n(n)
3949 .k(1)
3950 .iterations(1)
3951 .Test(xnn_f32_igemm_ukernel_4x8__wasmrelaxedsimd_fma_loadsplat);
3952 }
3953 }
3954 }
3955
TEST(F32_IGEMM_4X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,k_eq_1_subtile_m)3956 TEST(F32_IGEMM_4X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, k_eq_1_subtile_m) {
3957 for (uint32_t m = 1; m <= 4; m++) {
3958 GemmMicrokernelTester()
3959 .mr(4)
3960 .nr(8)
3961 .kr(1)
3962 .sr(1)
3963 .m(m)
3964 .n(8)
3965 .k(1)
3966 .iterations(1)
3967 .Test(xnn_f32_igemm_ukernel_4x8__wasmrelaxedsimd_fma_loadsplat);
3968 }
3969 }
3970
TEST(F32_IGEMM_4X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,k_eq_1_subtile_n)3971 TEST(F32_IGEMM_4X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, k_eq_1_subtile_n) {
3972 for (uint32_t n = 1; n <= 8; n++) {
3973 GemmMicrokernelTester()
3974 .mr(4)
3975 .nr(8)
3976 .kr(1)
3977 .sr(1)
3978 .m(4)
3979 .n(n)
3980 .k(1)
3981 .iterations(1)
3982 .Test(xnn_f32_igemm_ukernel_4x8__wasmrelaxedsimd_fma_loadsplat);
3983 }
3984 }
3985
TEST(F32_IGEMM_4X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,k_gt_1)3986 TEST(F32_IGEMM_4X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, k_gt_1) {
3987 for (size_t k = 2; k < 10; k++) {
3988 GemmMicrokernelTester()
3989 .mr(4)
3990 .nr(8)
3991 .kr(1)
3992 .sr(1)
3993 .m(4)
3994 .n(8)
3995 .k(k)
3996 .Test(xnn_f32_igemm_ukernel_4x8__wasmrelaxedsimd_fma_loadsplat);
3997 }
3998 }
3999
TEST(F32_IGEMM_4X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,k_gt_1_subtile)4000 TEST(F32_IGEMM_4X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, k_gt_1_subtile) {
4001 for (size_t k = 2; k < 10; k++) {
4002 for (uint32_t n = 1; n <= 8; n++) {
4003 for (uint32_t m = 1; m <= 4; m++) {
4004 GemmMicrokernelTester()
4005 .mr(4)
4006 .nr(8)
4007 .kr(1)
4008 .sr(1)
4009 .m(m)
4010 .n(n)
4011 .k(k)
4012 .iterations(1)
4013 .Test(xnn_f32_igemm_ukernel_4x8__wasmrelaxedsimd_fma_loadsplat);
4014 }
4015 }
4016 }
4017 }
4018
TEST(F32_IGEMM_4X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,n_gt_8)4019 TEST(F32_IGEMM_4X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, n_gt_8) {
4020 for (uint32_t n = 9; n < 16; n++) {
4021 for (size_t k = 1; k <= 5; k += 2) {
4022 GemmMicrokernelTester()
4023 .mr(4)
4024 .nr(8)
4025 .kr(1)
4026 .sr(1)
4027 .m(4)
4028 .n(n)
4029 .k(k)
4030 .Test(xnn_f32_igemm_ukernel_4x8__wasmrelaxedsimd_fma_loadsplat);
4031 }
4032 }
4033 }
4034
TEST(F32_IGEMM_4X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,n_gt_8_strided_cn)4035 TEST(F32_IGEMM_4X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, n_gt_8_strided_cn) {
4036 for (uint32_t n = 9; n < 16; n++) {
4037 for (size_t k = 1; k <= 5; k += 2) {
4038 GemmMicrokernelTester()
4039 .mr(4)
4040 .nr(8)
4041 .kr(1)
4042 .sr(1)
4043 .m(4)
4044 .n(n)
4045 .k(k)
4046 .cn_stride(11)
4047 .Test(xnn_f32_igemm_ukernel_4x8__wasmrelaxedsimd_fma_loadsplat);
4048 }
4049 }
4050 }
4051
TEST(F32_IGEMM_4X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,n_gt_8_subtile)4052 TEST(F32_IGEMM_4X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, n_gt_8_subtile) {
4053 for (uint32_t n = 9; n < 16; n++) {
4054 for (size_t k = 1; k <= 5; k += 2) {
4055 for (uint32_t m = 1; m <= 4; m++) {
4056 GemmMicrokernelTester()
4057 .mr(4)
4058 .nr(8)
4059 .kr(1)
4060 .sr(1)
4061 .m(m)
4062 .n(n)
4063 .k(k)
4064 .iterations(1)
4065 .Test(xnn_f32_igemm_ukernel_4x8__wasmrelaxedsimd_fma_loadsplat);
4066 }
4067 }
4068 }
4069 }
4070
TEST(F32_IGEMM_4X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,n_div_8)4071 TEST(F32_IGEMM_4X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, n_div_8) {
4072 for (uint32_t n = 16; n <= 24; n += 8) {
4073 for (size_t k = 1; k <= 5; k += 2) {
4074 GemmMicrokernelTester()
4075 .mr(4)
4076 .nr(8)
4077 .kr(1)
4078 .sr(1)
4079 .m(4)
4080 .n(n)
4081 .k(k)
4082 .Test(xnn_f32_igemm_ukernel_4x8__wasmrelaxedsimd_fma_loadsplat);
4083 }
4084 }
4085 }
4086
TEST(F32_IGEMM_4X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,n_div_8_strided_cn)4087 TEST(F32_IGEMM_4X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, n_div_8_strided_cn) {
4088 for (uint32_t n = 16; n <= 24; n += 8) {
4089 for (size_t k = 1; k <= 5; k += 2) {
4090 GemmMicrokernelTester()
4091 .mr(4)
4092 .nr(8)
4093 .kr(1)
4094 .sr(1)
4095 .m(4)
4096 .n(n)
4097 .k(k)
4098 .cn_stride(11)
4099 .Test(xnn_f32_igemm_ukernel_4x8__wasmrelaxedsimd_fma_loadsplat);
4100 }
4101 }
4102 }
4103
TEST(F32_IGEMM_4X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,n_div_8_subtile)4104 TEST(F32_IGEMM_4X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, n_div_8_subtile) {
4105 for (uint32_t n = 16; n <= 24; n += 8) {
4106 for (size_t k = 1; k <= 5; k += 2) {
4107 for (uint32_t m = 1; m <= 4; m++) {
4108 GemmMicrokernelTester()
4109 .mr(4)
4110 .nr(8)
4111 .kr(1)
4112 .sr(1)
4113 .m(m)
4114 .n(n)
4115 .k(k)
4116 .iterations(1)
4117 .Test(xnn_f32_igemm_ukernel_4x8__wasmrelaxedsimd_fma_loadsplat);
4118 }
4119 }
4120 }
4121 }
4122
TEST(F32_IGEMM_4X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,small_kernel)4123 TEST(F32_IGEMM_4X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, small_kernel) {
4124 for (size_t k = 1; k <= 5; k += 2) {
4125 GemmMicrokernelTester()
4126 .mr(4)
4127 .nr(8)
4128 .kr(1)
4129 .sr(1)
4130 .m(4)
4131 .n(8)
4132 .k(k)
4133 .ks(3)
4134 .Test(xnn_f32_igemm_ukernel_4x8__wasmrelaxedsimd_fma_loadsplat);
4135 }
4136 }
4137
TEST(F32_IGEMM_4X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,small_kernel_subtile)4138 TEST(F32_IGEMM_4X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, small_kernel_subtile) {
4139 for (size_t k = 1; k <= 5; k += 2) {
4140 for (uint32_t n = 1; n <= 8; n++) {
4141 for (uint32_t m = 1; m <= 4; m++) {
4142 GemmMicrokernelTester()
4143 .mr(4)
4144 .nr(8)
4145 .kr(1)
4146 .sr(1)
4147 .m(m)
4148 .n(n)
4149 .k(k)
4150 .ks(3)
4151 .iterations(1)
4152 .Test(xnn_f32_igemm_ukernel_4x8__wasmrelaxedsimd_fma_loadsplat);
4153 }
4154 }
4155 }
4156 }
4157
TEST(F32_IGEMM_4X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,n_gt_8_small_kernel)4158 TEST(F32_IGEMM_4X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, n_gt_8_small_kernel) {
4159 for (uint32_t n = 9; n < 16; n++) {
4160 for (size_t k = 1; k <= 5; k += 2) {
4161 GemmMicrokernelTester()
4162 .mr(4)
4163 .nr(8)
4164 .kr(1)
4165 .sr(1)
4166 .m(4)
4167 .n(n)
4168 .k(k)
4169 .ks(3)
4170 .Test(xnn_f32_igemm_ukernel_4x8__wasmrelaxedsimd_fma_loadsplat);
4171 }
4172 }
4173 }
4174
TEST(F32_IGEMM_4X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,n_div_8_small_kernel)4175 TEST(F32_IGEMM_4X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, n_div_8_small_kernel) {
4176 for (uint32_t n = 16; n <= 24; n += 8) {
4177 for (size_t k = 1; k <= 5; k += 2) {
4178 GemmMicrokernelTester()
4179 .mr(4)
4180 .nr(8)
4181 .kr(1)
4182 .sr(1)
4183 .m(4)
4184 .n(n)
4185 .k(k)
4186 .ks(3)
4187 .Test(xnn_f32_igemm_ukernel_4x8__wasmrelaxedsimd_fma_loadsplat);
4188 }
4189 }
4190 }
4191
TEST(F32_IGEMM_4X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,strided_cm_subtile)4192 TEST(F32_IGEMM_4X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, strided_cm_subtile) {
4193 for (size_t k = 1; k <= 5; k += 2) {
4194 for (uint32_t n = 1; n <= 8; n++) {
4195 for (uint32_t m = 1; m <= 4; m++) {
4196 GemmMicrokernelTester()
4197 .mr(4)
4198 .nr(8)
4199 .kr(1)
4200 .sr(1)
4201 .m(m)
4202 .n(n)
4203 .k(k)
4204 .cm_stride(11)
4205 .iterations(1)
4206 .Test(xnn_f32_igemm_ukernel_4x8__wasmrelaxedsimd_fma_loadsplat);
4207 }
4208 }
4209 }
4210 }
4211
TEST(F32_IGEMM_4X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,a_offset)4212 TEST(F32_IGEMM_4X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, a_offset) {
4213 for (size_t k = 1; k <= 5; k += 2) {
4214 GemmMicrokernelTester()
4215 .mr(4)
4216 .nr(8)
4217 .kr(1)
4218 .sr(1)
4219 .m(4)
4220 .n(8)
4221 .k(k)
4222 .ks(3)
4223 .a_offset(23)
4224 .Test(xnn_f32_igemm_ukernel_4x8__wasmrelaxedsimd_fma_loadsplat);
4225 }
4226 }
4227
TEST(F32_IGEMM_4X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,zero)4228 TEST(F32_IGEMM_4X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, zero) {
4229 for (size_t k = 1; k <= 5; k += 2) {
4230 for (uint32_t mz = 0; mz < 4; mz++) {
4231 GemmMicrokernelTester()
4232 .mr(4)
4233 .nr(8)
4234 .kr(1)
4235 .sr(1)
4236 .m(4)
4237 .n(8)
4238 .k(k)
4239 .ks(3)
4240 .a_offset(23)
4241 .zero_index(mz)
4242 .Test(xnn_f32_igemm_ukernel_4x8__wasmrelaxedsimd_fma_loadsplat);
4243 }
4244 }
4245 }
4246
TEST(F32_IGEMM_4X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,strided_cm)4247 TEST(F32_IGEMM_4X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, strided_cm) {
4248 GemmMicrokernelTester()
4249 .mr(4)
4250 .nr(8)
4251 .kr(1)
4252 .sr(1)
4253 .m(4)
4254 .n(8)
4255 .k(1)
4256 .cm_stride(11)
4257 .Test(xnn_f32_igemm_ukernel_4x8__wasmrelaxedsimd_fma_loadsplat);
4258 }
4259 #endif // XNN_ARCH_WASMRELAXEDSIMD
4260
4261
4262 #if XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_IGEMM_4X8S4__WASMRELAXEDSIMD_FMA,k_eq_4)4263 TEST(F32_IGEMM_4X8S4__WASMRELAXEDSIMD_FMA, k_eq_4) {
4264 GemmMicrokernelTester()
4265 .mr(4)
4266 .nr(8)
4267 .kr(1)
4268 .sr(4)
4269 .m(4)
4270 .n(8)
4271 .k(4)
4272 .Test(xnn_f32_igemm_ukernel_4x8s4__wasmrelaxedsimd_fma);
4273 }
4274
TEST(F32_IGEMM_4X8S4__WASMRELAXEDSIMD_FMA,strided_cn)4275 TEST(F32_IGEMM_4X8S4__WASMRELAXEDSIMD_FMA, strided_cn) {
4276 GemmMicrokernelTester()
4277 .mr(4)
4278 .nr(8)
4279 .kr(1)
4280 .sr(4)
4281 .m(4)
4282 .n(8)
4283 .k(4)
4284 .cn_stride(11)
4285 .Test(xnn_f32_igemm_ukernel_4x8s4__wasmrelaxedsimd_fma);
4286 }
4287
TEST(F32_IGEMM_4X8S4__WASMRELAXEDSIMD_FMA,k_eq_4_subtile)4288 TEST(F32_IGEMM_4X8S4__WASMRELAXEDSIMD_FMA, k_eq_4_subtile) {
4289 for (uint32_t n = 1; n <= 8; n++) {
4290 for (uint32_t m = 1; m <= 4; m++) {
4291 GemmMicrokernelTester()
4292 .mr(4)
4293 .nr(8)
4294 .kr(1)
4295 .sr(4)
4296 .m(m)
4297 .n(n)
4298 .k(4)
4299 .iterations(1)
4300 .Test(xnn_f32_igemm_ukernel_4x8s4__wasmrelaxedsimd_fma);
4301 }
4302 }
4303 }
4304
TEST(F32_IGEMM_4X8S4__WASMRELAXEDSIMD_FMA,k_eq_4_subtile_m)4305 TEST(F32_IGEMM_4X8S4__WASMRELAXEDSIMD_FMA, k_eq_4_subtile_m) {
4306 for (uint32_t m = 1; m <= 4; m++) {
4307 GemmMicrokernelTester()
4308 .mr(4)
4309 .nr(8)
4310 .kr(1)
4311 .sr(4)
4312 .m(m)
4313 .n(8)
4314 .k(4)
4315 .iterations(1)
4316 .Test(xnn_f32_igemm_ukernel_4x8s4__wasmrelaxedsimd_fma);
4317 }
4318 }
4319
TEST(F32_IGEMM_4X8S4__WASMRELAXEDSIMD_FMA,k_eq_4_subtile_n)4320 TEST(F32_IGEMM_4X8S4__WASMRELAXEDSIMD_FMA, k_eq_4_subtile_n) {
4321 for (uint32_t n = 1; n <= 8; n++) {
4322 GemmMicrokernelTester()
4323 .mr(4)
4324 .nr(8)
4325 .kr(1)
4326 .sr(4)
4327 .m(4)
4328 .n(n)
4329 .k(4)
4330 .iterations(1)
4331 .Test(xnn_f32_igemm_ukernel_4x8s4__wasmrelaxedsimd_fma);
4332 }
4333 }
4334
TEST(F32_IGEMM_4X8S4__WASMRELAXEDSIMD_FMA,k_lt_4)4335 TEST(F32_IGEMM_4X8S4__WASMRELAXEDSIMD_FMA, k_lt_4) {
4336 for (size_t k = 1; k < 4; k++) {
4337 GemmMicrokernelTester()
4338 .mr(4)
4339 .nr(8)
4340 .kr(1)
4341 .sr(4)
4342 .m(4)
4343 .n(8)
4344 .k(k)
4345 .Test(xnn_f32_igemm_ukernel_4x8s4__wasmrelaxedsimd_fma);
4346 }
4347 }
4348
TEST(F32_IGEMM_4X8S4__WASMRELAXEDSIMD_FMA,k_lt_4_subtile)4349 TEST(F32_IGEMM_4X8S4__WASMRELAXEDSIMD_FMA, k_lt_4_subtile) {
4350 for (size_t k = 1; k < 4; k++) {
4351 for (uint32_t n = 1; n <= 8; n++) {
4352 for (uint32_t m = 1; m <= 4; m++) {
4353 GemmMicrokernelTester()
4354 .mr(4)
4355 .nr(8)
4356 .kr(1)
4357 .sr(4)
4358 .m(m)
4359 .n(n)
4360 .k(k)
4361 .iterations(1)
4362 .Test(xnn_f32_igemm_ukernel_4x8s4__wasmrelaxedsimd_fma);
4363 }
4364 }
4365 }
4366 }
4367
TEST(F32_IGEMM_4X8S4__WASMRELAXEDSIMD_FMA,k_gt_4)4368 TEST(F32_IGEMM_4X8S4__WASMRELAXEDSIMD_FMA, k_gt_4) {
4369 for (size_t k = 5; k < 8; k++) {
4370 GemmMicrokernelTester()
4371 .mr(4)
4372 .nr(8)
4373 .kr(1)
4374 .sr(4)
4375 .m(4)
4376 .n(8)
4377 .k(k)
4378 .Test(xnn_f32_igemm_ukernel_4x8s4__wasmrelaxedsimd_fma);
4379 }
4380 }
4381
TEST(F32_IGEMM_4X8S4__WASMRELAXEDSIMD_FMA,k_gt_4_subtile)4382 TEST(F32_IGEMM_4X8S4__WASMRELAXEDSIMD_FMA, k_gt_4_subtile) {
4383 for (size_t k = 5; k < 8; k++) {
4384 for (uint32_t n = 1; n <= 8; n++) {
4385 for (uint32_t m = 1; m <= 4; m++) {
4386 GemmMicrokernelTester()
4387 .mr(4)
4388 .nr(8)
4389 .kr(1)
4390 .sr(4)
4391 .m(m)
4392 .n(n)
4393 .k(k)
4394 .iterations(1)
4395 .Test(xnn_f32_igemm_ukernel_4x8s4__wasmrelaxedsimd_fma);
4396 }
4397 }
4398 }
4399 }
4400
TEST(F32_IGEMM_4X8S4__WASMRELAXEDSIMD_FMA,k_div_4)4401 TEST(F32_IGEMM_4X8S4__WASMRELAXEDSIMD_FMA, k_div_4) {
4402 for (size_t k = 8; k <= 40; k += 4) {
4403 GemmMicrokernelTester()
4404 .mr(4)
4405 .nr(8)
4406 .kr(1)
4407 .sr(4)
4408 .m(4)
4409 .n(8)
4410 .k(k)
4411 .Test(xnn_f32_igemm_ukernel_4x8s4__wasmrelaxedsimd_fma);
4412 }
4413 }
4414
TEST(F32_IGEMM_4X8S4__WASMRELAXEDSIMD_FMA,k_div_4_subtile)4415 TEST(F32_IGEMM_4X8S4__WASMRELAXEDSIMD_FMA, k_div_4_subtile) {
4416 for (size_t k = 8; k <= 40; k += 4) {
4417 for (uint32_t n = 1; n <= 8; n++) {
4418 for (uint32_t m = 1; m <= 4; m++) {
4419 GemmMicrokernelTester()
4420 .mr(4)
4421 .nr(8)
4422 .kr(1)
4423 .sr(4)
4424 .m(m)
4425 .n(n)
4426 .k(k)
4427 .iterations(1)
4428 .Test(xnn_f32_igemm_ukernel_4x8s4__wasmrelaxedsimd_fma);
4429 }
4430 }
4431 }
4432 }
4433
TEST(F32_IGEMM_4X8S4__WASMRELAXEDSIMD_FMA,n_gt_8)4434 TEST(F32_IGEMM_4X8S4__WASMRELAXEDSIMD_FMA, n_gt_8) {
4435 for (uint32_t n = 9; n < 16; n++) {
4436 for (size_t k = 1; k <= 20; k += 5) {
4437 GemmMicrokernelTester()
4438 .mr(4)
4439 .nr(8)
4440 .kr(1)
4441 .sr(4)
4442 .m(4)
4443 .n(n)
4444 .k(k)
4445 .Test(xnn_f32_igemm_ukernel_4x8s4__wasmrelaxedsimd_fma);
4446 }
4447 }
4448 }
4449
TEST(F32_IGEMM_4X8S4__WASMRELAXEDSIMD_FMA,n_gt_8_strided_cn)4450 TEST(F32_IGEMM_4X8S4__WASMRELAXEDSIMD_FMA, n_gt_8_strided_cn) {
4451 for (uint32_t n = 9; n < 16; n++) {
4452 for (size_t k = 1; k <= 20; k += 5) {
4453 GemmMicrokernelTester()
4454 .mr(4)
4455 .nr(8)
4456 .kr(1)
4457 .sr(4)
4458 .m(4)
4459 .n(n)
4460 .k(k)
4461 .cn_stride(11)
4462 .Test(xnn_f32_igemm_ukernel_4x8s4__wasmrelaxedsimd_fma);
4463 }
4464 }
4465 }
4466
TEST(F32_IGEMM_4X8S4__WASMRELAXEDSIMD_FMA,n_gt_8_subtile)4467 TEST(F32_IGEMM_4X8S4__WASMRELAXEDSIMD_FMA, n_gt_8_subtile) {
4468 for (uint32_t n = 9; n < 16; n++) {
4469 for (size_t k = 1; k <= 20; k += 5) {
4470 for (uint32_t m = 1; m <= 4; m++) {
4471 GemmMicrokernelTester()
4472 .mr(4)
4473 .nr(8)
4474 .kr(1)
4475 .sr(4)
4476 .m(m)
4477 .n(n)
4478 .k(k)
4479 .iterations(1)
4480 .Test(xnn_f32_igemm_ukernel_4x8s4__wasmrelaxedsimd_fma);
4481 }
4482 }
4483 }
4484 }
4485
TEST(F32_IGEMM_4X8S4__WASMRELAXEDSIMD_FMA,n_div_8)4486 TEST(F32_IGEMM_4X8S4__WASMRELAXEDSIMD_FMA, n_div_8) {
4487 for (uint32_t n = 16; n <= 24; n += 8) {
4488 for (size_t k = 1; k <= 20; k += 5) {
4489 GemmMicrokernelTester()
4490 .mr(4)
4491 .nr(8)
4492 .kr(1)
4493 .sr(4)
4494 .m(4)
4495 .n(n)
4496 .k(k)
4497 .Test(xnn_f32_igemm_ukernel_4x8s4__wasmrelaxedsimd_fma);
4498 }
4499 }
4500 }
4501
TEST(F32_IGEMM_4X8S4__WASMRELAXEDSIMD_FMA,n_div_8_strided_cn)4502 TEST(F32_IGEMM_4X8S4__WASMRELAXEDSIMD_FMA, n_div_8_strided_cn) {
4503 for (uint32_t n = 16; n <= 24; n += 8) {
4504 for (size_t k = 1; k <= 20; k += 5) {
4505 GemmMicrokernelTester()
4506 .mr(4)
4507 .nr(8)
4508 .kr(1)
4509 .sr(4)
4510 .m(4)
4511 .n(n)
4512 .k(k)
4513 .cn_stride(11)
4514 .Test(xnn_f32_igemm_ukernel_4x8s4__wasmrelaxedsimd_fma);
4515 }
4516 }
4517 }
4518
TEST(F32_IGEMM_4X8S4__WASMRELAXEDSIMD_FMA,n_div_8_subtile)4519 TEST(F32_IGEMM_4X8S4__WASMRELAXEDSIMD_FMA, n_div_8_subtile) {
4520 for (uint32_t n = 16; n <= 24; n += 8) {
4521 for (size_t k = 1; k <= 20; k += 5) {
4522 for (uint32_t m = 1; m <= 4; m++) {
4523 GemmMicrokernelTester()
4524 .mr(4)
4525 .nr(8)
4526 .kr(1)
4527 .sr(4)
4528 .m(m)
4529 .n(n)
4530 .k(k)
4531 .iterations(1)
4532 .Test(xnn_f32_igemm_ukernel_4x8s4__wasmrelaxedsimd_fma);
4533 }
4534 }
4535 }
4536 }
4537
TEST(F32_IGEMM_4X8S4__WASMRELAXEDSIMD_FMA,small_kernel)4538 TEST(F32_IGEMM_4X8S4__WASMRELAXEDSIMD_FMA, small_kernel) {
4539 for (size_t k = 1; k <= 20; k += 5) {
4540 GemmMicrokernelTester()
4541 .mr(4)
4542 .nr(8)
4543 .kr(1)
4544 .sr(4)
4545 .m(4)
4546 .n(8)
4547 .k(k)
4548 .ks(3)
4549 .Test(xnn_f32_igemm_ukernel_4x8s4__wasmrelaxedsimd_fma);
4550 }
4551 }
4552
TEST(F32_IGEMM_4X8S4__WASMRELAXEDSIMD_FMA,small_kernel_subtile)4553 TEST(F32_IGEMM_4X8S4__WASMRELAXEDSIMD_FMA, small_kernel_subtile) {
4554 for (size_t k = 1; k <= 20; k += 5) {
4555 for (uint32_t n = 1; n <= 8; n++) {
4556 for (uint32_t m = 1; m <= 4; m++) {
4557 GemmMicrokernelTester()
4558 .mr(4)
4559 .nr(8)
4560 .kr(1)
4561 .sr(4)
4562 .m(m)
4563 .n(n)
4564 .k(k)
4565 .ks(3)
4566 .iterations(1)
4567 .Test(xnn_f32_igemm_ukernel_4x8s4__wasmrelaxedsimd_fma);
4568 }
4569 }
4570 }
4571 }
4572
TEST(F32_IGEMM_4X8S4__WASMRELAXEDSIMD_FMA,n_gt_8_small_kernel)4573 TEST(F32_IGEMM_4X8S4__WASMRELAXEDSIMD_FMA, n_gt_8_small_kernel) {
4574 for (uint32_t n = 9; n < 16; n++) {
4575 for (size_t k = 1; k <= 20; k += 5) {
4576 GemmMicrokernelTester()
4577 .mr(4)
4578 .nr(8)
4579 .kr(1)
4580 .sr(4)
4581 .m(4)
4582 .n(n)
4583 .k(k)
4584 .ks(3)
4585 .Test(xnn_f32_igemm_ukernel_4x8s4__wasmrelaxedsimd_fma);
4586 }
4587 }
4588 }
4589
TEST(F32_IGEMM_4X8S4__WASMRELAXEDSIMD_FMA,n_div_8_small_kernel)4590 TEST(F32_IGEMM_4X8S4__WASMRELAXEDSIMD_FMA, n_div_8_small_kernel) {
4591 for (uint32_t n = 16; n <= 24; n += 8) {
4592 for (size_t k = 1; k <= 20; k += 5) {
4593 GemmMicrokernelTester()
4594 .mr(4)
4595 .nr(8)
4596 .kr(1)
4597 .sr(4)
4598 .m(4)
4599 .n(n)
4600 .k(k)
4601 .ks(3)
4602 .Test(xnn_f32_igemm_ukernel_4x8s4__wasmrelaxedsimd_fma);
4603 }
4604 }
4605 }
4606
TEST(F32_IGEMM_4X8S4__WASMRELAXEDSIMD_FMA,strided_cm_subtile)4607 TEST(F32_IGEMM_4X8S4__WASMRELAXEDSIMD_FMA, strided_cm_subtile) {
4608 for (size_t k = 1; k <= 20; k += 5) {
4609 for (uint32_t n = 1; n <= 8; n++) {
4610 for (uint32_t m = 1; m <= 4; m++) {
4611 GemmMicrokernelTester()
4612 .mr(4)
4613 .nr(8)
4614 .kr(1)
4615 .sr(4)
4616 .m(m)
4617 .n(n)
4618 .k(k)
4619 .cm_stride(11)
4620 .iterations(1)
4621 .Test(xnn_f32_igemm_ukernel_4x8s4__wasmrelaxedsimd_fma);
4622 }
4623 }
4624 }
4625 }
4626
TEST(F32_IGEMM_4X8S4__WASMRELAXEDSIMD_FMA,a_offset)4627 TEST(F32_IGEMM_4X8S4__WASMRELAXEDSIMD_FMA, a_offset) {
4628 for (size_t k = 1; k <= 20; k += 5) {
4629 GemmMicrokernelTester()
4630 .mr(4)
4631 .nr(8)
4632 .kr(1)
4633 .sr(4)
4634 .m(4)
4635 .n(8)
4636 .k(k)
4637 .ks(3)
4638 .a_offset(83)
4639 .Test(xnn_f32_igemm_ukernel_4x8s4__wasmrelaxedsimd_fma);
4640 }
4641 }
4642
TEST(F32_IGEMM_4X8S4__WASMRELAXEDSIMD_FMA,zero)4643 TEST(F32_IGEMM_4X8S4__WASMRELAXEDSIMD_FMA, zero) {
4644 for (size_t k = 1; k <= 20; k += 5) {
4645 for (uint32_t mz = 0; mz < 4; mz++) {
4646 GemmMicrokernelTester()
4647 .mr(4)
4648 .nr(8)
4649 .kr(1)
4650 .sr(4)
4651 .m(4)
4652 .n(8)
4653 .k(k)
4654 .ks(3)
4655 .a_offset(83)
4656 .zero_index(mz)
4657 .Test(xnn_f32_igemm_ukernel_4x8s4__wasmrelaxedsimd_fma);
4658 }
4659 }
4660 }
4661
TEST(F32_IGEMM_4X8S4__WASMRELAXEDSIMD_FMA,strided_cm)4662 TEST(F32_IGEMM_4X8S4__WASMRELAXEDSIMD_FMA, strided_cm) {
4663 GemmMicrokernelTester()
4664 .mr(4)
4665 .nr(8)
4666 .kr(1)
4667 .sr(4)
4668 .m(4)
4669 .n(8)
4670 .k(4)
4671 .cm_stride(11)
4672 .Test(xnn_f32_igemm_ukernel_4x8s4__wasmrelaxedsimd_fma);
4673 }
4674 #endif // XNN_ARCH_WASMRELAXEDSIMD
4675
4676
4677 #if XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_IGEMM_5X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,k_eq_1)4678 TEST(F32_IGEMM_5X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, k_eq_1) {
4679 GemmMicrokernelTester()
4680 .mr(5)
4681 .nr(8)
4682 .kr(1)
4683 .sr(1)
4684 .m(5)
4685 .n(8)
4686 .k(1)
4687 .Test(xnn_f32_igemm_ukernel_5x8__wasmrelaxedsimd_fma_loadsplat);
4688 }
4689
TEST(F32_IGEMM_5X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,strided_cn)4690 TEST(F32_IGEMM_5X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, strided_cn) {
4691 GemmMicrokernelTester()
4692 .mr(5)
4693 .nr(8)
4694 .kr(1)
4695 .sr(1)
4696 .m(5)
4697 .n(8)
4698 .k(1)
4699 .cn_stride(11)
4700 .Test(xnn_f32_igemm_ukernel_5x8__wasmrelaxedsimd_fma_loadsplat);
4701 }
4702
TEST(F32_IGEMM_5X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,k_eq_1_subtile)4703 TEST(F32_IGEMM_5X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, k_eq_1_subtile) {
4704 for (uint32_t n = 1; n <= 8; n++) {
4705 for (uint32_t m = 1; m <= 5; m++) {
4706 GemmMicrokernelTester()
4707 .mr(5)
4708 .nr(8)
4709 .kr(1)
4710 .sr(1)
4711 .m(m)
4712 .n(n)
4713 .k(1)
4714 .iterations(1)
4715 .Test(xnn_f32_igemm_ukernel_5x8__wasmrelaxedsimd_fma_loadsplat);
4716 }
4717 }
4718 }
4719
TEST(F32_IGEMM_5X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,k_eq_1_subtile_m)4720 TEST(F32_IGEMM_5X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, k_eq_1_subtile_m) {
4721 for (uint32_t m = 1; m <= 5; m++) {
4722 GemmMicrokernelTester()
4723 .mr(5)
4724 .nr(8)
4725 .kr(1)
4726 .sr(1)
4727 .m(m)
4728 .n(8)
4729 .k(1)
4730 .iterations(1)
4731 .Test(xnn_f32_igemm_ukernel_5x8__wasmrelaxedsimd_fma_loadsplat);
4732 }
4733 }
4734
TEST(F32_IGEMM_5X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,k_eq_1_subtile_n)4735 TEST(F32_IGEMM_5X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, k_eq_1_subtile_n) {
4736 for (uint32_t n = 1; n <= 8; n++) {
4737 GemmMicrokernelTester()
4738 .mr(5)
4739 .nr(8)
4740 .kr(1)
4741 .sr(1)
4742 .m(5)
4743 .n(n)
4744 .k(1)
4745 .iterations(1)
4746 .Test(xnn_f32_igemm_ukernel_5x8__wasmrelaxedsimd_fma_loadsplat);
4747 }
4748 }
4749
TEST(F32_IGEMM_5X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,k_gt_1)4750 TEST(F32_IGEMM_5X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, k_gt_1) {
4751 for (size_t k = 2; k < 10; k++) {
4752 GemmMicrokernelTester()
4753 .mr(5)
4754 .nr(8)
4755 .kr(1)
4756 .sr(1)
4757 .m(5)
4758 .n(8)
4759 .k(k)
4760 .Test(xnn_f32_igemm_ukernel_5x8__wasmrelaxedsimd_fma_loadsplat);
4761 }
4762 }
4763
TEST(F32_IGEMM_5X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,k_gt_1_subtile)4764 TEST(F32_IGEMM_5X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, k_gt_1_subtile) {
4765 for (size_t k = 2; k < 10; k++) {
4766 for (uint32_t n = 1; n <= 8; n++) {
4767 for (uint32_t m = 1; m <= 5; m++) {
4768 GemmMicrokernelTester()
4769 .mr(5)
4770 .nr(8)
4771 .kr(1)
4772 .sr(1)
4773 .m(m)
4774 .n(n)
4775 .k(k)
4776 .iterations(1)
4777 .Test(xnn_f32_igemm_ukernel_5x8__wasmrelaxedsimd_fma_loadsplat);
4778 }
4779 }
4780 }
4781 }
4782
TEST(F32_IGEMM_5X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,n_gt_8)4783 TEST(F32_IGEMM_5X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, n_gt_8) {
4784 for (uint32_t n = 9; n < 16; n++) {
4785 for (size_t k = 1; k <= 5; k += 2) {
4786 GemmMicrokernelTester()
4787 .mr(5)
4788 .nr(8)
4789 .kr(1)
4790 .sr(1)
4791 .m(5)
4792 .n(n)
4793 .k(k)
4794 .Test(xnn_f32_igemm_ukernel_5x8__wasmrelaxedsimd_fma_loadsplat);
4795 }
4796 }
4797 }
4798
TEST(F32_IGEMM_5X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,n_gt_8_strided_cn)4799 TEST(F32_IGEMM_5X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, n_gt_8_strided_cn) {
4800 for (uint32_t n = 9; n < 16; n++) {
4801 for (size_t k = 1; k <= 5; k += 2) {
4802 GemmMicrokernelTester()
4803 .mr(5)
4804 .nr(8)
4805 .kr(1)
4806 .sr(1)
4807 .m(5)
4808 .n(n)
4809 .k(k)
4810 .cn_stride(11)
4811 .Test(xnn_f32_igemm_ukernel_5x8__wasmrelaxedsimd_fma_loadsplat);
4812 }
4813 }
4814 }
4815
TEST(F32_IGEMM_5X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,n_gt_8_subtile)4816 TEST(F32_IGEMM_5X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, n_gt_8_subtile) {
4817 for (uint32_t n = 9; n < 16; n++) {
4818 for (size_t k = 1; k <= 5; k += 2) {
4819 for (uint32_t m = 1; m <= 5; m++) {
4820 GemmMicrokernelTester()
4821 .mr(5)
4822 .nr(8)
4823 .kr(1)
4824 .sr(1)
4825 .m(m)
4826 .n(n)
4827 .k(k)
4828 .iterations(1)
4829 .Test(xnn_f32_igemm_ukernel_5x8__wasmrelaxedsimd_fma_loadsplat);
4830 }
4831 }
4832 }
4833 }
4834
TEST(F32_IGEMM_5X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,n_div_8)4835 TEST(F32_IGEMM_5X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, n_div_8) {
4836 for (uint32_t n = 16; n <= 24; n += 8) {
4837 for (size_t k = 1; k <= 5; k += 2) {
4838 GemmMicrokernelTester()
4839 .mr(5)
4840 .nr(8)
4841 .kr(1)
4842 .sr(1)
4843 .m(5)
4844 .n(n)
4845 .k(k)
4846 .Test(xnn_f32_igemm_ukernel_5x8__wasmrelaxedsimd_fma_loadsplat);
4847 }
4848 }
4849 }
4850
TEST(F32_IGEMM_5X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,n_div_8_strided_cn)4851 TEST(F32_IGEMM_5X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, n_div_8_strided_cn) {
4852 for (uint32_t n = 16; n <= 24; n += 8) {
4853 for (size_t k = 1; k <= 5; k += 2) {
4854 GemmMicrokernelTester()
4855 .mr(5)
4856 .nr(8)
4857 .kr(1)
4858 .sr(1)
4859 .m(5)
4860 .n(n)
4861 .k(k)
4862 .cn_stride(11)
4863 .Test(xnn_f32_igemm_ukernel_5x8__wasmrelaxedsimd_fma_loadsplat);
4864 }
4865 }
4866 }
4867
TEST(F32_IGEMM_5X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,n_div_8_subtile)4868 TEST(F32_IGEMM_5X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, n_div_8_subtile) {
4869 for (uint32_t n = 16; n <= 24; n += 8) {
4870 for (size_t k = 1; k <= 5; k += 2) {
4871 for (uint32_t m = 1; m <= 5; m++) {
4872 GemmMicrokernelTester()
4873 .mr(5)
4874 .nr(8)
4875 .kr(1)
4876 .sr(1)
4877 .m(m)
4878 .n(n)
4879 .k(k)
4880 .iterations(1)
4881 .Test(xnn_f32_igemm_ukernel_5x8__wasmrelaxedsimd_fma_loadsplat);
4882 }
4883 }
4884 }
4885 }
4886
TEST(F32_IGEMM_5X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,small_kernel)4887 TEST(F32_IGEMM_5X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, small_kernel) {
4888 for (size_t k = 1; k <= 5; k += 2) {
4889 GemmMicrokernelTester()
4890 .mr(5)
4891 .nr(8)
4892 .kr(1)
4893 .sr(1)
4894 .m(5)
4895 .n(8)
4896 .k(k)
4897 .ks(3)
4898 .Test(xnn_f32_igemm_ukernel_5x8__wasmrelaxedsimd_fma_loadsplat);
4899 }
4900 }
4901
TEST(F32_IGEMM_5X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,small_kernel_subtile)4902 TEST(F32_IGEMM_5X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, small_kernel_subtile) {
4903 for (size_t k = 1; k <= 5; k += 2) {
4904 for (uint32_t n = 1; n <= 8; n++) {
4905 for (uint32_t m = 1; m <= 5; m++) {
4906 GemmMicrokernelTester()
4907 .mr(5)
4908 .nr(8)
4909 .kr(1)
4910 .sr(1)
4911 .m(m)
4912 .n(n)
4913 .k(k)
4914 .ks(3)
4915 .iterations(1)
4916 .Test(xnn_f32_igemm_ukernel_5x8__wasmrelaxedsimd_fma_loadsplat);
4917 }
4918 }
4919 }
4920 }
4921
TEST(F32_IGEMM_5X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,n_gt_8_small_kernel)4922 TEST(F32_IGEMM_5X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, n_gt_8_small_kernel) {
4923 for (uint32_t n = 9; n < 16; n++) {
4924 for (size_t k = 1; k <= 5; k += 2) {
4925 GemmMicrokernelTester()
4926 .mr(5)
4927 .nr(8)
4928 .kr(1)
4929 .sr(1)
4930 .m(5)
4931 .n(n)
4932 .k(k)
4933 .ks(3)
4934 .Test(xnn_f32_igemm_ukernel_5x8__wasmrelaxedsimd_fma_loadsplat);
4935 }
4936 }
4937 }
4938
TEST(F32_IGEMM_5X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,n_div_8_small_kernel)4939 TEST(F32_IGEMM_5X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, n_div_8_small_kernel) {
4940 for (uint32_t n = 16; n <= 24; n += 8) {
4941 for (size_t k = 1; k <= 5; k += 2) {
4942 GemmMicrokernelTester()
4943 .mr(5)
4944 .nr(8)
4945 .kr(1)
4946 .sr(1)
4947 .m(5)
4948 .n(n)
4949 .k(k)
4950 .ks(3)
4951 .Test(xnn_f32_igemm_ukernel_5x8__wasmrelaxedsimd_fma_loadsplat);
4952 }
4953 }
4954 }
4955
TEST(F32_IGEMM_5X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,strided_cm_subtile)4956 TEST(F32_IGEMM_5X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, strided_cm_subtile) {
4957 for (size_t k = 1; k <= 5; k += 2) {
4958 for (uint32_t n = 1; n <= 8; n++) {
4959 for (uint32_t m = 1; m <= 5; m++) {
4960 GemmMicrokernelTester()
4961 .mr(5)
4962 .nr(8)
4963 .kr(1)
4964 .sr(1)
4965 .m(m)
4966 .n(n)
4967 .k(k)
4968 .cm_stride(11)
4969 .iterations(1)
4970 .Test(xnn_f32_igemm_ukernel_5x8__wasmrelaxedsimd_fma_loadsplat);
4971 }
4972 }
4973 }
4974 }
4975
TEST(F32_IGEMM_5X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,a_offset)4976 TEST(F32_IGEMM_5X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, a_offset) {
4977 for (size_t k = 1; k <= 5; k += 2) {
4978 GemmMicrokernelTester()
4979 .mr(5)
4980 .nr(8)
4981 .kr(1)
4982 .sr(1)
4983 .m(5)
4984 .n(8)
4985 .k(k)
4986 .ks(3)
4987 .a_offset(29)
4988 .Test(xnn_f32_igemm_ukernel_5x8__wasmrelaxedsimd_fma_loadsplat);
4989 }
4990 }
4991
TEST(F32_IGEMM_5X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,zero)4992 TEST(F32_IGEMM_5X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, zero) {
4993 for (size_t k = 1; k <= 5; k += 2) {
4994 for (uint32_t mz = 0; mz < 5; mz++) {
4995 GemmMicrokernelTester()
4996 .mr(5)
4997 .nr(8)
4998 .kr(1)
4999 .sr(1)
5000 .m(5)
5001 .n(8)
5002 .k(k)
5003 .ks(3)
5004 .a_offset(29)
5005 .zero_index(mz)
5006 .Test(xnn_f32_igemm_ukernel_5x8__wasmrelaxedsimd_fma_loadsplat);
5007 }
5008 }
5009 }
5010
TEST(F32_IGEMM_5X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,strided_cm)5011 TEST(F32_IGEMM_5X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, strided_cm) {
5012 GemmMicrokernelTester()
5013 .mr(5)
5014 .nr(8)
5015 .kr(1)
5016 .sr(1)
5017 .m(5)
5018 .n(8)
5019 .k(1)
5020 .cm_stride(11)
5021 .Test(xnn_f32_igemm_ukernel_5x8__wasmrelaxedsimd_fma_loadsplat);
5022 }
5023 #endif // XNN_ARCH_WASMRELAXEDSIMD
5024
5025
5026 #if XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_IGEMM_6X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,k_eq_1)5027 TEST(F32_IGEMM_6X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, k_eq_1) {
5028 GemmMicrokernelTester()
5029 .mr(6)
5030 .nr(8)
5031 .kr(1)
5032 .sr(1)
5033 .m(6)
5034 .n(8)
5035 .k(1)
5036 .Test(xnn_f32_igemm_ukernel_6x8__wasmrelaxedsimd_fma_loadsplat);
5037 }
5038
TEST(F32_IGEMM_6X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,strided_cn)5039 TEST(F32_IGEMM_6X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, strided_cn) {
5040 GemmMicrokernelTester()
5041 .mr(6)
5042 .nr(8)
5043 .kr(1)
5044 .sr(1)
5045 .m(6)
5046 .n(8)
5047 .k(1)
5048 .cn_stride(11)
5049 .Test(xnn_f32_igemm_ukernel_6x8__wasmrelaxedsimd_fma_loadsplat);
5050 }
5051
TEST(F32_IGEMM_6X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,k_eq_1_subtile)5052 TEST(F32_IGEMM_6X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, k_eq_1_subtile) {
5053 for (uint32_t n = 1; n <= 8; n++) {
5054 for (uint32_t m = 1; m <= 6; m++) {
5055 GemmMicrokernelTester()
5056 .mr(6)
5057 .nr(8)
5058 .kr(1)
5059 .sr(1)
5060 .m(m)
5061 .n(n)
5062 .k(1)
5063 .iterations(1)
5064 .Test(xnn_f32_igemm_ukernel_6x8__wasmrelaxedsimd_fma_loadsplat);
5065 }
5066 }
5067 }
5068
TEST(F32_IGEMM_6X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,k_eq_1_subtile_m)5069 TEST(F32_IGEMM_6X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, k_eq_1_subtile_m) {
5070 for (uint32_t m = 1; m <= 6; m++) {
5071 GemmMicrokernelTester()
5072 .mr(6)
5073 .nr(8)
5074 .kr(1)
5075 .sr(1)
5076 .m(m)
5077 .n(8)
5078 .k(1)
5079 .iterations(1)
5080 .Test(xnn_f32_igemm_ukernel_6x8__wasmrelaxedsimd_fma_loadsplat);
5081 }
5082 }
5083
TEST(F32_IGEMM_6X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,k_eq_1_subtile_n)5084 TEST(F32_IGEMM_6X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, k_eq_1_subtile_n) {
5085 for (uint32_t n = 1; n <= 8; n++) {
5086 GemmMicrokernelTester()
5087 .mr(6)
5088 .nr(8)
5089 .kr(1)
5090 .sr(1)
5091 .m(6)
5092 .n(n)
5093 .k(1)
5094 .iterations(1)
5095 .Test(xnn_f32_igemm_ukernel_6x8__wasmrelaxedsimd_fma_loadsplat);
5096 }
5097 }
5098
TEST(F32_IGEMM_6X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,k_gt_1)5099 TEST(F32_IGEMM_6X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, k_gt_1) {
5100 for (size_t k = 2; k < 10; k++) {
5101 GemmMicrokernelTester()
5102 .mr(6)
5103 .nr(8)
5104 .kr(1)
5105 .sr(1)
5106 .m(6)
5107 .n(8)
5108 .k(k)
5109 .Test(xnn_f32_igemm_ukernel_6x8__wasmrelaxedsimd_fma_loadsplat);
5110 }
5111 }
5112
TEST(F32_IGEMM_6X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,k_gt_1_subtile)5113 TEST(F32_IGEMM_6X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, k_gt_1_subtile) {
5114 for (size_t k = 2; k < 10; k++) {
5115 for (uint32_t n = 1; n <= 8; n++) {
5116 for (uint32_t m = 1; m <= 6; m++) {
5117 GemmMicrokernelTester()
5118 .mr(6)
5119 .nr(8)
5120 .kr(1)
5121 .sr(1)
5122 .m(m)
5123 .n(n)
5124 .k(k)
5125 .iterations(1)
5126 .Test(xnn_f32_igemm_ukernel_6x8__wasmrelaxedsimd_fma_loadsplat);
5127 }
5128 }
5129 }
5130 }
5131
TEST(F32_IGEMM_6X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,n_gt_8)5132 TEST(F32_IGEMM_6X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, n_gt_8) {
5133 for (uint32_t n = 9; n < 16; n++) {
5134 for (size_t k = 1; k <= 5; k += 2) {
5135 GemmMicrokernelTester()
5136 .mr(6)
5137 .nr(8)
5138 .kr(1)
5139 .sr(1)
5140 .m(6)
5141 .n(n)
5142 .k(k)
5143 .Test(xnn_f32_igemm_ukernel_6x8__wasmrelaxedsimd_fma_loadsplat);
5144 }
5145 }
5146 }
5147
TEST(F32_IGEMM_6X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,n_gt_8_strided_cn)5148 TEST(F32_IGEMM_6X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, n_gt_8_strided_cn) {
5149 for (uint32_t n = 9; n < 16; n++) {
5150 for (size_t k = 1; k <= 5; k += 2) {
5151 GemmMicrokernelTester()
5152 .mr(6)
5153 .nr(8)
5154 .kr(1)
5155 .sr(1)
5156 .m(6)
5157 .n(n)
5158 .k(k)
5159 .cn_stride(11)
5160 .Test(xnn_f32_igemm_ukernel_6x8__wasmrelaxedsimd_fma_loadsplat);
5161 }
5162 }
5163 }
5164
TEST(F32_IGEMM_6X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,n_gt_8_subtile)5165 TEST(F32_IGEMM_6X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, n_gt_8_subtile) {
5166 for (uint32_t n = 9; n < 16; n++) {
5167 for (size_t k = 1; k <= 5; k += 2) {
5168 for (uint32_t m = 1; m <= 6; m++) {
5169 GemmMicrokernelTester()
5170 .mr(6)
5171 .nr(8)
5172 .kr(1)
5173 .sr(1)
5174 .m(m)
5175 .n(n)
5176 .k(k)
5177 .iterations(1)
5178 .Test(xnn_f32_igemm_ukernel_6x8__wasmrelaxedsimd_fma_loadsplat);
5179 }
5180 }
5181 }
5182 }
5183
TEST(F32_IGEMM_6X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,n_div_8)5184 TEST(F32_IGEMM_6X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, n_div_8) {
5185 for (uint32_t n = 16; n <= 24; n += 8) {
5186 for (size_t k = 1; k <= 5; k += 2) {
5187 GemmMicrokernelTester()
5188 .mr(6)
5189 .nr(8)
5190 .kr(1)
5191 .sr(1)
5192 .m(6)
5193 .n(n)
5194 .k(k)
5195 .Test(xnn_f32_igemm_ukernel_6x8__wasmrelaxedsimd_fma_loadsplat);
5196 }
5197 }
5198 }
5199
TEST(F32_IGEMM_6X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,n_div_8_strided_cn)5200 TEST(F32_IGEMM_6X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, n_div_8_strided_cn) {
5201 for (uint32_t n = 16; n <= 24; n += 8) {
5202 for (size_t k = 1; k <= 5; k += 2) {
5203 GemmMicrokernelTester()
5204 .mr(6)
5205 .nr(8)
5206 .kr(1)
5207 .sr(1)
5208 .m(6)
5209 .n(n)
5210 .k(k)
5211 .cn_stride(11)
5212 .Test(xnn_f32_igemm_ukernel_6x8__wasmrelaxedsimd_fma_loadsplat);
5213 }
5214 }
5215 }
5216
TEST(F32_IGEMM_6X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,n_div_8_subtile)5217 TEST(F32_IGEMM_6X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, n_div_8_subtile) {
5218 for (uint32_t n = 16; n <= 24; n += 8) {
5219 for (size_t k = 1; k <= 5; k += 2) {
5220 for (uint32_t m = 1; m <= 6; m++) {
5221 GemmMicrokernelTester()
5222 .mr(6)
5223 .nr(8)
5224 .kr(1)
5225 .sr(1)
5226 .m(m)
5227 .n(n)
5228 .k(k)
5229 .iterations(1)
5230 .Test(xnn_f32_igemm_ukernel_6x8__wasmrelaxedsimd_fma_loadsplat);
5231 }
5232 }
5233 }
5234 }
5235
TEST(F32_IGEMM_6X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,small_kernel)5236 TEST(F32_IGEMM_6X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, small_kernel) {
5237 for (size_t k = 1; k <= 5; k += 2) {
5238 GemmMicrokernelTester()
5239 .mr(6)
5240 .nr(8)
5241 .kr(1)
5242 .sr(1)
5243 .m(6)
5244 .n(8)
5245 .k(k)
5246 .ks(3)
5247 .Test(xnn_f32_igemm_ukernel_6x8__wasmrelaxedsimd_fma_loadsplat);
5248 }
5249 }
5250
TEST(F32_IGEMM_6X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,small_kernel_subtile)5251 TEST(F32_IGEMM_6X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, small_kernel_subtile) {
5252 for (size_t k = 1; k <= 5; k += 2) {
5253 for (uint32_t n = 1; n <= 8; n++) {
5254 for (uint32_t m = 1; m <= 6; m++) {
5255 GemmMicrokernelTester()
5256 .mr(6)
5257 .nr(8)
5258 .kr(1)
5259 .sr(1)
5260 .m(m)
5261 .n(n)
5262 .k(k)
5263 .ks(3)
5264 .iterations(1)
5265 .Test(xnn_f32_igemm_ukernel_6x8__wasmrelaxedsimd_fma_loadsplat);
5266 }
5267 }
5268 }
5269 }
5270
TEST(F32_IGEMM_6X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,n_gt_8_small_kernel)5271 TEST(F32_IGEMM_6X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, n_gt_8_small_kernel) {
5272 for (uint32_t n = 9; n < 16; n++) {
5273 for (size_t k = 1; k <= 5; k += 2) {
5274 GemmMicrokernelTester()
5275 .mr(6)
5276 .nr(8)
5277 .kr(1)
5278 .sr(1)
5279 .m(6)
5280 .n(n)
5281 .k(k)
5282 .ks(3)
5283 .Test(xnn_f32_igemm_ukernel_6x8__wasmrelaxedsimd_fma_loadsplat);
5284 }
5285 }
5286 }
5287
TEST(F32_IGEMM_6X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,n_div_8_small_kernel)5288 TEST(F32_IGEMM_6X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, n_div_8_small_kernel) {
5289 for (uint32_t n = 16; n <= 24; n += 8) {
5290 for (size_t k = 1; k <= 5; k += 2) {
5291 GemmMicrokernelTester()
5292 .mr(6)
5293 .nr(8)
5294 .kr(1)
5295 .sr(1)
5296 .m(6)
5297 .n(n)
5298 .k(k)
5299 .ks(3)
5300 .Test(xnn_f32_igemm_ukernel_6x8__wasmrelaxedsimd_fma_loadsplat);
5301 }
5302 }
5303 }
5304
TEST(F32_IGEMM_6X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,strided_cm_subtile)5305 TEST(F32_IGEMM_6X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, strided_cm_subtile) {
5306 for (size_t k = 1; k <= 5; k += 2) {
5307 for (uint32_t n = 1; n <= 8; n++) {
5308 for (uint32_t m = 1; m <= 6; m++) {
5309 GemmMicrokernelTester()
5310 .mr(6)
5311 .nr(8)
5312 .kr(1)
5313 .sr(1)
5314 .m(m)
5315 .n(n)
5316 .k(k)
5317 .cm_stride(11)
5318 .iterations(1)
5319 .Test(xnn_f32_igemm_ukernel_6x8__wasmrelaxedsimd_fma_loadsplat);
5320 }
5321 }
5322 }
5323 }
5324
TEST(F32_IGEMM_6X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,a_offset)5325 TEST(F32_IGEMM_6X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, a_offset) {
5326 for (size_t k = 1; k <= 5; k += 2) {
5327 GemmMicrokernelTester()
5328 .mr(6)
5329 .nr(8)
5330 .kr(1)
5331 .sr(1)
5332 .m(6)
5333 .n(8)
5334 .k(k)
5335 .ks(3)
5336 .a_offset(37)
5337 .Test(xnn_f32_igemm_ukernel_6x8__wasmrelaxedsimd_fma_loadsplat);
5338 }
5339 }
5340
TEST(F32_IGEMM_6X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,zero)5341 TEST(F32_IGEMM_6X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, zero) {
5342 for (size_t k = 1; k <= 5; k += 2) {
5343 for (uint32_t mz = 0; mz < 6; mz++) {
5344 GemmMicrokernelTester()
5345 .mr(6)
5346 .nr(8)
5347 .kr(1)
5348 .sr(1)
5349 .m(6)
5350 .n(8)
5351 .k(k)
5352 .ks(3)
5353 .a_offset(37)
5354 .zero_index(mz)
5355 .Test(xnn_f32_igemm_ukernel_6x8__wasmrelaxedsimd_fma_loadsplat);
5356 }
5357 }
5358 }
5359
TEST(F32_IGEMM_6X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,strided_cm)5360 TEST(F32_IGEMM_6X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, strided_cm) {
5361 GemmMicrokernelTester()
5362 .mr(6)
5363 .nr(8)
5364 .kr(1)
5365 .sr(1)
5366 .m(6)
5367 .n(8)
5368 .k(1)
5369 .cm_stride(11)
5370 .Test(xnn_f32_igemm_ukernel_6x8__wasmrelaxedsimd_fma_loadsplat);
5371 }
5372 #endif // XNN_ARCH_WASMRELAXEDSIMD
5373
5374
TEST(F32_IGEMM_2X4__SCALAR,k_eq_1)5375 TEST(F32_IGEMM_2X4__SCALAR, k_eq_1) {
5376 GemmMicrokernelTester()
5377 .mr(2)
5378 .nr(4)
5379 .kr(1)
5380 .sr(1)
5381 .m(2)
5382 .n(4)
5383 .k(1)
5384 .Test(xnn_f32_igemm_ukernel_2x4__scalar);
5385 }
5386
TEST(F32_IGEMM_2X4__SCALAR,strided_cn)5387 TEST(F32_IGEMM_2X4__SCALAR, strided_cn) {
5388 GemmMicrokernelTester()
5389 .mr(2)
5390 .nr(4)
5391 .kr(1)
5392 .sr(1)
5393 .m(2)
5394 .n(4)
5395 .k(1)
5396 .cn_stride(7)
5397 .Test(xnn_f32_igemm_ukernel_2x4__scalar);
5398 }
5399
TEST(F32_IGEMM_2X4__SCALAR,k_eq_1_subtile)5400 TEST(F32_IGEMM_2X4__SCALAR, k_eq_1_subtile) {
5401 for (uint32_t n = 1; n <= 4; n++) {
5402 for (uint32_t m = 1; m <= 2; m++) {
5403 GemmMicrokernelTester()
5404 .mr(2)
5405 .nr(4)
5406 .kr(1)
5407 .sr(1)
5408 .m(m)
5409 .n(n)
5410 .k(1)
5411 .iterations(1)
5412 .Test(xnn_f32_igemm_ukernel_2x4__scalar);
5413 }
5414 }
5415 }
5416
TEST(F32_IGEMM_2X4__SCALAR,k_eq_1_subtile_m)5417 TEST(F32_IGEMM_2X4__SCALAR, k_eq_1_subtile_m) {
5418 for (uint32_t m = 1; m <= 2; m++) {
5419 GemmMicrokernelTester()
5420 .mr(2)
5421 .nr(4)
5422 .kr(1)
5423 .sr(1)
5424 .m(m)
5425 .n(4)
5426 .k(1)
5427 .iterations(1)
5428 .Test(xnn_f32_igemm_ukernel_2x4__scalar);
5429 }
5430 }
5431
TEST(F32_IGEMM_2X4__SCALAR,k_eq_1_subtile_n)5432 TEST(F32_IGEMM_2X4__SCALAR, k_eq_1_subtile_n) {
5433 for (uint32_t n = 1; n <= 4; n++) {
5434 GemmMicrokernelTester()
5435 .mr(2)
5436 .nr(4)
5437 .kr(1)
5438 .sr(1)
5439 .m(2)
5440 .n(n)
5441 .k(1)
5442 .iterations(1)
5443 .Test(xnn_f32_igemm_ukernel_2x4__scalar);
5444 }
5445 }
5446
TEST(F32_IGEMM_2X4__SCALAR,k_gt_1)5447 TEST(F32_IGEMM_2X4__SCALAR, k_gt_1) {
5448 for (size_t k = 2; k < 10; k++) {
5449 GemmMicrokernelTester()
5450 .mr(2)
5451 .nr(4)
5452 .kr(1)
5453 .sr(1)
5454 .m(2)
5455 .n(4)
5456 .k(k)
5457 .Test(xnn_f32_igemm_ukernel_2x4__scalar);
5458 }
5459 }
5460
TEST(F32_IGEMM_2X4__SCALAR,k_gt_1_subtile)5461 TEST(F32_IGEMM_2X4__SCALAR, k_gt_1_subtile) {
5462 for (size_t k = 2; k < 10; k++) {
5463 for (uint32_t n = 1; n <= 4; n++) {
5464 for (uint32_t m = 1; m <= 2; m++) {
5465 GemmMicrokernelTester()
5466 .mr(2)
5467 .nr(4)
5468 .kr(1)
5469 .sr(1)
5470 .m(m)
5471 .n(n)
5472 .k(k)
5473 .iterations(1)
5474 .Test(xnn_f32_igemm_ukernel_2x4__scalar);
5475 }
5476 }
5477 }
5478 }
5479
TEST(F32_IGEMM_2X4__SCALAR,n_gt_4)5480 TEST(F32_IGEMM_2X4__SCALAR, n_gt_4) {
5481 for (uint32_t n = 5; n < 8; n++) {
5482 for (size_t k = 1; k <= 5; k += 2) {
5483 GemmMicrokernelTester()
5484 .mr(2)
5485 .nr(4)
5486 .kr(1)
5487 .sr(1)
5488 .m(2)
5489 .n(n)
5490 .k(k)
5491 .Test(xnn_f32_igemm_ukernel_2x4__scalar);
5492 }
5493 }
5494 }
5495
TEST(F32_IGEMM_2X4__SCALAR,n_gt_4_strided_cn)5496 TEST(F32_IGEMM_2X4__SCALAR, n_gt_4_strided_cn) {
5497 for (uint32_t n = 5; n < 8; n++) {
5498 for (size_t k = 1; k <= 5; k += 2) {
5499 GemmMicrokernelTester()
5500 .mr(2)
5501 .nr(4)
5502 .kr(1)
5503 .sr(1)
5504 .m(2)
5505 .n(n)
5506 .k(k)
5507 .cn_stride(7)
5508 .Test(xnn_f32_igemm_ukernel_2x4__scalar);
5509 }
5510 }
5511 }
5512
TEST(F32_IGEMM_2X4__SCALAR,n_gt_4_subtile)5513 TEST(F32_IGEMM_2X4__SCALAR, n_gt_4_subtile) {
5514 for (uint32_t n = 5; n < 8; n++) {
5515 for (size_t k = 1; k <= 5; k += 2) {
5516 for (uint32_t m = 1; m <= 2; m++) {
5517 GemmMicrokernelTester()
5518 .mr(2)
5519 .nr(4)
5520 .kr(1)
5521 .sr(1)
5522 .m(m)
5523 .n(n)
5524 .k(k)
5525 .iterations(1)
5526 .Test(xnn_f32_igemm_ukernel_2x4__scalar);
5527 }
5528 }
5529 }
5530 }
5531
TEST(F32_IGEMM_2X4__SCALAR,n_div_4)5532 TEST(F32_IGEMM_2X4__SCALAR, n_div_4) {
5533 for (uint32_t n = 8; n <= 12; n += 4) {
5534 for (size_t k = 1; k <= 5; k += 2) {
5535 GemmMicrokernelTester()
5536 .mr(2)
5537 .nr(4)
5538 .kr(1)
5539 .sr(1)
5540 .m(2)
5541 .n(n)
5542 .k(k)
5543 .Test(xnn_f32_igemm_ukernel_2x4__scalar);
5544 }
5545 }
5546 }
5547
TEST(F32_IGEMM_2X4__SCALAR,n_div_4_strided_cn)5548 TEST(F32_IGEMM_2X4__SCALAR, n_div_4_strided_cn) {
5549 for (uint32_t n = 8; n <= 12; n += 4) {
5550 for (size_t k = 1; k <= 5; k += 2) {
5551 GemmMicrokernelTester()
5552 .mr(2)
5553 .nr(4)
5554 .kr(1)
5555 .sr(1)
5556 .m(2)
5557 .n(n)
5558 .k(k)
5559 .cn_stride(7)
5560 .Test(xnn_f32_igemm_ukernel_2x4__scalar);
5561 }
5562 }
5563 }
5564
TEST(F32_IGEMM_2X4__SCALAR,n_div_4_subtile)5565 TEST(F32_IGEMM_2X4__SCALAR, n_div_4_subtile) {
5566 for (uint32_t n = 8; n <= 12; n += 4) {
5567 for (size_t k = 1; k <= 5; k += 2) {
5568 for (uint32_t m = 1; m <= 2; m++) {
5569 GemmMicrokernelTester()
5570 .mr(2)
5571 .nr(4)
5572 .kr(1)
5573 .sr(1)
5574 .m(m)
5575 .n(n)
5576 .k(k)
5577 .iterations(1)
5578 .Test(xnn_f32_igemm_ukernel_2x4__scalar);
5579 }
5580 }
5581 }
5582 }
5583
TEST(F32_IGEMM_2X4__SCALAR,small_kernel)5584 TEST(F32_IGEMM_2X4__SCALAR, small_kernel) {
5585 for (size_t k = 1; k <= 5; k += 2) {
5586 GemmMicrokernelTester()
5587 .mr(2)
5588 .nr(4)
5589 .kr(1)
5590 .sr(1)
5591 .m(2)
5592 .n(4)
5593 .k(k)
5594 .ks(3)
5595 .Test(xnn_f32_igemm_ukernel_2x4__scalar);
5596 }
5597 }
5598
TEST(F32_IGEMM_2X4__SCALAR,small_kernel_subtile)5599 TEST(F32_IGEMM_2X4__SCALAR, small_kernel_subtile) {
5600 for (size_t k = 1; k <= 5; k += 2) {
5601 for (uint32_t n = 1; n <= 4; n++) {
5602 for (uint32_t m = 1; m <= 2; m++) {
5603 GemmMicrokernelTester()
5604 .mr(2)
5605 .nr(4)
5606 .kr(1)
5607 .sr(1)
5608 .m(m)
5609 .n(n)
5610 .k(k)
5611 .ks(3)
5612 .iterations(1)
5613 .Test(xnn_f32_igemm_ukernel_2x4__scalar);
5614 }
5615 }
5616 }
5617 }
5618
TEST(F32_IGEMM_2X4__SCALAR,n_gt_4_small_kernel)5619 TEST(F32_IGEMM_2X4__SCALAR, n_gt_4_small_kernel) {
5620 for (uint32_t n = 5; n < 8; n++) {
5621 for (size_t k = 1; k <= 5; k += 2) {
5622 GemmMicrokernelTester()
5623 .mr(2)
5624 .nr(4)
5625 .kr(1)
5626 .sr(1)
5627 .m(2)
5628 .n(n)
5629 .k(k)
5630 .ks(3)
5631 .Test(xnn_f32_igemm_ukernel_2x4__scalar);
5632 }
5633 }
5634 }
5635
TEST(F32_IGEMM_2X4__SCALAR,n_div_4_small_kernel)5636 TEST(F32_IGEMM_2X4__SCALAR, n_div_4_small_kernel) {
5637 for (uint32_t n = 8; n <= 12; n += 4) {
5638 for (size_t k = 1; k <= 5; k += 2) {
5639 GemmMicrokernelTester()
5640 .mr(2)
5641 .nr(4)
5642 .kr(1)
5643 .sr(1)
5644 .m(2)
5645 .n(n)
5646 .k(k)
5647 .ks(3)
5648 .Test(xnn_f32_igemm_ukernel_2x4__scalar);
5649 }
5650 }
5651 }
5652
TEST(F32_IGEMM_2X4__SCALAR,strided_cm_subtile)5653 TEST(F32_IGEMM_2X4__SCALAR, strided_cm_subtile) {
5654 for (size_t k = 1; k <= 5; k += 2) {
5655 for (uint32_t n = 1; n <= 4; n++) {
5656 for (uint32_t m = 1; m <= 2; m++) {
5657 GemmMicrokernelTester()
5658 .mr(2)
5659 .nr(4)
5660 .kr(1)
5661 .sr(1)
5662 .m(m)
5663 .n(n)
5664 .k(k)
5665 .cm_stride(7)
5666 .iterations(1)
5667 .Test(xnn_f32_igemm_ukernel_2x4__scalar);
5668 }
5669 }
5670 }
5671 }
5672
TEST(F32_IGEMM_2X4__SCALAR,a_offset)5673 TEST(F32_IGEMM_2X4__SCALAR, a_offset) {
5674 for (size_t k = 1; k <= 5; k += 2) {
5675 GemmMicrokernelTester()
5676 .mr(2)
5677 .nr(4)
5678 .kr(1)
5679 .sr(1)
5680 .m(2)
5681 .n(4)
5682 .k(k)
5683 .ks(3)
5684 .a_offset(13)
5685 .Test(xnn_f32_igemm_ukernel_2x4__scalar);
5686 }
5687 }
5688
TEST(F32_IGEMM_2X4__SCALAR,zero)5689 TEST(F32_IGEMM_2X4__SCALAR, zero) {
5690 for (size_t k = 1; k <= 5; k += 2) {
5691 for (uint32_t mz = 0; mz < 2; mz++) {
5692 GemmMicrokernelTester()
5693 .mr(2)
5694 .nr(4)
5695 .kr(1)
5696 .sr(1)
5697 .m(2)
5698 .n(4)
5699 .k(k)
5700 .ks(3)
5701 .a_offset(13)
5702 .zero_index(mz)
5703 .Test(xnn_f32_igemm_ukernel_2x4__scalar);
5704 }
5705 }
5706 }
5707
TEST(F32_IGEMM_2X4__SCALAR,strided_cm)5708 TEST(F32_IGEMM_2X4__SCALAR, strided_cm) {
5709 GemmMicrokernelTester()
5710 .mr(2)
5711 .nr(4)
5712 .kr(1)
5713 .sr(1)
5714 .m(2)
5715 .n(4)
5716 .k(1)
5717 .cm_stride(7)
5718 .Test(xnn_f32_igemm_ukernel_2x4__scalar);
5719 }
5720