1 // Copyright (c) Facebook, Inc. and its affiliates.
2 // All rights reserved.
3 //
4 // Copyright 2019 Google LLC
5 //
6 // This source code is licensed under the BSD-style license found in the
7 // LICENSE file in the root directory of this source tree.
8 //
9 // Auto-generated file. Do not edit!
10 // Specification: test/f32-gemm.yaml
11 // Generator: tools/generate-gemm-test.py
12
13
14 #include <gtest/gtest.h>
15
16 #include <xnnpack/allocator.h>
17 #include <xnnpack/common.h>
18 #include <xnnpack/isa-checks.h>
19 #include <xnnpack/microparams-init.h>
20
21 #include <xnnpack/gemm.h>
22 #include <xnnpack/igemm.h>
23 #include <xnnpack/ppmm.h>
24 #include "gemm-microkernel-tester.h"
25
26
27 #if XNN_ARCH_ARM && XNN_ENABLE_ASSEMBLY
TEST(F32_GEMM_4X4__AARCH32_VFP_LD64,k_eq_2)28 TEST(F32_GEMM_4X4__AARCH32_VFP_LD64, k_eq_2) {
29 GemmMicrokernelTester()
30 .mr(4)
31 .nr(4)
32 .kr(1)
33 .sr(1)
34 .m(4)
35 .n(4)
36 .k(2)
37 .Test(xnn_f32_gemm_ukernel_4x4__aarch32_vfp_ld64);
38 }
39
TEST(F32_GEMM_4X4__AARCH32_VFP_LD64,strided_cn)40 TEST(F32_GEMM_4X4__AARCH32_VFP_LD64, strided_cn) {
41 GemmMicrokernelTester()
42 .mr(4)
43 .nr(4)
44 .kr(1)
45 .sr(1)
46 .m(4)
47 .n(4)
48 .k(2)
49 .cn_stride(7)
50 .Test(xnn_f32_gemm_ukernel_4x4__aarch32_vfp_ld64);
51 }
52
TEST(F32_GEMM_4X4__AARCH32_VFP_LD64,k_eq_2_strided_a)53 TEST(F32_GEMM_4X4__AARCH32_VFP_LD64, k_eq_2_strided_a) {
54 GemmMicrokernelTester()
55 .mr(4)
56 .nr(4)
57 .kr(1)
58 .sr(1)
59 .m(4)
60 .n(4)
61 .k(2)
62 .a_stride(5)
63 .Test(xnn_f32_gemm_ukernel_4x4__aarch32_vfp_ld64);
64 }
65
TEST(F32_GEMM_4X4__AARCH32_VFP_LD64,k_eq_2_subtile)66 TEST(F32_GEMM_4X4__AARCH32_VFP_LD64, k_eq_2_subtile) {
67 for (uint32_t n = 1; n <= 4; n++) {
68 for (uint32_t m = 1; m <= 4; m++) {
69 GemmMicrokernelTester()
70 .mr(4)
71 .nr(4)
72 .kr(1)
73 .sr(1)
74 .m(m)
75 .n(n)
76 .k(2)
77 .iterations(1)
78 .Test(xnn_f32_gemm_ukernel_4x4__aarch32_vfp_ld64);
79 }
80 }
81 }
82
TEST(F32_GEMM_4X4__AARCH32_VFP_LD64,k_eq_2_subtile_m)83 TEST(F32_GEMM_4X4__AARCH32_VFP_LD64, k_eq_2_subtile_m) {
84 for (uint32_t m = 1; m <= 4; m++) {
85 GemmMicrokernelTester()
86 .mr(4)
87 .nr(4)
88 .kr(1)
89 .sr(1)
90 .m(m)
91 .n(4)
92 .k(2)
93 .iterations(1)
94 .Test(xnn_f32_gemm_ukernel_4x4__aarch32_vfp_ld64);
95 }
96 }
97
TEST(F32_GEMM_4X4__AARCH32_VFP_LD64,k_eq_2_subtile_n)98 TEST(F32_GEMM_4X4__AARCH32_VFP_LD64, k_eq_2_subtile_n) {
99 for (uint32_t n = 1; n <= 4; n++) {
100 GemmMicrokernelTester()
101 .mr(4)
102 .nr(4)
103 .kr(1)
104 .sr(1)
105 .m(4)
106 .n(n)
107 .k(2)
108 .iterations(1)
109 .Test(xnn_f32_gemm_ukernel_4x4__aarch32_vfp_ld64);
110 }
111 }
112
TEST(F32_GEMM_4X4__AARCH32_VFP_LD64,k_lt_2)113 TEST(F32_GEMM_4X4__AARCH32_VFP_LD64, k_lt_2) {
114 for (size_t k = 1; k < 2; k++) {
115 GemmMicrokernelTester()
116 .mr(4)
117 .nr(4)
118 .kr(1)
119 .sr(1)
120 .m(4)
121 .n(4)
122 .k(k)
123 .Test(xnn_f32_gemm_ukernel_4x4__aarch32_vfp_ld64);
124 }
125 }
126
TEST(F32_GEMM_4X4__AARCH32_VFP_LD64,k_lt_2_strided_a)127 TEST(F32_GEMM_4X4__AARCH32_VFP_LD64, k_lt_2_strided_a) {
128 for (size_t k = 1; k < 2; k++) {
129 GemmMicrokernelTester()
130 .mr(4)
131 .nr(4)
132 .kr(1)
133 .sr(1)
134 .m(4)
135 .n(4)
136 .k(k)
137 .a_stride(5)
138 .Test(xnn_f32_gemm_ukernel_4x4__aarch32_vfp_ld64);
139 }
140 }
141
TEST(F32_GEMM_4X4__AARCH32_VFP_LD64,k_lt_2_subtile)142 TEST(F32_GEMM_4X4__AARCH32_VFP_LD64, k_lt_2_subtile) {
143 for (size_t k = 1; k < 2; k++) {
144 for (uint32_t n = 1; n <= 4; n++) {
145 for (uint32_t m = 1; m <= 4; m++) {
146 GemmMicrokernelTester()
147 .mr(4)
148 .nr(4)
149 .kr(1)
150 .sr(1)
151 .m(m)
152 .n(n)
153 .k(k)
154 .iterations(1)
155 .Test(xnn_f32_gemm_ukernel_4x4__aarch32_vfp_ld64);
156 }
157 }
158 }
159 }
160
TEST(F32_GEMM_4X4__AARCH32_VFP_LD64,k_gt_2)161 TEST(F32_GEMM_4X4__AARCH32_VFP_LD64, k_gt_2) {
162 for (size_t k = 3; k < 4; k++) {
163 GemmMicrokernelTester()
164 .mr(4)
165 .nr(4)
166 .kr(1)
167 .sr(1)
168 .m(4)
169 .n(4)
170 .k(k)
171 .Test(xnn_f32_gemm_ukernel_4x4__aarch32_vfp_ld64);
172 }
173 }
174
TEST(F32_GEMM_4X4__AARCH32_VFP_LD64,k_gt_2_strided_a)175 TEST(F32_GEMM_4X4__AARCH32_VFP_LD64, k_gt_2_strided_a) {
176 for (size_t k = 3; k < 4; k++) {
177 GemmMicrokernelTester()
178 .mr(4)
179 .nr(4)
180 .kr(1)
181 .sr(1)
182 .m(4)
183 .n(4)
184 .k(k)
185 .a_stride(7)
186 .Test(xnn_f32_gemm_ukernel_4x4__aarch32_vfp_ld64);
187 }
188 }
189
TEST(F32_GEMM_4X4__AARCH32_VFP_LD64,k_gt_2_subtile)190 TEST(F32_GEMM_4X4__AARCH32_VFP_LD64, k_gt_2_subtile) {
191 for (size_t k = 3; k < 4; k++) {
192 for (uint32_t n = 1; n <= 4; n++) {
193 for (uint32_t m = 1; m <= 4; m++) {
194 GemmMicrokernelTester()
195 .mr(4)
196 .nr(4)
197 .kr(1)
198 .sr(1)
199 .m(m)
200 .n(n)
201 .k(k)
202 .iterations(1)
203 .Test(xnn_f32_gemm_ukernel_4x4__aarch32_vfp_ld64);
204 }
205 }
206 }
207 }
208
TEST(F32_GEMM_4X4__AARCH32_VFP_LD64,k_div_2)209 TEST(F32_GEMM_4X4__AARCH32_VFP_LD64, k_div_2) {
210 for (size_t k = 4; k <= 20; k += 2) {
211 GemmMicrokernelTester()
212 .mr(4)
213 .nr(4)
214 .kr(1)
215 .sr(1)
216 .m(4)
217 .n(4)
218 .k(k)
219 .Test(xnn_f32_gemm_ukernel_4x4__aarch32_vfp_ld64);
220 }
221 }
222
TEST(F32_GEMM_4X4__AARCH32_VFP_LD64,k_div_2_strided_a)223 TEST(F32_GEMM_4X4__AARCH32_VFP_LD64, k_div_2_strided_a) {
224 for (size_t k = 4; k <= 20; k += 2) {
225 GemmMicrokernelTester()
226 .mr(4)
227 .nr(4)
228 .kr(1)
229 .sr(1)
230 .m(4)
231 .n(4)
232 .k(k)
233 .a_stride(23)
234 .Test(xnn_f32_gemm_ukernel_4x4__aarch32_vfp_ld64);
235 }
236 }
237
TEST(F32_GEMM_4X4__AARCH32_VFP_LD64,k_div_2_subtile)238 TEST(F32_GEMM_4X4__AARCH32_VFP_LD64, k_div_2_subtile) {
239 for (size_t k = 4; k <= 20; k += 2) {
240 for (uint32_t n = 1; n <= 4; n++) {
241 for (uint32_t m = 1; m <= 4; m++) {
242 GemmMicrokernelTester()
243 .mr(4)
244 .nr(4)
245 .kr(1)
246 .sr(1)
247 .m(m)
248 .n(n)
249 .k(k)
250 .iterations(1)
251 .Test(xnn_f32_gemm_ukernel_4x4__aarch32_vfp_ld64);
252 }
253 }
254 }
255 }
256
TEST(F32_GEMM_4X4__AARCH32_VFP_LD64,n_gt_4)257 TEST(F32_GEMM_4X4__AARCH32_VFP_LD64, n_gt_4) {
258 for (uint32_t n = 5; n < 8; n++) {
259 for (size_t k = 1; k <= 10; k += 3) {
260 GemmMicrokernelTester()
261 .mr(4)
262 .nr(4)
263 .kr(1)
264 .sr(1)
265 .m(4)
266 .n(n)
267 .k(k)
268 .Test(xnn_f32_gemm_ukernel_4x4__aarch32_vfp_ld64);
269 }
270 }
271 }
272
TEST(F32_GEMM_4X4__AARCH32_VFP_LD64,n_gt_4_strided_cn)273 TEST(F32_GEMM_4X4__AARCH32_VFP_LD64, n_gt_4_strided_cn) {
274 for (uint32_t n = 5; n < 8; n++) {
275 for (size_t k = 1; k <= 10; k += 3) {
276 GemmMicrokernelTester()
277 .mr(4)
278 .nr(4)
279 .kr(1)
280 .sr(1)
281 .m(4)
282 .n(n)
283 .k(k)
284 .cn_stride(7)
285 .Test(xnn_f32_gemm_ukernel_4x4__aarch32_vfp_ld64);
286 }
287 }
288 }
289
TEST(F32_GEMM_4X4__AARCH32_VFP_LD64,n_gt_4_strided_a)290 TEST(F32_GEMM_4X4__AARCH32_VFP_LD64, n_gt_4_strided_a) {
291 for (uint32_t n = 5; n < 8; n++) {
292 for (size_t k = 1; k <= 10; k += 3) {
293 GemmMicrokernelTester()
294 .mr(4)
295 .nr(4)
296 .kr(1)
297 .sr(1)
298 .m(4)
299 .n(n)
300 .k(k)
301 .a_stride(13)
302 .Test(xnn_f32_gemm_ukernel_4x4__aarch32_vfp_ld64);
303 }
304 }
305 }
306
TEST(F32_GEMM_4X4__AARCH32_VFP_LD64,n_gt_4_subtile)307 TEST(F32_GEMM_4X4__AARCH32_VFP_LD64, n_gt_4_subtile) {
308 for (uint32_t n = 5; n < 8; n++) {
309 for (size_t k = 1; k <= 10; k += 3) {
310 for (uint32_t m = 1; m <= 4; m++) {
311 GemmMicrokernelTester()
312 .mr(4)
313 .nr(4)
314 .kr(1)
315 .sr(1)
316 .m(m)
317 .n(n)
318 .k(k)
319 .iterations(1)
320 .Test(xnn_f32_gemm_ukernel_4x4__aarch32_vfp_ld64);
321 }
322 }
323 }
324 }
325
TEST(F32_GEMM_4X4__AARCH32_VFP_LD64,n_div_4)326 TEST(F32_GEMM_4X4__AARCH32_VFP_LD64, n_div_4) {
327 for (uint32_t n = 8; n <= 12; n += 4) {
328 for (size_t k = 1; k <= 10; k += 3) {
329 GemmMicrokernelTester()
330 .mr(4)
331 .nr(4)
332 .kr(1)
333 .sr(1)
334 .m(4)
335 .n(n)
336 .k(k)
337 .Test(xnn_f32_gemm_ukernel_4x4__aarch32_vfp_ld64);
338 }
339 }
340 }
341
TEST(F32_GEMM_4X4__AARCH32_VFP_LD64,n_div_4_strided_cn)342 TEST(F32_GEMM_4X4__AARCH32_VFP_LD64, n_div_4_strided_cn) {
343 for (uint32_t n = 8; n <= 12; n += 4) {
344 for (size_t k = 1; k <= 10; k += 3) {
345 GemmMicrokernelTester()
346 .mr(4)
347 .nr(4)
348 .kr(1)
349 .sr(1)
350 .m(4)
351 .n(n)
352 .k(k)
353 .cn_stride(7)
354 .Test(xnn_f32_gemm_ukernel_4x4__aarch32_vfp_ld64);
355 }
356 }
357 }
358
TEST(F32_GEMM_4X4__AARCH32_VFP_LD64,n_div_4_strided_a)359 TEST(F32_GEMM_4X4__AARCH32_VFP_LD64, n_div_4_strided_a) {
360 for (uint32_t n = 8; n <= 12; n += 4) {
361 for (size_t k = 1; k <= 10; k += 3) {
362 GemmMicrokernelTester()
363 .mr(4)
364 .nr(4)
365 .kr(1)
366 .sr(1)
367 .m(4)
368 .n(n)
369 .k(k)
370 .a_stride(13)
371 .Test(xnn_f32_gemm_ukernel_4x4__aarch32_vfp_ld64);
372 }
373 }
374 }
375
TEST(F32_GEMM_4X4__AARCH32_VFP_LD64,n_div_4_subtile)376 TEST(F32_GEMM_4X4__AARCH32_VFP_LD64, n_div_4_subtile) {
377 for (uint32_t n = 8; n <= 12; n += 4) {
378 for (size_t k = 1; k <= 10; k += 3) {
379 for (uint32_t m = 1; m <= 4; m++) {
380 GemmMicrokernelTester()
381 .mr(4)
382 .nr(4)
383 .kr(1)
384 .sr(1)
385 .m(m)
386 .n(n)
387 .k(k)
388 .iterations(1)
389 .Test(xnn_f32_gemm_ukernel_4x4__aarch32_vfp_ld64);
390 }
391 }
392 }
393 }
394
TEST(F32_GEMM_4X4__AARCH32_VFP_LD64,strided_cm_subtile)395 TEST(F32_GEMM_4X4__AARCH32_VFP_LD64, strided_cm_subtile) {
396 for (size_t k = 1; k <= 10; k += 3) {
397 for (uint32_t n = 1; n <= 4; n++) {
398 for (uint32_t m = 1; m <= 4; m++) {
399 GemmMicrokernelTester()
400 .mr(4)
401 .nr(4)
402 .kr(1)
403 .sr(1)
404 .m(m)
405 .n(n)
406 .k(k)
407 .cm_stride(7)
408 .iterations(1)
409 .Test(xnn_f32_gemm_ukernel_4x4__aarch32_vfp_ld64);
410 }
411 }
412 }
413 }
414
TEST(F32_GEMM_4X4__AARCH32_VFP_LD64,strided_cm)415 TEST(F32_GEMM_4X4__AARCH32_VFP_LD64, strided_cm) {
416 GemmMicrokernelTester()
417 .mr(4)
418 .nr(4)
419 .kr(1)
420 .sr(1)
421 .m(4)
422 .n(4)
423 .k(2)
424 .cm_stride(7)
425 .Test(xnn_f32_gemm_ukernel_4x4__aarch32_vfp_ld64);
426 }
427 #endif // XNN_ARCH_ARM && XNN_ENABLE_ASSEMBLY
428
429
430 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_GEMM_3X8__WASMSIMD_LOADSPLAT,k_eq_1)431 TEST(F32_GEMM_3X8__WASMSIMD_LOADSPLAT, k_eq_1) {
432 GemmMicrokernelTester()
433 .mr(3)
434 .nr(8)
435 .kr(1)
436 .sr(1)
437 .m(3)
438 .n(8)
439 .k(1)
440 .Test(xnn_f32_gemm_ukernel_3x8__wasmsimd_loadsplat);
441 }
442
TEST(F32_GEMM_3X8__WASMSIMD_LOADSPLAT,strided_cn)443 TEST(F32_GEMM_3X8__WASMSIMD_LOADSPLAT, strided_cn) {
444 GemmMicrokernelTester()
445 .mr(3)
446 .nr(8)
447 .kr(1)
448 .sr(1)
449 .m(3)
450 .n(8)
451 .k(1)
452 .cn_stride(11)
453 .Test(xnn_f32_gemm_ukernel_3x8__wasmsimd_loadsplat);
454 }
455
TEST(F32_GEMM_3X8__WASMSIMD_LOADSPLAT,k_eq_1_strided_a)456 TEST(F32_GEMM_3X8__WASMSIMD_LOADSPLAT, k_eq_1_strided_a) {
457 GemmMicrokernelTester()
458 .mr(3)
459 .nr(8)
460 .kr(1)
461 .sr(1)
462 .m(3)
463 .n(8)
464 .k(1)
465 .a_stride(3)
466 .Test(xnn_f32_gemm_ukernel_3x8__wasmsimd_loadsplat);
467 }
468
TEST(F32_GEMM_3X8__WASMSIMD_LOADSPLAT,k_eq_1_subtile)469 TEST(F32_GEMM_3X8__WASMSIMD_LOADSPLAT, k_eq_1_subtile) {
470 for (uint32_t n = 1; n <= 8; n++) {
471 for (uint32_t m = 1; m <= 3; m++) {
472 GemmMicrokernelTester()
473 .mr(3)
474 .nr(8)
475 .kr(1)
476 .sr(1)
477 .m(m)
478 .n(n)
479 .k(1)
480 .iterations(1)
481 .Test(xnn_f32_gemm_ukernel_3x8__wasmsimd_loadsplat);
482 }
483 }
484 }
485
TEST(F32_GEMM_3X8__WASMSIMD_LOADSPLAT,k_eq_1_subtile_m)486 TEST(F32_GEMM_3X8__WASMSIMD_LOADSPLAT, k_eq_1_subtile_m) {
487 for (uint32_t m = 1; m <= 3; m++) {
488 GemmMicrokernelTester()
489 .mr(3)
490 .nr(8)
491 .kr(1)
492 .sr(1)
493 .m(m)
494 .n(8)
495 .k(1)
496 .iterations(1)
497 .Test(xnn_f32_gemm_ukernel_3x8__wasmsimd_loadsplat);
498 }
499 }
500
TEST(F32_GEMM_3X8__WASMSIMD_LOADSPLAT,k_eq_1_subtile_n)501 TEST(F32_GEMM_3X8__WASMSIMD_LOADSPLAT, k_eq_1_subtile_n) {
502 for (uint32_t n = 1; n <= 8; n++) {
503 GemmMicrokernelTester()
504 .mr(3)
505 .nr(8)
506 .kr(1)
507 .sr(1)
508 .m(3)
509 .n(n)
510 .k(1)
511 .iterations(1)
512 .Test(xnn_f32_gemm_ukernel_3x8__wasmsimd_loadsplat);
513 }
514 }
515
TEST(F32_GEMM_3X8__WASMSIMD_LOADSPLAT,k_gt_1)516 TEST(F32_GEMM_3X8__WASMSIMD_LOADSPLAT, k_gt_1) {
517 for (size_t k = 2; k < 10; k++) {
518 GemmMicrokernelTester()
519 .mr(3)
520 .nr(8)
521 .kr(1)
522 .sr(1)
523 .m(3)
524 .n(8)
525 .k(k)
526 .Test(xnn_f32_gemm_ukernel_3x8__wasmsimd_loadsplat);
527 }
528 }
529
TEST(F32_GEMM_3X8__WASMSIMD_LOADSPLAT,k_gt_1_strided_a)530 TEST(F32_GEMM_3X8__WASMSIMD_LOADSPLAT, k_gt_1_strided_a) {
531 for (size_t k = 2; k < 10; k++) {
532 GemmMicrokernelTester()
533 .mr(3)
534 .nr(8)
535 .kr(1)
536 .sr(1)
537 .m(3)
538 .n(8)
539 .k(k)
540 .a_stride(11)
541 .Test(xnn_f32_gemm_ukernel_3x8__wasmsimd_loadsplat);
542 }
543 }
544
TEST(F32_GEMM_3X8__WASMSIMD_LOADSPLAT,k_gt_1_subtile)545 TEST(F32_GEMM_3X8__WASMSIMD_LOADSPLAT, k_gt_1_subtile) {
546 for (size_t k = 2; k < 10; k++) {
547 for (uint32_t n = 1; n <= 8; n++) {
548 for (uint32_t m = 1; m <= 3; m++) {
549 GemmMicrokernelTester()
550 .mr(3)
551 .nr(8)
552 .kr(1)
553 .sr(1)
554 .m(m)
555 .n(n)
556 .k(k)
557 .iterations(1)
558 .Test(xnn_f32_gemm_ukernel_3x8__wasmsimd_loadsplat);
559 }
560 }
561 }
562 }
563
TEST(F32_GEMM_3X8__WASMSIMD_LOADSPLAT,n_gt_8)564 TEST(F32_GEMM_3X8__WASMSIMD_LOADSPLAT, n_gt_8) {
565 for (uint32_t n = 9; n < 16; n++) {
566 for (size_t k = 1; k <= 5; k += 2) {
567 GemmMicrokernelTester()
568 .mr(3)
569 .nr(8)
570 .kr(1)
571 .sr(1)
572 .m(3)
573 .n(n)
574 .k(k)
575 .Test(xnn_f32_gemm_ukernel_3x8__wasmsimd_loadsplat);
576 }
577 }
578 }
579
TEST(F32_GEMM_3X8__WASMSIMD_LOADSPLAT,n_gt_8_strided_cn)580 TEST(F32_GEMM_3X8__WASMSIMD_LOADSPLAT, n_gt_8_strided_cn) {
581 for (uint32_t n = 9; n < 16; n++) {
582 for (size_t k = 1; k <= 5; k += 2) {
583 GemmMicrokernelTester()
584 .mr(3)
585 .nr(8)
586 .kr(1)
587 .sr(1)
588 .m(3)
589 .n(n)
590 .k(k)
591 .cn_stride(11)
592 .Test(xnn_f32_gemm_ukernel_3x8__wasmsimd_loadsplat);
593 }
594 }
595 }
596
TEST(F32_GEMM_3X8__WASMSIMD_LOADSPLAT,n_gt_8_strided_a)597 TEST(F32_GEMM_3X8__WASMSIMD_LOADSPLAT, n_gt_8_strided_a) {
598 for (uint32_t n = 9; n < 16; n++) {
599 for (size_t k = 1; k <= 5; k += 2) {
600 GemmMicrokernelTester()
601 .mr(3)
602 .nr(8)
603 .kr(1)
604 .sr(1)
605 .m(3)
606 .n(n)
607 .k(k)
608 .a_stride(7)
609 .Test(xnn_f32_gemm_ukernel_3x8__wasmsimd_loadsplat);
610 }
611 }
612 }
613
TEST(F32_GEMM_3X8__WASMSIMD_LOADSPLAT,n_gt_8_subtile)614 TEST(F32_GEMM_3X8__WASMSIMD_LOADSPLAT, n_gt_8_subtile) {
615 for (uint32_t n = 9; n < 16; n++) {
616 for (size_t k = 1; k <= 5; k += 2) {
617 for (uint32_t m = 1; m <= 3; m++) {
618 GemmMicrokernelTester()
619 .mr(3)
620 .nr(8)
621 .kr(1)
622 .sr(1)
623 .m(m)
624 .n(n)
625 .k(k)
626 .iterations(1)
627 .Test(xnn_f32_gemm_ukernel_3x8__wasmsimd_loadsplat);
628 }
629 }
630 }
631 }
632
TEST(F32_GEMM_3X8__WASMSIMD_LOADSPLAT,n_div_8)633 TEST(F32_GEMM_3X8__WASMSIMD_LOADSPLAT, n_div_8) {
634 for (uint32_t n = 16; n <= 24; n += 8) {
635 for (size_t k = 1; k <= 5; k += 2) {
636 GemmMicrokernelTester()
637 .mr(3)
638 .nr(8)
639 .kr(1)
640 .sr(1)
641 .m(3)
642 .n(n)
643 .k(k)
644 .Test(xnn_f32_gemm_ukernel_3x8__wasmsimd_loadsplat);
645 }
646 }
647 }
648
TEST(F32_GEMM_3X8__WASMSIMD_LOADSPLAT,n_div_8_strided_cn)649 TEST(F32_GEMM_3X8__WASMSIMD_LOADSPLAT, n_div_8_strided_cn) {
650 for (uint32_t n = 16; n <= 24; n += 8) {
651 for (size_t k = 1; k <= 5; k += 2) {
652 GemmMicrokernelTester()
653 .mr(3)
654 .nr(8)
655 .kr(1)
656 .sr(1)
657 .m(3)
658 .n(n)
659 .k(k)
660 .cn_stride(11)
661 .Test(xnn_f32_gemm_ukernel_3x8__wasmsimd_loadsplat);
662 }
663 }
664 }
665
TEST(F32_GEMM_3X8__WASMSIMD_LOADSPLAT,n_div_8_strided_a)666 TEST(F32_GEMM_3X8__WASMSIMD_LOADSPLAT, n_div_8_strided_a) {
667 for (uint32_t n = 16; n <= 24; n += 8) {
668 for (size_t k = 1; k <= 5; k += 2) {
669 GemmMicrokernelTester()
670 .mr(3)
671 .nr(8)
672 .kr(1)
673 .sr(1)
674 .m(3)
675 .n(n)
676 .k(k)
677 .a_stride(7)
678 .Test(xnn_f32_gemm_ukernel_3x8__wasmsimd_loadsplat);
679 }
680 }
681 }
682
TEST(F32_GEMM_3X8__WASMSIMD_LOADSPLAT,n_div_8_subtile)683 TEST(F32_GEMM_3X8__WASMSIMD_LOADSPLAT, n_div_8_subtile) {
684 for (uint32_t n = 16; n <= 24; n += 8) {
685 for (size_t k = 1; k <= 5; k += 2) {
686 for (uint32_t m = 1; m <= 3; m++) {
687 GemmMicrokernelTester()
688 .mr(3)
689 .nr(8)
690 .kr(1)
691 .sr(1)
692 .m(m)
693 .n(n)
694 .k(k)
695 .iterations(1)
696 .Test(xnn_f32_gemm_ukernel_3x8__wasmsimd_loadsplat);
697 }
698 }
699 }
700 }
701
TEST(F32_GEMM_3X8__WASMSIMD_LOADSPLAT,strided_cm_subtile)702 TEST(F32_GEMM_3X8__WASMSIMD_LOADSPLAT, strided_cm_subtile) {
703 for (size_t k = 1; k <= 5; k += 2) {
704 for (uint32_t n = 1; n <= 8; n++) {
705 for (uint32_t m = 1; m <= 3; m++) {
706 GemmMicrokernelTester()
707 .mr(3)
708 .nr(8)
709 .kr(1)
710 .sr(1)
711 .m(m)
712 .n(n)
713 .k(k)
714 .cm_stride(11)
715 .iterations(1)
716 .Test(xnn_f32_gemm_ukernel_3x8__wasmsimd_loadsplat);
717 }
718 }
719 }
720 }
721
TEST(F32_GEMM_3X8__WASMSIMD_LOADSPLAT,strided_cm)722 TEST(F32_GEMM_3X8__WASMSIMD_LOADSPLAT, strided_cm) {
723 GemmMicrokernelTester()
724 .mr(3)
725 .nr(8)
726 .kr(1)
727 .sr(1)
728 .m(3)
729 .n(8)
730 .k(1)
731 .cm_stride(11)
732 .Test(xnn_f32_gemm_ukernel_3x8__wasmsimd_loadsplat);
733 }
734 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
735
736
737 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_GEMM_3X8__WASMSIMD_SPLAT,k_eq_4)738 TEST(F32_GEMM_3X8__WASMSIMD_SPLAT, k_eq_4) {
739 GemmMicrokernelTester()
740 .mr(3)
741 .nr(8)
742 .kr(1)
743 .sr(1)
744 .m(3)
745 .n(8)
746 .k(4)
747 .Test(xnn_f32_gemm_ukernel_3x8__wasmsimd_splat);
748 }
749
TEST(F32_GEMM_3X8__WASMSIMD_SPLAT,strided_cn)750 TEST(F32_GEMM_3X8__WASMSIMD_SPLAT, strided_cn) {
751 GemmMicrokernelTester()
752 .mr(3)
753 .nr(8)
754 .kr(1)
755 .sr(1)
756 .m(3)
757 .n(8)
758 .k(4)
759 .cn_stride(11)
760 .Test(xnn_f32_gemm_ukernel_3x8__wasmsimd_splat);
761 }
762
TEST(F32_GEMM_3X8__WASMSIMD_SPLAT,k_eq_4_strided_a)763 TEST(F32_GEMM_3X8__WASMSIMD_SPLAT, k_eq_4_strided_a) {
764 GemmMicrokernelTester()
765 .mr(3)
766 .nr(8)
767 .kr(1)
768 .sr(1)
769 .m(3)
770 .n(8)
771 .k(4)
772 .a_stride(7)
773 .Test(xnn_f32_gemm_ukernel_3x8__wasmsimd_splat);
774 }
775
TEST(F32_GEMM_3X8__WASMSIMD_SPLAT,k_eq_4_subtile)776 TEST(F32_GEMM_3X8__WASMSIMD_SPLAT, k_eq_4_subtile) {
777 for (uint32_t n = 1; n <= 8; n++) {
778 for (uint32_t m = 1; m <= 3; m++) {
779 GemmMicrokernelTester()
780 .mr(3)
781 .nr(8)
782 .kr(1)
783 .sr(1)
784 .m(m)
785 .n(n)
786 .k(4)
787 .iterations(1)
788 .Test(xnn_f32_gemm_ukernel_3x8__wasmsimd_splat);
789 }
790 }
791 }
792
TEST(F32_GEMM_3X8__WASMSIMD_SPLAT,k_eq_4_subtile_m)793 TEST(F32_GEMM_3X8__WASMSIMD_SPLAT, k_eq_4_subtile_m) {
794 for (uint32_t m = 1; m <= 3; m++) {
795 GemmMicrokernelTester()
796 .mr(3)
797 .nr(8)
798 .kr(1)
799 .sr(1)
800 .m(m)
801 .n(8)
802 .k(4)
803 .iterations(1)
804 .Test(xnn_f32_gemm_ukernel_3x8__wasmsimd_splat);
805 }
806 }
807
TEST(F32_GEMM_3X8__WASMSIMD_SPLAT,k_eq_4_subtile_n)808 TEST(F32_GEMM_3X8__WASMSIMD_SPLAT, k_eq_4_subtile_n) {
809 for (uint32_t n = 1; n <= 8; n++) {
810 GemmMicrokernelTester()
811 .mr(3)
812 .nr(8)
813 .kr(1)
814 .sr(1)
815 .m(3)
816 .n(n)
817 .k(4)
818 .iterations(1)
819 .Test(xnn_f32_gemm_ukernel_3x8__wasmsimd_splat);
820 }
821 }
822
TEST(F32_GEMM_3X8__WASMSIMD_SPLAT,k_lt_4)823 TEST(F32_GEMM_3X8__WASMSIMD_SPLAT, k_lt_4) {
824 for (size_t k = 1; k < 4; k++) {
825 GemmMicrokernelTester()
826 .mr(3)
827 .nr(8)
828 .kr(1)
829 .sr(1)
830 .m(3)
831 .n(8)
832 .k(k)
833 .Test(xnn_f32_gemm_ukernel_3x8__wasmsimd_splat);
834 }
835 }
836
TEST(F32_GEMM_3X8__WASMSIMD_SPLAT,k_lt_4_strided_a)837 TEST(F32_GEMM_3X8__WASMSIMD_SPLAT, k_lt_4_strided_a) {
838 for (size_t k = 1; k < 4; k++) {
839 GemmMicrokernelTester()
840 .mr(3)
841 .nr(8)
842 .kr(1)
843 .sr(1)
844 .m(3)
845 .n(8)
846 .k(k)
847 .a_stride(7)
848 .Test(xnn_f32_gemm_ukernel_3x8__wasmsimd_splat);
849 }
850 }
851
TEST(F32_GEMM_3X8__WASMSIMD_SPLAT,k_lt_4_subtile)852 TEST(F32_GEMM_3X8__WASMSIMD_SPLAT, k_lt_4_subtile) {
853 for (size_t k = 1; k < 4; k++) {
854 for (uint32_t n = 1; n <= 8; n++) {
855 for (uint32_t m = 1; m <= 3; m++) {
856 GemmMicrokernelTester()
857 .mr(3)
858 .nr(8)
859 .kr(1)
860 .sr(1)
861 .m(m)
862 .n(n)
863 .k(k)
864 .iterations(1)
865 .Test(xnn_f32_gemm_ukernel_3x8__wasmsimd_splat);
866 }
867 }
868 }
869 }
870
TEST(F32_GEMM_3X8__WASMSIMD_SPLAT,k_gt_4)871 TEST(F32_GEMM_3X8__WASMSIMD_SPLAT, k_gt_4) {
872 for (size_t k = 5; k < 8; k++) {
873 GemmMicrokernelTester()
874 .mr(3)
875 .nr(8)
876 .kr(1)
877 .sr(1)
878 .m(3)
879 .n(8)
880 .k(k)
881 .Test(xnn_f32_gemm_ukernel_3x8__wasmsimd_splat);
882 }
883 }
884
TEST(F32_GEMM_3X8__WASMSIMD_SPLAT,k_gt_4_strided_a)885 TEST(F32_GEMM_3X8__WASMSIMD_SPLAT, k_gt_4_strided_a) {
886 for (size_t k = 5; k < 8; k++) {
887 GemmMicrokernelTester()
888 .mr(3)
889 .nr(8)
890 .kr(1)
891 .sr(1)
892 .m(3)
893 .n(8)
894 .k(k)
895 .a_stride(11)
896 .Test(xnn_f32_gemm_ukernel_3x8__wasmsimd_splat);
897 }
898 }
899
TEST(F32_GEMM_3X8__WASMSIMD_SPLAT,k_gt_4_subtile)900 TEST(F32_GEMM_3X8__WASMSIMD_SPLAT, k_gt_4_subtile) {
901 for (size_t k = 5; k < 8; k++) {
902 for (uint32_t n = 1; n <= 8; n++) {
903 for (uint32_t m = 1; m <= 3; m++) {
904 GemmMicrokernelTester()
905 .mr(3)
906 .nr(8)
907 .kr(1)
908 .sr(1)
909 .m(m)
910 .n(n)
911 .k(k)
912 .iterations(1)
913 .Test(xnn_f32_gemm_ukernel_3x8__wasmsimd_splat);
914 }
915 }
916 }
917 }
918
TEST(F32_GEMM_3X8__WASMSIMD_SPLAT,k_div_4)919 TEST(F32_GEMM_3X8__WASMSIMD_SPLAT, k_div_4) {
920 for (size_t k = 8; k <= 40; k += 4) {
921 GemmMicrokernelTester()
922 .mr(3)
923 .nr(8)
924 .kr(1)
925 .sr(1)
926 .m(3)
927 .n(8)
928 .k(k)
929 .Test(xnn_f32_gemm_ukernel_3x8__wasmsimd_splat);
930 }
931 }
932
TEST(F32_GEMM_3X8__WASMSIMD_SPLAT,k_div_4_strided_a)933 TEST(F32_GEMM_3X8__WASMSIMD_SPLAT, k_div_4_strided_a) {
934 for (size_t k = 8; k <= 40; k += 4) {
935 GemmMicrokernelTester()
936 .mr(3)
937 .nr(8)
938 .kr(1)
939 .sr(1)
940 .m(3)
941 .n(8)
942 .k(k)
943 .a_stride(43)
944 .Test(xnn_f32_gemm_ukernel_3x8__wasmsimd_splat);
945 }
946 }
947
TEST(F32_GEMM_3X8__WASMSIMD_SPLAT,k_div_4_subtile)948 TEST(F32_GEMM_3X8__WASMSIMD_SPLAT, k_div_4_subtile) {
949 for (size_t k = 8; k <= 40; k += 4) {
950 for (uint32_t n = 1; n <= 8; n++) {
951 for (uint32_t m = 1; m <= 3; m++) {
952 GemmMicrokernelTester()
953 .mr(3)
954 .nr(8)
955 .kr(1)
956 .sr(1)
957 .m(m)
958 .n(n)
959 .k(k)
960 .iterations(1)
961 .Test(xnn_f32_gemm_ukernel_3x8__wasmsimd_splat);
962 }
963 }
964 }
965 }
966
TEST(F32_GEMM_3X8__WASMSIMD_SPLAT,n_gt_8)967 TEST(F32_GEMM_3X8__WASMSIMD_SPLAT, n_gt_8) {
968 for (uint32_t n = 9; n < 16; n++) {
969 for (size_t k = 1; k <= 20; k += 5) {
970 GemmMicrokernelTester()
971 .mr(3)
972 .nr(8)
973 .kr(1)
974 .sr(1)
975 .m(3)
976 .n(n)
977 .k(k)
978 .Test(xnn_f32_gemm_ukernel_3x8__wasmsimd_splat);
979 }
980 }
981 }
982
TEST(F32_GEMM_3X8__WASMSIMD_SPLAT,n_gt_8_strided_cn)983 TEST(F32_GEMM_3X8__WASMSIMD_SPLAT, n_gt_8_strided_cn) {
984 for (uint32_t n = 9; n < 16; n++) {
985 for (size_t k = 1; k <= 20; k += 5) {
986 GemmMicrokernelTester()
987 .mr(3)
988 .nr(8)
989 .kr(1)
990 .sr(1)
991 .m(3)
992 .n(n)
993 .k(k)
994 .cn_stride(11)
995 .Test(xnn_f32_gemm_ukernel_3x8__wasmsimd_splat);
996 }
997 }
998 }
999
TEST(F32_GEMM_3X8__WASMSIMD_SPLAT,n_gt_8_strided_a)1000 TEST(F32_GEMM_3X8__WASMSIMD_SPLAT, n_gt_8_strided_a) {
1001 for (uint32_t n = 9; n < 16; n++) {
1002 for (size_t k = 1; k <= 20; k += 5) {
1003 GemmMicrokernelTester()
1004 .mr(3)
1005 .nr(8)
1006 .kr(1)
1007 .sr(1)
1008 .m(3)
1009 .n(n)
1010 .k(k)
1011 .a_stride(23)
1012 .Test(xnn_f32_gemm_ukernel_3x8__wasmsimd_splat);
1013 }
1014 }
1015 }
1016
TEST(F32_GEMM_3X8__WASMSIMD_SPLAT,n_gt_8_subtile)1017 TEST(F32_GEMM_3X8__WASMSIMD_SPLAT, n_gt_8_subtile) {
1018 for (uint32_t n = 9; n < 16; n++) {
1019 for (size_t k = 1; k <= 20; k += 5) {
1020 for (uint32_t m = 1; m <= 3; m++) {
1021 GemmMicrokernelTester()
1022 .mr(3)
1023 .nr(8)
1024 .kr(1)
1025 .sr(1)
1026 .m(m)
1027 .n(n)
1028 .k(k)
1029 .iterations(1)
1030 .Test(xnn_f32_gemm_ukernel_3x8__wasmsimd_splat);
1031 }
1032 }
1033 }
1034 }
1035
TEST(F32_GEMM_3X8__WASMSIMD_SPLAT,n_div_8)1036 TEST(F32_GEMM_3X8__WASMSIMD_SPLAT, n_div_8) {
1037 for (uint32_t n = 16; n <= 24; n += 8) {
1038 for (size_t k = 1; k <= 20; k += 5) {
1039 GemmMicrokernelTester()
1040 .mr(3)
1041 .nr(8)
1042 .kr(1)
1043 .sr(1)
1044 .m(3)
1045 .n(n)
1046 .k(k)
1047 .Test(xnn_f32_gemm_ukernel_3x8__wasmsimd_splat);
1048 }
1049 }
1050 }
1051
TEST(F32_GEMM_3X8__WASMSIMD_SPLAT,n_div_8_strided_cn)1052 TEST(F32_GEMM_3X8__WASMSIMD_SPLAT, n_div_8_strided_cn) {
1053 for (uint32_t n = 16; n <= 24; n += 8) {
1054 for (size_t k = 1; k <= 20; k += 5) {
1055 GemmMicrokernelTester()
1056 .mr(3)
1057 .nr(8)
1058 .kr(1)
1059 .sr(1)
1060 .m(3)
1061 .n(n)
1062 .k(k)
1063 .cn_stride(11)
1064 .Test(xnn_f32_gemm_ukernel_3x8__wasmsimd_splat);
1065 }
1066 }
1067 }
1068
TEST(F32_GEMM_3X8__WASMSIMD_SPLAT,n_div_8_strided_a)1069 TEST(F32_GEMM_3X8__WASMSIMD_SPLAT, n_div_8_strided_a) {
1070 for (uint32_t n = 16; n <= 24; n += 8) {
1071 for (size_t k = 1; k <= 20; k += 5) {
1072 GemmMicrokernelTester()
1073 .mr(3)
1074 .nr(8)
1075 .kr(1)
1076 .sr(1)
1077 .m(3)
1078 .n(n)
1079 .k(k)
1080 .a_stride(23)
1081 .Test(xnn_f32_gemm_ukernel_3x8__wasmsimd_splat);
1082 }
1083 }
1084 }
1085
TEST(F32_GEMM_3X8__WASMSIMD_SPLAT,n_div_8_subtile)1086 TEST(F32_GEMM_3X8__WASMSIMD_SPLAT, n_div_8_subtile) {
1087 for (uint32_t n = 16; n <= 24; n += 8) {
1088 for (size_t k = 1; k <= 20; k += 5) {
1089 for (uint32_t m = 1; m <= 3; m++) {
1090 GemmMicrokernelTester()
1091 .mr(3)
1092 .nr(8)
1093 .kr(1)
1094 .sr(1)
1095 .m(m)
1096 .n(n)
1097 .k(k)
1098 .iterations(1)
1099 .Test(xnn_f32_gemm_ukernel_3x8__wasmsimd_splat);
1100 }
1101 }
1102 }
1103 }
1104
TEST(F32_GEMM_3X8__WASMSIMD_SPLAT,strided_cm_subtile)1105 TEST(F32_GEMM_3X8__WASMSIMD_SPLAT, strided_cm_subtile) {
1106 for (size_t k = 1; k <= 20; k += 5) {
1107 for (uint32_t n = 1; n <= 8; n++) {
1108 for (uint32_t m = 1; m <= 3; m++) {
1109 GemmMicrokernelTester()
1110 .mr(3)
1111 .nr(8)
1112 .kr(1)
1113 .sr(1)
1114 .m(m)
1115 .n(n)
1116 .k(k)
1117 .cm_stride(11)
1118 .iterations(1)
1119 .Test(xnn_f32_gemm_ukernel_3x8__wasmsimd_splat);
1120 }
1121 }
1122 }
1123 }
1124
TEST(F32_GEMM_3X8__WASMSIMD_SPLAT,strided_cm)1125 TEST(F32_GEMM_3X8__WASMSIMD_SPLAT, strided_cm) {
1126 GemmMicrokernelTester()
1127 .mr(3)
1128 .nr(8)
1129 .kr(1)
1130 .sr(1)
1131 .m(3)
1132 .n(8)
1133 .k(4)
1134 .cm_stride(11)
1135 .Test(xnn_f32_gemm_ukernel_3x8__wasmsimd_splat);
1136 }
1137 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
1138
1139
1140 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_GEMM_3X8S4__WASMSIMD,k_eq_4)1141 TEST(F32_GEMM_3X8S4__WASMSIMD, k_eq_4) {
1142 GemmMicrokernelTester()
1143 .mr(3)
1144 .nr(8)
1145 .kr(1)
1146 .sr(4)
1147 .m(3)
1148 .n(8)
1149 .k(4)
1150 .Test(xnn_f32_gemm_ukernel_3x8s4__wasmsimd);
1151 }
1152
TEST(F32_GEMM_3X8S4__WASMSIMD,strided_cn)1153 TEST(F32_GEMM_3X8S4__WASMSIMD, strided_cn) {
1154 GemmMicrokernelTester()
1155 .mr(3)
1156 .nr(8)
1157 .kr(1)
1158 .sr(4)
1159 .m(3)
1160 .n(8)
1161 .k(4)
1162 .cn_stride(11)
1163 .Test(xnn_f32_gemm_ukernel_3x8s4__wasmsimd);
1164 }
1165
TEST(F32_GEMM_3X8S4__WASMSIMD,k_eq_4_strided_a)1166 TEST(F32_GEMM_3X8S4__WASMSIMD, k_eq_4_strided_a) {
1167 GemmMicrokernelTester()
1168 .mr(3)
1169 .nr(8)
1170 .kr(1)
1171 .sr(4)
1172 .m(3)
1173 .n(8)
1174 .k(4)
1175 .a_stride(7)
1176 .Test(xnn_f32_gemm_ukernel_3x8s4__wasmsimd);
1177 }
1178
TEST(F32_GEMM_3X8S4__WASMSIMD,k_eq_4_subtile)1179 TEST(F32_GEMM_3X8S4__WASMSIMD, k_eq_4_subtile) {
1180 for (uint32_t n = 1; n <= 8; n++) {
1181 for (uint32_t m = 1; m <= 3; m++) {
1182 GemmMicrokernelTester()
1183 .mr(3)
1184 .nr(8)
1185 .kr(1)
1186 .sr(4)
1187 .m(m)
1188 .n(n)
1189 .k(4)
1190 .iterations(1)
1191 .Test(xnn_f32_gemm_ukernel_3x8s4__wasmsimd);
1192 }
1193 }
1194 }
1195
TEST(F32_GEMM_3X8S4__WASMSIMD,k_eq_4_subtile_m)1196 TEST(F32_GEMM_3X8S4__WASMSIMD, k_eq_4_subtile_m) {
1197 for (uint32_t m = 1; m <= 3; m++) {
1198 GemmMicrokernelTester()
1199 .mr(3)
1200 .nr(8)
1201 .kr(1)
1202 .sr(4)
1203 .m(m)
1204 .n(8)
1205 .k(4)
1206 .iterations(1)
1207 .Test(xnn_f32_gemm_ukernel_3x8s4__wasmsimd);
1208 }
1209 }
1210
TEST(F32_GEMM_3X8S4__WASMSIMD,k_eq_4_subtile_n)1211 TEST(F32_GEMM_3X8S4__WASMSIMD, k_eq_4_subtile_n) {
1212 for (uint32_t n = 1; n <= 8; n++) {
1213 GemmMicrokernelTester()
1214 .mr(3)
1215 .nr(8)
1216 .kr(1)
1217 .sr(4)
1218 .m(3)
1219 .n(n)
1220 .k(4)
1221 .iterations(1)
1222 .Test(xnn_f32_gemm_ukernel_3x8s4__wasmsimd);
1223 }
1224 }
1225
TEST(F32_GEMM_3X8S4__WASMSIMD,k_lt_4)1226 TEST(F32_GEMM_3X8S4__WASMSIMD, k_lt_4) {
1227 for (size_t k = 1; k < 4; k++) {
1228 GemmMicrokernelTester()
1229 .mr(3)
1230 .nr(8)
1231 .kr(1)
1232 .sr(4)
1233 .m(3)
1234 .n(8)
1235 .k(k)
1236 .Test(xnn_f32_gemm_ukernel_3x8s4__wasmsimd);
1237 }
1238 }
1239
TEST(F32_GEMM_3X8S4__WASMSIMD,k_lt_4_strided_a)1240 TEST(F32_GEMM_3X8S4__WASMSIMD, k_lt_4_strided_a) {
1241 for (size_t k = 1; k < 4; k++) {
1242 GemmMicrokernelTester()
1243 .mr(3)
1244 .nr(8)
1245 .kr(1)
1246 .sr(4)
1247 .m(3)
1248 .n(8)
1249 .k(k)
1250 .a_stride(7)
1251 .Test(xnn_f32_gemm_ukernel_3x8s4__wasmsimd);
1252 }
1253 }
1254
TEST(F32_GEMM_3X8S4__WASMSIMD,k_lt_4_subtile)1255 TEST(F32_GEMM_3X8S4__WASMSIMD, k_lt_4_subtile) {
1256 for (size_t k = 1; k < 4; k++) {
1257 for (uint32_t n = 1; n <= 8; n++) {
1258 for (uint32_t m = 1; m <= 3; m++) {
1259 GemmMicrokernelTester()
1260 .mr(3)
1261 .nr(8)
1262 .kr(1)
1263 .sr(4)
1264 .m(m)
1265 .n(n)
1266 .k(k)
1267 .iterations(1)
1268 .Test(xnn_f32_gemm_ukernel_3x8s4__wasmsimd);
1269 }
1270 }
1271 }
1272 }
1273
TEST(F32_GEMM_3X8S4__WASMSIMD,k_gt_4)1274 TEST(F32_GEMM_3X8S4__WASMSIMD, k_gt_4) {
1275 for (size_t k = 5; k < 8; k++) {
1276 GemmMicrokernelTester()
1277 .mr(3)
1278 .nr(8)
1279 .kr(1)
1280 .sr(4)
1281 .m(3)
1282 .n(8)
1283 .k(k)
1284 .Test(xnn_f32_gemm_ukernel_3x8s4__wasmsimd);
1285 }
1286 }
1287
TEST(F32_GEMM_3X8S4__WASMSIMD,k_gt_4_strided_a)1288 TEST(F32_GEMM_3X8S4__WASMSIMD, k_gt_4_strided_a) {
1289 for (size_t k = 5; k < 8; k++) {
1290 GemmMicrokernelTester()
1291 .mr(3)
1292 .nr(8)
1293 .kr(1)
1294 .sr(4)
1295 .m(3)
1296 .n(8)
1297 .k(k)
1298 .a_stride(11)
1299 .Test(xnn_f32_gemm_ukernel_3x8s4__wasmsimd);
1300 }
1301 }
1302
TEST(F32_GEMM_3X8S4__WASMSIMD,k_gt_4_subtile)1303 TEST(F32_GEMM_3X8S4__WASMSIMD, k_gt_4_subtile) {
1304 for (size_t k = 5; k < 8; k++) {
1305 for (uint32_t n = 1; n <= 8; n++) {
1306 for (uint32_t m = 1; m <= 3; m++) {
1307 GemmMicrokernelTester()
1308 .mr(3)
1309 .nr(8)
1310 .kr(1)
1311 .sr(4)
1312 .m(m)
1313 .n(n)
1314 .k(k)
1315 .iterations(1)
1316 .Test(xnn_f32_gemm_ukernel_3x8s4__wasmsimd);
1317 }
1318 }
1319 }
1320 }
1321
TEST(F32_GEMM_3X8S4__WASMSIMD,k_div_4)1322 TEST(F32_GEMM_3X8S4__WASMSIMD, k_div_4) {
1323 for (size_t k = 8; k <= 40; k += 4) {
1324 GemmMicrokernelTester()
1325 .mr(3)
1326 .nr(8)
1327 .kr(1)
1328 .sr(4)
1329 .m(3)
1330 .n(8)
1331 .k(k)
1332 .Test(xnn_f32_gemm_ukernel_3x8s4__wasmsimd);
1333 }
1334 }
1335
TEST(F32_GEMM_3X8S4__WASMSIMD,k_div_4_strided_a)1336 TEST(F32_GEMM_3X8S4__WASMSIMD, k_div_4_strided_a) {
1337 for (size_t k = 8; k <= 40; k += 4) {
1338 GemmMicrokernelTester()
1339 .mr(3)
1340 .nr(8)
1341 .kr(1)
1342 .sr(4)
1343 .m(3)
1344 .n(8)
1345 .k(k)
1346 .a_stride(43)
1347 .Test(xnn_f32_gemm_ukernel_3x8s4__wasmsimd);
1348 }
1349 }
1350
TEST(F32_GEMM_3X8S4__WASMSIMD,k_div_4_subtile)1351 TEST(F32_GEMM_3X8S4__WASMSIMD, k_div_4_subtile) {
1352 for (size_t k = 8; k <= 40; k += 4) {
1353 for (uint32_t n = 1; n <= 8; n++) {
1354 for (uint32_t m = 1; m <= 3; m++) {
1355 GemmMicrokernelTester()
1356 .mr(3)
1357 .nr(8)
1358 .kr(1)
1359 .sr(4)
1360 .m(m)
1361 .n(n)
1362 .k(k)
1363 .iterations(1)
1364 .Test(xnn_f32_gemm_ukernel_3x8s4__wasmsimd);
1365 }
1366 }
1367 }
1368 }
1369
TEST(F32_GEMM_3X8S4__WASMSIMD,n_gt_8)1370 TEST(F32_GEMM_3X8S4__WASMSIMD, n_gt_8) {
1371 for (uint32_t n = 9; n < 16; n++) {
1372 for (size_t k = 1; k <= 20; k += 5) {
1373 GemmMicrokernelTester()
1374 .mr(3)
1375 .nr(8)
1376 .kr(1)
1377 .sr(4)
1378 .m(3)
1379 .n(n)
1380 .k(k)
1381 .Test(xnn_f32_gemm_ukernel_3x8s4__wasmsimd);
1382 }
1383 }
1384 }
1385
TEST(F32_GEMM_3X8S4__WASMSIMD,n_gt_8_strided_cn)1386 TEST(F32_GEMM_3X8S4__WASMSIMD, n_gt_8_strided_cn) {
1387 for (uint32_t n = 9; n < 16; n++) {
1388 for (size_t k = 1; k <= 20; k += 5) {
1389 GemmMicrokernelTester()
1390 .mr(3)
1391 .nr(8)
1392 .kr(1)
1393 .sr(4)
1394 .m(3)
1395 .n(n)
1396 .k(k)
1397 .cn_stride(11)
1398 .Test(xnn_f32_gemm_ukernel_3x8s4__wasmsimd);
1399 }
1400 }
1401 }
1402
TEST(F32_GEMM_3X8S4__WASMSIMD,n_gt_8_strided_a)1403 TEST(F32_GEMM_3X8S4__WASMSIMD, n_gt_8_strided_a) {
1404 for (uint32_t n = 9; n < 16; n++) {
1405 for (size_t k = 1; k <= 20; k += 5) {
1406 GemmMicrokernelTester()
1407 .mr(3)
1408 .nr(8)
1409 .kr(1)
1410 .sr(4)
1411 .m(3)
1412 .n(n)
1413 .k(k)
1414 .a_stride(23)
1415 .Test(xnn_f32_gemm_ukernel_3x8s4__wasmsimd);
1416 }
1417 }
1418 }
1419
TEST(F32_GEMM_3X8S4__WASMSIMD,n_gt_8_subtile)1420 TEST(F32_GEMM_3X8S4__WASMSIMD, n_gt_8_subtile) {
1421 for (uint32_t n = 9; n < 16; n++) {
1422 for (size_t k = 1; k <= 20; k += 5) {
1423 for (uint32_t m = 1; m <= 3; m++) {
1424 GemmMicrokernelTester()
1425 .mr(3)
1426 .nr(8)
1427 .kr(1)
1428 .sr(4)
1429 .m(m)
1430 .n(n)
1431 .k(k)
1432 .iterations(1)
1433 .Test(xnn_f32_gemm_ukernel_3x8s4__wasmsimd);
1434 }
1435 }
1436 }
1437 }
1438
TEST(F32_GEMM_3X8S4__WASMSIMD,n_div_8)1439 TEST(F32_GEMM_3X8S4__WASMSIMD, n_div_8) {
1440 for (uint32_t n = 16; n <= 24; n += 8) {
1441 for (size_t k = 1; k <= 20; k += 5) {
1442 GemmMicrokernelTester()
1443 .mr(3)
1444 .nr(8)
1445 .kr(1)
1446 .sr(4)
1447 .m(3)
1448 .n(n)
1449 .k(k)
1450 .Test(xnn_f32_gemm_ukernel_3x8s4__wasmsimd);
1451 }
1452 }
1453 }
1454
TEST(F32_GEMM_3X8S4__WASMSIMD,n_div_8_strided_cn)1455 TEST(F32_GEMM_3X8S4__WASMSIMD, n_div_8_strided_cn) {
1456 for (uint32_t n = 16; n <= 24; n += 8) {
1457 for (size_t k = 1; k <= 20; k += 5) {
1458 GemmMicrokernelTester()
1459 .mr(3)
1460 .nr(8)
1461 .kr(1)
1462 .sr(4)
1463 .m(3)
1464 .n(n)
1465 .k(k)
1466 .cn_stride(11)
1467 .Test(xnn_f32_gemm_ukernel_3x8s4__wasmsimd);
1468 }
1469 }
1470 }
1471
TEST(F32_GEMM_3X8S4__WASMSIMD,n_div_8_strided_a)1472 TEST(F32_GEMM_3X8S4__WASMSIMD, n_div_8_strided_a) {
1473 for (uint32_t n = 16; n <= 24; n += 8) {
1474 for (size_t k = 1; k <= 20; k += 5) {
1475 GemmMicrokernelTester()
1476 .mr(3)
1477 .nr(8)
1478 .kr(1)
1479 .sr(4)
1480 .m(3)
1481 .n(n)
1482 .k(k)
1483 .a_stride(23)
1484 .Test(xnn_f32_gemm_ukernel_3x8s4__wasmsimd);
1485 }
1486 }
1487 }
1488
TEST(F32_GEMM_3X8S4__WASMSIMD,n_div_8_subtile)1489 TEST(F32_GEMM_3X8S4__WASMSIMD, n_div_8_subtile) {
1490 for (uint32_t n = 16; n <= 24; n += 8) {
1491 for (size_t k = 1; k <= 20; k += 5) {
1492 for (uint32_t m = 1; m <= 3; m++) {
1493 GemmMicrokernelTester()
1494 .mr(3)
1495 .nr(8)
1496 .kr(1)
1497 .sr(4)
1498 .m(m)
1499 .n(n)
1500 .k(k)
1501 .iterations(1)
1502 .Test(xnn_f32_gemm_ukernel_3x8s4__wasmsimd);
1503 }
1504 }
1505 }
1506 }
1507
TEST(F32_GEMM_3X8S4__WASMSIMD,strided_cm_subtile)1508 TEST(F32_GEMM_3X8S4__WASMSIMD, strided_cm_subtile) {
1509 for (size_t k = 1; k <= 20; k += 5) {
1510 for (uint32_t n = 1; n <= 8; n++) {
1511 for (uint32_t m = 1; m <= 3; m++) {
1512 GemmMicrokernelTester()
1513 .mr(3)
1514 .nr(8)
1515 .kr(1)
1516 .sr(4)
1517 .m(m)
1518 .n(n)
1519 .k(k)
1520 .cm_stride(11)
1521 .iterations(1)
1522 .Test(xnn_f32_gemm_ukernel_3x8s4__wasmsimd);
1523 }
1524 }
1525 }
1526 }
1527
TEST(F32_GEMM_3X8S4__WASMSIMD,strided_cm)1528 TEST(F32_GEMM_3X8S4__WASMSIMD, strided_cm) {
1529 GemmMicrokernelTester()
1530 .mr(3)
1531 .nr(8)
1532 .kr(1)
1533 .sr(4)
1534 .m(3)
1535 .n(8)
1536 .k(4)
1537 .cm_stride(11)
1538 .Test(xnn_f32_gemm_ukernel_3x8s4__wasmsimd);
1539 }
1540 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
1541
1542
1543 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_GEMM_4X2C4__WASMSIMD,k_eq_4)1544 TEST(F32_GEMM_4X2C4__WASMSIMD, k_eq_4) {
1545 GemmMicrokernelTester()
1546 .mr(4)
1547 .nr(2)
1548 .kr(4)
1549 .sr(1)
1550 .m(4)
1551 .n(2)
1552 .k(4)
1553 .Test(xnn_f32_gemm_ukernel_4x2c4__wasmsimd);
1554 }
1555
TEST(F32_GEMM_4X2C4__WASMSIMD,strided_cn)1556 TEST(F32_GEMM_4X2C4__WASMSIMD, strided_cn) {
1557 GemmMicrokernelTester()
1558 .mr(4)
1559 .nr(2)
1560 .kr(4)
1561 .sr(1)
1562 .m(4)
1563 .n(2)
1564 .k(4)
1565 .cn_stride(5)
1566 .Test(xnn_f32_gemm_ukernel_4x2c4__wasmsimd);
1567 }
1568
TEST(F32_GEMM_4X2C4__WASMSIMD,k_eq_4_strided_a)1569 TEST(F32_GEMM_4X2C4__WASMSIMD, k_eq_4_strided_a) {
1570 GemmMicrokernelTester()
1571 .mr(4)
1572 .nr(2)
1573 .kr(4)
1574 .sr(1)
1575 .m(4)
1576 .n(2)
1577 .k(4)
1578 .a_stride(7)
1579 .Test(xnn_f32_gemm_ukernel_4x2c4__wasmsimd);
1580 }
1581
TEST(F32_GEMM_4X2C4__WASMSIMD,k_eq_4_subtile)1582 TEST(F32_GEMM_4X2C4__WASMSIMD, k_eq_4_subtile) {
1583 for (uint32_t n = 1; n <= 2; n++) {
1584 for (uint32_t m = 1; m <= 4; m++) {
1585 GemmMicrokernelTester()
1586 .mr(4)
1587 .nr(2)
1588 .kr(4)
1589 .sr(1)
1590 .m(m)
1591 .n(n)
1592 .k(4)
1593 .iterations(1)
1594 .Test(xnn_f32_gemm_ukernel_4x2c4__wasmsimd);
1595 }
1596 }
1597 }
1598
TEST(F32_GEMM_4X2C4__WASMSIMD,k_eq_4_subtile_m)1599 TEST(F32_GEMM_4X2C4__WASMSIMD, k_eq_4_subtile_m) {
1600 for (uint32_t m = 1; m <= 4; m++) {
1601 GemmMicrokernelTester()
1602 .mr(4)
1603 .nr(2)
1604 .kr(4)
1605 .sr(1)
1606 .m(m)
1607 .n(2)
1608 .k(4)
1609 .iterations(1)
1610 .Test(xnn_f32_gemm_ukernel_4x2c4__wasmsimd);
1611 }
1612 }
1613
TEST(F32_GEMM_4X2C4__WASMSIMD,k_eq_4_subtile_n)1614 TEST(F32_GEMM_4X2C4__WASMSIMD, k_eq_4_subtile_n) {
1615 for (uint32_t n = 1; n <= 2; n++) {
1616 GemmMicrokernelTester()
1617 .mr(4)
1618 .nr(2)
1619 .kr(4)
1620 .sr(1)
1621 .m(4)
1622 .n(n)
1623 .k(4)
1624 .iterations(1)
1625 .Test(xnn_f32_gemm_ukernel_4x2c4__wasmsimd);
1626 }
1627 }
1628
TEST(F32_GEMM_4X2C4__WASMSIMD,k_lt_4)1629 TEST(F32_GEMM_4X2C4__WASMSIMD, k_lt_4) {
1630 for (size_t k = 1; k < 4; k++) {
1631 GemmMicrokernelTester()
1632 .mr(4)
1633 .nr(2)
1634 .kr(4)
1635 .sr(1)
1636 .m(4)
1637 .n(2)
1638 .k(k)
1639 .Test(xnn_f32_gemm_ukernel_4x2c4__wasmsimd);
1640 }
1641 }
1642
TEST(F32_GEMM_4X2C4__WASMSIMD,k_lt_4_strided_a)1643 TEST(F32_GEMM_4X2C4__WASMSIMD, k_lt_4_strided_a) {
1644 for (size_t k = 1; k < 4; k++) {
1645 GemmMicrokernelTester()
1646 .mr(4)
1647 .nr(2)
1648 .kr(4)
1649 .sr(1)
1650 .m(4)
1651 .n(2)
1652 .k(k)
1653 .a_stride(7)
1654 .Test(xnn_f32_gemm_ukernel_4x2c4__wasmsimd);
1655 }
1656 }
1657
TEST(F32_GEMM_4X2C4__WASMSIMD,k_lt_4_subtile)1658 TEST(F32_GEMM_4X2C4__WASMSIMD, k_lt_4_subtile) {
1659 for (size_t k = 1; k < 4; k++) {
1660 for (uint32_t n = 1; n <= 2; n++) {
1661 for (uint32_t m = 1; m <= 4; m++) {
1662 GemmMicrokernelTester()
1663 .mr(4)
1664 .nr(2)
1665 .kr(4)
1666 .sr(1)
1667 .m(m)
1668 .n(n)
1669 .k(k)
1670 .iterations(1)
1671 .Test(xnn_f32_gemm_ukernel_4x2c4__wasmsimd);
1672 }
1673 }
1674 }
1675 }
1676
TEST(F32_GEMM_4X2C4__WASMSIMD,k_gt_4)1677 TEST(F32_GEMM_4X2C4__WASMSIMD, k_gt_4) {
1678 for (size_t k = 5; k < 8; k++) {
1679 GemmMicrokernelTester()
1680 .mr(4)
1681 .nr(2)
1682 .kr(4)
1683 .sr(1)
1684 .m(4)
1685 .n(2)
1686 .k(k)
1687 .Test(xnn_f32_gemm_ukernel_4x2c4__wasmsimd);
1688 }
1689 }
1690
TEST(F32_GEMM_4X2C4__WASMSIMD,k_gt_4_strided_a)1691 TEST(F32_GEMM_4X2C4__WASMSIMD, k_gt_4_strided_a) {
1692 for (size_t k = 5; k < 8; k++) {
1693 GemmMicrokernelTester()
1694 .mr(4)
1695 .nr(2)
1696 .kr(4)
1697 .sr(1)
1698 .m(4)
1699 .n(2)
1700 .k(k)
1701 .a_stride(11)
1702 .Test(xnn_f32_gemm_ukernel_4x2c4__wasmsimd);
1703 }
1704 }
1705
TEST(F32_GEMM_4X2C4__WASMSIMD,k_gt_4_subtile)1706 TEST(F32_GEMM_4X2C4__WASMSIMD, k_gt_4_subtile) {
1707 for (size_t k = 5; k < 8; k++) {
1708 for (uint32_t n = 1; n <= 2; n++) {
1709 for (uint32_t m = 1; m <= 4; m++) {
1710 GemmMicrokernelTester()
1711 .mr(4)
1712 .nr(2)
1713 .kr(4)
1714 .sr(1)
1715 .m(m)
1716 .n(n)
1717 .k(k)
1718 .iterations(1)
1719 .Test(xnn_f32_gemm_ukernel_4x2c4__wasmsimd);
1720 }
1721 }
1722 }
1723 }
1724
TEST(F32_GEMM_4X2C4__WASMSIMD,k_div_4)1725 TEST(F32_GEMM_4X2C4__WASMSIMD, k_div_4) {
1726 for (size_t k = 8; k <= 40; k += 4) {
1727 GemmMicrokernelTester()
1728 .mr(4)
1729 .nr(2)
1730 .kr(4)
1731 .sr(1)
1732 .m(4)
1733 .n(2)
1734 .k(k)
1735 .Test(xnn_f32_gemm_ukernel_4x2c4__wasmsimd);
1736 }
1737 }
1738
TEST(F32_GEMM_4X2C4__WASMSIMD,k_div_4_strided_a)1739 TEST(F32_GEMM_4X2C4__WASMSIMD, k_div_4_strided_a) {
1740 for (size_t k = 8; k <= 40; k += 4) {
1741 GemmMicrokernelTester()
1742 .mr(4)
1743 .nr(2)
1744 .kr(4)
1745 .sr(1)
1746 .m(4)
1747 .n(2)
1748 .k(k)
1749 .a_stride(43)
1750 .Test(xnn_f32_gemm_ukernel_4x2c4__wasmsimd);
1751 }
1752 }
1753
TEST(F32_GEMM_4X2C4__WASMSIMD,k_div_4_subtile)1754 TEST(F32_GEMM_4X2C4__WASMSIMD, k_div_4_subtile) {
1755 for (size_t k = 8; k <= 40; k += 4) {
1756 for (uint32_t n = 1; n <= 2; n++) {
1757 for (uint32_t m = 1; m <= 4; m++) {
1758 GemmMicrokernelTester()
1759 .mr(4)
1760 .nr(2)
1761 .kr(4)
1762 .sr(1)
1763 .m(m)
1764 .n(n)
1765 .k(k)
1766 .iterations(1)
1767 .Test(xnn_f32_gemm_ukernel_4x2c4__wasmsimd);
1768 }
1769 }
1770 }
1771 }
1772
TEST(F32_GEMM_4X2C4__WASMSIMD,n_gt_2)1773 TEST(F32_GEMM_4X2C4__WASMSIMD, n_gt_2) {
1774 for (uint32_t n = 3; n < 4; n++) {
1775 for (size_t k = 1; k <= 20; k += 5) {
1776 GemmMicrokernelTester()
1777 .mr(4)
1778 .nr(2)
1779 .kr(4)
1780 .sr(1)
1781 .m(4)
1782 .n(n)
1783 .k(k)
1784 .Test(xnn_f32_gemm_ukernel_4x2c4__wasmsimd);
1785 }
1786 }
1787 }
1788
TEST(F32_GEMM_4X2C4__WASMSIMD,n_gt_2_strided_cn)1789 TEST(F32_GEMM_4X2C4__WASMSIMD, n_gt_2_strided_cn) {
1790 for (uint32_t n = 3; n < 4; n++) {
1791 for (size_t k = 1; k <= 20; k += 5) {
1792 GemmMicrokernelTester()
1793 .mr(4)
1794 .nr(2)
1795 .kr(4)
1796 .sr(1)
1797 .m(4)
1798 .n(n)
1799 .k(k)
1800 .cn_stride(5)
1801 .Test(xnn_f32_gemm_ukernel_4x2c4__wasmsimd);
1802 }
1803 }
1804 }
1805
TEST(F32_GEMM_4X2C4__WASMSIMD,n_gt_2_strided_a)1806 TEST(F32_GEMM_4X2C4__WASMSIMD, n_gt_2_strided_a) {
1807 for (uint32_t n = 3; n < 4; n++) {
1808 for (size_t k = 1; k <= 20; k += 5) {
1809 GemmMicrokernelTester()
1810 .mr(4)
1811 .nr(2)
1812 .kr(4)
1813 .sr(1)
1814 .m(4)
1815 .n(n)
1816 .k(k)
1817 .a_stride(23)
1818 .Test(xnn_f32_gemm_ukernel_4x2c4__wasmsimd);
1819 }
1820 }
1821 }
1822
TEST(F32_GEMM_4X2C4__WASMSIMD,n_gt_2_subtile)1823 TEST(F32_GEMM_4X2C4__WASMSIMD, n_gt_2_subtile) {
1824 for (uint32_t n = 3; n < 4; n++) {
1825 for (size_t k = 1; k <= 20; k += 5) {
1826 for (uint32_t m = 1; m <= 4; m++) {
1827 GemmMicrokernelTester()
1828 .mr(4)
1829 .nr(2)
1830 .kr(4)
1831 .sr(1)
1832 .m(m)
1833 .n(n)
1834 .k(k)
1835 .iterations(1)
1836 .Test(xnn_f32_gemm_ukernel_4x2c4__wasmsimd);
1837 }
1838 }
1839 }
1840 }
1841
TEST(F32_GEMM_4X2C4__WASMSIMD,n_div_2)1842 TEST(F32_GEMM_4X2C4__WASMSIMD, n_div_2) {
1843 for (uint32_t n = 4; n <= 6; n += 2) {
1844 for (size_t k = 1; k <= 20; k += 5) {
1845 GemmMicrokernelTester()
1846 .mr(4)
1847 .nr(2)
1848 .kr(4)
1849 .sr(1)
1850 .m(4)
1851 .n(n)
1852 .k(k)
1853 .Test(xnn_f32_gemm_ukernel_4x2c4__wasmsimd);
1854 }
1855 }
1856 }
1857
TEST(F32_GEMM_4X2C4__WASMSIMD,n_div_2_strided_cn)1858 TEST(F32_GEMM_4X2C4__WASMSIMD, n_div_2_strided_cn) {
1859 for (uint32_t n = 4; n <= 6; n += 2) {
1860 for (size_t k = 1; k <= 20; k += 5) {
1861 GemmMicrokernelTester()
1862 .mr(4)
1863 .nr(2)
1864 .kr(4)
1865 .sr(1)
1866 .m(4)
1867 .n(n)
1868 .k(k)
1869 .cn_stride(5)
1870 .Test(xnn_f32_gemm_ukernel_4x2c4__wasmsimd);
1871 }
1872 }
1873 }
1874
TEST(F32_GEMM_4X2C4__WASMSIMD,n_div_2_strided_a)1875 TEST(F32_GEMM_4X2C4__WASMSIMD, n_div_2_strided_a) {
1876 for (uint32_t n = 4; n <= 6; n += 2) {
1877 for (size_t k = 1; k <= 20; k += 5) {
1878 GemmMicrokernelTester()
1879 .mr(4)
1880 .nr(2)
1881 .kr(4)
1882 .sr(1)
1883 .m(4)
1884 .n(n)
1885 .k(k)
1886 .a_stride(23)
1887 .Test(xnn_f32_gemm_ukernel_4x2c4__wasmsimd);
1888 }
1889 }
1890 }
1891
TEST(F32_GEMM_4X2C4__WASMSIMD,n_div_2_subtile)1892 TEST(F32_GEMM_4X2C4__WASMSIMD, n_div_2_subtile) {
1893 for (uint32_t n = 4; n <= 6; n += 2) {
1894 for (size_t k = 1; k <= 20; k += 5) {
1895 for (uint32_t m = 1; m <= 4; m++) {
1896 GemmMicrokernelTester()
1897 .mr(4)
1898 .nr(2)
1899 .kr(4)
1900 .sr(1)
1901 .m(m)
1902 .n(n)
1903 .k(k)
1904 .iterations(1)
1905 .Test(xnn_f32_gemm_ukernel_4x2c4__wasmsimd);
1906 }
1907 }
1908 }
1909 }
1910
TEST(F32_GEMM_4X2C4__WASMSIMD,strided_cm_subtile)1911 TEST(F32_GEMM_4X2C4__WASMSIMD, strided_cm_subtile) {
1912 for (size_t k = 1; k <= 20; k += 5) {
1913 for (uint32_t n = 1; n <= 2; n++) {
1914 for (uint32_t m = 1; m <= 4; m++) {
1915 GemmMicrokernelTester()
1916 .mr(4)
1917 .nr(2)
1918 .kr(4)
1919 .sr(1)
1920 .m(m)
1921 .n(n)
1922 .k(k)
1923 .cm_stride(5)
1924 .iterations(1)
1925 .Test(xnn_f32_gemm_ukernel_4x2c4__wasmsimd);
1926 }
1927 }
1928 }
1929 }
1930
TEST(F32_GEMM_4X2C4__WASMSIMD,strided_cm)1931 TEST(F32_GEMM_4X2C4__WASMSIMD, strided_cm) {
1932 GemmMicrokernelTester()
1933 .mr(4)
1934 .nr(2)
1935 .kr(4)
1936 .sr(1)
1937 .m(4)
1938 .n(2)
1939 .k(4)
1940 .cm_stride(5)
1941 .Test(xnn_f32_gemm_ukernel_4x2c4__wasmsimd);
1942 }
1943 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
1944
1945
1946 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_GEMM_4X8__WASMSIMD_LOADSPLAT,k_eq_1)1947 TEST(F32_GEMM_4X8__WASMSIMD_LOADSPLAT, k_eq_1) {
1948 GemmMicrokernelTester()
1949 .mr(4)
1950 .nr(8)
1951 .kr(1)
1952 .sr(1)
1953 .m(4)
1954 .n(8)
1955 .k(1)
1956 .Test(xnn_f32_gemm_ukernel_4x8__wasmsimd_loadsplat);
1957 }
1958
TEST(F32_GEMM_4X8__WASMSIMD_LOADSPLAT,strided_cn)1959 TEST(F32_GEMM_4X8__WASMSIMD_LOADSPLAT, strided_cn) {
1960 GemmMicrokernelTester()
1961 .mr(4)
1962 .nr(8)
1963 .kr(1)
1964 .sr(1)
1965 .m(4)
1966 .n(8)
1967 .k(1)
1968 .cn_stride(11)
1969 .Test(xnn_f32_gemm_ukernel_4x8__wasmsimd_loadsplat);
1970 }
1971
TEST(F32_GEMM_4X8__WASMSIMD_LOADSPLAT,k_eq_1_strided_a)1972 TEST(F32_GEMM_4X8__WASMSIMD_LOADSPLAT, k_eq_1_strided_a) {
1973 GemmMicrokernelTester()
1974 .mr(4)
1975 .nr(8)
1976 .kr(1)
1977 .sr(1)
1978 .m(4)
1979 .n(8)
1980 .k(1)
1981 .a_stride(3)
1982 .Test(xnn_f32_gemm_ukernel_4x8__wasmsimd_loadsplat);
1983 }
1984
TEST(F32_GEMM_4X8__WASMSIMD_LOADSPLAT,k_eq_1_subtile)1985 TEST(F32_GEMM_4X8__WASMSIMD_LOADSPLAT, k_eq_1_subtile) {
1986 for (uint32_t n = 1; n <= 8; n++) {
1987 for (uint32_t m = 1; m <= 4; m++) {
1988 GemmMicrokernelTester()
1989 .mr(4)
1990 .nr(8)
1991 .kr(1)
1992 .sr(1)
1993 .m(m)
1994 .n(n)
1995 .k(1)
1996 .iterations(1)
1997 .Test(xnn_f32_gemm_ukernel_4x8__wasmsimd_loadsplat);
1998 }
1999 }
2000 }
2001
TEST(F32_GEMM_4X8__WASMSIMD_LOADSPLAT,k_eq_1_subtile_m)2002 TEST(F32_GEMM_4X8__WASMSIMD_LOADSPLAT, k_eq_1_subtile_m) {
2003 for (uint32_t m = 1; m <= 4; m++) {
2004 GemmMicrokernelTester()
2005 .mr(4)
2006 .nr(8)
2007 .kr(1)
2008 .sr(1)
2009 .m(m)
2010 .n(8)
2011 .k(1)
2012 .iterations(1)
2013 .Test(xnn_f32_gemm_ukernel_4x8__wasmsimd_loadsplat);
2014 }
2015 }
2016
TEST(F32_GEMM_4X8__WASMSIMD_LOADSPLAT,k_eq_1_subtile_n)2017 TEST(F32_GEMM_4X8__WASMSIMD_LOADSPLAT, k_eq_1_subtile_n) {
2018 for (uint32_t n = 1; n <= 8; n++) {
2019 GemmMicrokernelTester()
2020 .mr(4)
2021 .nr(8)
2022 .kr(1)
2023 .sr(1)
2024 .m(4)
2025 .n(n)
2026 .k(1)
2027 .iterations(1)
2028 .Test(xnn_f32_gemm_ukernel_4x8__wasmsimd_loadsplat);
2029 }
2030 }
2031
TEST(F32_GEMM_4X8__WASMSIMD_LOADSPLAT,k_gt_1)2032 TEST(F32_GEMM_4X8__WASMSIMD_LOADSPLAT, k_gt_1) {
2033 for (size_t k = 2; k < 10; k++) {
2034 GemmMicrokernelTester()
2035 .mr(4)
2036 .nr(8)
2037 .kr(1)
2038 .sr(1)
2039 .m(4)
2040 .n(8)
2041 .k(k)
2042 .Test(xnn_f32_gemm_ukernel_4x8__wasmsimd_loadsplat);
2043 }
2044 }
2045
TEST(F32_GEMM_4X8__WASMSIMD_LOADSPLAT,k_gt_1_strided_a)2046 TEST(F32_GEMM_4X8__WASMSIMD_LOADSPLAT, k_gt_1_strided_a) {
2047 for (size_t k = 2; k < 10; k++) {
2048 GemmMicrokernelTester()
2049 .mr(4)
2050 .nr(8)
2051 .kr(1)
2052 .sr(1)
2053 .m(4)
2054 .n(8)
2055 .k(k)
2056 .a_stride(11)
2057 .Test(xnn_f32_gemm_ukernel_4x8__wasmsimd_loadsplat);
2058 }
2059 }
2060
TEST(F32_GEMM_4X8__WASMSIMD_LOADSPLAT,k_gt_1_subtile)2061 TEST(F32_GEMM_4X8__WASMSIMD_LOADSPLAT, k_gt_1_subtile) {
2062 for (size_t k = 2; k < 10; k++) {
2063 for (uint32_t n = 1; n <= 8; n++) {
2064 for (uint32_t m = 1; m <= 4; m++) {
2065 GemmMicrokernelTester()
2066 .mr(4)
2067 .nr(8)
2068 .kr(1)
2069 .sr(1)
2070 .m(m)
2071 .n(n)
2072 .k(k)
2073 .iterations(1)
2074 .Test(xnn_f32_gemm_ukernel_4x8__wasmsimd_loadsplat);
2075 }
2076 }
2077 }
2078 }
2079
TEST(F32_GEMM_4X8__WASMSIMD_LOADSPLAT,n_gt_8)2080 TEST(F32_GEMM_4X8__WASMSIMD_LOADSPLAT, n_gt_8) {
2081 for (uint32_t n = 9; n < 16; n++) {
2082 for (size_t k = 1; k <= 5; k += 2) {
2083 GemmMicrokernelTester()
2084 .mr(4)
2085 .nr(8)
2086 .kr(1)
2087 .sr(1)
2088 .m(4)
2089 .n(n)
2090 .k(k)
2091 .Test(xnn_f32_gemm_ukernel_4x8__wasmsimd_loadsplat);
2092 }
2093 }
2094 }
2095
TEST(F32_GEMM_4X8__WASMSIMD_LOADSPLAT,n_gt_8_strided_cn)2096 TEST(F32_GEMM_4X8__WASMSIMD_LOADSPLAT, n_gt_8_strided_cn) {
2097 for (uint32_t n = 9; n < 16; n++) {
2098 for (size_t k = 1; k <= 5; k += 2) {
2099 GemmMicrokernelTester()
2100 .mr(4)
2101 .nr(8)
2102 .kr(1)
2103 .sr(1)
2104 .m(4)
2105 .n(n)
2106 .k(k)
2107 .cn_stride(11)
2108 .Test(xnn_f32_gemm_ukernel_4x8__wasmsimd_loadsplat);
2109 }
2110 }
2111 }
2112
TEST(F32_GEMM_4X8__WASMSIMD_LOADSPLAT,n_gt_8_strided_a)2113 TEST(F32_GEMM_4X8__WASMSIMD_LOADSPLAT, n_gt_8_strided_a) {
2114 for (uint32_t n = 9; n < 16; n++) {
2115 for (size_t k = 1; k <= 5; k += 2) {
2116 GemmMicrokernelTester()
2117 .mr(4)
2118 .nr(8)
2119 .kr(1)
2120 .sr(1)
2121 .m(4)
2122 .n(n)
2123 .k(k)
2124 .a_stride(7)
2125 .Test(xnn_f32_gemm_ukernel_4x8__wasmsimd_loadsplat);
2126 }
2127 }
2128 }
2129
TEST(F32_GEMM_4X8__WASMSIMD_LOADSPLAT,n_gt_8_subtile)2130 TEST(F32_GEMM_4X8__WASMSIMD_LOADSPLAT, n_gt_8_subtile) {
2131 for (uint32_t n = 9; n < 16; n++) {
2132 for (size_t k = 1; k <= 5; k += 2) {
2133 for (uint32_t m = 1; m <= 4; m++) {
2134 GemmMicrokernelTester()
2135 .mr(4)
2136 .nr(8)
2137 .kr(1)
2138 .sr(1)
2139 .m(m)
2140 .n(n)
2141 .k(k)
2142 .iterations(1)
2143 .Test(xnn_f32_gemm_ukernel_4x8__wasmsimd_loadsplat);
2144 }
2145 }
2146 }
2147 }
2148
TEST(F32_GEMM_4X8__WASMSIMD_LOADSPLAT,n_div_8)2149 TEST(F32_GEMM_4X8__WASMSIMD_LOADSPLAT, n_div_8) {
2150 for (uint32_t n = 16; n <= 24; n += 8) {
2151 for (size_t k = 1; k <= 5; k += 2) {
2152 GemmMicrokernelTester()
2153 .mr(4)
2154 .nr(8)
2155 .kr(1)
2156 .sr(1)
2157 .m(4)
2158 .n(n)
2159 .k(k)
2160 .Test(xnn_f32_gemm_ukernel_4x8__wasmsimd_loadsplat);
2161 }
2162 }
2163 }
2164
TEST(F32_GEMM_4X8__WASMSIMD_LOADSPLAT,n_div_8_strided_cn)2165 TEST(F32_GEMM_4X8__WASMSIMD_LOADSPLAT, n_div_8_strided_cn) {
2166 for (uint32_t n = 16; n <= 24; n += 8) {
2167 for (size_t k = 1; k <= 5; k += 2) {
2168 GemmMicrokernelTester()
2169 .mr(4)
2170 .nr(8)
2171 .kr(1)
2172 .sr(1)
2173 .m(4)
2174 .n(n)
2175 .k(k)
2176 .cn_stride(11)
2177 .Test(xnn_f32_gemm_ukernel_4x8__wasmsimd_loadsplat);
2178 }
2179 }
2180 }
2181
TEST(F32_GEMM_4X8__WASMSIMD_LOADSPLAT,n_div_8_strided_a)2182 TEST(F32_GEMM_4X8__WASMSIMD_LOADSPLAT, n_div_8_strided_a) {
2183 for (uint32_t n = 16; n <= 24; n += 8) {
2184 for (size_t k = 1; k <= 5; k += 2) {
2185 GemmMicrokernelTester()
2186 .mr(4)
2187 .nr(8)
2188 .kr(1)
2189 .sr(1)
2190 .m(4)
2191 .n(n)
2192 .k(k)
2193 .a_stride(7)
2194 .Test(xnn_f32_gemm_ukernel_4x8__wasmsimd_loadsplat);
2195 }
2196 }
2197 }
2198
TEST(F32_GEMM_4X8__WASMSIMD_LOADSPLAT,n_div_8_subtile)2199 TEST(F32_GEMM_4X8__WASMSIMD_LOADSPLAT, n_div_8_subtile) {
2200 for (uint32_t n = 16; n <= 24; n += 8) {
2201 for (size_t k = 1; k <= 5; k += 2) {
2202 for (uint32_t m = 1; m <= 4; m++) {
2203 GemmMicrokernelTester()
2204 .mr(4)
2205 .nr(8)
2206 .kr(1)
2207 .sr(1)
2208 .m(m)
2209 .n(n)
2210 .k(k)
2211 .iterations(1)
2212 .Test(xnn_f32_gemm_ukernel_4x8__wasmsimd_loadsplat);
2213 }
2214 }
2215 }
2216 }
2217
TEST(F32_GEMM_4X8__WASMSIMD_LOADSPLAT,strided_cm_subtile)2218 TEST(F32_GEMM_4X8__WASMSIMD_LOADSPLAT, strided_cm_subtile) {
2219 for (size_t k = 1; k <= 5; k += 2) {
2220 for (uint32_t n = 1; n <= 8; n++) {
2221 for (uint32_t m = 1; m <= 4; m++) {
2222 GemmMicrokernelTester()
2223 .mr(4)
2224 .nr(8)
2225 .kr(1)
2226 .sr(1)
2227 .m(m)
2228 .n(n)
2229 .k(k)
2230 .cm_stride(11)
2231 .iterations(1)
2232 .Test(xnn_f32_gemm_ukernel_4x8__wasmsimd_loadsplat);
2233 }
2234 }
2235 }
2236 }
2237
TEST(F32_GEMM_4X8__WASMSIMD_LOADSPLAT,strided_cm)2238 TEST(F32_GEMM_4X8__WASMSIMD_LOADSPLAT, strided_cm) {
2239 GemmMicrokernelTester()
2240 .mr(4)
2241 .nr(8)
2242 .kr(1)
2243 .sr(1)
2244 .m(4)
2245 .n(8)
2246 .k(1)
2247 .cm_stride(11)
2248 .Test(xnn_f32_gemm_ukernel_4x8__wasmsimd_loadsplat);
2249 }
2250 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
2251
2252
2253 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_GEMM_4X8S4__WASMSIMD,k_eq_4)2254 TEST(F32_GEMM_4X8S4__WASMSIMD, k_eq_4) {
2255 GemmMicrokernelTester()
2256 .mr(4)
2257 .nr(8)
2258 .kr(1)
2259 .sr(4)
2260 .m(4)
2261 .n(8)
2262 .k(4)
2263 .Test(xnn_f32_gemm_ukernel_4x8s4__wasmsimd);
2264 }
2265
TEST(F32_GEMM_4X8S4__WASMSIMD,strided_cn)2266 TEST(F32_GEMM_4X8S4__WASMSIMD, strided_cn) {
2267 GemmMicrokernelTester()
2268 .mr(4)
2269 .nr(8)
2270 .kr(1)
2271 .sr(4)
2272 .m(4)
2273 .n(8)
2274 .k(4)
2275 .cn_stride(11)
2276 .Test(xnn_f32_gemm_ukernel_4x8s4__wasmsimd);
2277 }
2278
TEST(F32_GEMM_4X8S4__WASMSIMD,k_eq_4_strided_a)2279 TEST(F32_GEMM_4X8S4__WASMSIMD, k_eq_4_strided_a) {
2280 GemmMicrokernelTester()
2281 .mr(4)
2282 .nr(8)
2283 .kr(1)
2284 .sr(4)
2285 .m(4)
2286 .n(8)
2287 .k(4)
2288 .a_stride(7)
2289 .Test(xnn_f32_gemm_ukernel_4x8s4__wasmsimd);
2290 }
2291
TEST(F32_GEMM_4X8S4__WASMSIMD,k_eq_4_subtile)2292 TEST(F32_GEMM_4X8S4__WASMSIMD, k_eq_4_subtile) {
2293 for (uint32_t n = 1; n <= 8; n++) {
2294 for (uint32_t m = 1; m <= 4; m++) {
2295 GemmMicrokernelTester()
2296 .mr(4)
2297 .nr(8)
2298 .kr(1)
2299 .sr(4)
2300 .m(m)
2301 .n(n)
2302 .k(4)
2303 .iterations(1)
2304 .Test(xnn_f32_gemm_ukernel_4x8s4__wasmsimd);
2305 }
2306 }
2307 }
2308
TEST(F32_GEMM_4X8S4__WASMSIMD,k_eq_4_subtile_m)2309 TEST(F32_GEMM_4X8S4__WASMSIMD, k_eq_4_subtile_m) {
2310 for (uint32_t m = 1; m <= 4; m++) {
2311 GemmMicrokernelTester()
2312 .mr(4)
2313 .nr(8)
2314 .kr(1)
2315 .sr(4)
2316 .m(m)
2317 .n(8)
2318 .k(4)
2319 .iterations(1)
2320 .Test(xnn_f32_gemm_ukernel_4x8s4__wasmsimd);
2321 }
2322 }
2323
TEST(F32_GEMM_4X8S4__WASMSIMD,k_eq_4_subtile_n)2324 TEST(F32_GEMM_4X8S4__WASMSIMD, k_eq_4_subtile_n) {
2325 for (uint32_t n = 1; n <= 8; n++) {
2326 GemmMicrokernelTester()
2327 .mr(4)
2328 .nr(8)
2329 .kr(1)
2330 .sr(4)
2331 .m(4)
2332 .n(n)
2333 .k(4)
2334 .iterations(1)
2335 .Test(xnn_f32_gemm_ukernel_4x8s4__wasmsimd);
2336 }
2337 }
2338
TEST(F32_GEMM_4X8S4__WASMSIMD,k_lt_4)2339 TEST(F32_GEMM_4X8S4__WASMSIMD, k_lt_4) {
2340 for (size_t k = 1; k < 4; k++) {
2341 GemmMicrokernelTester()
2342 .mr(4)
2343 .nr(8)
2344 .kr(1)
2345 .sr(4)
2346 .m(4)
2347 .n(8)
2348 .k(k)
2349 .Test(xnn_f32_gemm_ukernel_4x8s4__wasmsimd);
2350 }
2351 }
2352
TEST(F32_GEMM_4X8S4__WASMSIMD,k_lt_4_strided_a)2353 TEST(F32_GEMM_4X8S4__WASMSIMD, k_lt_4_strided_a) {
2354 for (size_t k = 1; k < 4; k++) {
2355 GemmMicrokernelTester()
2356 .mr(4)
2357 .nr(8)
2358 .kr(1)
2359 .sr(4)
2360 .m(4)
2361 .n(8)
2362 .k(k)
2363 .a_stride(7)
2364 .Test(xnn_f32_gemm_ukernel_4x8s4__wasmsimd);
2365 }
2366 }
2367
TEST(F32_GEMM_4X8S4__WASMSIMD,k_lt_4_subtile)2368 TEST(F32_GEMM_4X8S4__WASMSIMD, k_lt_4_subtile) {
2369 for (size_t k = 1; k < 4; k++) {
2370 for (uint32_t n = 1; n <= 8; n++) {
2371 for (uint32_t m = 1; m <= 4; m++) {
2372 GemmMicrokernelTester()
2373 .mr(4)
2374 .nr(8)
2375 .kr(1)
2376 .sr(4)
2377 .m(m)
2378 .n(n)
2379 .k(k)
2380 .iterations(1)
2381 .Test(xnn_f32_gemm_ukernel_4x8s4__wasmsimd);
2382 }
2383 }
2384 }
2385 }
2386
TEST(F32_GEMM_4X8S4__WASMSIMD,k_gt_4)2387 TEST(F32_GEMM_4X8S4__WASMSIMD, k_gt_4) {
2388 for (size_t k = 5; k < 8; k++) {
2389 GemmMicrokernelTester()
2390 .mr(4)
2391 .nr(8)
2392 .kr(1)
2393 .sr(4)
2394 .m(4)
2395 .n(8)
2396 .k(k)
2397 .Test(xnn_f32_gemm_ukernel_4x8s4__wasmsimd);
2398 }
2399 }
2400
TEST(F32_GEMM_4X8S4__WASMSIMD,k_gt_4_strided_a)2401 TEST(F32_GEMM_4X8S4__WASMSIMD, k_gt_4_strided_a) {
2402 for (size_t k = 5; k < 8; k++) {
2403 GemmMicrokernelTester()
2404 .mr(4)
2405 .nr(8)
2406 .kr(1)
2407 .sr(4)
2408 .m(4)
2409 .n(8)
2410 .k(k)
2411 .a_stride(11)
2412 .Test(xnn_f32_gemm_ukernel_4x8s4__wasmsimd);
2413 }
2414 }
2415
TEST(F32_GEMM_4X8S4__WASMSIMD,k_gt_4_subtile)2416 TEST(F32_GEMM_4X8S4__WASMSIMD, k_gt_4_subtile) {
2417 for (size_t k = 5; k < 8; k++) {
2418 for (uint32_t n = 1; n <= 8; n++) {
2419 for (uint32_t m = 1; m <= 4; m++) {
2420 GemmMicrokernelTester()
2421 .mr(4)
2422 .nr(8)
2423 .kr(1)
2424 .sr(4)
2425 .m(m)
2426 .n(n)
2427 .k(k)
2428 .iterations(1)
2429 .Test(xnn_f32_gemm_ukernel_4x8s4__wasmsimd);
2430 }
2431 }
2432 }
2433 }
2434
TEST(F32_GEMM_4X8S4__WASMSIMD,k_div_4)2435 TEST(F32_GEMM_4X8S4__WASMSIMD, k_div_4) {
2436 for (size_t k = 8; k <= 40; k += 4) {
2437 GemmMicrokernelTester()
2438 .mr(4)
2439 .nr(8)
2440 .kr(1)
2441 .sr(4)
2442 .m(4)
2443 .n(8)
2444 .k(k)
2445 .Test(xnn_f32_gemm_ukernel_4x8s4__wasmsimd);
2446 }
2447 }
2448
TEST(F32_GEMM_4X8S4__WASMSIMD,k_div_4_strided_a)2449 TEST(F32_GEMM_4X8S4__WASMSIMD, k_div_4_strided_a) {
2450 for (size_t k = 8; k <= 40; k += 4) {
2451 GemmMicrokernelTester()
2452 .mr(4)
2453 .nr(8)
2454 .kr(1)
2455 .sr(4)
2456 .m(4)
2457 .n(8)
2458 .k(k)
2459 .a_stride(43)
2460 .Test(xnn_f32_gemm_ukernel_4x8s4__wasmsimd);
2461 }
2462 }
2463
TEST(F32_GEMM_4X8S4__WASMSIMD,k_div_4_subtile)2464 TEST(F32_GEMM_4X8S4__WASMSIMD, k_div_4_subtile) {
2465 for (size_t k = 8; k <= 40; k += 4) {
2466 for (uint32_t n = 1; n <= 8; n++) {
2467 for (uint32_t m = 1; m <= 4; m++) {
2468 GemmMicrokernelTester()
2469 .mr(4)
2470 .nr(8)
2471 .kr(1)
2472 .sr(4)
2473 .m(m)
2474 .n(n)
2475 .k(k)
2476 .iterations(1)
2477 .Test(xnn_f32_gemm_ukernel_4x8s4__wasmsimd);
2478 }
2479 }
2480 }
2481 }
2482
TEST(F32_GEMM_4X8S4__WASMSIMD,n_gt_8)2483 TEST(F32_GEMM_4X8S4__WASMSIMD, n_gt_8) {
2484 for (uint32_t n = 9; n < 16; n++) {
2485 for (size_t k = 1; k <= 20; k += 5) {
2486 GemmMicrokernelTester()
2487 .mr(4)
2488 .nr(8)
2489 .kr(1)
2490 .sr(4)
2491 .m(4)
2492 .n(n)
2493 .k(k)
2494 .Test(xnn_f32_gemm_ukernel_4x8s4__wasmsimd);
2495 }
2496 }
2497 }
2498
TEST(F32_GEMM_4X8S4__WASMSIMD,n_gt_8_strided_cn)2499 TEST(F32_GEMM_4X8S4__WASMSIMD, n_gt_8_strided_cn) {
2500 for (uint32_t n = 9; n < 16; n++) {
2501 for (size_t k = 1; k <= 20; k += 5) {
2502 GemmMicrokernelTester()
2503 .mr(4)
2504 .nr(8)
2505 .kr(1)
2506 .sr(4)
2507 .m(4)
2508 .n(n)
2509 .k(k)
2510 .cn_stride(11)
2511 .Test(xnn_f32_gemm_ukernel_4x8s4__wasmsimd);
2512 }
2513 }
2514 }
2515
TEST(F32_GEMM_4X8S4__WASMSIMD,n_gt_8_strided_a)2516 TEST(F32_GEMM_4X8S4__WASMSIMD, n_gt_8_strided_a) {
2517 for (uint32_t n = 9; n < 16; n++) {
2518 for (size_t k = 1; k <= 20; k += 5) {
2519 GemmMicrokernelTester()
2520 .mr(4)
2521 .nr(8)
2522 .kr(1)
2523 .sr(4)
2524 .m(4)
2525 .n(n)
2526 .k(k)
2527 .a_stride(23)
2528 .Test(xnn_f32_gemm_ukernel_4x8s4__wasmsimd);
2529 }
2530 }
2531 }
2532
TEST(F32_GEMM_4X8S4__WASMSIMD,n_gt_8_subtile)2533 TEST(F32_GEMM_4X8S4__WASMSIMD, n_gt_8_subtile) {
2534 for (uint32_t n = 9; n < 16; n++) {
2535 for (size_t k = 1; k <= 20; k += 5) {
2536 for (uint32_t m = 1; m <= 4; m++) {
2537 GemmMicrokernelTester()
2538 .mr(4)
2539 .nr(8)
2540 .kr(1)
2541 .sr(4)
2542 .m(m)
2543 .n(n)
2544 .k(k)
2545 .iterations(1)
2546 .Test(xnn_f32_gemm_ukernel_4x8s4__wasmsimd);
2547 }
2548 }
2549 }
2550 }
2551
TEST(F32_GEMM_4X8S4__WASMSIMD,n_div_8)2552 TEST(F32_GEMM_4X8S4__WASMSIMD, n_div_8) {
2553 for (uint32_t n = 16; n <= 24; n += 8) {
2554 for (size_t k = 1; k <= 20; k += 5) {
2555 GemmMicrokernelTester()
2556 .mr(4)
2557 .nr(8)
2558 .kr(1)
2559 .sr(4)
2560 .m(4)
2561 .n(n)
2562 .k(k)
2563 .Test(xnn_f32_gemm_ukernel_4x8s4__wasmsimd);
2564 }
2565 }
2566 }
2567
TEST(F32_GEMM_4X8S4__WASMSIMD,n_div_8_strided_cn)2568 TEST(F32_GEMM_4X8S4__WASMSIMD, n_div_8_strided_cn) {
2569 for (uint32_t n = 16; n <= 24; n += 8) {
2570 for (size_t k = 1; k <= 20; k += 5) {
2571 GemmMicrokernelTester()
2572 .mr(4)
2573 .nr(8)
2574 .kr(1)
2575 .sr(4)
2576 .m(4)
2577 .n(n)
2578 .k(k)
2579 .cn_stride(11)
2580 .Test(xnn_f32_gemm_ukernel_4x8s4__wasmsimd);
2581 }
2582 }
2583 }
2584
TEST(F32_GEMM_4X8S4__WASMSIMD,n_div_8_strided_a)2585 TEST(F32_GEMM_4X8S4__WASMSIMD, n_div_8_strided_a) {
2586 for (uint32_t n = 16; n <= 24; n += 8) {
2587 for (size_t k = 1; k <= 20; k += 5) {
2588 GemmMicrokernelTester()
2589 .mr(4)
2590 .nr(8)
2591 .kr(1)
2592 .sr(4)
2593 .m(4)
2594 .n(n)
2595 .k(k)
2596 .a_stride(23)
2597 .Test(xnn_f32_gemm_ukernel_4x8s4__wasmsimd);
2598 }
2599 }
2600 }
2601
TEST(F32_GEMM_4X8S4__WASMSIMD,n_div_8_subtile)2602 TEST(F32_GEMM_4X8S4__WASMSIMD, n_div_8_subtile) {
2603 for (uint32_t n = 16; n <= 24; n += 8) {
2604 for (size_t k = 1; k <= 20; k += 5) {
2605 for (uint32_t m = 1; m <= 4; m++) {
2606 GemmMicrokernelTester()
2607 .mr(4)
2608 .nr(8)
2609 .kr(1)
2610 .sr(4)
2611 .m(m)
2612 .n(n)
2613 .k(k)
2614 .iterations(1)
2615 .Test(xnn_f32_gemm_ukernel_4x8s4__wasmsimd);
2616 }
2617 }
2618 }
2619 }
2620
TEST(F32_GEMM_4X8S4__WASMSIMD,strided_cm_subtile)2621 TEST(F32_GEMM_4X8S4__WASMSIMD, strided_cm_subtile) {
2622 for (size_t k = 1; k <= 20; k += 5) {
2623 for (uint32_t n = 1; n <= 8; n++) {
2624 for (uint32_t m = 1; m <= 4; m++) {
2625 GemmMicrokernelTester()
2626 .mr(4)
2627 .nr(8)
2628 .kr(1)
2629 .sr(4)
2630 .m(m)
2631 .n(n)
2632 .k(k)
2633 .cm_stride(11)
2634 .iterations(1)
2635 .Test(xnn_f32_gemm_ukernel_4x8s4__wasmsimd);
2636 }
2637 }
2638 }
2639 }
2640
TEST(F32_GEMM_4X8S4__WASMSIMD,strided_cm)2641 TEST(F32_GEMM_4X8S4__WASMSIMD, strided_cm) {
2642 GemmMicrokernelTester()
2643 .mr(4)
2644 .nr(8)
2645 .kr(1)
2646 .sr(4)
2647 .m(4)
2648 .n(8)
2649 .k(4)
2650 .cm_stride(11)
2651 .Test(xnn_f32_gemm_ukernel_4x8s4__wasmsimd);
2652 }
2653 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
2654
2655
2656 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_GEMM_5X8__WASMSIMD_LOADSPLAT,k_eq_1)2657 TEST(F32_GEMM_5X8__WASMSIMD_LOADSPLAT, k_eq_1) {
2658 GemmMicrokernelTester()
2659 .mr(5)
2660 .nr(8)
2661 .kr(1)
2662 .sr(1)
2663 .m(5)
2664 .n(8)
2665 .k(1)
2666 .Test(xnn_f32_gemm_ukernel_5x8__wasmsimd_loadsplat);
2667 }
2668
TEST(F32_GEMM_5X8__WASMSIMD_LOADSPLAT,strided_cn)2669 TEST(F32_GEMM_5X8__WASMSIMD_LOADSPLAT, strided_cn) {
2670 GemmMicrokernelTester()
2671 .mr(5)
2672 .nr(8)
2673 .kr(1)
2674 .sr(1)
2675 .m(5)
2676 .n(8)
2677 .k(1)
2678 .cn_stride(11)
2679 .Test(xnn_f32_gemm_ukernel_5x8__wasmsimd_loadsplat);
2680 }
2681
TEST(F32_GEMM_5X8__WASMSIMD_LOADSPLAT,k_eq_1_strided_a)2682 TEST(F32_GEMM_5X8__WASMSIMD_LOADSPLAT, k_eq_1_strided_a) {
2683 GemmMicrokernelTester()
2684 .mr(5)
2685 .nr(8)
2686 .kr(1)
2687 .sr(1)
2688 .m(5)
2689 .n(8)
2690 .k(1)
2691 .a_stride(3)
2692 .Test(xnn_f32_gemm_ukernel_5x8__wasmsimd_loadsplat);
2693 }
2694
TEST(F32_GEMM_5X8__WASMSIMD_LOADSPLAT,k_eq_1_subtile)2695 TEST(F32_GEMM_5X8__WASMSIMD_LOADSPLAT, k_eq_1_subtile) {
2696 for (uint32_t n = 1; n <= 8; n++) {
2697 for (uint32_t m = 1; m <= 5; m++) {
2698 GemmMicrokernelTester()
2699 .mr(5)
2700 .nr(8)
2701 .kr(1)
2702 .sr(1)
2703 .m(m)
2704 .n(n)
2705 .k(1)
2706 .iterations(1)
2707 .Test(xnn_f32_gemm_ukernel_5x8__wasmsimd_loadsplat);
2708 }
2709 }
2710 }
2711
TEST(F32_GEMM_5X8__WASMSIMD_LOADSPLAT,k_eq_1_subtile_m)2712 TEST(F32_GEMM_5X8__WASMSIMD_LOADSPLAT, k_eq_1_subtile_m) {
2713 for (uint32_t m = 1; m <= 5; m++) {
2714 GemmMicrokernelTester()
2715 .mr(5)
2716 .nr(8)
2717 .kr(1)
2718 .sr(1)
2719 .m(m)
2720 .n(8)
2721 .k(1)
2722 .iterations(1)
2723 .Test(xnn_f32_gemm_ukernel_5x8__wasmsimd_loadsplat);
2724 }
2725 }
2726
TEST(F32_GEMM_5X8__WASMSIMD_LOADSPLAT,k_eq_1_subtile_n)2727 TEST(F32_GEMM_5X8__WASMSIMD_LOADSPLAT, k_eq_1_subtile_n) {
2728 for (uint32_t n = 1; n <= 8; n++) {
2729 GemmMicrokernelTester()
2730 .mr(5)
2731 .nr(8)
2732 .kr(1)
2733 .sr(1)
2734 .m(5)
2735 .n(n)
2736 .k(1)
2737 .iterations(1)
2738 .Test(xnn_f32_gemm_ukernel_5x8__wasmsimd_loadsplat);
2739 }
2740 }
2741
TEST(F32_GEMM_5X8__WASMSIMD_LOADSPLAT,k_gt_1)2742 TEST(F32_GEMM_5X8__WASMSIMD_LOADSPLAT, k_gt_1) {
2743 for (size_t k = 2; k < 10; k++) {
2744 GemmMicrokernelTester()
2745 .mr(5)
2746 .nr(8)
2747 .kr(1)
2748 .sr(1)
2749 .m(5)
2750 .n(8)
2751 .k(k)
2752 .Test(xnn_f32_gemm_ukernel_5x8__wasmsimd_loadsplat);
2753 }
2754 }
2755
TEST(F32_GEMM_5X8__WASMSIMD_LOADSPLAT,k_gt_1_strided_a)2756 TEST(F32_GEMM_5X8__WASMSIMD_LOADSPLAT, k_gt_1_strided_a) {
2757 for (size_t k = 2; k < 10; k++) {
2758 GemmMicrokernelTester()
2759 .mr(5)
2760 .nr(8)
2761 .kr(1)
2762 .sr(1)
2763 .m(5)
2764 .n(8)
2765 .k(k)
2766 .a_stride(11)
2767 .Test(xnn_f32_gemm_ukernel_5x8__wasmsimd_loadsplat);
2768 }
2769 }
2770
TEST(F32_GEMM_5X8__WASMSIMD_LOADSPLAT,k_gt_1_subtile)2771 TEST(F32_GEMM_5X8__WASMSIMD_LOADSPLAT, k_gt_1_subtile) {
2772 for (size_t k = 2; k < 10; k++) {
2773 for (uint32_t n = 1; n <= 8; n++) {
2774 for (uint32_t m = 1; m <= 5; m++) {
2775 GemmMicrokernelTester()
2776 .mr(5)
2777 .nr(8)
2778 .kr(1)
2779 .sr(1)
2780 .m(m)
2781 .n(n)
2782 .k(k)
2783 .iterations(1)
2784 .Test(xnn_f32_gemm_ukernel_5x8__wasmsimd_loadsplat);
2785 }
2786 }
2787 }
2788 }
2789
TEST(F32_GEMM_5X8__WASMSIMD_LOADSPLAT,n_gt_8)2790 TEST(F32_GEMM_5X8__WASMSIMD_LOADSPLAT, n_gt_8) {
2791 for (uint32_t n = 9; n < 16; n++) {
2792 for (size_t k = 1; k <= 5; k += 2) {
2793 GemmMicrokernelTester()
2794 .mr(5)
2795 .nr(8)
2796 .kr(1)
2797 .sr(1)
2798 .m(5)
2799 .n(n)
2800 .k(k)
2801 .Test(xnn_f32_gemm_ukernel_5x8__wasmsimd_loadsplat);
2802 }
2803 }
2804 }
2805
TEST(F32_GEMM_5X8__WASMSIMD_LOADSPLAT,n_gt_8_strided_cn)2806 TEST(F32_GEMM_5X8__WASMSIMD_LOADSPLAT, n_gt_8_strided_cn) {
2807 for (uint32_t n = 9; n < 16; n++) {
2808 for (size_t k = 1; k <= 5; k += 2) {
2809 GemmMicrokernelTester()
2810 .mr(5)
2811 .nr(8)
2812 .kr(1)
2813 .sr(1)
2814 .m(5)
2815 .n(n)
2816 .k(k)
2817 .cn_stride(11)
2818 .Test(xnn_f32_gemm_ukernel_5x8__wasmsimd_loadsplat);
2819 }
2820 }
2821 }
2822
TEST(F32_GEMM_5X8__WASMSIMD_LOADSPLAT,n_gt_8_strided_a)2823 TEST(F32_GEMM_5X8__WASMSIMD_LOADSPLAT, n_gt_8_strided_a) {
2824 for (uint32_t n = 9; n < 16; n++) {
2825 for (size_t k = 1; k <= 5; k += 2) {
2826 GemmMicrokernelTester()
2827 .mr(5)
2828 .nr(8)
2829 .kr(1)
2830 .sr(1)
2831 .m(5)
2832 .n(n)
2833 .k(k)
2834 .a_stride(7)
2835 .Test(xnn_f32_gemm_ukernel_5x8__wasmsimd_loadsplat);
2836 }
2837 }
2838 }
2839
TEST(F32_GEMM_5X8__WASMSIMD_LOADSPLAT,n_gt_8_subtile)2840 TEST(F32_GEMM_5X8__WASMSIMD_LOADSPLAT, n_gt_8_subtile) {
2841 for (uint32_t n = 9; n < 16; n++) {
2842 for (size_t k = 1; k <= 5; k += 2) {
2843 for (uint32_t m = 1; m <= 5; m++) {
2844 GemmMicrokernelTester()
2845 .mr(5)
2846 .nr(8)
2847 .kr(1)
2848 .sr(1)
2849 .m(m)
2850 .n(n)
2851 .k(k)
2852 .iterations(1)
2853 .Test(xnn_f32_gemm_ukernel_5x8__wasmsimd_loadsplat);
2854 }
2855 }
2856 }
2857 }
2858
TEST(F32_GEMM_5X8__WASMSIMD_LOADSPLAT,n_div_8)2859 TEST(F32_GEMM_5X8__WASMSIMD_LOADSPLAT, n_div_8) {
2860 for (uint32_t n = 16; n <= 24; n += 8) {
2861 for (size_t k = 1; k <= 5; k += 2) {
2862 GemmMicrokernelTester()
2863 .mr(5)
2864 .nr(8)
2865 .kr(1)
2866 .sr(1)
2867 .m(5)
2868 .n(n)
2869 .k(k)
2870 .Test(xnn_f32_gemm_ukernel_5x8__wasmsimd_loadsplat);
2871 }
2872 }
2873 }
2874
TEST(F32_GEMM_5X8__WASMSIMD_LOADSPLAT,n_div_8_strided_cn)2875 TEST(F32_GEMM_5X8__WASMSIMD_LOADSPLAT, n_div_8_strided_cn) {
2876 for (uint32_t n = 16; n <= 24; n += 8) {
2877 for (size_t k = 1; k <= 5; k += 2) {
2878 GemmMicrokernelTester()
2879 .mr(5)
2880 .nr(8)
2881 .kr(1)
2882 .sr(1)
2883 .m(5)
2884 .n(n)
2885 .k(k)
2886 .cn_stride(11)
2887 .Test(xnn_f32_gemm_ukernel_5x8__wasmsimd_loadsplat);
2888 }
2889 }
2890 }
2891
TEST(F32_GEMM_5X8__WASMSIMD_LOADSPLAT,n_div_8_strided_a)2892 TEST(F32_GEMM_5X8__WASMSIMD_LOADSPLAT, n_div_8_strided_a) {
2893 for (uint32_t n = 16; n <= 24; n += 8) {
2894 for (size_t k = 1; k <= 5; k += 2) {
2895 GemmMicrokernelTester()
2896 .mr(5)
2897 .nr(8)
2898 .kr(1)
2899 .sr(1)
2900 .m(5)
2901 .n(n)
2902 .k(k)
2903 .a_stride(7)
2904 .Test(xnn_f32_gemm_ukernel_5x8__wasmsimd_loadsplat);
2905 }
2906 }
2907 }
2908
TEST(F32_GEMM_5X8__WASMSIMD_LOADSPLAT,n_div_8_subtile)2909 TEST(F32_GEMM_5X8__WASMSIMD_LOADSPLAT, n_div_8_subtile) {
2910 for (uint32_t n = 16; n <= 24; n += 8) {
2911 for (size_t k = 1; k <= 5; k += 2) {
2912 for (uint32_t m = 1; m <= 5; m++) {
2913 GemmMicrokernelTester()
2914 .mr(5)
2915 .nr(8)
2916 .kr(1)
2917 .sr(1)
2918 .m(m)
2919 .n(n)
2920 .k(k)
2921 .iterations(1)
2922 .Test(xnn_f32_gemm_ukernel_5x8__wasmsimd_loadsplat);
2923 }
2924 }
2925 }
2926 }
2927
TEST(F32_GEMM_5X8__WASMSIMD_LOADSPLAT,strided_cm_subtile)2928 TEST(F32_GEMM_5X8__WASMSIMD_LOADSPLAT, strided_cm_subtile) {
2929 for (size_t k = 1; k <= 5; k += 2) {
2930 for (uint32_t n = 1; n <= 8; n++) {
2931 for (uint32_t m = 1; m <= 5; m++) {
2932 GemmMicrokernelTester()
2933 .mr(5)
2934 .nr(8)
2935 .kr(1)
2936 .sr(1)
2937 .m(m)
2938 .n(n)
2939 .k(k)
2940 .cm_stride(11)
2941 .iterations(1)
2942 .Test(xnn_f32_gemm_ukernel_5x8__wasmsimd_loadsplat);
2943 }
2944 }
2945 }
2946 }
2947
TEST(F32_GEMM_5X8__WASMSIMD_LOADSPLAT,strided_cm)2948 TEST(F32_GEMM_5X8__WASMSIMD_LOADSPLAT, strided_cm) {
2949 GemmMicrokernelTester()
2950 .mr(5)
2951 .nr(8)
2952 .kr(1)
2953 .sr(1)
2954 .m(5)
2955 .n(8)
2956 .k(1)
2957 .cm_stride(11)
2958 .Test(xnn_f32_gemm_ukernel_5x8__wasmsimd_loadsplat);
2959 }
2960 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
2961
2962
2963 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_GEMM_5X8S4__WASMSIMD,k_eq_4)2964 TEST(F32_GEMM_5X8S4__WASMSIMD, k_eq_4) {
2965 GemmMicrokernelTester()
2966 .mr(5)
2967 .nr(8)
2968 .kr(1)
2969 .sr(4)
2970 .m(5)
2971 .n(8)
2972 .k(4)
2973 .Test(xnn_f32_gemm_ukernel_5x8s4__wasmsimd);
2974 }
2975
TEST(F32_GEMM_5X8S4__WASMSIMD,strided_cn)2976 TEST(F32_GEMM_5X8S4__WASMSIMD, strided_cn) {
2977 GemmMicrokernelTester()
2978 .mr(5)
2979 .nr(8)
2980 .kr(1)
2981 .sr(4)
2982 .m(5)
2983 .n(8)
2984 .k(4)
2985 .cn_stride(11)
2986 .Test(xnn_f32_gemm_ukernel_5x8s4__wasmsimd);
2987 }
2988
TEST(F32_GEMM_5X8S4__WASMSIMD,k_eq_4_strided_a)2989 TEST(F32_GEMM_5X8S4__WASMSIMD, k_eq_4_strided_a) {
2990 GemmMicrokernelTester()
2991 .mr(5)
2992 .nr(8)
2993 .kr(1)
2994 .sr(4)
2995 .m(5)
2996 .n(8)
2997 .k(4)
2998 .a_stride(7)
2999 .Test(xnn_f32_gemm_ukernel_5x8s4__wasmsimd);
3000 }
3001
TEST(F32_GEMM_5X8S4__WASMSIMD,k_eq_4_subtile)3002 TEST(F32_GEMM_5X8S4__WASMSIMD, k_eq_4_subtile) {
3003 for (uint32_t n = 1; n <= 8; n++) {
3004 for (uint32_t m = 1; m <= 5; m++) {
3005 GemmMicrokernelTester()
3006 .mr(5)
3007 .nr(8)
3008 .kr(1)
3009 .sr(4)
3010 .m(m)
3011 .n(n)
3012 .k(4)
3013 .iterations(1)
3014 .Test(xnn_f32_gemm_ukernel_5x8s4__wasmsimd);
3015 }
3016 }
3017 }
3018
TEST(F32_GEMM_5X8S4__WASMSIMD,k_eq_4_subtile_m)3019 TEST(F32_GEMM_5X8S4__WASMSIMD, k_eq_4_subtile_m) {
3020 for (uint32_t m = 1; m <= 5; m++) {
3021 GemmMicrokernelTester()
3022 .mr(5)
3023 .nr(8)
3024 .kr(1)
3025 .sr(4)
3026 .m(m)
3027 .n(8)
3028 .k(4)
3029 .iterations(1)
3030 .Test(xnn_f32_gemm_ukernel_5x8s4__wasmsimd);
3031 }
3032 }
3033
TEST(F32_GEMM_5X8S4__WASMSIMD,k_eq_4_subtile_n)3034 TEST(F32_GEMM_5X8S4__WASMSIMD, k_eq_4_subtile_n) {
3035 for (uint32_t n = 1; n <= 8; n++) {
3036 GemmMicrokernelTester()
3037 .mr(5)
3038 .nr(8)
3039 .kr(1)
3040 .sr(4)
3041 .m(5)
3042 .n(n)
3043 .k(4)
3044 .iterations(1)
3045 .Test(xnn_f32_gemm_ukernel_5x8s4__wasmsimd);
3046 }
3047 }
3048
TEST(F32_GEMM_5X8S4__WASMSIMD,k_lt_4)3049 TEST(F32_GEMM_5X8S4__WASMSIMD, k_lt_4) {
3050 for (size_t k = 1; k < 4; k++) {
3051 GemmMicrokernelTester()
3052 .mr(5)
3053 .nr(8)
3054 .kr(1)
3055 .sr(4)
3056 .m(5)
3057 .n(8)
3058 .k(k)
3059 .Test(xnn_f32_gemm_ukernel_5x8s4__wasmsimd);
3060 }
3061 }
3062
TEST(F32_GEMM_5X8S4__WASMSIMD,k_lt_4_strided_a)3063 TEST(F32_GEMM_5X8S4__WASMSIMD, k_lt_4_strided_a) {
3064 for (size_t k = 1; k < 4; k++) {
3065 GemmMicrokernelTester()
3066 .mr(5)
3067 .nr(8)
3068 .kr(1)
3069 .sr(4)
3070 .m(5)
3071 .n(8)
3072 .k(k)
3073 .a_stride(7)
3074 .Test(xnn_f32_gemm_ukernel_5x8s4__wasmsimd);
3075 }
3076 }
3077
TEST(F32_GEMM_5X8S4__WASMSIMD,k_lt_4_subtile)3078 TEST(F32_GEMM_5X8S4__WASMSIMD, k_lt_4_subtile) {
3079 for (size_t k = 1; k < 4; k++) {
3080 for (uint32_t n = 1; n <= 8; n++) {
3081 for (uint32_t m = 1; m <= 5; m++) {
3082 GemmMicrokernelTester()
3083 .mr(5)
3084 .nr(8)
3085 .kr(1)
3086 .sr(4)
3087 .m(m)
3088 .n(n)
3089 .k(k)
3090 .iterations(1)
3091 .Test(xnn_f32_gemm_ukernel_5x8s4__wasmsimd);
3092 }
3093 }
3094 }
3095 }
3096
TEST(F32_GEMM_5X8S4__WASMSIMD,k_gt_4)3097 TEST(F32_GEMM_5X8S4__WASMSIMD, k_gt_4) {
3098 for (size_t k = 5; k < 8; k++) {
3099 GemmMicrokernelTester()
3100 .mr(5)
3101 .nr(8)
3102 .kr(1)
3103 .sr(4)
3104 .m(5)
3105 .n(8)
3106 .k(k)
3107 .Test(xnn_f32_gemm_ukernel_5x8s4__wasmsimd);
3108 }
3109 }
3110
TEST(F32_GEMM_5X8S4__WASMSIMD,k_gt_4_strided_a)3111 TEST(F32_GEMM_5X8S4__WASMSIMD, k_gt_4_strided_a) {
3112 for (size_t k = 5; k < 8; k++) {
3113 GemmMicrokernelTester()
3114 .mr(5)
3115 .nr(8)
3116 .kr(1)
3117 .sr(4)
3118 .m(5)
3119 .n(8)
3120 .k(k)
3121 .a_stride(11)
3122 .Test(xnn_f32_gemm_ukernel_5x8s4__wasmsimd);
3123 }
3124 }
3125
TEST(F32_GEMM_5X8S4__WASMSIMD,k_gt_4_subtile)3126 TEST(F32_GEMM_5X8S4__WASMSIMD, k_gt_4_subtile) {
3127 for (size_t k = 5; k < 8; k++) {
3128 for (uint32_t n = 1; n <= 8; n++) {
3129 for (uint32_t m = 1; m <= 5; m++) {
3130 GemmMicrokernelTester()
3131 .mr(5)
3132 .nr(8)
3133 .kr(1)
3134 .sr(4)
3135 .m(m)
3136 .n(n)
3137 .k(k)
3138 .iterations(1)
3139 .Test(xnn_f32_gemm_ukernel_5x8s4__wasmsimd);
3140 }
3141 }
3142 }
3143 }
3144
TEST(F32_GEMM_5X8S4__WASMSIMD,k_div_4)3145 TEST(F32_GEMM_5X8S4__WASMSIMD, k_div_4) {
3146 for (size_t k = 8; k <= 40; k += 4) {
3147 GemmMicrokernelTester()
3148 .mr(5)
3149 .nr(8)
3150 .kr(1)
3151 .sr(4)
3152 .m(5)
3153 .n(8)
3154 .k(k)
3155 .Test(xnn_f32_gemm_ukernel_5x8s4__wasmsimd);
3156 }
3157 }
3158
TEST(F32_GEMM_5X8S4__WASMSIMD,k_div_4_strided_a)3159 TEST(F32_GEMM_5X8S4__WASMSIMD, k_div_4_strided_a) {
3160 for (size_t k = 8; k <= 40; k += 4) {
3161 GemmMicrokernelTester()
3162 .mr(5)
3163 .nr(8)
3164 .kr(1)
3165 .sr(4)
3166 .m(5)
3167 .n(8)
3168 .k(k)
3169 .a_stride(43)
3170 .Test(xnn_f32_gemm_ukernel_5x8s4__wasmsimd);
3171 }
3172 }
3173
TEST(F32_GEMM_5X8S4__WASMSIMD,k_div_4_subtile)3174 TEST(F32_GEMM_5X8S4__WASMSIMD, k_div_4_subtile) {
3175 for (size_t k = 8; k <= 40; k += 4) {
3176 for (uint32_t n = 1; n <= 8; n++) {
3177 for (uint32_t m = 1; m <= 5; m++) {
3178 GemmMicrokernelTester()
3179 .mr(5)
3180 .nr(8)
3181 .kr(1)
3182 .sr(4)
3183 .m(m)
3184 .n(n)
3185 .k(k)
3186 .iterations(1)
3187 .Test(xnn_f32_gemm_ukernel_5x8s4__wasmsimd);
3188 }
3189 }
3190 }
3191 }
3192
TEST(F32_GEMM_5X8S4__WASMSIMD,n_gt_8)3193 TEST(F32_GEMM_5X8S4__WASMSIMD, n_gt_8) {
3194 for (uint32_t n = 9; n < 16; n++) {
3195 for (size_t k = 1; k <= 20; k += 5) {
3196 GemmMicrokernelTester()
3197 .mr(5)
3198 .nr(8)
3199 .kr(1)
3200 .sr(4)
3201 .m(5)
3202 .n(n)
3203 .k(k)
3204 .Test(xnn_f32_gemm_ukernel_5x8s4__wasmsimd);
3205 }
3206 }
3207 }
3208
TEST(F32_GEMM_5X8S4__WASMSIMD,n_gt_8_strided_cn)3209 TEST(F32_GEMM_5X8S4__WASMSIMD, n_gt_8_strided_cn) {
3210 for (uint32_t n = 9; n < 16; n++) {
3211 for (size_t k = 1; k <= 20; k += 5) {
3212 GemmMicrokernelTester()
3213 .mr(5)
3214 .nr(8)
3215 .kr(1)
3216 .sr(4)
3217 .m(5)
3218 .n(n)
3219 .k(k)
3220 .cn_stride(11)
3221 .Test(xnn_f32_gemm_ukernel_5x8s4__wasmsimd);
3222 }
3223 }
3224 }
3225
TEST(F32_GEMM_5X8S4__WASMSIMD,n_gt_8_strided_a)3226 TEST(F32_GEMM_5X8S4__WASMSIMD, n_gt_8_strided_a) {
3227 for (uint32_t n = 9; n < 16; n++) {
3228 for (size_t k = 1; k <= 20; k += 5) {
3229 GemmMicrokernelTester()
3230 .mr(5)
3231 .nr(8)
3232 .kr(1)
3233 .sr(4)
3234 .m(5)
3235 .n(n)
3236 .k(k)
3237 .a_stride(23)
3238 .Test(xnn_f32_gemm_ukernel_5x8s4__wasmsimd);
3239 }
3240 }
3241 }
3242
TEST(F32_GEMM_5X8S4__WASMSIMD,n_gt_8_subtile)3243 TEST(F32_GEMM_5X8S4__WASMSIMD, n_gt_8_subtile) {
3244 for (uint32_t n = 9; n < 16; n++) {
3245 for (size_t k = 1; k <= 20; k += 5) {
3246 for (uint32_t m = 1; m <= 5; m++) {
3247 GemmMicrokernelTester()
3248 .mr(5)
3249 .nr(8)
3250 .kr(1)
3251 .sr(4)
3252 .m(m)
3253 .n(n)
3254 .k(k)
3255 .iterations(1)
3256 .Test(xnn_f32_gemm_ukernel_5x8s4__wasmsimd);
3257 }
3258 }
3259 }
3260 }
3261
TEST(F32_GEMM_5X8S4__WASMSIMD,n_div_8)3262 TEST(F32_GEMM_5X8S4__WASMSIMD, n_div_8) {
3263 for (uint32_t n = 16; n <= 24; n += 8) {
3264 for (size_t k = 1; k <= 20; k += 5) {
3265 GemmMicrokernelTester()
3266 .mr(5)
3267 .nr(8)
3268 .kr(1)
3269 .sr(4)
3270 .m(5)
3271 .n(n)
3272 .k(k)
3273 .Test(xnn_f32_gemm_ukernel_5x8s4__wasmsimd);
3274 }
3275 }
3276 }
3277
TEST(F32_GEMM_5X8S4__WASMSIMD,n_div_8_strided_cn)3278 TEST(F32_GEMM_5X8S4__WASMSIMD, n_div_8_strided_cn) {
3279 for (uint32_t n = 16; n <= 24; n += 8) {
3280 for (size_t k = 1; k <= 20; k += 5) {
3281 GemmMicrokernelTester()
3282 .mr(5)
3283 .nr(8)
3284 .kr(1)
3285 .sr(4)
3286 .m(5)
3287 .n(n)
3288 .k(k)
3289 .cn_stride(11)
3290 .Test(xnn_f32_gemm_ukernel_5x8s4__wasmsimd);
3291 }
3292 }
3293 }
3294
TEST(F32_GEMM_5X8S4__WASMSIMD,n_div_8_strided_a)3295 TEST(F32_GEMM_5X8S4__WASMSIMD, n_div_8_strided_a) {
3296 for (uint32_t n = 16; n <= 24; n += 8) {
3297 for (size_t k = 1; k <= 20; k += 5) {
3298 GemmMicrokernelTester()
3299 .mr(5)
3300 .nr(8)
3301 .kr(1)
3302 .sr(4)
3303 .m(5)
3304 .n(n)
3305 .k(k)
3306 .a_stride(23)
3307 .Test(xnn_f32_gemm_ukernel_5x8s4__wasmsimd);
3308 }
3309 }
3310 }
3311
TEST(F32_GEMM_5X8S4__WASMSIMD,n_div_8_subtile)3312 TEST(F32_GEMM_5X8S4__WASMSIMD, n_div_8_subtile) {
3313 for (uint32_t n = 16; n <= 24; n += 8) {
3314 for (size_t k = 1; k <= 20; k += 5) {
3315 for (uint32_t m = 1; m <= 5; m++) {
3316 GemmMicrokernelTester()
3317 .mr(5)
3318 .nr(8)
3319 .kr(1)
3320 .sr(4)
3321 .m(m)
3322 .n(n)
3323 .k(k)
3324 .iterations(1)
3325 .Test(xnn_f32_gemm_ukernel_5x8s4__wasmsimd);
3326 }
3327 }
3328 }
3329 }
3330
TEST(F32_GEMM_5X8S4__WASMSIMD,strided_cm_subtile)3331 TEST(F32_GEMM_5X8S4__WASMSIMD, strided_cm_subtile) {
3332 for (size_t k = 1; k <= 20; k += 5) {
3333 for (uint32_t n = 1; n <= 8; n++) {
3334 for (uint32_t m = 1; m <= 5; m++) {
3335 GemmMicrokernelTester()
3336 .mr(5)
3337 .nr(8)
3338 .kr(1)
3339 .sr(4)
3340 .m(m)
3341 .n(n)
3342 .k(k)
3343 .cm_stride(11)
3344 .iterations(1)
3345 .Test(xnn_f32_gemm_ukernel_5x8s4__wasmsimd);
3346 }
3347 }
3348 }
3349 }
3350
TEST(F32_GEMM_5X8S4__WASMSIMD,strided_cm)3351 TEST(F32_GEMM_5X8S4__WASMSIMD, strided_cm) {
3352 GemmMicrokernelTester()
3353 .mr(5)
3354 .nr(8)
3355 .kr(1)
3356 .sr(4)
3357 .m(5)
3358 .n(8)
3359 .k(4)
3360 .cm_stride(11)
3361 .Test(xnn_f32_gemm_ukernel_5x8s4__wasmsimd);
3362 }
3363 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
3364
3365
3366 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_GEMM_6X8__WASMSIMD_SPLAT,k_eq_4)3367 TEST(F32_GEMM_6X8__WASMSIMD_SPLAT, k_eq_4) {
3368 GemmMicrokernelTester()
3369 .mr(6)
3370 .nr(8)
3371 .kr(1)
3372 .sr(1)
3373 .m(6)
3374 .n(8)
3375 .k(4)
3376 .Test(xnn_f32_gemm_ukernel_6x8__wasmsimd_splat);
3377 }
3378
TEST(F32_GEMM_6X8__WASMSIMD_SPLAT,strided_cn)3379 TEST(F32_GEMM_6X8__WASMSIMD_SPLAT, strided_cn) {
3380 GemmMicrokernelTester()
3381 .mr(6)
3382 .nr(8)
3383 .kr(1)
3384 .sr(1)
3385 .m(6)
3386 .n(8)
3387 .k(4)
3388 .cn_stride(11)
3389 .Test(xnn_f32_gemm_ukernel_6x8__wasmsimd_splat);
3390 }
3391
TEST(F32_GEMM_6X8__WASMSIMD_SPLAT,k_eq_4_strided_a)3392 TEST(F32_GEMM_6X8__WASMSIMD_SPLAT, k_eq_4_strided_a) {
3393 GemmMicrokernelTester()
3394 .mr(6)
3395 .nr(8)
3396 .kr(1)
3397 .sr(1)
3398 .m(6)
3399 .n(8)
3400 .k(4)
3401 .a_stride(7)
3402 .Test(xnn_f32_gemm_ukernel_6x8__wasmsimd_splat);
3403 }
3404
TEST(F32_GEMM_6X8__WASMSIMD_SPLAT,k_eq_4_subtile)3405 TEST(F32_GEMM_6X8__WASMSIMD_SPLAT, k_eq_4_subtile) {
3406 for (uint32_t n = 1; n <= 8; n++) {
3407 for (uint32_t m = 1; m <= 6; m++) {
3408 GemmMicrokernelTester()
3409 .mr(6)
3410 .nr(8)
3411 .kr(1)
3412 .sr(1)
3413 .m(m)
3414 .n(n)
3415 .k(4)
3416 .iterations(1)
3417 .Test(xnn_f32_gemm_ukernel_6x8__wasmsimd_splat);
3418 }
3419 }
3420 }
3421
TEST(F32_GEMM_6X8__WASMSIMD_SPLAT,k_eq_4_subtile_m)3422 TEST(F32_GEMM_6X8__WASMSIMD_SPLAT, k_eq_4_subtile_m) {
3423 for (uint32_t m = 1; m <= 6; m++) {
3424 GemmMicrokernelTester()
3425 .mr(6)
3426 .nr(8)
3427 .kr(1)
3428 .sr(1)
3429 .m(m)
3430 .n(8)
3431 .k(4)
3432 .iterations(1)
3433 .Test(xnn_f32_gemm_ukernel_6x8__wasmsimd_splat);
3434 }
3435 }
3436
TEST(F32_GEMM_6X8__WASMSIMD_SPLAT,k_eq_4_subtile_n)3437 TEST(F32_GEMM_6X8__WASMSIMD_SPLAT, k_eq_4_subtile_n) {
3438 for (uint32_t n = 1; n <= 8; n++) {
3439 GemmMicrokernelTester()
3440 .mr(6)
3441 .nr(8)
3442 .kr(1)
3443 .sr(1)
3444 .m(6)
3445 .n(n)
3446 .k(4)
3447 .iterations(1)
3448 .Test(xnn_f32_gemm_ukernel_6x8__wasmsimd_splat);
3449 }
3450 }
3451
TEST(F32_GEMM_6X8__WASMSIMD_SPLAT,k_lt_4)3452 TEST(F32_GEMM_6X8__WASMSIMD_SPLAT, k_lt_4) {
3453 for (size_t k = 1; k < 4; k++) {
3454 GemmMicrokernelTester()
3455 .mr(6)
3456 .nr(8)
3457 .kr(1)
3458 .sr(1)
3459 .m(6)
3460 .n(8)
3461 .k(k)
3462 .Test(xnn_f32_gemm_ukernel_6x8__wasmsimd_splat);
3463 }
3464 }
3465
TEST(F32_GEMM_6X8__WASMSIMD_SPLAT,k_lt_4_strided_a)3466 TEST(F32_GEMM_6X8__WASMSIMD_SPLAT, k_lt_4_strided_a) {
3467 for (size_t k = 1; k < 4; k++) {
3468 GemmMicrokernelTester()
3469 .mr(6)
3470 .nr(8)
3471 .kr(1)
3472 .sr(1)
3473 .m(6)
3474 .n(8)
3475 .k(k)
3476 .a_stride(7)
3477 .Test(xnn_f32_gemm_ukernel_6x8__wasmsimd_splat);
3478 }
3479 }
3480
TEST(F32_GEMM_6X8__WASMSIMD_SPLAT,k_lt_4_subtile)3481 TEST(F32_GEMM_6X8__WASMSIMD_SPLAT, k_lt_4_subtile) {
3482 for (size_t k = 1; k < 4; k++) {
3483 for (uint32_t n = 1; n <= 8; n++) {
3484 for (uint32_t m = 1; m <= 6; m++) {
3485 GemmMicrokernelTester()
3486 .mr(6)
3487 .nr(8)
3488 .kr(1)
3489 .sr(1)
3490 .m(m)
3491 .n(n)
3492 .k(k)
3493 .iterations(1)
3494 .Test(xnn_f32_gemm_ukernel_6x8__wasmsimd_splat);
3495 }
3496 }
3497 }
3498 }
3499
TEST(F32_GEMM_6X8__WASMSIMD_SPLAT,k_gt_4)3500 TEST(F32_GEMM_6X8__WASMSIMD_SPLAT, k_gt_4) {
3501 for (size_t k = 5; k < 8; k++) {
3502 GemmMicrokernelTester()
3503 .mr(6)
3504 .nr(8)
3505 .kr(1)
3506 .sr(1)
3507 .m(6)
3508 .n(8)
3509 .k(k)
3510 .Test(xnn_f32_gemm_ukernel_6x8__wasmsimd_splat);
3511 }
3512 }
3513
TEST(F32_GEMM_6X8__WASMSIMD_SPLAT,k_gt_4_strided_a)3514 TEST(F32_GEMM_6X8__WASMSIMD_SPLAT, k_gt_4_strided_a) {
3515 for (size_t k = 5; k < 8; k++) {
3516 GemmMicrokernelTester()
3517 .mr(6)
3518 .nr(8)
3519 .kr(1)
3520 .sr(1)
3521 .m(6)
3522 .n(8)
3523 .k(k)
3524 .a_stride(11)
3525 .Test(xnn_f32_gemm_ukernel_6x8__wasmsimd_splat);
3526 }
3527 }
3528
TEST(F32_GEMM_6X8__WASMSIMD_SPLAT,k_gt_4_subtile)3529 TEST(F32_GEMM_6X8__WASMSIMD_SPLAT, k_gt_4_subtile) {
3530 for (size_t k = 5; k < 8; k++) {
3531 for (uint32_t n = 1; n <= 8; n++) {
3532 for (uint32_t m = 1; m <= 6; m++) {
3533 GemmMicrokernelTester()
3534 .mr(6)
3535 .nr(8)
3536 .kr(1)
3537 .sr(1)
3538 .m(m)
3539 .n(n)
3540 .k(k)
3541 .iterations(1)
3542 .Test(xnn_f32_gemm_ukernel_6x8__wasmsimd_splat);
3543 }
3544 }
3545 }
3546 }
3547
TEST(F32_GEMM_6X8__WASMSIMD_SPLAT,k_div_4)3548 TEST(F32_GEMM_6X8__WASMSIMD_SPLAT, k_div_4) {
3549 for (size_t k = 8; k <= 40; k += 4) {
3550 GemmMicrokernelTester()
3551 .mr(6)
3552 .nr(8)
3553 .kr(1)
3554 .sr(1)
3555 .m(6)
3556 .n(8)
3557 .k(k)
3558 .Test(xnn_f32_gemm_ukernel_6x8__wasmsimd_splat);
3559 }
3560 }
3561
TEST(F32_GEMM_6X8__WASMSIMD_SPLAT,k_div_4_strided_a)3562 TEST(F32_GEMM_6X8__WASMSIMD_SPLAT, k_div_4_strided_a) {
3563 for (size_t k = 8; k <= 40; k += 4) {
3564 GemmMicrokernelTester()
3565 .mr(6)
3566 .nr(8)
3567 .kr(1)
3568 .sr(1)
3569 .m(6)
3570 .n(8)
3571 .k(k)
3572 .a_stride(43)
3573 .Test(xnn_f32_gemm_ukernel_6x8__wasmsimd_splat);
3574 }
3575 }
3576
TEST(F32_GEMM_6X8__WASMSIMD_SPLAT,k_div_4_subtile)3577 TEST(F32_GEMM_6X8__WASMSIMD_SPLAT, k_div_4_subtile) {
3578 for (size_t k = 8; k <= 40; k += 4) {
3579 for (uint32_t n = 1; n <= 8; n++) {
3580 for (uint32_t m = 1; m <= 6; m++) {
3581 GemmMicrokernelTester()
3582 .mr(6)
3583 .nr(8)
3584 .kr(1)
3585 .sr(1)
3586 .m(m)
3587 .n(n)
3588 .k(k)
3589 .iterations(1)
3590 .Test(xnn_f32_gemm_ukernel_6x8__wasmsimd_splat);
3591 }
3592 }
3593 }
3594 }
3595
TEST(F32_GEMM_6X8__WASMSIMD_SPLAT,n_gt_8)3596 TEST(F32_GEMM_6X8__WASMSIMD_SPLAT, n_gt_8) {
3597 for (uint32_t n = 9; n < 16; n++) {
3598 for (size_t k = 1; k <= 20; k += 5) {
3599 GemmMicrokernelTester()
3600 .mr(6)
3601 .nr(8)
3602 .kr(1)
3603 .sr(1)
3604 .m(6)
3605 .n(n)
3606 .k(k)
3607 .Test(xnn_f32_gemm_ukernel_6x8__wasmsimd_splat);
3608 }
3609 }
3610 }
3611
TEST(F32_GEMM_6X8__WASMSIMD_SPLAT,n_gt_8_strided_cn)3612 TEST(F32_GEMM_6X8__WASMSIMD_SPLAT, n_gt_8_strided_cn) {
3613 for (uint32_t n = 9; n < 16; n++) {
3614 for (size_t k = 1; k <= 20; k += 5) {
3615 GemmMicrokernelTester()
3616 .mr(6)
3617 .nr(8)
3618 .kr(1)
3619 .sr(1)
3620 .m(6)
3621 .n(n)
3622 .k(k)
3623 .cn_stride(11)
3624 .Test(xnn_f32_gemm_ukernel_6x8__wasmsimd_splat);
3625 }
3626 }
3627 }
3628
TEST(F32_GEMM_6X8__WASMSIMD_SPLAT,n_gt_8_strided_a)3629 TEST(F32_GEMM_6X8__WASMSIMD_SPLAT, n_gt_8_strided_a) {
3630 for (uint32_t n = 9; n < 16; n++) {
3631 for (size_t k = 1; k <= 20; k += 5) {
3632 GemmMicrokernelTester()
3633 .mr(6)
3634 .nr(8)
3635 .kr(1)
3636 .sr(1)
3637 .m(6)
3638 .n(n)
3639 .k(k)
3640 .a_stride(23)
3641 .Test(xnn_f32_gemm_ukernel_6x8__wasmsimd_splat);
3642 }
3643 }
3644 }
3645
TEST(F32_GEMM_6X8__WASMSIMD_SPLAT,n_gt_8_subtile)3646 TEST(F32_GEMM_6X8__WASMSIMD_SPLAT, n_gt_8_subtile) {
3647 for (uint32_t n = 9; n < 16; n++) {
3648 for (size_t k = 1; k <= 20; k += 5) {
3649 for (uint32_t m = 1; m <= 6; m++) {
3650 GemmMicrokernelTester()
3651 .mr(6)
3652 .nr(8)
3653 .kr(1)
3654 .sr(1)
3655 .m(m)
3656 .n(n)
3657 .k(k)
3658 .iterations(1)
3659 .Test(xnn_f32_gemm_ukernel_6x8__wasmsimd_splat);
3660 }
3661 }
3662 }
3663 }
3664
TEST(F32_GEMM_6X8__WASMSIMD_SPLAT,n_div_8)3665 TEST(F32_GEMM_6X8__WASMSIMD_SPLAT, n_div_8) {
3666 for (uint32_t n = 16; n <= 24; n += 8) {
3667 for (size_t k = 1; k <= 20; k += 5) {
3668 GemmMicrokernelTester()
3669 .mr(6)
3670 .nr(8)
3671 .kr(1)
3672 .sr(1)
3673 .m(6)
3674 .n(n)
3675 .k(k)
3676 .Test(xnn_f32_gemm_ukernel_6x8__wasmsimd_splat);
3677 }
3678 }
3679 }
3680
TEST(F32_GEMM_6X8__WASMSIMD_SPLAT,n_div_8_strided_cn)3681 TEST(F32_GEMM_6X8__WASMSIMD_SPLAT, n_div_8_strided_cn) {
3682 for (uint32_t n = 16; n <= 24; n += 8) {
3683 for (size_t k = 1; k <= 20; k += 5) {
3684 GemmMicrokernelTester()
3685 .mr(6)
3686 .nr(8)
3687 .kr(1)
3688 .sr(1)
3689 .m(6)
3690 .n(n)
3691 .k(k)
3692 .cn_stride(11)
3693 .Test(xnn_f32_gemm_ukernel_6x8__wasmsimd_splat);
3694 }
3695 }
3696 }
3697
TEST(F32_GEMM_6X8__WASMSIMD_SPLAT,n_div_8_strided_a)3698 TEST(F32_GEMM_6X8__WASMSIMD_SPLAT, n_div_8_strided_a) {
3699 for (uint32_t n = 16; n <= 24; n += 8) {
3700 for (size_t k = 1; k <= 20; k += 5) {
3701 GemmMicrokernelTester()
3702 .mr(6)
3703 .nr(8)
3704 .kr(1)
3705 .sr(1)
3706 .m(6)
3707 .n(n)
3708 .k(k)
3709 .a_stride(23)
3710 .Test(xnn_f32_gemm_ukernel_6x8__wasmsimd_splat);
3711 }
3712 }
3713 }
3714
TEST(F32_GEMM_6X8__WASMSIMD_SPLAT,n_div_8_subtile)3715 TEST(F32_GEMM_6X8__WASMSIMD_SPLAT, n_div_8_subtile) {
3716 for (uint32_t n = 16; n <= 24; n += 8) {
3717 for (size_t k = 1; k <= 20; k += 5) {
3718 for (uint32_t m = 1; m <= 6; m++) {
3719 GemmMicrokernelTester()
3720 .mr(6)
3721 .nr(8)
3722 .kr(1)
3723 .sr(1)
3724 .m(m)
3725 .n(n)
3726 .k(k)
3727 .iterations(1)
3728 .Test(xnn_f32_gemm_ukernel_6x8__wasmsimd_splat);
3729 }
3730 }
3731 }
3732 }
3733
TEST(F32_GEMM_6X8__WASMSIMD_SPLAT,strided_cm_subtile)3734 TEST(F32_GEMM_6X8__WASMSIMD_SPLAT, strided_cm_subtile) {
3735 for (size_t k = 1; k <= 20; k += 5) {
3736 for (uint32_t n = 1; n <= 8; n++) {
3737 for (uint32_t m = 1; m <= 6; m++) {
3738 GemmMicrokernelTester()
3739 .mr(6)
3740 .nr(8)
3741 .kr(1)
3742 .sr(1)
3743 .m(m)
3744 .n(n)
3745 .k(k)
3746 .cm_stride(11)
3747 .iterations(1)
3748 .Test(xnn_f32_gemm_ukernel_6x8__wasmsimd_splat);
3749 }
3750 }
3751 }
3752 }
3753
TEST(F32_GEMM_6X8__WASMSIMD_SPLAT,strided_cm)3754 TEST(F32_GEMM_6X8__WASMSIMD_SPLAT, strided_cm) {
3755 GemmMicrokernelTester()
3756 .mr(6)
3757 .nr(8)
3758 .kr(1)
3759 .sr(1)
3760 .m(6)
3761 .n(8)
3762 .k(4)
3763 .cm_stride(11)
3764 .Test(xnn_f32_gemm_ukernel_6x8__wasmsimd_splat);
3765 }
3766 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
3767
3768
3769 #if XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_GEMM_1X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,k_eq_1)3770 TEST(F32_GEMM_1X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, k_eq_1) {
3771 GemmMicrokernelTester()
3772 .mr(1)
3773 .nr(8)
3774 .kr(1)
3775 .sr(1)
3776 .m(1)
3777 .n(8)
3778 .k(1)
3779 .Test(xnn_f32_gemm_ukernel_1x8__wasmrelaxedsimd_fma_loadsplat);
3780 }
3781
TEST(F32_GEMM_1X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,strided_cn)3782 TEST(F32_GEMM_1X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, strided_cn) {
3783 GemmMicrokernelTester()
3784 .mr(1)
3785 .nr(8)
3786 .kr(1)
3787 .sr(1)
3788 .m(1)
3789 .n(8)
3790 .k(1)
3791 .cn_stride(11)
3792 .Test(xnn_f32_gemm_ukernel_1x8__wasmrelaxedsimd_fma_loadsplat);
3793 }
3794
TEST(F32_GEMM_1X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,k_eq_1_strided_a)3795 TEST(F32_GEMM_1X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, k_eq_1_strided_a) {
3796 GemmMicrokernelTester()
3797 .mr(1)
3798 .nr(8)
3799 .kr(1)
3800 .sr(1)
3801 .m(1)
3802 .n(8)
3803 .k(1)
3804 .a_stride(3)
3805 .Test(xnn_f32_gemm_ukernel_1x8__wasmrelaxedsimd_fma_loadsplat);
3806 }
3807
TEST(F32_GEMM_1X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,k_eq_1_subtile)3808 TEST(F32_GEMM_1X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, k_eq_1_subtile) {
3809 for (uint32_t n = 1; n <= 8; n++) {
3810 for (uint32_t m = 1; m <= 1; m++) {
3811 GemmMicrokernelTester()
3812 .mr(1)
3813 .nr(8)
3814 .kr(1)
3815 .sr(1)
3816 .m(m)
3817 .n(n)
3818 .k(1)
3819 .iterations(1)
3820 .Test(xnn_f32_gemm_ukernel_1x8__wasmrelaxedsimd_fma_loadsplat);
3821 }
3822 }
3823 }
3824
TEST(F32_GEMM_1X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,k_eq_1_subtile_m)3825 TEST(F32_GEMM_1X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, k_eq_1_subtile_m) {
3826 for (uint32_t m = 1; m <= 1; m++) {
3827 GemmMicrokernelTester()
3828 .mr(1)
3829 .nr(8)
3830 .kr(1)
3831 .sr(1)
3832 .m(m)
3833 .n(8)
3834 .k(1)
3835 .iterations(1)
3836 .Test(xnn_f32_gemm_ukernel_1x8__wasmrelaxedsimd_fma_loadsplat);
3837 }
3838 }
3839
TEST(F32_GEMM_1X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,k_eq_1_subtile_n)3840 TEST(F32_GEMM_1X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, k_eq_1_subtile_n) {
3841 for (uint32_t n = 1; n <= 8; n++) {
3842 GemmMicrokernelTester()
3843 .mr(1)
3844 .nr(8)
3845 .kr(1)
3846 .sr(1)
3847 .m(1)
3848 .n(n)
3849 .k(1)
3850 .iterations(1)
3851 .Test(xnn_f32_gemm_ukernel_1x8__wasmrelaxedsimd_fma_loadsplat);
3852 }
3853 }
3854
TEST(F32_GEMM_1X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,k_gt_1)3855 TEST(F32_GEMM_1X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, k_gt_1) {
3856 for (size_t k = 2; k < 10; k++) {
3857 GemmMicrokernelTester()
3858 .mr(1)
3859 .nr(8)
3860 .kr(1)
3861 .sr(1)
3862 .m(1)
3863 .n(8)
3864 .k(k)
3865 .Test(xnn_f32_gemm_ukernel_1x8__wasmrelaxedsimd_fma_loadsplat);
3866 }
3867 }
3868
TEST(F32_GEMM_1X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,k_gt_1_strided_a)3869 TEST(F32_GEMM_1X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, k_gt_1_strided_a) {
3870 for (size_t k = 2; k < 10; k++) {
3871 GemmMicrokernelTester()
3872 .mr(1)
3873 .nr(8)
3874 .kr(1)
3875 .sr(1)
3876 .m(1)
3877 .n(8)
3878 .k(k)
3879 .a_stride(11)
3880 .Test(xnn_f32_gemm_ukernel_1x8__wasmrelaxedsimd_fma_loadsplat);
3881 }
3882 }
3883
TEST(F32_GEMM_1X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,k_gt_1_subtile)3884 TEST(F32_GEMM_1X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, k_gt_1_subtile) {
3885 for (size_t k = 2; k < 10; k++) {
3886 for (uint32_t n = 1; n <= 8; n++) {
3887 for (uint32_t m = 1; m <= 1; m++) {
3888 GemmMicrokernelTester()
3889 .mr(1)
3890 .nr(8)
3891 .kr(1)
3892 .sr(1)
3893 .m(m)
3894 .n(n)
3895 .k(k)
3896 .iterations(1)
3897 .Test(xnn_f32_gemm_ukernel_1x8__wasmrelaxedsimd_fma_loadsplat);
3898 }
3899 }
3900 }
3901 }
3902
TEST(F32_GEMM_1X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,n_gt_8)3903 TEST(F32_GEMM_1X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, n_gt_8) {
3904 for (uint32_t n = 9; n < 16; n++) {
3905 for (size_t k = 1; k <= 5; k += 2) {
3906 GemmMicrokernelTester()
3907 .mr(1)
3908 .nr(8)
3909 .kr(1)
3910 .sr(1)
3911 .m(1)
3912 .n(n)
3913 .k(k)
3914 .Test(xnn_f32_gemm_ukernel_1x8__wasmrelaxedsimd_fma_loadsplat);
3915 }
3916 }
3917 }
3918
TEST(F32_GEMM_1X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,n_gt_8_strided_cn)3919 TEST(F32_GEMM_1X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, n_gt_8_strided_cn) {
3920 for (uint32_t n = 9; n < 16; n++) {
3921 for (size_t k = 1; k <= 5; k += 2) {
3922 GemmMicrokernelTester()
3923 .mr(1)
3924 .nr(8)
3925 .kr(1)
3926 .sr(1)
3927 .m(1)
3928 .n(n)
3929 .k(k)
3930 .cn_stride(11)
3931 .Test(xnn_f32_gemm_ukernel_1x8__wasmrelaxedsimd_fma_loadsplat);
3932 }
3933 }
3934 }
3935
TEST(F32_GEMM_1X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,n_gt_8_strided_a)3936 TEST(F32_GEMM_1X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, n_gt_8_strided_a) {
3937 for (uint32_t n = 9; n < 16; n++) {
3938 for (size_t k = 1; k <= 5; k += 2) {
3939 GemmMicrokernelTester()
3940 .mr(1)
3941 .nr(8)
3942 .kr(1)
3943 .sr(1)
3944 .m(1)
3945 .n(n)
3946 .k(k)
3947 .a_stride(7)
3948 .Test(xnn_f32_gemm_ukernel_1x8__wasmrelaxedsimd_fma_loadsplat);
3949 }
3950 }
3951 }
3952
TEST(F32_GEMM_1X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,n_gt_8_subtile)3953 TEST(F32_GEMM_1X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, n_gt_8_subtile) {
3954 for (uint32_t n = 9; n < 16; n++) {
3955 for (size_t k = 1; k <= 5; k += 2) {
3956 for (uint32_t m = 1; m <= 1; m++) {
3957 GemmMicrokernelTester()
3958 .mr(1)
3959 .nr(8)
3960 .kr(1)
3961 .sr(1)
3962 .m(m)
3963 .n(n)
3964 .k(k)
3965 .iterations(1)
3966 .Test(xnn_f32_gemm_ukernel_1x8__wasmrelaxedsimd_fma_loadsplat);
3967 }
3968 }
3969 }
3970 }
3971
TEST(F32_GEMM_1X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,n_div_8)3972 TEST(F32_GEMM_1X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, n_div_8) {
3973 for (uint32_t n = 16; n <= 24; n += 8) {
3974 for (size_t k = 1; k <= 5; k += 2) {
3975 GemmMicrokernelTester()
3976 .mr(1)
3977 .nr(8)
3978 .kr(1)
3979 .sr(1)
3980 .m(1)
3981 .n(n)
3982 .k(k)
3983 .Test(xnn_f32_gemm_ukernel_1x8__wasmrelaxedsimd_fma_loadsplat);
3984 }
3985 }
3986 }
3987
TEST(F32_GEMM_1X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,n_div_8_strided_cn)3988 TEST(F32_GEMM_1X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, n_div_8_strided_cn) {
3989 for (uint32_t n = 16; n <= 24; n += 8) {
3990 for (size_t k = 1; k <= 5; k += 2) {
3991 GemmMicrokernelTester()
3992 .mr(1)
3993 .nr(8)
3994 .kr(1)
3995 .sr(1)
3996 .m(1)
3997 .n(n)
3998 .k(k)
3999 .cn_stride(11)
4000 .Test(xnn_f32_gemm_ukernel_1x8__wasmrelaxedsimd_fma_loadsplat);
4001 }
4002 }
4003 }
4004
TEST(F32_GEMM_1X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,n_div_8_strided_a)4005 TEST(F32_GEMM_1X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, n_div_8_strided_a) {
4006 for (uint32_t n = 16; n <= 24; n += 8) {
4007 for (size_t k = 1; k <= 5; k += 2) {
4008 GemmMicrokernelTester()
4009 .mr(1)
4010 .nr(8)
4011 .kr(1)
4012 .sr(1)
4013 .m(1)
4014 .n(n)
4015 .k(k)
4016 .a_stride(7)
4017 .Test(xnn_f32_gemm_ukernel_1x8__wasmrelaxedsimd_fma_loadsplat);
4018 }
4019 }
4020 }
4021
TEST(F32_GEMM_1X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,n_div_8_subtile)4022 TEST(F32_GEMM_1X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, n_div_8_subtile) {
4023 for (uint32_t n = 16; n <= 24; n += 8) {
4024 for (size_t k = 1; k <= 5; k += 2) {
4025 for (uint32_t m = 1; m <= 1; m++) {
4026 GemmMicrokernelTester()
4027 .mr(1)
4028 .nr(8)
4029 .kr(1)
4030 .sr(1)
4031 .m(m)
4032 .n(n)
4033 .k(k)
4034 .iterations(1)
4035 .Test(xnn_f32_gemm_ukernel_1x8__wasmrelaxedsimd_fma_loadsplat);
4036 }
4037 }
4038 }
4039 }
4040
TEST(F32_GEMM_1X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,strided_cm_subtile)4041 TEST(F32_GEMM_1X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, strided_cm_subtile) {
4042 for (size_t k = 1; k <= 5; k += 2) {
4043 for (uint32_t n = 1; n <= 8; n++) {
4044 for (uint32_t m = 1; m <= 1; m++) {
4045 GemmMicrokernelTester()
4046 .mr(1)
4047 .nr(8)
4048 .kr(1)
4049 .sr(1)
4050 .m(m)
4051 .n(n)
4052 .k(k)
4053 .cm_stride(11)
4054 .iterations(1)
4055 .Test(xnn_f32_gemm_ukernel_1x8__wasmrelaxedsimd_fma_loadsplat);
4056 }
4057 }
4058 }
4059 }
4060
TEST(F32_GEMM_1X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,strided_cm)4061 TEST(F32_GEMM_1X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, strided_cm) {
4062 GemmMicrokernelTester()
4063 .mr(1)
4064 .nr(8)
4065 .kr(1)
4066 .sr(1)
4067 .m(1)
4068 .n(8)
4069 .k(1)
4070 .cm_stride(11)
4071 .Test(xnn_f32_gemm_ukernel_1x8__wasmrelaxedsimd_fma_loadsplat);
4072 }
4073 #endif // XNN_ARCH_WASMRELAXEDSIMD
4074
4075
4076 #if XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_GEMM_3X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,k_eq_1)4077 TEST(F32_GEMM_3X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, k_eq_1) {
4078 GemmMicrokernelTester()
4079 .mr(3)
4080 .nr(8)
4081 .kr(1)
4082 .sr(1)
4083 .m(3)
4084 .n(8)
4085 .k(1)
4086 .Test(xnn_f32_gemm_ukernel_3x8__wasmrelaxedsimd_fma_loadsplat);
4087 }
4088
TEST(F32_GEMM_3X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,strided_cn)4089 TEST(F32_GEMM_3X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, strided_cn) {
4090 GemmMicrokernelTester()
4091 .mr(3)
4092 .nr(8)
4093 .kr(1)
4094 .sr(1)
4095 .m(3)
4096 .n(8)
4097 .k(1)
4098 .cn_stride(11)
4099 .Test(xnn_f32_gemm_ukernel_3x8__wasmrelaxedsimd_fma_loadsplat);
4100 }
4101
TEST(F32_GEMM_3X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,k_eq_1_strided_a)4102 TEST(F32_GEMM_3X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, k_eq_1_strided_a) {
4103 GemmMicrokernelTester()
4104 .mr(3)
4105 .nr(8)
4106 .kr(1)
4107 .sr(1)
4108 .m(3)
4109 .n(8)
4110 .k(1)
4111 .a_stride(3)
4112 .Test(xnn_f32_gemm_ukernel_3x8__wasmrelaxedsimd_fma_loadsplat);
4113 }
4114
TEST(F32_GEMM_3X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,k_eq_1_subtile)4115 TEST(F32_GEMM_3X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, k_eq_1_subtile) {
4116 for (uint32_t n = 1; n <= 8; n++) {
4117 for (uint32_t m = 1; m <= 3; m++) {
4118 GemmMicrokernelTester()
4119 .mr(3)
4120 .nr(8)
4121 .kr(1)
4122 .sr(1)
4123 .m(m)
4124 .n(n)
4125 .k(1)
4126 .iterations(1)
4127 .Test(xnn_f32_gemm_ukernel_3x8__wasmrelaxedsimd_fma_loadsplat);
4128 }
4129 }
4130 }
4131
TEST(F32_GEMM_3X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,k_eq_1_subtile_m)4132 TEST(F32_GEMM_3X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, k_eq_1_subtile_m) {
4133 for (uint32_t m = 1; m <= 3; m++) {
4134 GemmMicrokernelTester()
4135 .mr(3)
4136 .nr(8)
4137 .kr(1)
4138 .sr(1)
4139 .m(m)
4140 .n(8)
4141 .k(1)
4142 .iterations(1)
4143 .Test(xnn_f32_gemm_ukernel_3x8__wasmrelaxedsimd_fma_loadsplat);
4144 }
4145 }
4146
TEST(F32_GEMM_3X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,k_eq_1_subtile_n)4147 TEST(F32_GEMM_3X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, k_eq_1_subtile_n) {
4148 for (uint32_t n = 1; n <= 8; n++) {
4149 GemmMicrokernelTester()
4150 .mr(3)
4151 .nr(8)
4152 .kr(1)
4153 .sr(1)
4154 .m(3)
4155 .n(n)
4156 .k(1)
4157 .iterations(1)
4158 .Test(xnn_f32_gemm_ukernel_3x8__wasmrelaxedsimd_fma_loadsplat);
4159 }
4160 }
4161
TEST(F32_GEMM_3X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,k_gt_1)4162 TEST(F32_GEMM_3X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, k_gt_1) {
4163 for (size_t k = 2; k < 10; k++) {
4164 GemmMicrokernelTester()
4165 .mr(3)
4166 .nr(8)
4167 .kr(1)
4168 .sr(1)
4169 .m(3)
4170 .n(8)
4171 .k(k)
4172 .Test(xnn_f32_gemm_ukernel_3x8__wasmrelaxedsimd_fma_loadsplat);
4173 }
4174 }
4175
TEST(F32_GEMM_3X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,k_gt_1_strided_a)4176 TEST(F32_GEMM_3X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, k_gt_1_strided_a) {
4177 for (size_t k = 2; k < 10; k++) {
4178 GemmMicrokernelTester()
4179 .mr(3)
4180 .nr(8)
4181 .kr(1)
4182 .sr(1)
4183 .m(3)
4184 .n(8)
4185 .k(k)
4186 .a_stride(11)
4187 .Test(xnn_f32_gemm_ukernel_3x8__wasmrelaxedsimd_fma_loadsplat);
4188 }
4189 }
4190
TEST(F32_GEMM_3X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,k_gt_1_subtile)4191 TEST(F32_GEMM_3X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, k_gt_1_subtile) {
4192 for (size_t k = 2; k < 10; k++) {
4193 for (uint32_t n = 1; n <= 8; n++) {
4194 for (uint32_t m = 1; m <= 3; m++) {
4195 GemmMicrokernelTester()
4196 .mr(3)
4197 .nr(8)
4198 .kr(1)
4199 .sr(1)
4200 .m(m)
4201 .n(n)
4202 .k(k)
4203 .iterations(1)
4204 .Test(xnn_f32_gemm_ukernel_3x8__wasmrelaxedsimd_fma_loadsplat);
4205 }
4206 }
4207 }
4208 }
4209
TEST(F32_GEMM_3X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,n_gt_8)4210 TEST(F32_GEMM_3X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, n_gt_8) {
4211 for (uint32_t n = 9; n < 16; n++) {
4212 for (size_t k = 1; k <= 5; k += 2) {
4213 GemmMicrokernelTester()
4214 .mr(3)
4215 .nr(8)
4216 .kr(1)
4217 .sr(1)
4218 .m(3)
4219 .n(n)
4220 .k(k)
4221 .Test(xnn_f32_gemm_ukernel_3x8__wasmrelaxedsimd_fma_loadsplat);
4222 }
4223 }
4224 }
4225
TEST(F32_GEMM_3X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,n_gt_8_strided_cn)4226 TEST(F32_GEMM_3X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, n_gt_8_strided_cn) {
4227 for (uint32_t n = 9; n < 16; n++) {
4228 for (size_t k = 1; k <= 5; k += 2) {
4229 GemmMicrokernelTester()
4230 .mr(3)
4231 .nr(8)
4232 .kr(1)
4233 .sr(1)
4234 .m(3)
4235 .n(n)
4236 .k(k)
4237 .cn_stride(11)
4238 .Test(xnn_f32_gemm_ukernel_3x8__wasmrelaxedsimd_fma_loadsplat);
4239 }
4240 }
4241 }
4242
TEST(F32_GEMM_3X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,n_gt_8_strided_a)4243 TEST(F32_GEMM_3X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, n_gt_8_strided_a) {
4244 for (uint32_t n = 9; n < 16; n++) {
4245 for (size_t k = 1; k <= 5; k += 2) {
4246 GemmMicrokernelTester()
4247 .mr(3)
4248 .nr(8)
4249 .kr(1)
4250 .sr(1)
4251 .m(3)
4252 .n(n)
4253 .k(k)
4254 .a_stride(7)
4255 .Test(xnn_f32_gemm_ukernel_3x8__wasmrelaxedsimd_fma_loadsplat);
4256 }
4257 }
4258 }
4259
TEST(F32_GEMM_3X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,n_gt_8_subtile)4260 TEST(F32_GEMM_3X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, n_gt_8_subtile) {
4261 for (uint32_t n = 9; n < 16; n++) {
4262 for (size_t k = 1; k <= 5; k += 2) {
4263 for (uint32_t m = 1; m <= 3; m++) {
4264 GemmMicrokernelTester()
4265 .mr(3)
4266 .nr(8)
4267 .kr(1)
4268 .sr(1)
4269 .m(m)
4270 .n(n)
4271 .k(k)
4272 .iterations(1)
4273 .Test(xnn_f32_gemm_ukernel_3x8__wasmrelaxedsimd_fma_loadsplat);
4274 }
4275 }
4276 }
4277 }
4278
TEST(F32_GEMM_3X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,n_div_8)4279 TEST(F32_GEMM_3X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, n_div_8) {
4280 for (uint32_t n = 16; n <= 24; n += 8) {
4281 for (size_t k = 1; k <= 5; k += 2) {
4282 GemmMicrokernelTester()
4283 .mr(3)
4284 .nr(8)
4285 .kr(1)
4286 .sr(1)
4287 .m(3)
4288 .n(n)
4289 .k(k)
4290 .Test(xnn_f32_gemm_ukernel_3x8__wasmrelaxedsimd_fma_loadsplat);
4291 }
4292 }
4293 }
4294
TEST(F32_GEMM_3X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,n_div_8_strided_cn)4295 TEST(F32_GEMM_3X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, n_div_8_strided_cn) {
4296 for (uint32_t n = 16; n <= 24; n += 8) {
4297 for (size_t k = 1; k <= 5; k += 2) {
4298 GemmMicrokernelTester()
4299 .mr(3)
4300 .nr(8)
4301 .kr(1)
4302 .sr(1)
4303 .m(3)
4304 .n(n)
4305 .k(k)
4306 .cn_stride(11)
4307 .Test(xnn_f32_gemm_ukernel_3x8__wasmrelaxedsimd_fma_loadsplat);
4308 }
4309 }
4310 }
4311
TEST(F32_GEMM_3X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,n_div_8_strided_a)4312 TEST(F32_GEMM_3X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, n_div_8_strided_a) {
4313 for (uint32_t n = 16; n <= 24; n += 8) {
4314 for (size_t k = 1; k <= 5; k += 2) {
4315 GemmMicrokernelTester()
4316 .mr(3)
4317 .nr(8)
4318 .kr(1)
4319 .sr(1)
4320 .m(3)
4321 .n(n)
4322 .k(k)
4323 .a_stride(7)
4324 .Test(xnn_f32_gemm_ukernel_3x8__wasmrelaxedsimd_fma_loadsplat);
4325 }
4326 }
4327 }
4328
TEST(F32_GEMM_3X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,n_div_8_subtile)4329 TEST(F32_GEMM_3X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, n_div_8_subtile) {
4330 for (uint32_t n = 16; n <= 24; n += 8) {
4331 for (size_t k = 1; k <= 5; k += 2) {
4332 for (uint32_t m = 1; m <= 3; m++) {
4333 GemmMicrokernelTester()
4334 .mr(3)
4335 .nr(8)
4336 .kr(1)
4337 .sr(1)
4338 .m(m)
4339 .n(n)
4340 .k(k)
4341 .iterations(1)
4342 .Test(xnn_f32_gemm_ukernel_3x8__wasmrelaxedsimd_fma_loadsplat);
4343 }
4344 }
4345 }
4346 }
4347
TEST(F32_GEMM_3X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,strided_cm_subtile)4348 TEST(F32_GEMM_3X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, strided_cm_subtile) {
4349 for (size_t k = 1; k <= 5; k += 2) {
4350 for (uint32_t n = 1; n <= 8; n++) {
4351 for (uint32_t m = 1; m <= 3; m++) {
4352 GemmMicrokernelTester()
4353 .mr(3)
4354 .nr(8)
4355 .kr(1)
4356 .sr(1)
4357 .m(m)
4358 .n(n)
4359 .k(k)
4360 .cm_stride(11)
4361 .iterations(1)
4362 .Test(xnn_f32_gemm_ukernel_3x8__wasmrelaxedsimd_fma_loadsplat);
4363 }
4364 }
4365 }
4366 }
4367
TEST(F32_GEMM_3X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,strided_cm)4368 TEST(F32_GEMM_3X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, strided_cm) {
4369 GemmMicrokernelTester()
4370 .mr(3)
4371 .nr(8)
4372 .kr(1)
4373 .sr(1)
4374 .m(3)
4375 .n(8)
4376 .k(1)
4377 .cm_stride(11)
4378 .Test(xnn_f32_gemm_ukernel_3x8__wasmrelaxedsimd_fma_loadsplat);
4379 }
4380 #endif // XNN_ARCH_WASMRELAXEDSIMD
4381
4382
4383 #if XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_GEMM_3X8S4__WASMRELAXEDSIMD_FMA,k_eq_4)4384 TEST(F32_GEMM_3X8S4__WASMRELAXEDSIMD_FMA, k_eq_4) {
4385 GemmMicrokernelTester()
4386 .mr(3)
4387 .nr(8)
4388 .kr(1)
4389 .sr(4)
4390 .m(3)
4391 .n(8)
4392 .k(4)
4393 .Test(xnn_f32_gemm_ukernel_3x8s4__wasmrelaxedsimd_fma);
4394 }
4395
TEST(F32_GEMM_3X8S4__WASMRELAXEDSIMD_FMA,strided_cn)4396 TEST(F32_GEMM_3X8S4__WASMRELAXEDSIMD_FMA, strided_cn) {
4397 GemmMicrokernelTester()
4398 .mr(3)
4399 .nr(8)
4400 .kr(1)
4401 .sr(4)
4402 .m(3)
4403 .n(8)
4404 .k(4)
4405 .cn_stride(11)
4406 .Test(xnn_f32_gemm_ukernel_3x8s4__wasmrelaxedsimd_fma);
4407 }
4408
TEST(F32_GEMM_3X8S4__WASMRELAXEDSIMD_FMA,k_eq_4_strided_a)4409 TEST(F32_GEMM_3X8S4__WASMRELAXEDSIMD_FMA, k_eq_4_strided_a) {
4410 GemmMicrokernelTester()
4411 .mr(3)
4412 .nr(8)
4413 .kr(1)
4414 .sr(4)
4415 .m(3)
4416 .n(8)
4417 .k(4)
4418 .a_stride(7)
4419 .Test(xnn_f32_gemm_ukernel_3x8s4__wasmrelaxedsimd_fma);
4420 }
4421
TEST(F32_GEMM_3X8S4__WASMRELAXEDSIMD_FMA,k_eq_4_subtile)4422 TEST(F32_GEMM_3X8S4__WASMRELAXEDSIMD_FMA, k_eq_4_subtile) {
4423 for (uint32_t n = 1; n <= 8; n++) {
4424 for (uint32_t m = 1; m <= 3; m++) {
4425 GemmMicrokernelTester()
4426 .mr(3)
4427 .nr(8)
4428 .kr(1)
4429 .sr(4)
4430 .m(m)
4431 .n(n)
4432 .k(4)
4433 .iterations(1)
4434 .Test(xnn_f32_gemm_ukernel_3x8s4__wasmrelaxedsimd_fma);
4435 }
4436 }
4437 }
4438
TEST(F32_GEMM_3X8S4__WASMRELAXEDSIMD_FMA,k_eq_4_subtile_m)4439 TEST(F32_GEMM_3X8S4__WASMRELAXEDSIMD_FMA, k_eq_4_subtile_m) {
4440 for (uint32_t m = 1; m <= 3; m++) {
4441 GemmMicrokernelTester()
4442 .mr(3)
4443 .nr(8)
4444 .kr(1)
4445 .sr(4)
4446 .m(m)
4447 .n(8)
4448 .k(4)
4449 .iterations(1)
4450 .Test(xnn_f32_gemm_ukernel_3x8s4__wasmrelaxedsimd_fma);
4451 }
4452 }
4453
TEST(F32_GEMM_3X8S4__WASMRELAXEDSIMD_FMA,k_eq_4_subtile_n)4454 TEST(F32_GEMM_3X8S4__WASMRELAXEDSIMD_FMA, k_eq_4_subtile_n) {
4455 for (uint32_t n = 1; n <= 8; n++) {
4456 GemmMicrokernelTester()
4457 .mr(3)
4458 .nr(8)
4459 .kr(1)
4460 .sr(4)
4461 .m(3)
4462 .n(n)
4463 .k(4)
4464 .iterations(1)
4465 .Test(xnn_f32_gemm_ukernel_3x8s4__wasmrelaxedsimd_fma);
4466 }
4467 }
4468
TEST(F32_GEMM_3X8S4__WASMRELAXEDSIMD_FMA,k_lt_4)4469 TEST(F32_GEMM_3X8S4__WASMRELAXEDSIMD_FMA, k_lt_4) {
4470 for (size_t k = 1; k < 4; k++) {
4471 GemmMicrokernelTester()
4472 .mr(3)
4473 .nr(8)
4474 .kr(1)
4475 .sr(4)
4476 .m(3)
4477 .n(8)
4478 .k(k)
4479 .Test(xnn_f32_gemm_ukernel_3x8s4__wasmrelaxedsimd_fma);
4480 }
4481 }
4482
TEST(F32_GEMM_3X8S4__WASMRELAXEDSIMD_FMA,k_lt_4_strided_a)4483 TEST(F32_GEMM_3X8S4__WASMRELAXEDSIMD_FMA, k_lt_4_strided_a) {
4484 for (size_t k = 1; k < 4; k++) {
4485 GemmMicrokernelTester()
4486 .mr(3)
4487 .nr(8)
4488 .kr(1)
4489 .sr(4)
4490 .m(3)
4491 .n(8)
4492 .k(k)
4493 .a_stride(7)
4494 .Test(xnn_f32_gemm_ukernel_3x8s4__wasmrelaxedsimd_fma);
4495 }
4496 }
4497
TEST(F32_GEMM_3X8S4__WASMRELAXEDSIMD_FMA,k_lt_4_subtile)4498 TEST(F32_GEMM_3X8S4__WASMRELAXEDSIMD_FMA, k_lt_4_subtile) {
4499 for (size_t k = 1; k < 4; k++) {
4500 for (uint32_t n = 1; n <= 8; n++) {
4501 for (uint32_t m = 1; m <= 3; m++) {
4502 GemmMicrokernelTester()
4503 .mr(3)
4504 .nr(8)
4505 .kr(1)
4506 .sr(4)
4507 .m(m)
4508 .n(n)
4509 .k(k)
4510 .iterations(1)
4511 .Test(xnn_f32_gemm_ukernel_3x8s4__wasmrelaxedsimd_fma);
4512 }
4513 }
4514 }
4515 }
4516
TEST(F32_GEMM_3X8S4__WASMRELAXEDSIMD_FMA,k_gt_4)4517 TEST(F32_GEMM_3X8S4__WASMRELAXEDSIMD_FMA, k_gt_4) {
4518 for (size_t k = 5; k < 8; k++) {
4519 GemmMicrokernelTester()
4520 .mr(3)
4521 .nr(8)
4522 .kr(1)
4523 .sr(4)
4524 .m(3)
4525 .n(8)
4526 .k(k)
4527 .Test(xnn_f32_gemm_ukernel_3x8s4__wasmrelaxedsimd_fma);
4528 }
4529 }
4530
TEST(F32_GEMM_3X8S4__WASMRELAXEDSIMD_FMA,k_gt_4_strided_a)4531 TEST(F32_GEMM_3X8S4__WASMRELAXEDSIMD_FMA, k_gt_4_strided_a) {
4532 for (size_t k = 5; k < 8; k++) {
4533 GemmMicrokernelTester()
4534 .mr(3)
4535 .nr(8)
4536 .kr(1)
4537 .sr(4)
4538 .m(3)
4539 .n(8)
4540 .k(k)
4541 .a_stride(11)
4542 .Test(xnn_f32_gemm_ukernel_3x8s4__wasmrelaxedsimd_fma);
4543 }
4544 }
4545
TEST(F32_GEMM_3X8S4__WASMRELAXEDSIMD_FMA,k_gt_4_subtile)4546 TEST(F32_GEMM_3X8S4__WASMRELAXEDSIMD_FMA, k_gt_4_subtile) {
4547 for (size_t k = 5; k < 8; k++) {
4548 for (uint32_t n = 1; n <= 8; n++) {
4549 for (uint32_t m = 1; m <= 3; m++) {
4550 GemmMicrokernelTester()
4551 .mr(3)
4552 .nr(8)
4553 .kr(1)
4554 .sr(4)
4555 .m(m)
4556 .n(n)
4557 .k(k)
4558 .iterations(1)
4559 .Test(xnn_f32_gemm_ukernel_3x8s4__wasmrelaxedsimd_fma);
4560 }
4561 }
4562 }
4563 }
4564
TEST(F32_GEMM_3X8S4__WASMRELAXEDSIMD_FMA,k_div_4)4565 TEST(F32_GEMM_3X8S4__WASMRELAXEDSIMD_FMA, k_div_4) {
4566 for (size_t k = 8; k <= 40; k += 4) {
4567 GemmMicrokernelTester()
4568 .mr(3)
4569 .nr(8)
4570 .kr(1)
4571 .sr(4)
4572 .m(3)
4573 .n(8)
4574 .k(k)
4575 .Test(xnn_f32_gemm_ukernel_3x8s4__wasmrelaxedsimd_fma);
4576 }
4577 }
4578
TEST(F32_GEMM_3X8S4__WASMRELAXEDSIMD_FMA,k_div_4_strided_a)4579 TEST(F32_GEMM_3X8S4__WASMRELAXEDSIMD_FMA, k_div_4_strided_a) {
4580 for (size_t k = 8; k <= 40; k += 4) {
4581 GemmMicrokernelTester()
4582 .mr(3)
4583 .nr(8)
4584 .kr(1)
4585 .sr(4)
4586 .m(3)
4587 .n(8)
4588 .k(k)
4589 .a_stride(43)
4590 .Test(xnn_f32_gemm_ukernel_3x8s4__wasmrelaxedsimd_fma);
4591 }
4592 }
4593
TEST(F32_GEMM_3X8S4__WASMRELAXEDSIMD_FMA,k_div_4_subtile)4594 TEST(F32_GEMM_3X8S4__WASMRELAXEDSIMD_FMA, k_div_4_subtile) {
4595 for (size_t k = 8; k <= 40; k += 4) {
4596 for (uint32_t n = 1; n <= 8; n++) {
4597 for (uint32_t m = 1; m <= 3; m++) {
4598 GemmMicrokernelTester()
4599 .mr(3)
4600 .nr(8)
4601 .kr(1)
4602 .sr(4)
4603 .m(m)
4604 .n(n)
4605 .k(k)
4606 .iterations(1)
4607 .Test(xnn_f32_gemm_ukernel_3x8s4__wasmrelaxedsimd_fma);
4608 }
4609 }
4610 }
4611 }
4612
TEST(F32_GEMM_3X8S4__WASMRELAXEDSIMD_FMA,n_gt_8)4613 TEST(F32_GEMM_3X8S4__WASMRELAXEDSIMD_FMA, n_gt_8) {
4614 for (uint32_t n = 9; n < 16; n++) {
4615 for (size_t k = 1; k <= 20; k += 5) {
4616 GemmMicrokernelTester()
4617 .mr(3)
4618 .nr(8)
4619 .kr(1)
4620 .sr(4)
4621 .m(3)
4622 .n(n)
4623 .k(k)
4624 .Test(xnn_f32_gemm_ukernel_3x8s4__wasmrelaxedsimd_fma);
4625 }
4626 }
4627 }
4628
TEST(F32_GEMM_3X8S4__WASMRELAXEDSIMD_FMA,n_gt_8_strided_cn)4629 TEST(F32_GEMM_3X8S4__WASMRELAXEDSIMD_FMA, n_gt_8_strided_cn) {
4630 for (uint32_t n = 9; n < 16; n++) {
4631 for (size_t k = 1; k <= 20; k += 5) {
4632 GemmMicrokernelTester()
4633 .mr(3)
4634 .nr(8)
4635 .kr(1)
4636 .sr(4)
4637 .m(3)
4638 .n(n)
4639 .k(k)
4640 .cn_stride(11)
4641 .Test(xnn_f32_gemm_ukernel_3x8s4__wasmrelaxedsimd_fma);
4642 }
4643 }
4644 }
4645
TEST(F32_GEMM_3X8S4__WASMRELAXEDSIMD_FMA,n_gt_8_strided_a)4646 TEST(F32_GEMM_3X8S4__WASMRELAXEDSIMD_FMA, n_gt_8_strided_a) {
4647 for (uint32_t n = 9; n < 16; n++) {
4648 for (size_t k = 1; k <= 20; k += 5) {
4649 GemmMicrokernelTester()
4650 .mr(3)
4651 .nr(8)
4652 .kr(1)
4653 .sr(4)
4654 .m(3)
4655 .n(n)
4656 .k(k)
4657 .a_stride(23)
4658 .Test(xnn_f32_gemm_ukernel_3x8s4__wasmrelaxedsimd_fma);
4659 }
4660 }
4661 }
4662
TEST(F32_GEMM_3X8S4__WASMRELAXEDSIMD_FMA,n_gt_8_subtile)4663 TEST(F32_GEMM_3X8S4__WASMRELAXEDSIMD_FMA, n_gt_8_subtile) {
4664 for (uint32_t n = 9; n < 16; n++) {
4665 for (size_t k = 1; k <= 20; k += 5) {
4666 for (uint32_t m = 1; m <= 3; m++) {
4667 GemmMicrokernelTester()
4668 .mr(3)
4669 .nr(8)
4670 .kr(1)
4671 .sr(4)
4672 .m(m)
4673 .n(n)
4674 .k(k)
4675 .iterations(1)
4676 .Test(xnn_f32_gemm_ukernel_3x8s4__wasmrelaxedsimd_fma);
4677 }
4678 }
4679 }
4680 }
4681
TEST(F32_GEMM_3X8S4__WASMRELAXEDSIMD_FMA,n_div_8)4682 TEST(F32_GEMM_3X8S4__WASMRELAXEDSIMD_FMA, n_div_8) {
4683 for (uint32_t n = 16; n <= 24; n += 8) {
4684 for (size_t k = 1; k <= 20; k += 5) {
4685 GemmMicrokernelTester()
4686 .mr(3)
4687 .nr(8)
4688 .kr(1)
4689 .sr(4)
4690 .m(3)
4691 .n(n)
4692 .k(k)
4693 .Test(xnn_f32_gemm_ukernel_3x8s4__wasmrelaxedsimd_fma);
4694 }
4695 }
4696 }
4697
TEST(F32_GEMM_3X8S4__WASMRELAXEDSIMD_FMA,n_div_8_strided_cn)4698 TEST(F32_GEMM_3X8S4__WASMRELAXEDSIMD_FMA, n_div_8_strided_cn) {
4699 for (uint32_t n = 16; n <= 24; n += 8) {
4700 for (size_t k = 1; k <= 20; k += 5) {
4701 GemmMicrokernelTester()
4702 .mr(3)
4703 .nr(8)
4704 .kr(1)
4705 .sr(4)
4706 .m(3)
4707 .n(n)
4708 .k(k)
4709 .cn_stride(11)
4710 .Test(xnn_f32_gemm_ukernel_3x8s4__wasmrelaxedsimd_fma);
4711 }
4712 }
4713 }
4714
TEST(F32_GEMM_3X8S4__WASMRELAXEDSIMD_FMA,n_div_8_strided_a)4715 TEST(F32_GEMM_3X8S4__WASMRELAXEDSIMD_FMA, n_div_8_strided_a) {
4716 for (uint32_t n = 16; n <= 24; n += 8) {
4717 for (size_t k = 1; k <= 20; k += 5) {
4718 GemmMicrokernelTester()
4719 .mr(3)
4720 .nr(8)
4721 .kr(1)
4722 .sr(4)
4723 .m(3)
4724 .n(n)
4725 .k(k)
4726 .a_stride(23)
4727 .Test(xnn_f32_gemm_ukernel_3x8s4__wasmrelaxedsimd_fma);
4728 }
4729 }
4730 }
4731
TEST(F32_GEMM_3X8S4__WASMRELAXEDSIMD_FMA,n_div_8_subtile)4732 TEST(F32_GEMM_3X8S4__WASMRELAXEDSIMD_FMA, n_div_8_subtile) {
4733 for (uint32_t n = 16; n <= 24; n += 8) {
4734 for (size_t k = 1; k <= 20; k += 5) {
4735 for (uint32_t m = 1; m <= 3; m++) {
4736 GemmMicrokernelTester()
4737 .mr(3)
4738 .nr(8)
4739 .kr(1)
4740 .sr(4)
4741 .m(m)
4742 .n(n)
4743 .k(k)
4744 .iterations(1)
4745 .Test(xnn_f32_gemm_ukernel_3x8s4__wasmrelaxedsimd_fma);
4746 }
4747 }
4748 }
4749 }
4750
TEST(F32_GEMM_3X8S4__WASMRELAXEDSIMD_FMA,strided_cm_subtile)4751 TEST(F32_GEMM_3X8S4__WASMRELAXEDSIMD_FMA, strided_cm_subtile) {
4752 for (size_t k = 1; k <= 20; k += 5) {
4753 for (uint32_t n = 1; n <= 8; n++) {
4754 for (uint32_t m = 1; m <= 3; m++) {
4755 GemmMicrokernelTester()
4756 .mr(3)
4757 .nr(8)
4758 .kr(1)
4759 .sr(4)
4760 .m(m)
4761 .n(n)
4762 .k(k)
4763 .cm_stride(11)
4764 .iterations(1)
4765 .Test(xnn_f32_gemm_ukernel_3x8s4__wasmrelaxedsimd_fma);
4766 }
4767 }
4768 }
4769 }
4770
TEST(F32_GEMM_3X8S4__WASMRELAXEDSIMD_FMA,strided_cm)4771 TEST(F32_GEMM_3X8S4__WASMRELAXEDSIMD_FMA, strided_cm) {
4772 GemmMicrokernelTester()
4773 .mr(3)
4774 .nr(8)
4775 .kr(1)
4776 .sr(4)
4777 .m(3)
4778 .n(8)
4779 .k(4)
4780 .cm_stride(11)
4781 .Test(xnn_f32_gemm_ukernel_3x8s4__wasmrelaxedsimd_fma);
4782 }
4783 #endif // XNN_ARCH_WASMRELAXEDSIMD
4784
4785
4786 #if XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_GEMM_4X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,k_eq_1)4787 TEST(F32_GEMM_4X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, k_eq_1) {
4788 GemmMicrokernelTester()
4789 .mr(4)
4790 .nr(8)
4791 .kr(1)
4792 .sr(1)
4793 .m(4)
4794 .n(8)
4795 .k(1)
4796 .Test(xnn_f32_gemm_ukernel_4x8__wasmrelaxedsimd_fma_loadsplat);
4797 }
4798
TEST(F32_GEMM_4X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,strided_cn)4799 TEST(F32_GEMM_4X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, strided_cn) {
4800 GemmMicrokernelTester()
4801 .mr(4)
4802 .nr(8)
4803 .kr(1)
4804 .sr(1)
4805 .m(4)
4806 .n(8)
4807 .k(1)
4808 .cn_stride(11)
4809 .Test(xnn_f32_gemm_ukernel_4x8__wasmrelaxedsimd_fma_loadsplat);
4810 }
4811
TEST(F32_GEMM_4X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,k_eq_1_strided_a)4812 TEST(F32_GEMM_4X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, k_eq_1_strided_a) {
4813 GemmMicrokernelTester()
4814 .mr(4)
4815 .nr(8)
4816 .kr(1)
4817 .sr(1)
4818 .m(4)
4819 .n(8)
4820 .k(1)
4821 .a_stride(3)
4822 .Test(xnn_f32_gemm_ukernel_4x8__wasmrelaxedsimd_fma_loadsplat);
4823 }
4824
TEST(F32_GEMM_4X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,k_eq_1_subtile)4825 TEST(F32_GEMM_4X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, k_eq_1_subtile) {
4826 for (uint32_t n = 1; n <= 8; n++) {
4827 for (uint32_t m = 1; m <= 4; m++) {
4828 GemmMicrokernelTester()
4829 .mr(4)
4830 .nr(8)
4831 .kr(1)
4832 .sr(1)
4833 .m(m)
4834 .n(n)
4835 .k(1)
4836 .iterations(1)
4837 .Test(xnn_f32_gemm_ukernel_4x8__wasmrelaxedsimd_fma_loadsplat);
4838 }
4839 }
4840 }
4841
TEST(F32_GEMM_4X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,k_eq_1_subtile_m)4842 TEST(F32_GEMM_4X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, k_eq_1_subtile_m) {
4843 for (uint32_t m = 1; m <= 4; m++) {
4844 GemmMicrokernelTester()
4845 .mr(4)
4846 .nr(8)
4847 .kr(1)
4848 .sr(1)
4849 .m(m)
4850 .n(8)
4851 .k(1)
4852 .iterations(1)
4853 .Test(xnn_f32_gemm_ukernel_4x8__wasmrelaxedsimd_fma_loadsplat);
4854 }
4855 }
4856
TEST(F32_GEMM_4X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,k_eq_1_subtile_n)4857 TEST(F32_GEMM_4X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, k_eq_1_subtile_n) {
4858 for (uint32_t n = 1; n <= 8; n++) {
4859 GemmMicrokernelTester()
4860 .mr(4)
4861 .nr(8)
4862 .kr(1)
4863 .sr(1)
4864 .m(4)
4865 .n(n)
4866 .k(1)
4867 .iterations(1)
4868 .Test(xnn_f32_gemm_ukernel_4x8__wasmrelaxedsimd_fma_loadsplat);
4869 }
4870 }
4871
TEST(F32_GEMM_4X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,k_gt_1)4872 TEST(F32_GEMM_4X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, k_gt_1) {
4873 for (size_t k = 2; k < 10; k++) {
4874 GemmMicrokernelTester()
4875 .mr(4)
4876 .nr(8)
4877 .kr(1)
4878 .sr(1)
4879 .m(4)
4880 .n(8)
4881 .k(k)
4882 .Test(xnn_f32_gemm_ukernel_4x8__wasmrelaxedsimd_fma_loadsplat);
4883 }
4884 }
4885
TEST(F32_GEMM_4X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,k_gt_1_strided_a)4886 TEST(F32_GEMM_4X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, k_gt_1_strided_a) {
4887 for (size_t k = 2; k < 10; k++) {
4888 GemmMicrokernelTester()
4889 .mr(4)
4890 .nr(8)
4891 .kr(1)
4892 .sr(1)
4893 .m(4)
4894 .n(8)
4895 .k(k)
4896 .a_stride(11)
4897 .Test(xnn_f32_gemm_ukernel_4x8__wasmrelaxedsimd_fma_loadsplat);
4898 }
4899 }
4900
TEST(F32_GEMM_4X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,k_gt_1_subtile)4901 TEST(F32_GEMM_4X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, k_gt_1_subtile) {
4902 for (size_t k = 2; k < 10; k++) {
4903 for (uint32_t n = 1; n <= 8; n++) {
4904 for (uint32_t m = 1; m <= 4; m++) {
4905 GemmMicrokernelTester()
4906 .mr(4)
4907 .nr(8)
4908 .kr(1)
4909 .sr(1)
4910 .m(m)
4911 .n(n)
4912 .k(k)
4913 .iterations(1)
4914 .Test(xnn_f32_gemm_ukernel_4x8__wasmrelaxedsimd_fma_loadsplat);
4915 }
4916 }
4917 }
4918 }
4919
TEST(F32_GEMM_4X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,n_gt_8)4920 TEST(F32_GEMM_4X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, n_gt_8) {
4921 for (uint32_t n = 9; n < 16; n++) {
4922 for (size_t k = 1; k <= 5; k += 2) {
4923 GemmMicrokernelTester()
4924 .mr(4)
4925 .nr(8)
4926 .kr(1)
4927 .sr(1)
4928 .m(4)
4929 .n(n)
4930 .k(k)
4931 .Test(xnn_f32_gemm_ukernel_4x8__wasmrelaxedsimd_fma_loadsplat);
4932 }
4933 }
4934 }
4935
TEST(F32_GEMM_4X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,n_gt_8_strided_cn)4936 TEST(F32_GEMM_4X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, n_gt_8_strided_cn) {
4937 for (uint32_t n = 9; n < 16; n++) {
4938 for (size_t k = 1; k <= 5; k += 2) {
4939 GemmMicrokernelTester()
4940 .mr(4)
4941 .nr(8)
4942 .kr(1)
4943 .sr(1)
4944 .m(4)
4945 .n(n)
4946 .k(k)
4947 .cn_stride(11)
4948 .Test(xnn_f32_gemm_ukernel_4x8__wasmrelaxedsimd_fma_loadsplat);
4949 }
4950 }
4951 }
4952
TEST(F32_GEMM_4X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,n_gt_8_strided_a)4953 TEST(F32_GEMM_4X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, n_gt_8_strided_a) {
4954 for (uint32_t n = 9; n < 16; n++) {
4955 for (size_t k = 1; k <= 5; k += 2) {
4956 GemmMicrokernelTester()
4957 .mr(4)
4958 .nr(8)
4959 .kr(1)
4960 .sr(1)
4961 .m(4)
4962 .n(n)
4963 .k(k)
4964 .a_stride(7)
4965 .Test(xnn_f32_gemm_ukernel_4x8__wasmrelaxedsimd_fma_loadsplat);
4966 }
4967 }
4968 }
4969
TEST(F32_GEMM_4X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,n_gt_8_subtile)4970 TEST(F32_GEMM_4X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, n_gt_8_subtile) {
4971 for (uint32_t n = 9; n < 16; n++) {
4972 for (size_t k = 1; k <= 5; k += 2) {
4973 for (uint32_t m = 1; m <= 4; m++) {
4974 GemmMicrokernelTester()
4975 .mr(4)
4976 .nr(8)
4977 .kr(1)
4978 .sr(1)
4979 .m(m)
4980 .n(n)
4981 .k(k)
4982 .iterations(1)
4983 .Test(xnn_f32_gemm_ukernel_4x8__wasmrelaxedsimd_fma_loadsplat);
4984 }
4985 }
4986 }
4987 }
4988
TEST(F32_GEMM_4X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,n_div_8)4989 TEST(F32_GEMM_4X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, n_div_8) {
4990 for (uint32_t n = 16; n <= 24; n += 8) {
4991 for (size_t k = 1; k <= 5; k += 2) {
4992 GemmMicrokernelTester()
4993 .mr(4)
4994 .nr(8)
4995 .kr(1)
4996 .sr(1)
4997 .m(4)
4998 .n(n)
4999 .k(k)
5000 .Test(xnn_f32_gemm_ukernel_4x8__wasmrelaxedsimd_fma_loadsplat);
5001 }
5002 }
5003 }
5004
TEST(F32_GEMM_4X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,n_div_8_strided_cn)5005 TEST(F32_GEMM_4X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, n_div_8_strided_cn) {
5006 for (uint32_t n = 16; n <= 24; n += 8) {
5007 for (size_t k = 1; k <= 5; k += 2) {
5008 GemmMicrokernelTester()
5009 .mr(4)
5010 .nr(8)
5011 .kr(1)
5012 .sr(1)
5013 .m(4)
5014 .n(n)
5015 .k(k)
5016 .cn_stride(11)
5017 .Test(xnn_f32_gemm_ukernel_4x8__wasmrelaxedsimd_fma_loadsplat);
5018 }
5019 }
5020 }
5021
TEST(F32_GEMM_4X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,n_div_8_strided_a)5022 TEST(F32_GEMM_4X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, n_div_8_strided_a) {
5023 for (uint32_t n = 16; n <= 24; n += 8) {
5024 for (size_t k = 1; k <= 5; k += 2) {
5025 GemmMicrokernelTester()
5026 .mr(4)
5027 .nr(8)
5028 .kr(1)
5029 .sr(1)
5030 .m(4)
5031 .n(n)
5032 .k(k)
5033 .a_stride(7)
5034 .Test(xnn_f32_gemm_ukernel_4x8__wasmrelaxedsimd_fma_loadsplat);
5035 }
5036 }
5037 }
5038
TEST(F32_GEMM_4X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,n_div_8_subtile)5039 TEST(F32_GEMM_4X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, n_div_8_subtile) {
5040 for (uint32_t n = 16; n <= 24; n += 8) {
5041 for (size_t k = 1; k <= 5; k += 2) {
5042 for (uint32_t m = 1; m <= 4; m++) {
5043 GemmMicrokernelTester()
5044 .mr(4)
5045 .nr(8)
5046 .kr(1)
5047 .sr(1)
5048 .m(m)
5049 .n(n)
5050 .k(k)
5051 .iterations(1)
5052 .Test(xnn_f32_gemm_ukernel_4x8__wasmrelaxedsimd_fma_loadsplat);
5053 }
5054 }
5055 }
5056 }
5057
TEST(F32_GEMM_4X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,strided_cm_subtile)5058 TEST(F32_GEMM_4X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, strided_cm_subtile) {
5059 for (size_t k = 1; k <= 5; k += 2) {
5060 for (uint32_t n = 1; n <= 8; n++) {
5061 for (uint32_t m = 1; m <= 4; m++) {
5062 GemmMicrokernelTester()
5063 .mr(4)
5064 .nr(8)
5065 .kr(1)
5066 .sr(1)
5067 .m(m)
5068 .n(n)
5069 .k(k)
5070 .cm_stride(11)
5071 .iterations(1)
5072 .Test(xnn_f32_gemm_ukernel_4x8__wasmrelaxedsimd_fma_loadsplat);
5073 }
5074 }
5075 }
5076 }
5077
TEST(F32_GEMM_4X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,strided_cm)5078 TEST(F32_GEMM_4X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, strided_cm) {
5079 GemmMicrokernelTester()
5080 .mr(4)
5081 .nr(8)
5082 .kr(1)
5083 .sr(1)
5084 .m(4)
5085 .n(8)
5086 .k(1)
5087 .cm_stride(11)
5088 .Test(xnn_f32_gemm_ukernel_4x8__wasmrelaxedsimd_fma_loadsplat);
5089 }
5090 #endif // XNN_ARCH_WASMRELAXEDSIMD
5091
5092
5093 #if XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_GEMM_4X8S4__WASMRELAXEDSIMD_FMA,k_eq_4)5094 TEST(F32_GEMM_4X8S4__WASMRELAXEDSIMD_FMA, k_eq_4) {
5095 GemmMicrokernelTester()
5096 .mr(4)
5097 .nr(8)
5098 .kr(1)
5099 .sr(4)
5100 .m(4)
5101 .n(8)
5102 .k(4)
5103 .Test(xnn_f32_gemm_ukernel_4x8s4__wasmrelaxedsimd_fma);
5104 }
5105
TEST(F32_GEMM_4X8S4__WASMRELAXEDSIMD_FMA,strided_cn)5106 TEST(F32_GEMM_4X8S4__WASMRELAXEDSIMD_FMA, strided_cn) {
5107 GemmMicrokernelTester()
5108 .mr(4)
5109 .nr(8)
5110 .kr(1)
5111 .sr(4)
5112 .m(4)
5113 .n(8)
5114 .k(4)
5115 .cn_stride(11)
5116 .Test(xnn_f32_gemm_ukernel_4x8s4__wasmrelaxedsimd_fma);
5117 }
5118
TEST(F32_GEMM_4X8S4__WASMRELAXEDSIMD_FMA,k_eq_4_strided_a)5119 TEST(F32_GEMM_4X8S4__WASMRELAXEDSIMD_FMA, k_eq_4_strided_a) {
5120 GemmMicrokernelTester()
5121 .mr(4)
5122 .nr(8)
5123 .kr(1)
5124 .sr(4)
5125 .m(4)
5126 .n(8)
5127 .k(4)
5128 .a_stride(7)
5129 .Test(xnn_f32_gemm_ukernel_4x8s4__wasmrelaxedsimd_fma);
5130 }
5131
TEST(F32_GEMM_4X8S4__WASMRELAXEDSIMD_FMA,k_eq_4_subtile)5132 TEST(F32_GEMM_4X8S4__WASMRELAXEDSIMD_FMA, k_eq_4_subtile) {
5133 for (uint32_t n = 1; n <= 8; n++) {
5134 for (uint32_t m = 1; m <= 4; m++) {
5135 GemmMicrokernelTester()
5136 .mr(4)
5137 .nr(8)
5138 .kr(1)
5139 .sr(4)
5140 .m(m)
5141 .n(n)
5142 .k(4)
5143 .iterations(1)
5144 .Test(xnn_f32_gemm_ukernel_4x8s4__wasmrelaxedsimd_fma);
5145 }
5146 }
5147 }
5148
TEST(F32_GEMM_4X8S4__WASMRELAXEDSIMD_FMA,k_eq_4_subtile_m)5149 TEST(F32_GEMM_4X8S4__WASMRELAXEDSIMD_FMA, k_eq_4_subtile_m) {
5150 for (uint32_t m = 1; m <= 4; m++) {
5151 GemmMicrokernelTester()
5152 .mr(4)
5153 .nr(8)
5154 .kr(1)
5155 .sr(4)
5156 .m(m)
5157 .n(8)
5158 .k(4)
5159 .iterations(1)
5160 .Test(xnn_f32_gemm_ukernel_4x8s4__wasmrelaxedsimd_fma);
5161 }
5162 }
5163
TEST(F32_GEMM_4X8S4__WASMRELAXEDSIMD_FMA,k_eq_4_subtile_n)5164 TEST(F32_GEMM_4X8S4__WASMRELAXEDSIMD_FMA, k_eq_4_subtile_n) {
5165 for (uint32_t n = 1; n <= 8; n++) {
5166 GemmMicrokernelTester()
5167 .mr(4)
5168 .nr(8)
5169 .kr(1)
5170 .sr(4)
5171 .m(4)
5172 .n(n)
5173 .k(4)
5174 .iterations(1)
5175 .Test(xnn_f32_gemm_ukernel_4x8s4__wasmrelaxedsimd_fma);
5176 }
5177 }
5178
TEST(F32_GEMM_4X8S4__WASMRELAXEDSIMD_FMA,k_lt_4)5179 TEST(F32_GEMM_4X8S4__WASMRELAXEDSIMD_FMA, k_lt_4) {
5180 for (size_t k = 1; k < 4; k++) {
5181 GemmMicrokernelTester()
5182 .mr(4)
5183 .nr(8)
5184 .kr(1)
5185 .sr(4)
5186 .m(4)
5187 .n(8)
5188 .k(k)
5189 .Test(xnn_f32_gemm_ukernel_4x8s4__wasmrelaxedsimd_fma);
5190 }
5191 }
5192
TEST(F32_GEMM_4X8S4__WASMRELAXEDSIMD_FMA,k_lt_4_strided_a)5193 TEST(F32_GEMM_4X8S4__WASMRELAXEDSIMD_FMA, k_lt_4_strided_a) {
5194 for (size_t k = 1; k < 4; k++) {
5195 GemmMicrokernelTester()
5196 .mr(4)
5197 .nr(8)
5198 .kr(1)
5199 .sr(4)
5200 .m(4)
5201 .n(8)
5202 .k(k)
5203 .a_stride(7)
5204 .Test(xnn_f32_gemm_ukernel_4x8s4__wasmrelaxedsimd_fma);
5205 }
5206 }
5207
TEST(F32_GEMM_4X8S4__WASMRELAXEDSIMD_FMA,k_lt_4_subtile)5208 TEST(F32_GEMM_4X8S4__WASMRELAXEDSIMD_FMA, k_lt_4_subtile) {
5209 for (size_t k = 1; k < 4; k++) {
5210 for (uint32_t n = 1; n <= 8; n++) {
5211 for (uint32_t m = 1; m <= 4; m++) {
5212 GemmMicrokernelTester()
5213 .mr(4)
5214 .nr(8)
5215 .kr(1)
5216 .sr(4)
5217 .m(m)
5218 .n(n)
5219 .k(k)
5220 .iterations(1)
5221 .Test(xnn_f32_gemm_ukernel_4x8s4__wasmrelaxedsimd_fma);
5222 }
5223 }
5224 }
5225 }
5226
TEST(F32_GEMM_4X8S4__WASMRELAXEDSIMD_FMA,k_gt_4)5227 TEST(F32_GEMM_4X8S4__WASMRELAXEDSIMD_FMA, k_gt_4) {
5228 for (size_t k = 5; k < 8; k++) {
5229 GemmMicrokernelTester()
5230 .mr(4)
5231 .nr(8)
5232 .kr(1)
5233 .sr(4)
5234 .m(4)
5235 .n(8)
5236 .k(k)
5237 .Test(xnn_f32_gemm_ukernel_4x8s4__wasmrelaxedsimd_fma);
5238 }
5239 }
5240
TEST(F32_GEMM_4X8S4__WASMRELAXEDSIMD_FMA,k_gt_4_strided_a)5241 TEST(F32_GEMM_4X8S4__WASMRELAXEDSIMD_FMA, k_gt_4_strided_a) {
5242 for (size_t k = 5; k < 8; k++) {
5243 GemmMicrokernelTester()
5244 .mr(4)
5245 .nr(8)
5246 .kr(1)
5247 .sr(4)
5248 .m(4)
5249 .n(8)
5250 .k(k)
5251 .a_stride(11)
5252 .Test(xnn_f32_gemm_ukernel_4x8s4__wasmrelaxedsimd_fma);
5253 }
5254 }
5255
TEST(F32_GEMM_4X8S4__WASMRELAXEDSIMD_FMA,k_gt_4_subtile)5256 TEST(F32_GEMM_4X8S4__WASMRELAXEDSIMD_FMA, k_gt_4_subtile) {
5257 for (size_t k = 5; k < 8; k++) {
5258 for (uint32_t n = 1; n <= 8; n++) {
5259 for (uint32_t m = 1; m <= 4; m++) {
5260 GemmMicrokernelTester()
5261 .mr(4)
5262 .nr(8)
5263 .kr(1)
5264 .sr(4)
5265 .m(m)
5266 .n(n)
5267 .k(k)
5268 .iterations(1)
5269 .Test(xnn_f32_gemm_ukernel_4x8s4__wasmrelaxedsimd_fma);
5270 }
5271 }
5272 }
5273 }
5274
TEST(F32_GEMM_4X8S4__WASMRELAXEDSIMD_FMA,k_div_4)5275 TEST(F32_GEMM_4X8S4__WASMRELAXEDSIMD_FMA, k_div_4) {
5276 for (size_t k = 8; k <= 40; k += 4) {
5277 GemmMicrokernelTester()
5278 .mr(4)
5279 .nr(8)
5280 .kr(1)
5281 .sr(4)
5282 .m(4)
5283 .n(8)
5284 .k(k)
5285 .Test(xnn_f32_gemm_ukernel_4x8s4__wasmrelaxedsimd_fma);
5286 }
5287 }
5288
TEST(F32_GEMM_4X8S4__WASMRELAXEDSIMD_FMA,k_div_4_strided_a)5289 TEST(F32_GEMM_4X8S4__WASMRELAXEDSIMD_FMA, k_div_4_strided_a) {
5290 for (size_t k = 8; k <= 40; k += 4) {
5291 GemmMicrokernelTester()
5292 .mr(4)
5293 .nr(8)
5294 .kr(1)
5295 .sr(4)
5296 .m(4)
5297 .n(8)
5298 .k(k)
5299 .a_stride(43)
5300 .Test(xnn_f32_gemm_ukernel_4x8s4__wasmrelaxedsimd_fma);
5301 }
5302 }
5303
TEST(F32_GEMM_4X8S4__WASMRELAXEDSIMD_FMA,k_div_4_subtile)5304 TEST(F32_GEMM_4X8S4__WASMRELAXEDSIMD_FMA, k_div_4_subtile) {
5305 for (size_t k = 8; k <= 40; k += 4) {
5306 for (uint32_t n = 1; n <= 8; n++) {
5307 for (uint32_t m = 1; m <= 4; m++) {
5308 GemmMicrokernelTester()
5309 .mr(4)
5310 .nr(8)
5311 .kr(1)
5312 .sr(4)
5313 .m(m)
5314 .n(n)
5315 .k(k)
5316 .iterations(1)
5317 .Test(xnn_f32_gemm_ukernel_4x8s4__wasmrelaxedsimd_fma);
5318 }
5319 }
5320 }
5321 }
5322
TEST(F32_GEMM_4X8S4__WASMRELAXEDSIMD_FMA,n_gt_8)5323 TEST(F32_GEMM_4X8S4__WASMRELAXEDSIMD_FMA, n_gt_8) {
5324 for (uint32_t n = 9; n < 16; n++) {
5325 for (size_t k = 1; k <= 20; k += 5) {
5326 GemmMicrokernelTester()
5327 .mr(4)
5328 .nr(8)
5329 .kr(1)
5330 .sr(4)
5331 .m(4)
5332 .n(n)
5333 .k(k)
5334 .Test(xnn_f32_gemm_ukernel_4x8s4__wasmrelaxedsimd_fma);
5335 }
5336 }
5337 }
5338
TEST(F32_GEMM_4X8S4__WASMRELAXEDSIMD_FMA,n_gt_8_strided_cn)5339 TEST(F32_GEMM_4X8S4__WASMRELAXEDSIMD_FMA, n_gt_8_strided_cn) {
5340 for (uint32_t n = 9; n < 16; n++) {
5341 for (size_t k = 1; k <= 20; k += 5) {
5342 GemmMicrokernelTester()
5343 .mr(4)
5344 .nr(8)
5345 .kr(1)
5346 .sr(4)
5347 .m(4)
5348 .n(n)
5349 .k(k)
5350 .cn_stride(11)
5351 .Test(xnn_f32_gemm_ukernel_4x8s4__wasmrelaxedsimd_fma);
5352 }
5353 }
5354 }
5355
TEST(F32_GEMM_4X8S4__WASMRELAXEDSIMD_FMA,n_gt_8_strided_a)5356 TEST(F32_GEMM_4X8S4__WASMRELAXEDSIMD_FMA, n_gt_8_strided_a) {
5357 for (uint32_t n = 9; n < 16; n++) {
5358 for (size_t k = 1; k <= 20; k += 5) {
5359 GemmMicrokernelTester()
5360 .mr(4)
5361 .nr(8)
5362 .kr(1)
5363 .sr(4)
5364 .m(4)
5365 .n(n)
5366 .k(k)
5367 .a_stride(23)
5368 .Test(xnn_f32_gemm_ukernel_4x8s4__wasmrelaxedsimd_fma);
5369 }
5370 }
5371 }
5372
TEST(F32_GEMM_4X8S4__WASMRELAXEDSIMD_FMA,n_gt_8_subtile)5373 TEST(F32_GEMM_4X8S4__WASMRELAXEDSIMD_FMA, n_gt_8_subtile) {
5374 for (uint32_t n = 9; n < 16; n++) {
5375 for (size_t k = 1; k <= 20; k += 5) {
5376 for (uint32_t m = 1; m <= 4; m++) {
5377 GemmMicrokernelTester()
5378 .mr(4)
5379 .nr(8)
5380 .kr(1)
5381 .sr(4)
5382 .m(m)
5383 .n(n)
5384 .k(k)
5385 .iterations(1)
5386 .Test(xnn_f32_gemm_ukernel_4x8s4__wasmrelaxedsimd_fma);
5387 }
5388 }
5389 }
5390 }
5391
TEST(F32_GEMM_4X8S4__WASMRELAXEDSIMD_FMA,n_div_8)5392 TEST(F32_GEMM_4X8S4__WASMRELAXEDSIMD_FMA, n_div_8) {
5393 for (uint32_t n = 16; n <= 24; n += 8) {
5394 for (size_t k = 1; k <= 20; k += 5) {
5395 GemmMicrokernelTester()
5396 .mr(4)
5397 .nr(8)
5398 .kr(1)
5399 .sr(4)
5400 .m(4)
5401 .n(n)
5402 .k(k)
5403 .Test(xnn_f32_gemm_ukernel_4x8s4__wasmrelaxedsimd_fma);
5404 }
5405 }
5406 }
5407
TEST(F32_GEMM_4X8S4__WASMRELAXEDSIMD_FMA,n_div_8_strided_cn)5408 TEST(F32_GEMM_4X8S4__WASMRELAXEDSIMD_FMA, n_div_8_strided_cn) {
5409 for (uint32_t n = 16; n <= 24; n += 8) {
5410 for (size_t k = 1; k <= 20; k += 5) {
5411 GemmMicrokernelTester()
5412 .mr(4)
5413 .nr(8)
5414 .kr(1)
5415 .sr(4)
5416 .m(4)
5417 .n(n)
5418 .k(k)
5419 .cn_stride(11)
5420 .Test(xnn_f32_gemm_ukernel_4x8s4__wasmrelaxedsimd_fma);
5421 }
5422 }
5423 }
5424
TEST(F32_GEMM_4X8S4__WASMRELAXEDSIMD_FMA,n_div_8_strided_a)5425 TEST(F32_GEMM_4X8S4__WASMRELAXEDSIMD_FMA, n_div_8_strided_a) {
5426 for (uint32_t n = 16; n <= 24; n += 8) {
5427 for (size_t k = 1; k <= 20; k += 5) {
5428 GemmMicrokernelTester()
5429 .mr(4)
5430 .nr(8)
5431 .kr(1)
5432 .sr(4)
5433 .m(4)
5434 .n(n)
5435 .k(k)
5436 .a_stride(23)
5437 .Test(xnn_f32_gemm_ukernel_4x8s4__wasmrelaxedsimd_fma);
5438 }
5439 }
5440 }
5441
TEST(F32_GEMM_4X8S4__WASMRELAXEDSIMD_FMA,n_div_8_subtile)5442 TEST(F32_GEMM_4X8S4__WASMRELAXEDSIMD_FMA, n_div_8_subtile) {
5443 for (uint32_t n = 16; n <= 24; n += 8) {
5444 for (size_t k = 1; k <= 20; k += 5) {
5445 for (uint32_t m = 1; m <= 4; m++) {
5446 GemmMicrokernelTester()
5447 .mr(4)
5448 .nr(8)
5449 .kr(1)
5450 .sr(4)
5451 .m(m)
5452 .n(n)
5453 .k(k)
5454 .iterations(1)
5455 .Test(xnn_f32_gemm_ukernel_4x8s4__wasmrelaxedsimd_fma);
5456 }
5457 }
5458 }
5459 }
5460
TEST(F32_GEMM_4X8S4__WASMRELAXEDSIMD_FMA,strided_cm_subtile)5461 TEST(F32_GEMM_4X8S4__WASMRELAXEDSIMD_FMA, strided_cm_subtile) {
5462 for (size_t k = 1; k <= 20; k += 5) {
5463 for (uint32_t n = 1; n <= 8; n++) {
5464 for (uint32_t m = 1; m <= 4; m++) {
5465 GemmMicrokernelTester()
5466 .mr(4)
5467 .nr(8)
5468 .kr(1)
5469 .sr(4)
5470 .m(m)
5471 .n(n)
5472 .k(k)
5473 .cm_stride(11)
5474 .iterations(1)
5475 .Test(xnn_f32_gemm_ukernel_4x8s4__wasmrelaxedsimd_fma);
5476 }
5477 }
5478 }
5479 }
5480
TEST(F32_GEMM_4X8S4__WASMRELAXEDSIMD_FMA,strided_cm)5481 TEST(F32_GEMM_4X8S4__WASMRELAXEDSIMD_FMA, strided_cm) {
5482 GemmMicrokernelTester()
5483 .mr(4)
5484 .nr(8)
5485 .kr(1)
5486 .sr(4)
5487 .m(4)
5488 .n(8)
5489 .k(4)
5490 .cm_stride(11)
5491 .Test(xnn_f32_gemm_ukernel_4x8s4__wasmrelaxedsimd_fma);
5492 }
5493 #endif // XNN_ARCH_WASMRELAXEDSIMD
5494
5495
5496 #if XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_GEMM_5X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,k_eq_1)5497 TEST(F32_GEMM_5X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, k_eq_1) {
5498 GemmMicrokernelTester()
5499 .mr(5)
5500 .nr(8)
5501 .kr(1)
5502 .sr(1)
5503 .m(5)
5504 .n(8)
5505 .k(1)
5506 .Test(xnn_f32_gemm_ukernel_5x8__wasmrelaxedsimd_fma_loadsplat);
5507 }
5508
TEST(F32_GEMM_5X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,strided_cn)5509 TEST(F32_GEMM_5X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, strided_cn) {
5510 GemmMicrokernelTester()
5511 .mr(5)
5512 .nr(8)
5513 .kr(1)
5514 .sr(1)
5515 .m(5)
5516 .n(8)
5517 .k(1)
5518 .cn_stride(11)
5519 .Test(xnn_f32_gemm_ukernel_5x8__wasmrelaxedsimd_fma_loadsplat);
5520 }
5521
TEST(F32_GEMM_5X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,k_eq_1_strided_a)5522 TEST(F32_GEMM_5X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, k_eq_1_strided_a) {
5523 GemmMicrokernelTester()
5524 .mr(5)
5525 .nr(8)
5526 .kr(1)
5527 .sr(1)
5528 .m(5)
5529 .n(8)
5530 .k(1)
5531 .a_stride(3)
5532 .Test(xnn_f32_gemm_ukernel_5x8__wasmrelaxedsimd_fma_loadsplat);
5533 }
5534
TEST(F32_GEMM_5X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,k_eq_1_subtile)5535 TEST(F32_GEMM_5X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, k_eq_1_subtile) {
5536 for (uint32_t n = 1; n <= 8; n++) {
5537 for (uint32_t m = 1; m <= 5; m++) {
5538 GemmMicrokernelTester()
5539 .mr(5)
5540 .nr(8)
5541 .kr(1)
5542 .sr(1)
5543 .m(m)
5544 .n(n)
5545 .k(1)
5546 .iterations(1)
5547 .Test(xnn_f32_gemm_ukernel_5x8__wasmrelaxedsimd_fma_loadsplat);
5548 }
5549 }
5550 }
5551
TEST(F32_GEMM_5X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,k_eq_1_subtile_m)5552 TEST(F32_GEMM_5X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, k_eq_1_subtile_m) {
5553 for (uint32_t m = 1; m <= 5; m++) {
5554 GemmMicrokernelTester()
5555 .mr(5)
5556 .nr(8)
5557 .kr(1)
5558 .sr(1)
5559 .m(m)
5560 .n(8)
5561 .k(1)
5562 .iterations(1)
5563 .Test(xnn_f32_gemm_ukernel_5x8__wasmrelaxedsimd_fma_loadsplat);
5564 }
5565 }
5566
TEST(F32_GEMM_5X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,k_eq_1_subtile_n)5567 TEST(F32_GEMM_5X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, k_eq_1_subtile_n) {
5568 for (uint32_t n = 1; n <= 8; n++) {
5569 GemmMicrokernelTester()
5570 .mr(5)
5571 .nr(8)
5572 .kr(1)
5573 .sr(1)
5574 .m(5)
5575 .n(n)
5576 .k(1)
5577 .iterations(1)
5578 .Test(xnn_f32_gemm_ukernel_5x8__wasmrelaxedsimd_fma_loadsplat);
5579 }
5580 }
5581
TEST(F32_GEMM_5X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,k_gt_1)5582 TEST(F32_GEMM_5X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, k_gt_1) {
5583 for (size_t k = 2; k < 10; k++) {
5584 GemmMicrokernelTester()
5585 .mr(5)
5586 .nr(8)
5587 .kr(1)
5588 .sr(1)
5589 .m(5)
5590 .n(8)
5591 .k(k)
5592 .Test(xnn_f32_gemm_ukernel_5x8__wasmrelaxedsimd_fma_loadsplat);
5593 }
5594 }
5595
TEST(F32_GEMM_5X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,k_gt_1_strided_a)5596 TEST(F32_GEMM_5X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, k_gt_1_strided_a) {
5597 for (size_t k = 2; k < 10; k++) {
5598 GemmMicrokernelTester()
5599 .mr(5)
5600 .nr(8)
5601 .kr(1)
5602 .sr(1)
5603 .m(5)
5604 .n(8)
5605 .k(k)
5606 .a_stride(11)
5607 .Test(xnn_f32_gemm_ukernel_5x8__wasmrelaxedsimd_fma_loadsplat);
5608 }
5609 }
5610
TEST(F32_GEMM_5X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,k_gt_1_subtile)5611 TEST(F32_GEMM_5X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, k_gt_1_subtile) {
5612 for (size_t k = 2; k < 10; k++) {
5613 for (uint32_t n = 1; n <= 8; n++) {
5614 for (uint32_t m = 1; m <= 5; m++) {
5615 GemmMicrokernelTester()
5616 .mr(5)
5617 .nr(8)
5618 .kr(1)
5619 .sr(1)
5620 .m(m)
5621 .n(n)
5622 .k(k)
5623 .iterations(1)
5624 .Test(xnn_f32_gemm_ukernel_5x8__wasmrelaxedsimd_fma_loadsplat);
5625 }
5626 }
5627 }
5628 }
5629
TEST(F32_GEMM_5X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,n_gt_8)5630 TEST(F32_GEMM_5X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, n_gt_8) {
5631 for (uint32_t n = 9; n < 16; n++) {
5632 for (size_t k = 1; k <= 5; k += 2) {
5633 GemmMicrokernelTester()
5634 .mr(5)
5635 .nr(8)
5636 .kr(1)
5637 .sr(1)
5638 .m(5)
5639 .n(n)
5640 .k(k)
5641 .Test(xnn_f32_gemm_ukernel_5x8__wasmrelaxedsimd_fma_loadsplat);
5642 }
5643 }
5644 }
5645
TEST(F32_GEMM_5X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,n_gt_8_strided_cn)5646 TEST(F32_GEMM_5X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, n_gt_8_strided_cn) {
5647 for (uint32_t n = 9; n < 16; n++) {
5648 for (size_t k = 1; k <= 5; k += 2) {
5649 GemmMicrokernelTester()
5650 .mr(5)
5651 .nr(8)
5652 .kr(1)
5653 .sr(1)
5654 .m(5)
5655 .n(n)
5656 .k(k)
5657 .cn_stride(11)
5658 .Test(xnn_f32_gemm_ukernel_5x8__wasmrelaxedsimd_fma_loadsplat);
5659 }
5660 }
5661 }
5662
TEST(F32_GEMM_5X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,n_gt_8_strided_a)5663 TEST(F32_GEMM_5X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, n_gt_8_strided_a) {
5664 for (uint32_t n = 9; n < 16; n++) {
5665 for (size_t k = 1; k <= 5; k += 2) {
5666 GemmMicrokernelTester()
5667 .mr(5)
5668 .nr(8)
5669 .kr(1)
5670 .sr(1)
5671 .m(5)
5672 .n(n)
5673 .k(k)
5674 .a_stride(7)
5675 .Test(xnn_f32_gemm_ukernel_5x8__wasmrelaxedsimd_fma_loadsplat);
5676 }
5677 }
5678 }
5679
TEST(F32_GEMM_5X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,n_gt_8_subtile)5680 TEST(F32_GEMM_5X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, n_gt_8_subtile) {
5681 for (uint32_t n = 9; n < 16; n++) {
5682 for (size_t k = 1; k <= 5; k += 2) {
5683 for (uint32_t m = 1; m <= 5; m++) {
5684 GemmMicrokernelTester()
5685 .mr(5)
5686 .nr(8)
5687 .kr(1)
5688 .sr(1)
5689 .m(m)
5690 .n(n)
5691 .k(k)
5692 .iterations(1)
5693 .Test(xnn_f32_gemm_ukernel_5x8__wasmrelaxedsimd_fma_loadsplat);
5694 }
5695 }
5696 }
5697 }
5698
TEST(F32_GEMM_5X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,n_div_8)5699 TEST(F32_GEMM_5X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, n_div_8) {
5700 for (uint32_t n = 16; n <= 24; n += 8) {
5701 for (size_t k = 1; k <= 5; k += 2) {
5702 GemmMicrokernelTester()
5703 .mr(5)
5704 .nr(8)
5705 .kr(1)
5706 .sr(1)
5707 .m(5)
5708 .n(n)
5709 .k(k)
5710 .Test(xnn_f32_gemm_ukernel_5x8__wasmrelaxedsimd_fma_loadsplat);
5711 }
5712 }
5713 }
5714
TEST(F32_GEMM_5X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,n_div_8_strided_cn)5715 TEST(F32_GEMM_5X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, n_div_8_strided_cn) {
5716 for (uint32_t n = 16; n <= 24; n += 8) {
5717 for (size_t k = 1; k <= 5; k += 2) {
5718 GemmMicrokernelTester()
5719 .mr(5)
5720 .nr(8)
5721 .kr(1)
5722 .sr(1)
5723 .m(5)
5724 .n(n)
5725 .k(k)
5726 .cn_stride(11)
5727 .Test(xnn_f32_gemm_ukernel_5x8__wasmrelaxedsimd_fma_loadsplat);
5728 }
5729 }
5730 }
5731
TEST(F32_GEMM_5X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,n_div_8_strided_a)5732 TEST(F32_GEMM_5X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, n_div_8_strided_a) {
5733 for (uint32_t n = 16; n <= 24; n += 8) {
5734 for (size_t k = 1; k <= 5; k += 2) {
5735 GemmMicrokernelTester()
5736 .mr(5)
5737 .nr(8)
5738 .kr(1)
5739 .sr(1)
5740 .m(5)
5741 .n(n)
5742 .k(k)
5743 .a_stride(7)
5744 .Test(xnn_f32_gemm_ukernel_5x8__wasmrelaxedsimd_fma_loadsplat);
5745 }
5746 }
5747 }
5748
TEST(F32_GEMM_5X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,n_div_8_subtile)5749 TEST(F32_GEMM_5X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, n_div_8_subtile) {
5750 for (uint32_t n = 16; n <= 24; n += 8) {
5751 for (size_t k = 1; k <= 5; k += 2) {
5752 for (uint32_t m = 1; m <= 5; m++) {
5753 GemmMicrokernelTester()
5754 .mr(5)
5755 .nr(8)
5756 .kr(1)
5757 .sr(1)
5758 .m(m)
5759 .n(n)
5760 .k(k)
5761 .iterations(1)
5762 .Test(xnn_f32_gemm_ukernel_5x8__wasmrelaxedsimd_fma_loadsplat);
5763 }
5764 }
5765 }
5766 }
5767
TEST(F32_GEMM_5X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,strided_cm_subtile)5768 TEST(F32_GEMM_5X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, strided_cm_subtile) {
5769 for (size_t k = 1; k <= 5; k += 2) {
5770 for (uint32_t n = 1; n <= 8; n++) {
5771 for (uint32_t m = 1; m <= 5; m++) {
5772 GemmMicrokernelTester()
5773 .mr(5)
5774 .nr(8)
5775 .kr(1)
5776 .sr(1)
5777 .m(m)
5778 .n(n)
5779 .k(k)
5780 .cm_stride(11)
5781 .iterations(1)
5782 .Test(xnn_f32_gemm_ukernel_5x8__wasmrelaxedsimd_fma_loadsplat);
5783 }
5784 }
5785 }
5786 }
5787
TEST(F32_GEMM_5X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,strided_cm)5788 TEST(F32_GEMM_5X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, strided_cm) {
5789 GemmMicrokernelTester()
5790 .mr(5)
5791 .nr(8)
5792 .kr(1)
5793 .sr(1)
5794 .m(5)
5795 .n(8)
5796 .k(1)
5797 .cm_stride(11)
5798 .Test(xnn_f32_gemm_ukernel_5x8__wasmrelaxedsimd_fma_loadsplat);
5799 }
5800 #endif // XNN_ARCH_WASMRELAXEDSIMD
5801
5802
5803 #if XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_GEMM_6X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,k_eq_1)5804 TEST(F32_GEMM_6X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, k_eq_1) {
5805 GemmMicrokernelTester()
5806 .mr(6)
5807 .nr(8)
5808 .kr(1)
5809 .sr(1)
5810 .m(6)
5811 .n(8)
5812 .k(1)
5813 .Test(xnn_f32_gemm_ukernel_6x8__wasmrelaxedsimd_fma_loadsplat);
5814 }
5815
TEST(F32_GEMM_6X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,strided_cn)5816 TEST(F32_GEMM_6X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, strided_cn) {
5817 GemmMicrokernelTester()
5818 .mr(6)
5819 .nr(8)
5820 .kr(1)
5821 .sr(1)
5822 .m(6)
5823 .n(8)
5824 .k(1)
5825 .cn_stride(11)
5826 .Test(xnn_f32_gemm_ukernel_6x8__wasmrelaxedsimd_fma_loadsplat);
5827 }
5828
TEST(F32_GEMM_6X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,k_eq_1_strided_a)5829 TEST(F32_GEMM_6X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, k_eq_1_strided_a) {
5830 GemmMicrokernelTester()
5831 .mr(6)
5832 .nr(8)
5833 .kr(1)
5834 .sr(1)
5835 .m(6)
5836 .n(8)
5837 .k(1)
5838 .a_stride(3)
5839 .Test(xnn_f32_gemm_ukernel_6x8__wasmrelaxedsimd_fma_loadsplat);
5840 }
5841
TEST(F32_GEMM_6X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,k_eq_1_subtile)5842 TEST(F32_GEMM_6X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, k_eq_1_subtile) {
5843 for (uint32_t n = 1; n <= 8; n++) {
5844 for (uint32_t m = 1; m <= 6; m++) {
5845 GemmMicrokernelTester()
5846 .mr(6)
5847 .nr(8)
5848 .kr(1)
5849 .sr(1)
5850 .m(m)
5851 .n(n)
5852 .k(1)
5853 .iterations(1)
5854 .Test(xnn_f32_gemm_ukernel_6x8__wasmrelaxedsimd_fma_loadsplat);
5855 }
5856 }
5857 }
5858
TEST(F32_GEMM_6X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,k_eq_1_subtile_m)5859 TEST(F32_GEMM_6X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, k_eq_1_subtile_m) {
5860 for (uint32_t m = 1; m <= 6; m++) {
5861 GemmMicrokernelTester()
5862 .mr(6)
5863 .nr(8)
5864 .kr(1)
5865 .sr(1)
5866 .m(m)
5867 .n(8)
5868 .k(1)
5869 .iterations(1)
5870 .Test(xnn_f32_gemm_ukernel_6x8__wasmrelaxedsimd_fma_loadsplat);
5871 }
5872 }
5873
TEST(F32_GEMM_6X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,k_eq_1_subtile_n)5874 TEST(F32_GEMM_6X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, k_eq_1_subtile_n) {
5875 for (uint32_t n = 1; n <= 8; n++) {
5876 GemmMicrokernelTester()
5877 .mr(6)
5878 .nr(8)
5879 .kr(1)
5880 .sr(1)
5881 .m(6)
5882 .n(n)
5883 .k(1)
5884 .iterations(1)
5885 .Test(xnn_f32_gemm_ukernel_6x8__wasmrelaxedsimd_fma_loadsplat);
5886 }
5887 }
5888
TEST(F32_GEMM_6X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,k_gt_1)5889 TEST(F32_GEMM_6X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, k_gt_1) {
5890 for (size_t k = 2; k < 10; k++) {
5891 GemmMicrokernelTester()
5892 .mr(6)
5893 .nr(8)
5894 .kr(1)
5895 .sr(1)
5896 .m(6)
5897 .n(8)
5898 .k(k)
5899 .Test(xnn_f32_gemm_ukernel_6x8__wasmrelaxedsimd_fma_loadsplat);
5900 }
5901 }
5902
TEST(F32_GEMM_6X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,k_gt_1_strided_a)5903 TEST(F32_GEMM_6X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, k_gt_1_strided_a) {
5904 for (size_t k = 2; k < 10; k++) {
5905 GemmMicrokernelTester()
5906 .mr(6)
5907 .nr(8)
5908 .kr(1)
5909 .sr(1)
5910 .m(6)
5911 .n(8)
5912 .k(k)
5913 .a_stride(11)
5914 .Test(xnn_f32_gemm_ukernel_6x8__wasmrelaxedsimd_fma_loadsplat);
5915 }
5916 }
5917
TEST(F32_GEMM_6X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,k_gt_1_subtile)5918 TEST(F32_GEMM_6X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, k_gt_1_subtile) {
5919 for (size_t k = 2; k < 10; k++) {
5920 for (uint32_t n = 1; n <= 8; n++) {
5921 for (uint32_t m = 1; m <= 6; m++) {
5922 GemmMicrokernelTester()
5923 .mr(6)
5924 .nr(8)
5925 .kr(1)
5926 .sr(1)
5927 .m(m)
5928 .n(n)
5929 .k(k)
5930 .iterations(1)
5931 .Test(xnn_f32_gemm_ukernel_6x8__wasmrelaxedsimd_fma_loadsplat);
5932 }
5933 }
5934 }
5935 }
5936
TEST(F32_GEMM_6X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,n_gt_8)5937 TEST(F32_GEMM_6X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, n_gt_8) {
5938 for (uint32_t n = 9; n < 16; n++) {
5939 for (size_t k = 1; k <= 5; k += 2) {
5940 GemmMicrokernelTester()
5941 .mr(6)
5942 .nr(8)
5943 .kr(1)
5944 .sr(1)
5945 .m(6)
5946 .n(n)
5947 .k(k)
5948 .Test(xnn_f32_gemm_ukernel_6x8__wasmrelaxedsimd_fma_loadsplat);
5949 }
5950 }
5951 }
5952
TEST(F32_GEMM_6X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,n_gt_8_strided_cn)5953 TEST(F32_GEMM_6X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, n_gt_8_strided_cn) {
5954 for (uint32_t n = 9; n < 16; n++) {
5955 for (size_t k = 1; k <= 5; k += 2) {
5956 GemmMicrokernelTester()
5957 .mr(6)
5958 .nr(8)
5959 .kr(1)
5960 .sr(1)
5961 .m(6)
5962 .n(n)
5963 .k(k)
5964 .cn_stride(11)
5965 .Test(xnn_f32_gemm_ukernel_6x8__wasmrelaxedsimd_fma_loadsplat);
5966 }
5967 }
5968 }
5969
TEST(F32_GEMM_6X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,n_gt_8_strided_a)5970 TEST(F32_GEMM_6X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, n_gt_8_strided_a) {
5971 for (uint32_t n = 9; n < 16; n++) {
5972 for (size_t k = 1; k <= 5; k += 2) {
5973 GemmMicrokernelTester()
5974 .mr(6)
5975 .nr(8)
5976 .kr(1)
5977 .sr(1)
5978 .m(6)
5979 .n(n)
5980 .k(k)
5981 .a_stride(7)
5982 .Test(xnn_f32_gemm_ukernel_6x8__wasmrelaxedsimd_fma_loadsplat);
5983 }
5984 }
5985 }
5986
TEST(F32_GEMM_6X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,n_gt_8_subtile)5987 TEST(F32_GEMM_6X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, n_gt_8_subtile) {
5988 for (uint32_t n = 9; n < 16; n++) {
5989 for (size_t k = 1; k <= 5; k += 2) {
5990 for (uint32_t m = 1; m <= 6; m++) {
5991 GemmMicrokernelTester()
5992 .mr(6)
5993 .nr(8)
5994 .kr(1)
5995 .sr(1)
5996 .m(m)
5997 .n(n)
5998 .k(k)
5999 .iterations(1)
6000 .Test(xnn_f32_gemm_ukernel_6x8__wasmrelaxedsimd_fma_loadsplat);
6001 }
6002 }
6003 }
6004 }
6005
TEST(F32_GEMM_6X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,n_div_8)6006 TEST(F32_GEMM_6X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, n_div_8) {
6007 for (uint32_t n = 16; n <= 24; n += 8) {
6008 for (size_t k = 1; k <= 5; k += 2) {
6009 GemmMicrokernelTester()
6010 .mr(6)
6011 .nr(8)
6012 .kr(1)
6013 .sr(1)
6014 .m(6)
6015 .n(n)
6016 .k(k)
6017 .Test(xnn_f32_gemm_ukernel_6x8__wasmrelaxedsimd_fma_loadsplat);
6018 }
6019 }
6020 }
6021
TEST(F32_GEMM_6X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,n_div_8_strided_cn)6022 TEST(F32_GEMM_6X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, n_div_8_strided_cn) {
6023 for (uint32_t n = 16; n <= 24; n += 8) {
6024 for (size_t k = 1; k <= 5; k += 2) {
6025 GemmMicrokernelTester()
6026 .mr(6)
6027 .nr(8)
6028 .kr(1)
6029 .sr(1)
6030 .m(6)
6031 .n(n)
6032 .k(k)
6033 .cn_stride(11)
6034 .Test(xnn_f32_gemm_ukernel_6x8__wasmrelaxedsimd_fma_loadsplat);
6035 }
6036 }
6037 }
6038
TEST(F32_GEMM_6X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,n_div_8_strided_a)6039 TEST(F32_GEMM_6X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, n_div_8_strided_a) {
6040 for (uint32_t n = 16; n <= 24; n += 8) {
6041 for (size_t k = 1; k <= 5; k += 2) {
6042 GemmMicrokernelTester()
6043 .mr(6)
6044 .nr(8)
6045 .kr(1)
6046 .sr(1)
6047 .m(6)
6048 .n(n)
6049 .k(k)
6050 .a_stride(7)
6051 .Test(xnn_f32_gemm_ukernel_6x8__wasmrelaxedsimd_fma_loadsplat);
6052 }
6053 }
6054 }
6055
TEST(F32_GEMM_6X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,n_div_8_subtile)6056 TEST(F32_GEMM_6X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, n_div_8_subtile) {
6057 for (uint32_t n = 16; n <= 24; n += 8) {
6058 for (size_t k = 1; k <= 5; k += 2) {
6059 for (uint32_t m = 1; m <= 6; m++) {
6060 GemmMicrokernelTester()
6061 .mr(6)
6062 .nr(8)
6063 .kr(1)
6064 .sr(1)
6065 .m(m)
6066 .n(n)
6067 .k(k)
6068 .iterations(1)
6069 .Test(xnn_f32_gemm_ukernel_6x8__wasmrelaxedsimd_fma_loadsplat);
6070 }
6071 }
6072 }
6073 }
6074
TEST(F32_GEMM_6X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,strided_cm_subtile)6075 TEST(F32_GEMM_6X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, strided_cm_subtile) {
6076 for (size_t k = 1; k <= 5; k += 2) {
6077 for (uint32_t n = 1; n <= 8; n++) {
6078 for (uint32_t m = 1; m <= 6; m++) {
6079 GemmMicrokernelTester()
6080 .mr(6)
6081 .nr(8)
6082 .kr(1)
6083 .sr(1)
6084 .m(m)
6085 .n(n)
6086 .k(k)
6087 .cm_stride(11)
6088 .iterations(1)
6089 .Test(xnn_f32_gemm_ukernel_6x8__wasmrelaxedsimd_fma_loadsplat);
6090 }
6091 }
6092 }
6093 }
6094
TEST(F32_GEMM_6X8__WASMRELAXEDSIMD_FMA_LOADSPLAT,strided_cm)6095 TEST(F32_GEMM_6X8__WASMRELAXEDSIMD_FMA_LOADSPLAT, strided_cm) {
6096 GemmMicrokernelTester()
6097 .mr(6)
6098 .nr(8)
6099 .kr(1)
6100 .sr(1)
6101 .m(6)
6102 .n(8)
6103 .k(1)
6104 .cm_stride(11)
6105 .Test(xnn_f32_gemm_ukernel_6x8__wasmrelaxedsimd_fma_loadsplat);
6106 }
6107 #endif // XNN_ARCH_WASMRELAXEDSIMD
6108
6109
TEST(F32_GEMM_2X4__SCALAR,k_eq_1)6110 TEST(F32_GEMM_2X4__SCALAR, k_eq_1) {
6111 GemmMicrokernelTester()
6112 .mr(2)
6113 .nr(4)
6114 .kr(1)
6115 .sr(1)
6116 .m(2)
6117 .n(4)
6118 .k(1)
6119 .Test(xnn_f32_gemm_ukernel_2x4__scalar);
6120 }
6121
TEST(F32_GEMM_2X4__SCALAR,strided_cn)6122 TEST(F32_GEMM_2X4__SCALAR, strided_cn) {
6123 GemmMicrokernelTester()
6124 .mr(2)
6125 .nr(4)
6126 .kr(1)
6127 .sr(1)
6128 .m(2)
6129 .n(4)
6130 .k(1)
6131 .cn_stride(7)
6132 .Test(xnn_f32_gemm_ukernel_2x4__scalar);
6133 }
6134
TEST(F32_GEMM_2X4__SCALAR,k_eq_1_strided_a)6135 TEST(F32_GEMM_2X4__SCALAR, k_eq_1_strided_a) {
6136 GemmMicrokernelTester()
6137 .mr(2)
6138 .nr(4)
6139 .kr(1)
6140 .sr(1)
6141 .m(2)
6142 .n(4)
6143 .k(1)
6144 .a_stride(3)
6145 .Test(xnn_f32_gemm_ukernel_2x4__scalar);
6146 }
6147
TEST(F32_GEMM_2X4__SCALAR,k_eq_1_subtile)6148 TEST(F32_GEMM_2X4__SCALAR, k_eq_1_subtile) {
6149 for (uint32_t n = 1; n <= 4; n++) {
6150 for (uint32_t m = 1; m <= 2; m++) {
6151 GemmMicrokernelTester()
6152 .mr(2)
6153 .nr(4)
6154 .kr(1)
6155 .sr(1)
6156 .m(m)
6157 .n(n)
6158 .k(1)
6159 .iterations(1)
6160 .Test(xnn_f32_gemm_ukernel_2x4__scalar);
6161 }
6162 }
6163 }
6164
TEST(F32_GEMM_2X4__SCALAR,k_eq_1_subtile_m)6165 TEST(F32_GEMM_2X4__SCALAR, k_eq_1_subtile_m) {
6166 for (uint32_t m = 1; m <= 2; m++) {
6167 GemmMicrokernelTester()
6168 .mr(2)
6169 .nr(4)
6170 .kr(1)
6171 .sr(1)
6172 .m(m)
6173 .n(4)
6174 .k(1)
6175 .iterations(1)
6176 .Test(xnn_f32_gemm_ukernel_2x4__scalar);
6177 }
6178 }
6179
TEST(F32_GEMM_2X4__SCALAR,k_eq_1_subtile_n)6180 TEST(F32_GEMM_2X4__SCALAR, k_eq_1_subtile_n) {
6181 for (uint32_t n = 1; n <= 4; n++) {
6182 GemmMicrokernelTester()
6183 .mr(2)
6184 .nr(4)
6185 .kr(1)
6186 .sr(1)
6187 .m(2)
6188 .n(n)
6189 .k(1)
6190 .iterations(1)
6191 .Test(xnn_f32_gemm_ukernel_2x4__scalar);
6192 }
6193 }
6194
TEST(F32_GEMM_2X4__SCALAR,k_gt_1)6195 TEST(F32_GEMM_2X4__SCALAR, k_gt_1) {
6196 for (size_t k = 2; k < 10; k++) {
6197 GemmMicrokernelTester()
6198 .mr(2)
6199 .nr(4)
6200 .kr(1)
6201 .sr(1)
6202 .m(2)
6203 .n(4)
6204 .k(k)
6205 .Test(xnn_f32_gemm_ukernel_2x4__scalar);
6206 }
6207 }
6208
TEST(F32_GEMM_2X4__SCALAR,k_gt_1_strided_a)6209 TEST(F32_GEMM_2X4__SCALAR, k_gt_1_strided_a) {
6210 for (size_t k = 2; k < 10; k++) {
6211 GemmMicrokernelTester()
6212 .mr(2)
6213 .nr(4)
6214 .kr(1)
6215 .sr(1)
6216 .m(2)
6217 .n(4)
6218 .k(k)
6219 .a_stride(11)
6220 .Test(xnn_f32_gemm_ukernel_2x4__scalar);
6221 }
6222 }
6223
TEST(F32_GEMM_2X4__SCALAR,k_gt_1_subtile)6224 TEST(F32_GEMM_2X4__SCALAR, k_gt_1_subtile) {
6225 for (size_t k = 2; k < 10; k++) {
6226 for (uint32_t n = 1; n <= 4; n++) {
6227 for (uint32_t m = 1; m <= 2; m++) {
6228 GemmMicrokernelTester()
6229 .mr(2)
6230 .nr(4)
6231 .kr(1)
6232 .sr(1)
6233 .m(m)
6234 .n(n)
6235 .k(k)
6236 .iterations(1)
6237 .Test(xnn_f32_gemm_ukernel_2x4__scalar);
6238 }
6239 }
6240 }
6241 }
6242
TEST(F32_GEMM_2X4__SCALAR,n_gt_4)6243 TEST(F32_GEMM_2X4__SCALAR, n_gt_4) {
6244 for (uint32_t n = 5; n < 8; n++) {
6245 for (size_t k = 1; k <= 5; k += 2) {
6246 GemmMicrokernelTester()
6247 .mr(2)
6248 .nr(4)
6249 .kr(1)
6250 .sr(1)
6251 .m(2)
6252 .n(n)
6253 .k(k)
6254 .Test(xnn_f32_gemm_ukernel_2x4__scalar);
6255 }
6256 }
6257 }
6258
TEST(F32_GEMM_2X4__SCALAR,n_gt_4_strided_cn)6259 TEST(F32_GEMM_2X4__SCALAR, n_gt_4_strided_cn) {
6260 for (uint32_t n = 5; n < 8; n++) {
6261 for (size_t k = 1; k <= 5; k += 2) {
6262 GemmMicrokernelTester()
6263 .mr(2)
6264 .nr(4)
6265 .kr(1)
6266 .sr(1)
6267 .m(2)
6268 .n(n)
6269 .k(k)
6270 .cn_stride(7)
6271 .Test(xnn_f32_gemm_ukernel_2x4__scalar);
6272 }
6273 }
6274 }
6275
TEST(F32_GEMM_2X4__SCALAR,n_gt_4_strided_a)6276 TEST(F32_GEMM_2X4__SCALAR, n_gt_4_strided_a) {
6277 for (uint32_t n = 5; n < 8; n++) {
6278 for (size_t k = 1; k <= 5; k += 2) {
6279 GemmMicrokernelTester()
6280 .mr(2)
6281 .nr(4)
6282 .kr(1)
6283 .sr(1)
6284 .m(2)
6285 .n(n)
6286 .k(k)
6287 .a_stride(7)
6288 .Test(xnn_f32_gemm_ukernel_2x4__scalar);
6289 }
6290 }
6291 }
6292
TEST(F32_GEMM_2X4__SCALAR,n_gt_4_subtile)6293 TEST(F32_GEMM_2X4__SCALAR, n_gt_4_subtile) {
6294 for (uint32_t n = 5; n < 8; n++) {
6295 for (size_t k = 1; k <= 5; k += 2) {
6296 for (uint32_t m = 1; m <= 2; m++) {
6297 GemmMicrokernelTester()
6298 .mr(2)
6299 .nr(4)
6300 .kr(1)
6301 .sr(1)
6302 .m(m)
6303 .n(n)
6304 .k(k)
6305 .iterations(1)
6306 .Test(xnn_f32_gemm_ukernel_2x4__scalar);
6307 }
6308 }
6309 }
6310 }
6311
TEST(F32_GEMM_2X4__SCALAR,n_div_4)6312 TEST(F32_GEMM_2X4__SCALAR, n_div_4) {
6313 for (uint32_t n = 8; n <= 12; n += 4) {
6314 for (size_t k = 1; k <= 5; k += 2) {
6315 GemmMicrokernelTester()
6316 .mr(2)
6317 .nr(4)
6318 .kr(1)
6319 .sr(1)
6320 .m(2)
6321 .n(n)
6322 .k(k)
6323 .Test(xnn_f32_gemm_ukernel_2x4__scalar);
6324 }
6325 }
6326 }
6327
TEST(F32_GEMM_2X4__SCALAR,n_div_4_strided_cn)6328 TEST(F32_GEMM_2X4__SCALAR, n_div_4_strided_cn) {
6329 for (uint32_t n = 8; n <= 12; n += 4) {
6330 for (size_t k = 1; k <= 5; k += 2) {
6331 GemmMicrokernelTester()
6332 .mr(2)
6333 .nr(4)
6334 .kr(1)
6335 .sr(1)
6336 .m(2)
6337 .n(n)
6338 .k(k)
6339 .cn_stride(7)
6340 .Test(xnn_f32_gemm_ukernel_2x4__scalar);
6341 }
6342 }
6343 }
6344
TEST(F32_GEMM_2X4__SCALAR,n_div_4_strided_a)6345 TEST(F32_GEMM_2X4__SCALAR, n_div_4_strided_a) {
6346 for (uint32_t n = 8; n <= 12; n += 4) {
6347 for (size_t k = 1; k <= 5; k += 2) {
6348 GemmMicrokernelTester()
6349 .mr(2)
6350 .nr(4)
6351 .kr(1)
6352 .sr(1)
6353 .m(2)
6354 .n(n)
6355 .k(k)
6356 .a_stride(7)
6357 .Test(xnn_f32_gemm_ukernel_2x4__scalar);
6358 }
6359 }
6360 }
6361
TEST(F32_GEMM_2X4__SCALAR,n_div_4_subtile)6362 TEST(F32_GEMM_2X4__SCALAR, n_div_4_subtile) {
6363 for (uint32_t n = 8; n <= 12; n += 4) {
6364 for (size_t k = 1; k <= 5; k += 2) {
6365 for (uint32_t m = 1; m <= 2; m++) {
6366 GemmMicrokernelTester()
6367 .mr(2)
6368 .nr(4)
6369 .kr(1)
6370 .sr(1)
6371 .m(m)
6372 .n(n)
6373 .k(k)
6374 .iterations(1)
6375 .Test(xnn_f32_gemm_ukernel_2x4__scalar);
6376 }
6377 }
6378 }
6379 }
6380
TEST(F32_GEMM_2X4__SCALAR,strided_cm_subtile)6381 TEST(F32_GEMM_2X4__SCALAR, strided_cm_subtile) {
6382 for (size_t k = 1; k <= 5; k += 2) {
6383 for (uint32_t n = 1; n <= 4; n++) {
6384 for (uint32_t m = 1; m <= 2; m++) {
6385 GemmMicrokernelTester()
6386 .mr(2)
6387 .nr(4)
6388 .kr(1)
6389 .sr(1)
6390 .m(m)
6391 .n(n)
6392 .k(k)
6393 .cm_stride(7)
6394 .iterations(1)
6395 .Test(xnn_f32_gemm_ukernel_2x4__scalar);
6396 }
6397 }
6398 }
6399 }
6400
TEST(F32_GEMM_2X4__SCALAR,strided_cm)6401 TEST(F32_GEMM_2X4__SCALAR, strided_cm) {
6402 GemmMicrokernelTester()
6403 .mr(2)
6404 .nr(4)
6405 .kr(1)
6406 .sr(1)
6407 .m(2)
6408 .n(4)
6409 .k(1)
6410 .cm_stride(7)
6411 .Test(xnn_f32_gemm_ukernel_2x4__scalar);
6412 }
6413