1 // Copyright (c) Facebook, Inc. and its affiliates.
2 // All rights reserved.
3 //
4 // Copyright 2019 Google LLC
5 //
6 // This source code is licensed under the BSD-style license found in the
7 // LICENSE file in the root directory of this source tree.
8 //
9 // Auto-generated file. Do not edit!
10 // Specification: test/f32-gemm-relu.yaml
11 // Generator: tools/generate-gemm-test.py
12
13
14 #include <gtest/gtest.h>
15
16 #include <xnnpack/common.h>
17 #include <xnnpack/isa-checks.h>
18
19 #include <xnnpack/gemm.h>
20 #include <xnnpack/igemm.h>
21 #include <xnnpack/ppmm.h>
22 #include "gemm-microkernel-tester.h"
23
24
25 #if XNN_ARCH_WASMSIMD
TEST(F32_GEMM_RELU_1X8__WASMSIMD_SPLAT,k_eq_4)26 TEST(F32_GEMM_RELU_1X8__WASMSIMD_SPLAT, k_eq_4) {
27 GemmMicrokernelTester()
28 .mr(1)
29 .nr(8)
30 .kr(1)
31 .sr(1)
32 .m(1)
33 .n(8)
34 .k(4)
35 .Test(xnn_f32_gemm_relu_ukernel_1x8__wasmsimd_splat);
36 }
37
TEST(F32_GEMM_RELU_1X8__WASMSIMD_SPLAT,strided_cn)38 TEST(F32_GEMM_RELU_1X8__WASMSIMD_SPLAT, strided_cn) {
39 GemmMicrokernelTester()
40 .mr(1)
41 .nr(8)
42 .kr(1)
43 .sr(1)
44 .m(1)
45 .n(8)
46 .k(4)
47 .cn_stride(11)
48 .Test(xnn_f32_gemm_relu_ukernel_1x8__wasmsimd_splat);
49 }
50
TEST(F32_GEMM_RELU_1X8__WASMSIMD_SPLAT,k_eq_4_strided_a)51 TEST(F32_GEMM_RELU_1X8__WASMSIMD_SPLAT, k_eq_4_strided_a) {
52 GemmMicrokernelTester()
53 .mr(1)
54 .nr(8)
55 .kr(1)
56 .sr(1)
57 .m(1)
58 .n(8)
59 .k(4)
60 .a_stride(7)
61 .Test(xnn_f32_gemm_relu_ukernel_1x8__wasmsimd_splat);
62 }
63
TEST(F32_GEMM_RELU_1X8__WASMSIMD_SPLAT,k_eq_4_subtile)64 TEST(F32_GEMM_RELU_1X8__WASMSIMD_SPLAT, k_eq_4_subtile) {
65 for (uint32_t m = 1; m <= 1; m++) {
66 for (uint32_t n = 1; n <= 8; n++) {
67 GemmMicrokernelTester()
68 .mr(1)
69 .nr(8)
70 .kr(1)
71 .sr(1)
72 .m(m)
73 .n(n)
74 .k(4)
75 .iterations(1)
76 .Test(xnn_f32_gemm_relu_ukernel_1x8__wasmsimd_splat);
77 }
78 }
79 }
80
TEST(F32_GEMM_RELU_1X8__WASMSIMD_SPLAT,k_eq_4_subtile_m)81 TEST(F32_GEMM_RELU_1X8__WASMSIMD_SPLAT, k_eq_4_subtile_m) {
82 for (uint32_t m = 1; m <= 1; m++) {
83 GemmMicrokernelTester()
84 .mr(1)
85 .nr(8)
86 .kr(1)
87 .sr(1)
88 .m(m)
89 .n(8)
90 .k(4)
91 .iterations(1)
92 .Test(xnn_f32_gemm_relu_ukernel_1x8__wasmsimd_splat);
93 }
94 }
95
TEST(F32_GEMM_RELU_1X8__WASMSIMD_SPLAT,k_eq_4_subtile_n)96 TEST(F32_GEMM_RELU_1X8__WASMSIMD_SPLAT, k_eq_4_subtile_n) {
97 for (uint32_t n = 1; n <= 8; n++) {
98 GemmMicrokernelTester()
99 .mr(1)
100 .nr(8)
101 .kr(1)
102 .sr(1)
103 .m(1)
104 .n(n)
105 .k(4)
106 .iterations(1)
107 .Test(xnn_f32_gemm_relu_ukernel_1x8__wasmsimd_splat);
108 }
109 }
110
TEST(F32_GEMM_RELU_1X8__WASMSIMD_SPLAT,k_lt_4)111 TEST(F32_GEMM_RELU_1X8__WASMSIMD_SPLAT, k_lt_4) {
112 for (size_t k = 1; k < 4; k++) {
113 GemmMicrokernelTester()
114 .mr(1)
115 .nr(8)
116 .kr(1)
117 .sr(1)
118 .m(1)
119 .n(8)
120 .k(k)
121 .Test(xnn_f32_gemm_relu_ukernel_1x8__wasmsimd_splat);
122 }
123 }
124
TEST(F32_GEMM_RELU_1X8__WASMSIMD_SPLAT,k_lt_4_strided_a)125 TEST(F32_GEMM_RELU_1X8__WASMSIMD_SPLAT, k_lt_4_strided_a) {
126 for (size_t k = 1; k < 4; k++) {
127 GemmMicrokernelTester()
128 .mr(1)
129 .nr(8)
130 .kr(1)
131 .sr(1)
132 .m(1)
133 .n(8)
134 .k(k)
135 .a_stride(7)
136 .Test(xnn_f32_gemm_relu_ukernel_1x8__wasmsimd_splat);
137 }
138 }
139
TEST(F32_GEMM_RELU_1X8__WASMSIMD_SPLAT,k_lt_4_subtile)140 TEST(F32_GEMM_RELU_1X8__WASMSIMD_SPLAT, k_lt_4_subtile) {
141 for (size_t k = 1; k < 4; k++) {
142 for (uint32_t m = 1; m <= 1; m++) {
143 for (uint32_t n = 1; n <= 8; n++) {
144 GemmMicrokernelTester()
145 .mr(1)
146 .nr(8)
147 .kr(1)
148 .sr(1)
149 .m(m)
150 .n(n)
151 .k(k)
152 .iterations(1)
153 .Test(xnn_f32_gemm_relu_ukernel_1x8__wasmsimd_splat);
154 }
155 }
156 }
157 }
158
TEST(F32_GEMM_RELU_1X8__WASMSIMD_SPLAT,k_gt_4)159 TEST(F32_GEMM_RELU_1X8__WASMSIMD_SPLAT, k_gt_4) {
160 for (size_t k = 5; k < 8; k++) {
161 GemmMicrokernelTester()
162 .mr(1)
163 .nr(8)
164 .kr(1)
165 .sr(1)
166 .m(1)
167 .n(8)
168 .k(k)
169 .Test(xnn_f32_gemm_relu_ukernel_1x8__wasmsimd_splat);
170 }
171 }
172
TEST(F32_GEMM_RELU_1X8__WASMSIMD_SPLAT,k_gt_4_strided_a)173 TEST(F32_GEMM_RELU_1X8__WASMSIMD_SPLAT, k_gt_4_strided_a) {
174 for (size_t k = 5; k < 8; k++) {
175 GemmMicrokernelTester()
176 .mr(1)
177 .nr(8)
178 .kr(1)
179 .sr(1)
180 .m(1)
181 .n(8)
182 .k(k)
183 .a_stride(11)
184 .Test(xnn_f32_gemm_relu_ukernel_1x8__wasmsimd_splat);
185 }
186 }
187
TEST(F32_GEMM_RELU_1X8__WASMSIMD_SPLAT,k_gt_4_subtile)188 TEST(F32_GEMM_RELU_1X8__WASMSIMD_SPLAT, k_gt_4_subtile) {
189 for (size_t k = 5; k < 8; k++) {
190 for (uint32_t m = 1; m <= 1; m++) {
191 for (uint32_t n = 1; n <= 8; n++) {
192 GemmMicrokernelTester()
193 .mr(1)
194 .nr(8)
195 .kr(1)
196 .sr(1)
197 .m(m)
198 .n(n)
199 .k(k)
200 .iterations(1)
201 .Test(xnn_f32_gemm_relu_ukernel_1x8__wasmsimd_splat);
202 }
203 }
204 }
205 }
206
TEST(F32_GEMM_RELU_1X8__WASMSIMD_SPLAT,k_div_4)207 TEST(F32_GEMM_RELU_1X8__WASMSIMD_SPLAT, k_div_4) {
208 for (size_t k = 8; k <= 40; k += 4) {
209 GemmMicrokernelTester()
210 .mr(1)
211 .nr(8)
212 .kr(1)
213 .sr(1)
214 .m(1)
215 .n(8)
216 .k(k)
217 .Test(xnn_f32_gemm_relu_ukernel_1x8__wasmsimd_splat);
218 }
219 }
220
TEST(F32_GEMM_RELU_1X8__WASMSIMD_SPLAT,k_div_4_strided_a)221 TEST(F32_GEMM_RELU_1X8__WASMSIMD_SPLAT, k_div_4_strided_a) {
222 for (size_t k = 8; k <= 40; k += 4) {
223 GemmMicrokernelTester()
224 .mr(1)
225 .nr(8)
226 .kr(1)
227 .sr(1)
228 .m(1)
229 .n(8)
230 .k(k)
231 .a_stride(43)
232 .Test(xnn_f32_gemm_relu_ukernel_1x8__wasmsimd_splat);
233 }
234 }
235
TEST(F32_GEMM_RELU_1X8__WASMSIMD_SPLAT,k_div_4_subtile)236 TEST(F32_GEMM_RELU_1X8__WASMSIMD_SPLAT, k_div_4_subtile) {
237 for (size_t k = 8; k <= 40; k += 4) {
238 for (uint32_t m = 1; m <= 1; m++) {
239 for (uint32_t n = 1; n <= 8; n++) {
240 GemmMicrokernelTester()
241 .mr(1)
242 .nr(8)
243 .kr(1)
244 .sr(1)
245 .m(m)
246 .n(n)
247 .k(k)
248 .iterations(1)
249 .Test(xnn_f32_gemm_relu_ukernel_1x8__wasmsimd_splat);
250 }
251 }
252 }
253 }
254
TEST(F32_GEMM_RELU_1X8__WASMSIMD_SPLAT,n_gt_8)255 TEST(F32_GEMM_RELU_1X8__WASMSIMD_SPLAT, n_gt_8) {
256 for (uint32_t n = 9; n < 16; n++) {
257 for (size_t k = 1; k <= 20; k += 5) {
258 GemmMicrokernelTester()
259 .mr(1)
260 .nr(8)
261 .kr(1)
262 .sr(1)
263 .m(1)
264 .n(8)
265 .k(k)
266 .Test(xnn_f32_gemm_relu_ukernel_1x8__wasmsimd_splat);
267 }
268 }
269 }
270
TEST(F32_GEMM_RELU_1X8__WASMSIMD_SPLAT,n_gt_8_strided_cn)271 TEST(F32_GEMM_RELU_1X8__WASMSIMD_SPLAT, n_gt_8_strided_cn) {
272 for (uint32_t n = 9; n < 16; n++) {
273 for (size_t k = 1; k <= 20; k += 5) {
274 GemmMicrokernelTester()
275 .mr(1)
276 .nr(8)
277 .kr(1)
278 .sr(1)
279 .m(1)
280 .n(8)
281 .k(k)
282 .cn_stride(11)
283 .Test(xnn_f32_gemm_relu_ukernel_1x8__wasmsimd_splat);
284 }
285 }
286 }
287
TEST(F32_GEMM_RELU_1X8__WASMSIMD_SPLAT,n_gt_8_strided_a)288 TEST(F32_GEMM_RELU_1X8__WASMSIMD_SPLAT, n_gt_8_strided_a) {
289 for (uint32_t n = 9; n < 16; n++) {
290 for (size_t k = 1; k <= 20; k += 5) {
291 GemmMicrokernelTester()
292 .mr(1)
293 .nr(8)
294 .kr(1)
295 .sr(1)
296 .m(1)
297 .n(n)
298 .k(k)
299 .a_stride(23)
300 .Test(xnn_f32_gemm_relu_ukernel_1x8__wasmsimd_splat);
301 }
302 }
303 }
304
TEST(F32_GEMM_RELU_1X8__WASMSIMD_SPLAT,n_gt_8_subtile)305 TEST(F32_GEMM_RELU_1X8__WASMSIMD_SPLAT, n_gt_8_subtile) {
306 for (uint32_t n = 9; n < 16; n++) {
307 for (size_t k = 1; k <= 20; k += 5) {
308 for (uint32_t m = 1; m <= 1; m++) {
309 GemmMicrokernelTester()
310 .mr(1)
311 .nr(8)
312 .kr(1)
313 .sr(1)
314 .m(m)
315 .n(n)
316 .k(k)
317 .iterations(1)
318 .Test(xnn_f32_gemm_relu_ukernel_1x8__wasmsimd_splat);
319 }
320 }
321 }
322 }
323
TEST(F32_GEMM_RELU_1X8__WASMSIMD_SPLAT,n_div_8)324 TEST(F32_GEMM_RELU_1X8__WASMSIMD_SPLAT, n_div_8) {
325 for (uint32_t n = 16; n <= 24; n += 8) {
326 for (size_t k = 1; k <= 20; k += 5) {
327 GemmMicrokernelTester()
328 .mr(1)
329 .nr(8)
330 .kr(1)
331 .sr(1)
332 .m(1)
333 .n(8)
334 .k(k)
335 .Test(xnn_f32_gemm_relu_ukernel_1x8__wasmsimd_splat);
336 }
337 }
338 }
339
TEST(F32_GEMM_RELU_1X8__WASMSIMD_SPLAT,n_div_8_strided_cn)340 TEST(F32_GEMM_RELU_1X8__WASMSIMD_SPLAT, n_div_8_strided_cn) {
341 for (uint32_t n = 16; n <= 24; n += 8) {
342 for (size_t k = 1; k <= 20; k += 5) {
343 GemmMicrokernelTester()
344 .mr(1)
345 .nr(8)
346 .kr(1)
347 .sr(1)
348 .m(1)
349 .n(n)
350 .k(k)
351 .cn_stride(11)
352 .Test(xnn_f32_gemm_relu_ukernel_1x8__wasmsimd_splat);
353 }
354 }
355 }
356
TEST(F32_GEMM_RELU_1X8__WASMSIMD_SPLAT,n_div_8_strided_a)357 TEST(F32_GEMM_RELU_1X8__WASMSIMD_SPLAT, n_div_8_strided_a) {
358 for (uint32_t n = 16; n <= 24; n += 8) {
359 for (size_t k = 1; k <= 20; k += 5) {
360 GemmMicrokernelTester()
361 .mr(1)
362 .nr(8)
363 .kr(1)
364 .sr(1)
365 .m(1)
366 .n(n)
367 .k(k)
368 .a_stride(23)
369 .Test(xnn_f32_gemm_relu_ukernel_1x8__wasmsimd_splat);
370 }
371 }
372 }
373
TEST(F32_GEMM_RELU_1X8__WASMSIMD_SPLAT,n_div_8_subtile)374 TEST(F32_GEMM_RELU_1X8__WASMSIMD_SPLAT, n_div_8_subtile) {
375 for (uint32_t n = 16; n <= 24; n += 8) {
376 for (size_t k = 1; k <= 20; k += 5) {
377 for (uint32_t m = 1; m <= 1; m++) {
378 GemmMicrokernelTester()
379 .mr(1)
380 .nr(8)
381 .kr(1)
382 .sr(1)
383 .m(m)
384 .n(n)
385 .k(k)
386 .iterations(1)
387 .Test(xnn_f32_gemm_relu_ukernel_1x8__wasmsimd_splat);
388 }
389 }
390 }
391 }
392
TEST(F32_GEMM_RELU_1X8__WASMSIMD_SPLAT,strided_cm_subtile)393 TEST(F32_GEMM_RELU_1X8__WASMSIMD_SPLAT, strided_cm_subtile) {
394 for (size_t k = 1; k <= 20; k += 5) {
395 for (uint32_t m = 1; m <= 1; m++) {
396 for (uint32_t n = 1; n <= 8; n++) {
397 GemmMicrokernelTester()
398 .mr(1)
399 .nr(8)
400 .kr(1)
401 .sr(1)
402 .m(m)
403 .n(n)
404 .k(k)
405 .cm_stride(11)
406 .iterations(1)
407 .Test(xnn_f32_gemm_relu_ukernel_1x8__wasmsimd_splat);
408 }
409 }
410 }
411 }
412
TEST(F32_GEMM_RELU_1X8__WASMSIMD_SPLAT,strided_cm)413 TEST(F32_GEMM_RELU_1X8__WASMSIMD_SPLAT, strided_cm) {
414 GemmMicrokernelTester()
415 .mr(1)
416 .nr(8)
417 .kr(1)
418 .sr(1)
419 .m(1)
420 .n(8)
421 .k(4)
422 .cm_stride(11)
423 .Test(xnn_f32_gemm_relu_ukernel_1x8__wasmsimd_splat);
424 }
425 #endif // XNN_ARCH_WASMSIMD
426
427
428 #if XNN_ARCH_WASMSIMD
TEST(F32_GEMM_RELU_4X8__WASMSIMD_SPLAT,k_eq_4)429 TEST(F32_GEMM_RELU_4X8__WASMSIMD_SPLAT, k_eq_4) {
430 GemmMicrokernelTester()
431 .mr(4)
432 .nr(8)
433 .kr(1)
434 .sr(1)
435 .m(4)
436 .n(8)
437 .k(4)
438 .Test(xnn_f32_gemm_relu_ukernel_4x8__wasmsimd_splat);
439 }
440
TEST(F32_GEMM_RELU_4X8__WASMSIMD_SPLAT,strided_cn)441 TEST(F32_GEMM_RELU_4X8__WASMSIMD_SPLAT, strided_cn) {
442 GemmMicrokernelTester()
443 .mr(4)
444 .nr(8)
445 .kr(1)
446 .sr(1)
447 .m(4)
448 .n(8)
449 .k(4)
450 .cn_stride(11)
451 .Test(xnn_f32_gemm_relu_ukernel_4x8__wasmsimd_splat);
452 }
453
TEST(F32_GEMM_RELU_4X8__WASMSIMD_SPLAT,k_eq_4_strided_a)454 TEST(F32_GEMM_RELU_4X8__WASMSIMD_SPLAT, k_eq_4_strided_a) {
455 GemmMicrokernelTester()
456 .mr(4)
457 .nr(8)
458 .kr(1)
459 .sr(1)
460 .m(4)
461 .n(8)
462 .k(4)
463 .a_stride(7)
464 .Test(xnn_f32_gemm_relu_ukernel_4x8__wasmsimd_splat);
465 }
466
TEST(F32_GEMM_RELU_4X8__WASMSIMD_SPLAT,k_eq_4_subtile)467 TEST(F32_GEMM_RELU_4X8__WASMSIMD_SPLAT, k_eq_4_subtile) {
468 for (uint32_t m = 1; m <= 4; m++) {
469 for (uint32_t n = 1; n <= 8; n++) {
470 GemmMicrokernelTester()
471 .mr(4)
472 .nr(8)
473 .kr(1)
474 .sr(1)
475 .m(m)
476 .n(n)
477 .k(4)
478 .iterations(1)
479 .Test(xnn_f32_gemm_relu_ukernel_4x8__wasmsimd_splat);
480 }
481 }
482 }
483
TEST(F32_GEMM_RELU_4X8__WASMSIMD_SPLAT,k_eq_4_subtile_m)484 TEST(F32_GEMM_RELU_4X8__WASMSIMD_SPLAT, k_eq_4_subtile_m) {
485 for (uint32_t m = 1; m <= 4; m++) {
486 GemmMicrokernelTester()
487 .mr(4)
488 .nr(8)
489 .kr(1)
490 .sr(1)
491 .m(m)
492 .n(8)
493 .k(4)
494 .iterations(1)
495 .Test(xnn_f32_gemm_relu_ukernel_4x8__wasmsimd_splat);
496 }
497 }
498
TEST(F32_GEMM_RELU_4X8__WASMSIMD_SPLAT,k_eq_4_subtile_n)499 TEST(F32_GEMM_RELU_4X8__WASMSIMD_SPLAT, k_eq_4_subtile_n) {
500 for (uint32_t n = 1; n <= 8; n++) {
501 GemmMicrokernelTester()
502 .mr(4)
503 .nr(8)
504 .kr(1)
505 .sr(1)
506 .m(4)
507 .n(n)
508 .k(4)
509 .iterations(1)
510 .Test(xnn_f32_gemm_relu_ukernel_4x8__wasmsimd_splat);
511 }
512 }
513
TEST(F32_GEMM_RELU_4X8__WASMSIMD_SPLAT,k_lt_4)514 TEST(F32_GEMM_RELU_4X8__WASMSIMD_SPLAT, k_lt_4) {
515 for (size_t k = 1; k < 4; k++) {
516 GemmMicrokernelTester()
517 .mr(4)
518 .nr(8)
519 .kr(1)
520 .sr(1)
521 .m(4)
522 .n(8)
523 .k(k)
524 .Test(xnn_f32_gemm_relu_ukernel_4x8__wasmsimd_splat);
525 }
526 }
527
TEST(F32_GEMM_RELU_4X8__WASMSIMD_SPLAT,k_lt_4_strided_a)528 TEST(F32_GEMM_RELU_4X8__WASMSIMD_SPLAT, k_lt_4_strided_a) {
529 for (size_t k = 1; k < 4; k++) {
530 GemmMicrokernelTester()
531 .mr(4)
532 .nr(8)
533 .kr(1)
534 .sr(1)
535 .m(4)
536 .n(8)
537 .k(k)
538 .a_stride(7)
539 .Test(xnn_f32_gemm_relu_ukernel_4x8__wasmsimd_splat);
540 }
541 }
542
TEST(F32_GEMM_RELU_4X8__WASMSIMD_SPLAT,k_lt_4_subtile)543 TEST(F32_GEMM_RELU_4X8__WASMSIMD_SPLAT, k_lt_4_subtile) {
544 for (size_t k = 1; k < 4; k++) {
545 for (uint32_t m = 1; m <= 4; m++) {
546 for (uint32_t n = 1; n <= 8; n++) {
547 GemmMicrokernelTester()
548 .mr(4)
549 .nr(8)
550 .kr(1)
551 .sr(1)
552 .m(m)
553 .n(n)
554 .k(k)
555 .iterations(1)
556 .Test(xnn_f32_gemm_relu_ukernel_4x8__wasmsimd_splat);
557 }
558 }
559 }
560 }
561
TEST(F32_GEMM_RELU_4X8__WASMSIMD_SPLAT,k_gt_4)562 TEST(F32_GEMM_RELU_4X8__WASMSIMD_SPLAT, k_gt_4) {
563 for (size_t k = 5; k < 8; k++) {
564 GemmMicrokernelTester()
565 .mr(4)
566 .nr(8)
567 .kr(1)
568 .sr(1)
569 .m(4)
570 .n(8)
571 .k(k)
572 .Test(xnn_f32_gemm_relu_ukernel_4x8__wasmsimd_splat);
573 }
574 }
575
TEST(F32_GEMM_RELU_4X8__WASMSIMD_SPLAT,k_gt_4_strided_a)576 TEST(F32_GEMM_RELU_4X8__WASMSIMD_SPLAT, k_gt_4_strided_a) {
577 for (size_t k = 5; k < 8; k++) {
578 GemmMicrokernelTester()
579 .mr(4)
580 .nr(8)
581 .kr(1)
582 .sr(1)
583 .m(4)
584 .n(8)
585 .k(k)
586 .a_stride(11)
587 .Test(xnn_f32_gemm_relu_ukernel_4x8__wasmsimd_splat);
588 }
589 }
590
TEST(F32_GEMM_RELU_4X8__WASMSIMD_SPLAT,k_gt_4_subtile)591 TEST(F32_GEMM_RELU_4X8__WASMSIMD_SPLAT, k_gt_4_subtile) {
592 for (size_t k = 5; k < 8; k++) {
593 for (uint32_t m = 1; m <= 4; m++) {
594 for (uint32_t n = 1; n <= 8; n++) {
595 GemmMicrokernelTester()
596 .mr(4)
597 .nr(8)
598 .kr(1)
599 .sr(1)
600 .m(m)
601 .n(n)
602 .k(k)
603 .iterations(1)
604 .Test(xnn_f32_gemm_relu_ukernel_4x8__wasmsimd_splat);
605 }
606 }
607 }
608 }
609
TEST(F32_GEMM_RELU_4X8__WASMSIMD_SPLAT,k_div_4)610 TEST(F32_GEMM_RELU_4X8__WASMSIMD_SPLAT, k_div_4) {
611 for (size_t k = 8; k <= 40; k += 4) {
612 GemmMicrokernelTester()
613 .mr(4)
614 .nr(8)
615 .kr(1)
616 .sr(1)
617 .m(4)
618 .n(8)
619 .k(k)
620 .Test(xnn_f32_gemm_relu_ukernel_4x8__wasmsimd_splat);
621 }
622 }
623
TEST(F32_GEMM_RELU_4X8__WASMSIMD_SPLAT,k_div_4_strided_a)624 TEST(F32_GEMM_RELU_4X8__WASMSIMD_SPLAT, k_div_4_strided_a) {
625 for (size_t k = 8; k <= 40; k += 4) {
626 GemmMicrokernelTester()
627 .mr(4)
628 .nr(8)
629 .kr(1)
630 .sr(1)
631 .m(4)
632 .n(8)
633 .k(k)
634 .a_stride(43)
635 .Test(xnn_f32_gemm_relu_ukernel_4x8__wasmsimd_splat);
636 }
637 }
638
TEST(F32_GEMM_RELU_4X8__WASMSIMD_SPLAT,k_div_4_subtile)639 TEST(F32_GEMM_RELU_4X8__WASMSIMD_SPLAT, k_div_4_subtile) {
640 for (size_t k = 8; k <= 40; k += 4) {
641 for (uint32_t m = 1; m <= 4; m++) {
642 for (uint32_t n = 1; n <= 8; n++) {
643 GemmMicrokernelTester()
644 .mr(4)
645 .nr(8)
646 .kr(1)
647 .sr(1)
648 .m(m)
649 .n(n)
650 .k(k)
651 .iterations(1)
652 .Test(xnn_f32_gemm_relu_ukernel_4x8__wasmsimd_splat);
653 }
654 }
655 }
656 }
657
TEST(F32_GEMM_RELU_4X8__WASMSIMD_SPLAT,n_gt_8)658 TEST(F32_GEMM_RELU_4X8__WASMSIMD_SPLAT, n_gt_8) {
659 for (uint32_t n = 9; n < 16; n++) {
660 for (size_t k = 1; k <= 20; k += 5) {
661 GemmMicrokernelTester()
662 .mr(4)
663 .nr(8)
664 .kr(1)
665 .sr(1)
666 .m(4)
667 .n(8)
668 .k(k)
669 .Test(xnn_f32_gemm_relu_ukernel_4x8__wasmsimd_splat);
670 }
671 }
672 }
673
TEST(F32_GEMM_RELU_4X8__WASMSIMD_SPLAT,n_gt_8_strided_cn)674 TEST(F32_GEMM_RELU_4X8__WASMSIMD_SPLAT, n_gt_8_strided_cn) {
675 for (uint32_t n = 9; n < 16; n++) {
676 for (size_t k = 1; k <= 20; k += 5) {
677 GemmMicrokernelTester()
678 .mr(4)
679 .nr(8)
680 .kr(1)
681 .sr(1)
682 .m(4)
683 .n(8)
684 .k(k)
685 .cn_stride(11)
686 .Test(xnn_f32_gemm_relu_ukernel_4x8__wasmsimd_splat);
687 }
688 }
689 }
690
TEST(F32_GEMM_RELU_4X8__WASMSIMD_SPLAT,n_gt_8_strided_a)691 TEST(F32_GEMM_RELU_4X8__WASMSIMD_SPLAT, n_gt_8_strided_a) {
692 for (uint32_t n = 9; n < 16; n++) {
693 for (size_t k = 1; k <= 20; k += 5) {
694 GemmMicrokernelTester()
695 .mr(4)
696 .nr(8)
697 .kr(1)
698 .sr(1)
699 .m(4)
700 .n(n)
701 .k(k)
702 .a_stride(23)
703 .Test(xnn_f32_gemm_relu_ukernel_4x8__wasmsimd_splat);
704 }
705 }
706 }
707
TEST(F32_GEMM_RELU_4X8__WASMSIMD_SPLAT,n_gt_8_subtile)708 TEST(F32_GEMM_RELU_4X8__WASMSIMD_SPLAT, n_gt_8_subtile) {
709 for (uint32_t n = 9; n < 16; n++) {
710 for (size_t k = 1; k <= 20; k += 5) {
711 for (uint32_t m = 1; m <= 4; m++) {
712 GemmMicrokernelTester()
713 .mr(4)
714 .nr(8)
715 .kr(1)
716 .sr(1)
717 .m(m)
718 .n(n)
719 .k(k)
720 .iterations(1)
721 .Test(xnn_f32_gemm_relu_ukernel_4x8__wasmsimd_splat);
722 }
723 }
724 }
725 }
726
TEST(F32_GEMM_RELU_4X8__WASMSIMD_SPLAT,n_div_8)727 TEST(F32_GEMM_RELU_4X8__WASMSIMD_SPLAT, n_div_8) {
728 for (uint32_t n = 16; n <= 24; n += 8) {
729 for (size_t k = 1; k <= 20; k += 5) {
730 GemmMicrokernelTester()
731 .mr(4)
732 .nr(8)
733 .kr(1)
734 .sr(1)
735 .m(4)
736 .n(8)
737 .k(k)
738 .Test(xnn_f32_gemm_relu_ukernel_4x8__wasmsimd_splat);
739 }
740 }
741 }
742
TEST(F32_GEMM_RELU_4X8__WASMSIMD_SPLAT,n_div_8_strided_cn)743 TEST(F32_GEMM_RELU_4X8__WASMSIMD_SPLAT, n_div_8_strided_cn) {
744 for (uint32_t n = 16; n <= 24; n += 8) {
745 for (size_t k = 1; k <= 20; k += 5) {
746 GemmMicrokernelTester()
747 .mr(4)
748 .nr(8)
749 .kr(1)
750 .sr(1)
751 .m(4)
752 .n(n)
753 .k(k)
754 .cn_stride(11)
755 .Test(xnn_f32_gemm_relu_ukernel_4x8__wasmsimd_splat);
756 }
757 }
758 }
759
TEST(F32_GEMM_RELU_4X8__WASMSIMD_SPLAT,n_div_8_strided_a)760 TEST(F32_GEMM_RELU_4X8__WASMSIMD_SPLAT, n_div_8_strided_a) {
761 for (uint32_t n = 16; n <= 24; n += 8) {
762 for (size_t k = 1; k <= 20; k += 5) {
763 GemmMicrokernelTester()
764 .mr(4)
765 .nr(8)
766 .kr(1)
767 .sr(1)
768 .m(4)
769 .n(n)
770 .k(k)
771 .a_stride(23)
772 .Test(xnn_f32_gemm_relu_ukernel_4x8__wasmsimd_splat);
773 }
774 }
775 }
776
TEST(F32_GEMM_RELU_4X8__WASMSIMD_SPLAT,n_div_8_subtile)777 TEST(F32_GEMM_RELU_4X8__WASMSIMD_SPLAT, n_div_8_subtile) {
778 for (uint32_t n = 16; n <= 24; n += 8) {
779 for (size_t k = 1; k <= 20; k += 5) {
780 for (uint32_t m = 1; m <= 4; m++) {
781 GemmMicrokernelTester()
782 .mr(4)
783 .nr(8)
784 .kr(1)
785 .sr(1)
786 .m(m)
787 .n(n)
788 .k(k)
789 .iterations(1)
790 .Test(xnn_f32_gemm_relu_ukernel_4x8__wasmsimd_splat);
791 }
792 }
793 }
794 }
795
TEST(F32_GEMM_RELU_4X8__WASMSIMD_SPLAT,strided_cm_subtile)796 TEST(F32_GEMM_RELU_4X8__WASMSIMD_SPLAT, strided_cm_subtile) {
797 for (size_t k = 1; k <= 20; k += 5) {
798 for (uint32_t m = 1; m <= 4; m++) {
799 for (uint32_t n = 1; n <= 8; n++) {
800 GemmMicrokernelTester()
801 .mr(4)
802 .nr(8)
803 .kr(1)
804 .sr(1)
805 .m(m)
806 .n(n)
807 .k(k)
808 .cm_stride(11)
809 .iterations(1)
810 .Test(xnn_f32_gemm_relu_ukernel_4x8__wasmsimd_splat);
811 }
812 }
813 }
814 }
815
TEST(F32_GEMM_RELU_4X8__WASMSIMD_SPLAT,strided_cm)816 TEST(F32_GEMM_RELU_4X8__WASMSIMD_SPLAT, strided_cm) {
817 GemmMicrokernelTester()
818 .mr(4)
819 .nr(8)
820 .kr(1)
821 .sr(1)
822 .m(4)
823 .n(8)
824 .k(4)
825 .cm_stride(11)
826 .Test(xnn_f32_gemm_relu_ukernel_4x8__wasmsimd_splat);
827 }
828 #endif // XNN_ARCH_WASMSIMD
829
830
831 #if XNN_ARCH_WASMSIMD
TEST(F32_GEMM_RELU_5X8__WASMSIMD_SPLAT,k_eq_4)832 TEST(F32_GEMM_RELU_5X8__WASMSIMD_SPLAT, k_eq_4) {
833 GemmMicrokernelTester()
834 .mr(5)
835 .nr(8)
836 .kr(1)
837 .sr(1)
838 .m(5)
839 .n(8)
840 .k(4)
841 .Test(xnn_f32_gemm_relu_ukernel_5x8__wasmsimd_splat);
842 }
843
TEST(F32_GEMM_RELU_5X8__WASMSIMD_SPLAT,strided_cn)844 TEST(F32_GEMM_RELU_5X8__WASMSIMD_SPLAT, strided_cn) {
845 GemmMicrokernelTester()
846 .mr(5)
847 .nr(8)
848 .kr(1)
849 .sr(1)
850 .m(5)
851 .n(8)
852 .k(4)
853 .cn_stride(11)
854 .Test(xnn_f32_gemm_relu_ukernel_5x8__wasmsimd_splat);
855 }
856
TEST(F32_GEMM_RELU_5X8__WASMSIMD_SPLAT,k_eq_4_strided_a)857 TEST(F32_GEMM_RELU_5X8__WASMSIMD_SPLAT, k_eq_4_strided_a) {
858 GemmMicrokernelTester()
859 .mr(5)
860 .nr(8)
861 .kr(1)
862 .sr(1)
863 .m(5)
864 .n(8)
865 .k(4)
866 .a_stride(7)
867 .Test(xnn_f32_gemm_relu_ukernel_5x8__wasmsimd_splat);
868 }
869
TEST(F32_GEMM_RELU_5X8__WASMSIMD_SPLAT,k_eq_4_subtile)870 TEST(F32_GEMM_RELU_5X8__WASMSIMD_SPLAT, k_eq_4_subtile) {
871 for (uint32_t m = 1; m <= 5; m++) {
872 for (uint32_t n = 1; n <= 8; n++) {
873 GemmMicrokernelTester()
874 .mr(5)
875 .nr(8)
876 .kr(1)
877 .sr(1)
878 .m(m)
879 .n(n)
880 .k(4)
881 .iterations(1)
882 .Test(xnn_f32_gemm_relu_ukernel_5x8__wasmsimd_splat);
883 }
884 }
885 }
886
TEST(F32_GEMM_RELU_5X8__WASMSIMD_SPLAT,k_eq_4_subtile_m)887 TEST(F32_GEMM_RELU_5X8__WASMSIMD_SPLAT, k_eq_4_subtile_m) {
888 for (uint32_t m = 1; m <= 5; m++) {
889 GemmMicrokernelTester()
890 .mr(5)
891 .nr(8)
892 .kr(1)
893 .sr(1)
894 .m(m)
895 .n(8)
896 .k(4)
897 .iterations(1)
898 .Test(xnn_f32_gemm_relu_ukernel_5x8__wasmsimd_splat);
899 }
900 }
901
TEST(F32_GEMM_RELU_5X8__WASMSIMD_SPLAT,k_eq_4_subtile_n)902 TEST(F32_GEMM_RELU_5X8__WASMSIMD_SPLAT, k_eq_4_subtile_n) {
903 for (uint32_t n = 1; n <= 8; n++) {
904 GemmMicrokernelTester()
905 .mr(5)
906 .nr(8)
907 .kr(1)
908 .sr(1)
909 .m(5)
910 .n(n)
911 .k(4)
912 .iterations(1)
913 .Test(xnn_f32_gemm_relu_ukernel_5x8__wasmsimd_splat);
914 }
915 }
916
TEST(F32_GEMM_RELU_5X8__WASMSIMD_SPLAT,k_lt_4)917 TEST(F32_GEMM_RELU_5X8__WASMSIMD_SPLAT, k_lt_4) {
918 for (size_t k = 1; k < 4; k++) {
919 GemmMicrokernelTester()
920 .mr(5)
921 .nr(8)
922 .kr(1)
923 .sr(1)
924 .m(5)
925 .n(8)
926 .k(k)
927 .Test(xnn_f32_gemm_relu_ukernel_5x8__wasmsimd_splat);
928 }
929 }
930
TEST(F32_GEMM_RELU_5X8__WASMSIMD_SPLAT,k_lt_4_strided_a)931 TEST(F32_GEMM_RELU_5X8__WASMSIMD_SPLAT, k_lt_4_strided_a) {
932 for (size_t k = 1; k < 4; k++) {
933 GemmMicrokernelTester()
934 .mr(5)
935 .nr(8)
936 .kr(1)
937 .sr(1)
938 .m(5)
939 .n(8)
940 .k(k)
941 .a_stride(7)
942 .Test(xnn_f32_gemm_relu_ukernel_5x8__wasmsimd_splat);
943 }
944 }
945
TEST(F32_GEMM_RELU_5X8__WASMSIMD_SPLAT,k_lt_4_subtile)946 TEST(F32_GEMM_RELU_5X8__WASMSIMD_SPLAT, k_lt_4_subtile) {
947 for (size_t k = 1; k < 4; k++) {
948 for (uint32_t m = 1; m <= 5; m++) {
949 for (uint32_t n = 1; n <= 8; n++) {
950 GemmMicrokernelTester()
951 .mr(5)
952 .nr(8)
953 .kr(1)
954 .sr(1)
955 .m(m)
956 .n(n)
957 .k(k)
958 .iterations(1)
959 .Test(xnn_f32_gemm_relu_ukernel_5x8__wasmsimd_splat);
960 }
961 }
962 }
963 }
964
TEST(F32_GEMM_RELU_5X8__WASMSIMD_SPLAT,k_gt_4)965 TEST(F32_GEMM_RELU_5X8__WASMSIMD_SPLAT, k_gt_4) {
966 for (size_t k = 5; k < 8; k++) {
967 GemmMicrokernelTester()
968 .mr(5)
969 .nr(8)
970 .kr(1)
971 .sr(1)
972 .m(5)
973 .n(8)
974 .k(k)
975 .Test(xnn_f32_gemm_relu_ukernel_5x8__wasmsimd_splat);
976 }
977 }
978
TEST(F32_GEMM_RELU_5X8__WASMSIMD_SPLAT,k_gt_4_strided_a)979 TEST(F32_GEMM_RELU_5X8__WASMSIMD_SPLAT, k_gt_4_strided_a) {
980 for (size_t k = 5; k < 8; k++) {
981 GemmMicrokernelTester()
982 .mr(5)
983 .nr(8)
984 .kr(1)
985 .sr(1)
986 .m(5)
987 .n(8)
988 .k(k)
989 .a_stride(11)
990 .Test(xnn_f32_gemm_relu_ukernel_5x8__wasmsimd_splat);
991 }
992 }
993
TEST(F32_GEMM_RELU_5X8__WASMSIMD_SPLAT,k_gt_4_subtile)994 TEST(F32_GEMM_RELU_5X8__WASMSIMD_SPLAT, k_gt_4_subtile) {
995 for (size_t k = 5; k < 8; k++) {
996 for (uint32_t m = 1; m <= 5; m++) {
997 for (uint32_t n = 1; n <= 8; n++) {
998 GemmMicrokernelTester()
999 .mr(5)
1000 .nr(8)
1001 .kr(1)
1002 .sr(1)
1003 .m(m)
1004 .n(n)
1005 .k(k)
1006 .iterations(1)
1007 .Test(xnn_f32_gemm_relu_ukernel_5x8__wasmsimd_splat);
1008 }
1009 }
1010 }
1011 }
1012
TEST(F32_GEMM_RELU_5X8__WASMSIMD_SPLAT,k_div_4)1013 TEST(F32_GEMM_RELU_5X8__WASMSIMD_SPLAT, k_div_4) {
1014 for (size_t k = 8; k <= 40; k += 4) {
1015 GemmMicrokernelTester()
1016 .mr(5)
1017 .nr(8)
1018 .kr(1)
1019 .sr(1)
1020 .m(5)
1021 .n(8)
1022 .k(k)
1023 .Test(xnn_f32_gemm_relu_ukernel_5x8__wasmsimd_splat);
1024 }
1025 }
1026
TEST(F32_GEMM_RELU_5X8__WASMSIMD_SPLAT,k_div_4_strided_a)1027 TEST(F32_GEMM_RELU_5X8__WASMSIMD_SPLAT, k_div_4_strided_a) {
1028 for (size_t k = 8; k <= 40; k += 4) {
1029 GemmMicrokernelTester()
1030 .mr(5)
1031 .nr(8)
1032 .kr(1)
1033 .sr(1)
1034 .m(5)
1035 .n(8)
1036 .k(k)
1037 .a_stride(43)
1038 .Test(xnn_f32_gemm_relu_ukernel_5x8__wasmsimd_splat);
1039 }
1040 }
1041
TEST(F32_GEMM_RELU_5X8__WASMSIMD_SPLAT,k_div_4_subtile)1042 TEST(F32_GEMM_RELU_5X8__WASMSIMD_SPLAT, k_div_4_subtile) {
1043 for (size_t k = 8; k <= 40; k += 4) {
1044 for (uint32_t m = 1; m <= 5; m++) {
1045 for (uint32_t n = 1; n <= 8; n++) {
1046 GemmMicrokernelTester()
1047 .mr(5)
1048 .nr(8)
1049 .kr(1)
1050 .sr(1)
1051 .m(m)
1052 .n(n)
1053 .k(k)
1054 .iterations(1)
1055 .Test(xnn_f32_gemm_relu_ukernel_5x8__wasmsimd_splat);
1056 }
1057 }
1058 }
1059 }
1060
TEST(F32_GEMM_RELU_5X8__WASMSIMD_SPLAT,n_gt_8)1061 TEST(F32_GEMM_RELU_5X8__WASMSIMD_SPLAT, n_gt_8) {
1062 for (uint32_t n = 9; n < 16; n++) {
1063 for (size_t k = 1; k <= 20; k += 5) {
1064 GemmMicrokernelTester()
1065 .mr(5)
1066 .nr(8)
1067 .kr(1)
1068 .sr(1)
1069 .m(5)
1070 .n(8)
1071 .k(k)
1072 .Test(xnn_f32_gemm_relu_ukernel_5x8__wasmsimd_splat);
1073 }
1074 }
1075 }
1076
TEST(F32_GEMM_RELU_5X8__WASMSIMD_SPLAT,n_gt_8_strided_cn)1077 TEST(F32_GEMM_RELU_5X8__WASMSIMD_SPLAT, n_gt_8_strided_cn) {
1078 for (uint32_t n = 9; n < 16; n++) {
1079 for (size_t k = 1; k <= 20; k += 5) {
1080 GemmMicrokernelTester()
1081 .mr(5)
1082 .nr(8)
1083 .kr(1)
1084 .sr(1)
1085 .m(5)
1086 .n(8)
1087 .k(k)
1088 .cn_stride(11)
1089 .Test(xnn_f32_gemm_relu_ukernel_5x8__wasmsimd_splat);
1090 }
1091 }
1092 }
1093
TEST(F32_GEMM_RELU_5X8__WASMSIMD_SPLAT,n_gt_8_strided_a)1094 TEST(F32_GEMM_RELU_5X8__WASMSIMD_SPLAT, n_gt_8_strided_a) {
1095 for (uint32_t n = 9; n < 16; n++) {
1096 for (size_t k = 1; k <= 20; k += 5) {
1097 GemmMicrokernelTester()
1098 .mr(5)
1099 .nr(8)
1100 .kr(1)
1101 .sr(1)
1102 .m(5)
1103 .n(n)
1104 .k(k)
1105 .a_stride(23)
1106 .Test(xnn_f32_gemm_relu_ukernel_5x8__wasmsimd_splat);
1107 }
1108 }
1109 }
1110
TEST(F32_GEMM_RELU_5X8__WASMSIMD_SPLAT,n_gt_8_subtile)1111 TEST(F32_GEMM_RELU_5X8__WASMSIMD_SPLAT, n_gt_8_subtile) {
1112 for (uint32_t n = 9; n < 16; n++) {
1113 for (size_t k = 1; k <= 20; k += 5) {
1114 for (uint32_t m = 1; m <= 5; m++) {
1115 GemmMicrokernelTester()
1116 .mr(5)
1117 .nr(8)
1118 .kr(1)
1119 .sr(1)
1120 .m(m)
1121 .n(n)
1122 .k(k)
1123 .iterations(1)
1124 .Test(xnn_f32_gemm_relu_ukernel_5x8__wasmsimd_splat);
1125 }
1126 }
1127 }
1128 }
1129
TEST(F32_GEMM_RELU_5X8__WASMSIMD_SPLAT,n_div_8)1130 TEST(F32_GEMM_RELU_5X8__WASMSIMD_SPLAT, n_div_8) {
1131 for (uint32_t n = 16; n <= 24; n += 8) {
1132 for (size_t k = 1; k <= 20; k += 5) {
1133 GemmMicrokernelTester()
1134 .mr(5)
1135 .nr(8)
1136 .kr(1)
1137 .sr(1)
1138 .m(5)
1139 .n(8)
1140 .k(k)
1141 .Test(xnn_f32_gemm_relu_ukernel_5x8__wasmsimd_splat);
1142 }
1143 }
1144 }
1145
TEST(F32_GEMM_RELU_5X8__WASMSIMD_SPLAT,n_div_8_strided_cn)1146 TEST(F32_GEMM_RELU_5X8__WASMSIMD_SPLAT, n_div_8_strided_cn) {
1147 for (uint32_t n = 16; n <= 24; n += 8) {
1148 for (size_t k = 1; k <= 20; k += 5) {
1149 GemmMicrokernelTester()
1150 .mr(5)
1151 .nr(8)
1152 .kr(1)
1153 .sr(1)
1154 .m(5)
1155 .n(n)
1156 .k(k)
1157 .cn_stride(11)
1158 .Test(xnn_f32_gemm_relu_ukernel_5x8__wasmsimd_splat);
1159 }
1160 }
1161 }
1162
TEST(F32_GEMM_RELU_5X8__WASMSIMD_SPLAT,n_div_8_strided_a)1163 TEST(F32_GEMM_RELU_5X8__WASMSIMD_SPLAT, n_div_8_strided_a) {
1164 for (uint32_t n = 16; n <= 24; n += 8) {
1165 for (size_t k = 1; k <= 20; k += 5) {
1166 GemmMicrokernelTester()
1167 .mr(5)
1168 .nr(8)
1169 .kr(1)
1170 .sr(1)
1171 .m(5)
1172 .n(n)
1173 .k(k)
1174 .a_stride(23)
1175 .Test(xnn_f32_gemm_relu_ukernel_5x8__wasmsimd_splat);
1176 }
1177 }
1178 }
1179
TEST(F32_GEMM_RELU_5X8__WASMSIMD_SPLAT,n_div_8_subtile)1180 TEST(F32_GEMM_RELU_5X8__WASMSIMD_SPLAT, n_div_8_subtile) {
1181 for (uint32_t n = 16; n <= 24; n += 8) {
1182 for (size_t k = 1; k <= 20; k += 5) {
1183 for (uint32_t m = 1; m <= 5; m++) {
1184 GemmMicrokernelTester()
1185 .mr(5)
1186 .nr(8)
1187 .kr(1)
1188 .sr(1)
1189 .m(m)
1190 .n(n)
1191 .k(k)
1192 .iterations(1)
1193 .Test(xnn_f32_gemm_relu_ukernel_5x8__wasmsimd_splat);
1194 }
1195 }
1196 }
1197 }
1198
TEST(F32_GEMM_RELU_5X8__WASMSIMD_SPLAT,strided_cm_subtile)1199 TEST(F32_GEMM_RELU_5X8__WASMSIMD_SPLAT, strided_cm_subtile) {
1200 for (size_t k = 1; k <= 20; k += 5) {
1201 for (uint32_t m = 1; m <= 5; m++) {
1202 for (uint32_t n = 1; n <= 8; n++) {
1203 GemmMicrokernelTester()
1204 .mr(5)
1205 .nr(8)
1206 .kr(1)
1207 .sr(1)
1208 .m(m)
1209 .n(n)
1210 .k(k)
1211 .cm_stride(11)
1212 .iterations(1)
1213 .Test(xnn_f32_gemm_relu_ukernel_5x8__wasmsimd_splat);
1214 }
1215 }
1216 }
1217 }
1218
TEST(F32_GEMM_RELU_5X8__WASMSIMD_SPLAT,strided_cm)1219 TEST(F32_GEMM_RELU_5X8__WASMSIMD_SPLAT, strided_cm) {
1220 GemmMicrokernelTester()
1221 .mr(5)
1222 .nr(8)
1223 .kr(1)
1224 .sr(1)
1225 .m(5)
1226 .n(8)
1227 .k(4)
1228 .cm_stride(11)
1229 .Test(xnn_f32_gemm_relu_ukernel_5x8__wasmsimd_splat);
1230 }
1231 #endif // XNN_ARCH_WASMSIMD
1232
1233
1234 #if XNN_ARCH_WASMSIMD
TEST(F32_GEMM_RELU_4X2C4__WASMSIMD,k_eq_4)1235 TEST(F32_GEMM_RELU_4X2C4__WASMSIMD, k_eq_4) {
1236 GemmMicrokernelTester()
1237 .mr(4)
1238 .nr(2)
1239 .kr(4)
1240 .sr(1)
1241 .m(4)
1242 .n(2)
1243 .k(4)
1244 .Test(xnn_f32_gemm_relu_ukernel_4x2c4__wasmsimd);
1245 }
1246
TEST(F32_GEMM_RELU_4X2C4__WASMSIMD,strided_cn)1247 TEST(F32_GEMM_RELU_4X2C4__WASMSIMD, strided_cn) {
1248 GemmMicrokernelTester()
1249 .mr(4)
1250 .nr(2)
1251 .kr(4)
1252 .sr(1)
1253 .m(4)
1254 .n(2)
1255 .k(4)
1256 .cn_stride(5)
1257 .Test(xnn_f32_gemm_relu_ukernel_4x2c4__wasmsimd);
1258 }
1259
TEST(F32_GEMM_RELU_4X2C4__WASMSIMD,k_eq_4_strided_a)1260 TEST(F32_GEMM_RELU_4X2C4__WASMSIMD, k_eq_4_strided_a) {
1261 GemmMicrokernelTester()
1262 .mr(4)
1263 .nr(2)
1264 .kr(4)
1265 .sr(1)
1266 .m(4)
1267 .n(2)
1268 .k(4)
1269 .a_stride(7)
1270 .Test(xnn_f32_gemm_relu_ukernel_4x2c4__wasmsimd);
1271 }
1272
TEST(F32_GEMM_RELU_4X2C4__WASMSIMD,k_eq_4_subtile)1273 TEST(F32_GEMM_RELU_4X2C4__WASMSIMD, k_eq_4_subtile) {
1274 for (uint32_t m = 1; m <= 4; m++) {
1275 for (uint32_t n = 1; n <= 2; n++) {
1276 GemmMicrokernelTester()
1277 .mr(4)
1278 .nr(2)
1279 .kr(4)
1280 .sr(1)
1281 .m(m)
1282 .n(n)
1283 .k(4)
1284 .iterations(1)
1285 .Test(xnn_f32_gemm_relu_ukernel_4x2c4__wasmsimd);
1286 }
1287 }
1288 }
1289
TEST(F32_GEMM_RELU_4X2C4__WASMSIMD,k_eq_4_subtile_m)1290 TEST(F32_GEMM_RELU_4X2C4__WASMSIMD, k_eq_4_subtile_m) {
1291 for (uint32_t m = 1; m <= 4; m++) {
1292 GemmMicrokernelTester()
1293 .mr(4)
1294 .nr(2)
1295 .kr(4)
1296 .sr(1)
1297 .m(m)
1298 .n(2)
1299 .k(4)
1300 .iterations(1)
1301 .Test(xnn_f32_gemm_relu_ukernel_4x2c4__wasmsimd);
1302 }
1303 }
1304
TEST(F32_GEMM_RELU_4X2C4__WASMSIMD,k_eq_4_subtile_n)1305 TEST(F32_GEMM_RELU_4X2C4__WASMSIMD, k_eq_4_subtile_n) {
1306 for (uint32_t n = 1; n <= 2; n++) {
1307 GemmMicrokernelTester()
1308 .mr(4)
1309 .nr(2)
1310 .kr(4)
1311 .sr(1)
1312 .m(4)
1313 .n(n)
1314 .k(4)
1315 .iterations(1)
1316 .Test(xnn_f32_gemm_relu_ukernel_4x2c4__wasmsimd);
1317 }
1318 }
1319
TEST(F32_GEMM_RELU_4X2C4__WASMSIMD,k_lt_4)1320 TEST(F32_GEMM_RELU_4X2C4__WASMSIMD, k_lt_4) {
1321 for (size_t k = 1; k < 4; k++) {
1322 GemmMicrokernelTester()
1323 .mr(4)
1324 .nr(2)
1325 .kr(4)
1326 .sr(1)
1327 .m(4)
1328 .n(2)
1329 .k(k)
1330 .Test(xnn_f32_gemm_relu_ukernel_4x2c4__wasmsimd);
1331 }
1332 }
1333
TEST(F32_GEMM_RELU_4X2C4__WASMSIMD,k_lt_4_strided_a)1334 TEST(F32_GEMM_RELU_4X2C4__WASMSIMD, k_lt_4_strided_a) {
1335 for (size_t k = 1; k < 4; k++) {
1336 GemmMicrokernelTester()
1337 .mr(4)
1338 .nr(2)
1339 .kr(4)
1340 .sr(1)
1341 .m(4)
1342 .n(2)
1343 .k(k)
1344 .a_stride(7)
1345 .Test(xnn_f32_gemm_relu_ukernel_4x2c4__wasmsimd);
1346 }
1347 }
1348
TEST(F32_GEMM_RELU_4X2C4__WASMSIMD,k_lt_4_subtile)1349 TEST(F32_GEMM_RELU_4X2C4__WASMSIMD, k_lt_4_subtile) {
1350 for (size_t k = 1; k < 4; k++) {
1351 for (uint32_t m = 1; m <= 4; m++) {
1352 for (uint32_t n = 1; n <= 2; n++) {
1353 GemmMicrokernelTester()
1354 .mr(4)
1355 .nr(2)
1356 .kr(4)
1357 .sr(1)
1358 .m(m)
1359 .n(n)
1360 .k(k)
1361 .iterations(1)
1362 .Test(xnn_f32_gemm_relu_ukernel_4x2c4__wasmsimd);
1363 }
1364 }
1365 }
1366 }
1367
TEST(F32_GEMM_RELU_4X2C4__WASMSIMD,k_gt_4)1368 TEST(F32_GEMM_RELU_4X2C4__WASMSIMD, k_gt_4) {
1369 for (size_t k = 5; k < 8; k++) {
1370 GemmMicrokernelTester()
1371 .mr(4)
1372 .nr(2)
1373 .kr(4)
1374 .sr(1)
1375 .m(4)
1376 .n(2)
1377 .k(k)
1378 .Test(xnn_f32_gemm_relu_ukernel_4x2c4__wasmsimd);
1379 }
1380 }
1381
TEST(F32_GEMM_RELU_4X2C4__WASMSIMD,k_gt_4_strided_a)1382 TEST(F32_GEMM_RELU_4X2C4__WASMSIMD, k_gt_4_strided_a) {
1383 for (size_t k = 5; k < 8; k++) {
1384 GemmMicrokernelTester()
1385 .mr(4)
1386 .nr(2)
1387 .kr(4)
1388 .sr(1)
1389 .m(4)
1390 .n(2)
1391 .k(k)
1392 .a_stride(11)
1393 .Test(xnn_f32_gemm_relu_ukernel_4x2c4__wasmsimd);
1394 }
1395 }
1396
TEST(F32_GEMM_RELU_4X2C4__WASMSIMD,k_gt_4_subtile)1397 TEST(F32_GEMM_RELU_4X2C4__WASMSIMD, k_gt_4_subtile) {
1398 for (size_t k = 5; k < 8; k++) {
1399 for (uint32_t m = 1; m <= 4; m++) {
1400 for (uint32_t n = 1; n <= 2; n++) {
1401 GemmMicrokernelTester()
1402 .mr(4)
1403 .nr(2)
1404 .kr(4)
1405 .sr(1)
1406 .m(m)
1407 .n(n)
1408 .k(k)
1409 .iterations(1)
1410 .Test(xnn_f32_gemm_relu_ukernel_4x2c4__wasmsimd);
1411 }
1412 }
1413 }
1414 }
1415
TEST(F32_GEMM_RELU_4X2C4__WASMSIMD,k_div_4)1416 TEST(F32_GEMM_RELU_4X2C4__WASMSIMD, k_div_4) {
1417 for (size_t k = 8; k <= 40; k += 4) {
1418 GemmMicrokernelTester()
1419 .mr(4)
1420 .nr(2)
1421 .kr(4)
1422 .sr(1)
1423 .m(4)
1424 .n(2)
1425 .k(k)
1426 .Test(xnn_f32_gemm_relu_ukernel_4x2c4__wasmsimd);
1427 }
1428 }
1429
TEST(F32_GEMM_RELU_4X2C4__WASMSIMD,k_div_4_strided_a)1430 TEST(F32_GEMM_RELU_4X2C4__WASMSIMD, k_div_4_strided_a) {
1431 for (size_t k = 8; k <= 40; k += 4) {
1432 GemmMicrokernelTester()
1433 .mr(4)
1434 .nr(2)
1435 .kr(4)
1436 .sr(1)
1437 .m(4)
1438 .n(2)
1439 .k(k)
1440 .a_stride(43)
1441 .Test(xnn_f32_gemm_relu_ukernel_4x2c4__wasmsimd);
1442 }
1443 }
1444
TEST(F32_GEMM_RELU_4X2C4__WASMSIMD,k_div_4_subtile)1445 TEST(F32_GEMM_RELU_4X2C4__WASMSIMD, k_div_4_subtile) {
1446 for (size_t k = 8; k <= 40; k += 4) {
1447 for (uint32_t m = 1; m <= 4; m++) {
1448 for (uint32_t n = 1; n <= 2; n++) {
1449 GemmMicrokernelTester()
1450 .mr(4)
1451 .nr(2)
1452 .kr(4)
1453 .sr(1)
1454 .m(m)
1455 .n(n)
1456 .k(k)
1457 .iterations(1)
1458 .Test(xnn_f32_gemm_relu_ukernel_4x2c4__wasmsimd);
1459 }
1460 }
1461 }
1462 }
1463
TEST(F32_GEMM_RELU_4X2C4__WASMSIMD,n_gt_2)1464 TEST(F32_GEMM_RELU_4X2C4__WASMSIMD, n_gt_2) {
1465 for (uint32_t n = 3; n < 4; n++) {
1466 for (size_t k = 1; k <= 20; k += 5) {
1467 GemmMicrokernelTester()
1468 .mr(4)
1469 .nr(2)
1470 .kr(4)
1471 .sr(1)
1472 .m(4)
1473 .n(2)
1474 .k(k)
1475 .Test(xnn_f32_gemm_relu_ukernel_4x2c4__wasmsimd);
1476 }
1477 }
1478 }
1479
TEST(F32_GEMM_RELU_4X2C4__WASMSIMD,n_gt_2_strided_cn)1480 TEST(F32_GEMM_RELU_4X2C4__WASMSIMD, n_gt_2_strided_cn) {
1481 for (uint32_t n = 3; n < 4; n++) {
1482 for (size_t k = 1; k <= 20; k += 5) {
1483 GemmMicrokernelTester()
1484 .mr(4)
1485 .nr(2)
1486 .kr(4)
1487 .sr(1)
1488 .m(4)
1489 .n(2)
1490 .k(k)
1491 .cn_stride(5)
1492 .Test(xnn_f32_gemm_relu_ukernel_4x2c4__wasmsimd);
1493 }
1494 }
1495 }
1496
TEST(F32_GEMM_RELU_4X2C4__WASMSIMD,n_gt_2_strided_a)1497 TEST(F32_GEMM_RELU_4X2C4__WASMSIMD, n_gt_2_strided_a) {
1498 for (uint32_t n = 3; n < 4; n++) {
1499 for (size_t k = 1; k <= 20; k += 5) {
1500 GemmMicrokernelTester()
1501 .mr(4)
1502 .nr(2)
1503 .kr(4)
1504 .sr(1)
1505 .m(4)
1506 .n(n)
1507 .k(k)
1508 .a_stride(23)
1509 .Test(xnn_f32_gemm_relu_ukernel_4x2c4__wasmsimd);
1510 }
1511 }
1512 }
1513
TEST(F32_GEMM_RELU_4X2C4__WASMSIMD,n_gt_2_subtile)1514 TEST(F32_GEMM_RELU_4X2C4__WASMSIMD, n_gt_2_subtile) {
1515 for (uint32_t n = 3; n < 4; n++) {
1516 for (size_t k = 1; k <= 20; k += 5) {
1517 for (uint32_t m = 1; m <= 4; m++) {
1518 GemmMicrokernelTester()
1519 .mr(4)
1520 .nr(2)
1521 .kr(4)
1522 .sr(1)
1523 .m(m)
1524 .n(n)
1525 .k(k)
1526 .iterations(1)
1527 .Test(xnn_f32_gemm_relu_ukernel_4x2c4__wasmsimd);
1528 }
1529 }
1530 }
1531 }
1532
TEST(F32_GEMM_RELU_4X2C4__WASMSIMD,n_div_2)1533 TEST(F32_GEMM_RELU_4X2C4__WASMSIMD, n_div_2) {
1534 for (uint32_t n = 4; n <= 6; n += 2) {
1535 for (size_t k = 1; k <= 20; k += 5) {
1536 GemmMicrokernelTester()
1537 .mr(4)
1538 .nr(2)
1539 .kr(4)
1540 .sr(1)
1541 .m(4)
1542 .n(2)
1543 .k(k)
1544 .Test(xnn_f32_gemm_relu_ukernel_4x2c4__wasmsimd);
1545 }
1546 }
1547 }
1548
TEST(F32_GEMM_RELU_4X2C4__WASMSIMD,n_div_2_strided_cn)1549 TEST(F32_GEMM_RELU_4X2C4__WASMSIMD, n_div_2_strided_cn) {
1550 for (uint32_t n = 4; n <= 6; n += 2) {
1551 for (size_t k = 1; k <= 20; k += 5) {
1552 GemmMicrokernelTester()
1553 .mr(4)
1554 .nr(2)
1555 .kr(4)
1556 .sr(1)
1557 .m(4)
1558 .n(n)
1559 .k(k)
1560 .cn_stride(5)
1561 .Test(xnn_f32_gemm_relu_ukernel_4x2c4__wasmsimd);
1562 }
1563 }
1564 }
1565
TEST(F32_GEMM_RELU_4X2C4__WASMSIMD,n_div_2_strided_a)1566 TEST(F32_GEMM_RELU_4X2C4__WASMSIMD, n_div_2_strided_a) {
1567 for (uint32_t n = 4; n <= 6; n += 2) {
1568 for (size_t k = 1; k <= 20; k += 5) {
1569 GemmMicrokernelTester()
1570 .mr(4)
1571 .nr(2)
1572 .kr(4)
1573 .sr(1)
1574 .m(4)
1575 .n(n)
1576 .k(k)
1577 .a_stride(23)
1578 .Test(xnn_f32_gemm_relu_ukernel_4x2c4__wasmsimd);
1579 }
1580 }
1581 }
1582
TEST(F32_GEMM_RELU_4X2C4__WASMSIMD,n_div_2_subtile)1583 TEST(F32_GEMM_RELU_4X2C4__WASMSIMD, n_div_2_subtile) {
1584 for (uint32_t n = 4; n <= 6; n += 2) {
1585 for (size_t k = 1; k <= 20; k += 5) {
1586 for (uint32_t m = 1; m <= 4; m++) {
1587 GemmMicrokernelTester()
1588 .mr(4)
1589 .nr(2)
1590 .kr(4)
1591 .sr(1)
1592 .m(m)
1593 .n(n)
1594 .k(k)
1595 .iterations(1)
1596 .Test(xnn_f32_gemm_relu_ukernel_4x2c4__wasmsimd);
1597 }
1598 }
1599 }
1600 }
1601
TEST(F32_GEMM_RELU_4X2C4__WASMSIMD,strided_cm_subtile)1602 TEST(F32_GEMM_RELU_4X2C4__WASMSIMD, strided_cm_subtile) {
1603 for (size_t k = 1; k <= 20; k += 5) {
1604 for (uint32_t m = 1; m <= 4; m++) {
1605 for (uint32_t n = 1; n <= 2; n++) {
1606 GemmMicrokernelTester()
1607 .mr(4)
1608 .nr(2)
1609 .kr(4)
1610 .sr(1)
1611 .m(m)
1612 .n(n)
1613 .k(k)
1614 .cm_stride(5)
1615 .iterations(1)
1616 .Test(xnn_f32_gemm_relu_ukernel_4x2c4__wasmsimd);
1617 }
1618 }
1619 }
1620 }
1621
TEST(F32_GEMM_RELU_4X2C4__WASMSIMD,strided_cm)1622 TEST(F32_GEMM_RELU_4X2C4__WASMSIMD, strided_cm) {
1623 GemmMicrokernelTester()
1624 .mr(4)
1625 .nr(2)
1626 .kr(4)
1627 .sr(1)
1628 .m(4)
1629 .n(2)
1630 .k(4)
1631 .cm_stride(5)
1632 .Test(xnn_f32_gemm_relu_ukernel_4x2c4__wasmsimd);
1633 }
1634 #endif // XNN_ARCH_WASMSIMD
1635
1636
1637 #if XNN_ARCH_WASM || XNN_ARCH_WASMSIMD
TEST(F32_GEMM_RELU_1X4__WASM,k_eq_1)1638 TEST(F32_GEMM_RELU_1X4__WASM, k_eq_1) {
1639 GemmMicrokernelTester()
1640 .mr(1)
1641 .nr(4)
1642 .kr(1)
1643 .sr(1)
1644 .m(1)
1645 .n(4)
1646 .k(1)
1647 .Test(xnn_f32_gemm_relu_ukernel_1x4__wasm);
1648 }
1649
TEST(F32_GEMM_RELU_1X4__WASM,strided_cn)1650 TEST(F32_GEMM_RELU_1X4__WASM, strided_cn) {
1651 GemmMicrokernelTester()
1652 .mr(1)
1653 .nr(4)
1654 .kr(1)
1655 .sr(1)
1656 .m(1)
1657 .n(4)
1658 .k(1)
1659 .cn_stride(7)
1660 .Test(xnn_f32_gemm_relu_ukernel_1x4__wasm);
1661 }
1662
TEST(F32_GEMM_RELU_1X4__WASM,k_eq_1_strided_a)1663 TEST(F32_GEMM_RELU_1X4__WASM, k_eq_1_strided_a) {
1664 GemmMicrokernelTester()
1665 .mr(1)
1666 .nr(4)
1667 .kr(1)
1668 .sr(1)
1669 .m(1)
1670 .n(4)
1671 .k(1)
1672 .a_stride(3)
1673 .Test(xnn_f32_gemm_relu_ukernel_1x4__wasm);
1674 }
1675
TEST(F32_GEMM_RELU_1X4__WASM,k_eq_1_subtile)1676 TEST(F32_GEMM_RELU_1X4__WASM, k_eq_1_subtile) {
1677 for (uint32_t m = 1; m <= 1; m++) {
1678 for (uint32_t n = 1; n <= 4; n++) {
1679 GemmMicrokernelTester()
1680 .mr(1)
1681 .nr(4)
1682 .kr(1)
1683 .sr(1)
1684 .m(m)
1685 .n(n)
1686 .k(1)
1687 .iterations(1)
1688 .Test(xnn_f32_gemm_relu_ukernel_1x4__wasm);
1689 }
1690 }
1691 }
1692
TEST(F32_GEMM_RELU_1X4__WASM,k_eq_1_subtile_m)1693 TEST(F32_GEMM_RELU_1X4__WASM, k_eq_1_subtile_m) {
1694 for (uint32_t m = 1; m <= 1; m++) {
1695 GemmMicrokernelTester()
1696 .mr(1)
1697 .nr(4)
1698 .kr(1)
1699 .sr(1)
1700 .m(m)
1701 .n(4)
1702 .k(1)
1703 .iterations(1)
1704 .Test(xnn_f32_gemm_relu_ukernel_1x4__wasm);
1705 }
1706 }
1707
TEST(F32_GEMM_RELU_1X4__WASM,k_eq_1_subtile_n)1708 TEST(F32_GEMM_RELU_1X4__WASM, k_eq_1_subtile_n) {
1709 for (uint32_t n = 1; n <= 4; n++) {
1710 GemmMicrokernelTester()
1711 .mr(1)
1712 .nr(4)
1713 .kr(1)
1714 .sr(1)
1715 .m(1)
1716 .n(n)
1717 .k(1)
1718 .iterations(1)
1719 .Test(xnn_f32_gemm_relu_ukernel_1x4__wasm);
1720 }
1721 }
1722
TEST(F32_GEMM_RELU_1X4__WASM,k_gt_1)1723 TEST(F32_GEMM_RELU_1X4__WASM, k_gt_1) {
1724 for (size_t k = 2; k < 10; k++) {
1725 GemmMicrokernelTester()
1726 .mr(1)
1727 .nr(4)
1728 .kr(1)
1729 .sr(1)
1730 .m(1)
1731 .n(4)
1732 .k(k)
1733 .Test(xnn_f32_gemm_relu_ukernel_1x4__wasm);
1734 }
1735 }
1736
TEST(F32_GEMM_RELU_1X4__WASM,k_gt_1_strided_a)1737 TEST(F32_GEMM_RELU_1X4__WASM, k_gt_1_strided_a) {
1738 for (size_t k = 2; k < 10; k++) {
1739 GemmMicrokernelTester()
1740 .mr(1)
1741 .nr(4)
1742 .kr(1)
1743 .sr(1)
1744 .m(1)
1745 .n(4)
1746 .k(k)
1747 .a_stride(11)
1748 .Test(xnn_f32_gemm_relu_ukernel_1x4__wasm);
1749 }
1750 }
1751
TEST(F32_GEMM_RELU_1X4__WASM,k_gt_1_subtile)1752 TEST(F32_GEMM_RELU_1X4__WASM, k_gt_1_subtile) {
1753 for (size_t k = 2; k < 10; k++) {
1754 for (uint32_t m = 1; m <= 1; m++) {
1755 for (uint32_t n = 1; n <= 4; n++) {
1756 GemmMicrokernelTester()
1757 .mr(1)
1758 .nr(4)
1759 .kr(1)
1760 .sr(1)
1761 .m(m)
1762 .n(n)
1763 .k(k)
1764 .iterations(1)
1765 .Test(xnn_f32_gemm_relu_ukernel_1x4__wasm);
1766 }
1767 }
1768 }
1769 }
1770
TEST(F32_GEMM_RELU_1X4__WASM,n_gt_4)1771 TEST(F32_GEMM_RELU_1X4__WASM, n_gt_4) {
1772 for (uint32_t n = 5; n < 8; n++) {
1773 for (size_t k = 1; k <= 5; k += 2) {
1774 GemmMicrokernelTester()
1775 .mr(1)
1776 .nr(4)
1777 .kr(1)
1778 .sr(1)
1779 .m(1)
1780 .n(4)
1781 .k(k)
1782 .Test(xnn_f32_gemm_relu_ukernel_1x4__wasm);
1783 }
1784 }
1785 }
1786
TEST(F32_GEMM_RELU_1X4__WASM,n_gt_4_strided_cn)1787 TEST(F32_GEMM_RELU_1X4__WASM, n_gt_4_strided_cn) {
1788 for (uint32_t n = 5; n < 8; n++) {
1789 for (size_t k = 1; k <= 5; k += 2) {
1790 GemmMicrokernelTester()
1791 .mr(1)
1792 .nr(4)
1793 .kr(1)
1794 .sr(1)
1795 .m(1)
1796 .n(4)
1797 .k(k)
1798 .cn_stride(7)
1799 .Test(xnn_f32_gemm_relu_ukernel_1x4__wasm);
1800 }
1801 }
1802 }
1803
TEST(F32_GEMM_RELU_1X4__WASM,n_gt_4_strided_a)1804 TEST(F32_GEMM_RELU_1X4__WASM, n_gt_4_strided_a) {
1805 for (uint32_t n = 5; n < 8; n++) {
1806 for (size_t k = 1; k <= 5; k += 2) {
1807 GemmMicrokernelTester()
1808 .mr(1)
1809 .nr(4)
1810 .kr(1)
1811 .sr(1)
1812 .m(1)
1813 .n(n)
1814 .k(k)
1815 .a_stride(7)
1816 .Test(xnn_f32_gemm_relu_ukernel_1x4__wasm);
1817 }
1818 }
1819 }
1820
TEST(F32_GEMM_RELU_1X4__WASM,n_gt_4_subtile)1821 TEST(F32_GEMM_RELU_1X4__WASM, n_gt_4_subtile) {
1822 for (uint32_t n = 5; n < 8; n++) {
1823 for (size_t k = 1; k <= 5; k += 2) {
1824 for (uint32_t m = 1; m <= 1; m++) {
1825 GemmMicrokernelTester()
1826 .mr(1)
1827 .nr(4)
1828 .kr(1)
1829 .sr(1)
1830 .m(m)
1831 .n(n)
1832 .k(k)
1833 .iterations(1)
1834 .Test(xnn_f32_gemm_relu_ukernel_1x4__wasm);
1835 }
1836 }
1837 }
1838 }
1839
TEST(F32_GEMM_RELU_1X4__WASM,n_div_4)1840 TEST(F32_GEMM_RELU_1X4__WASM, n_div_4) {
1841 for (uint32_t n = 8; n <= 12; n += 4) {
1842 for (size_t k = 1; k <= 5; k += 2) {
1843 GemmMicrokernelTester()
1844 .mr(1)
1845 .nr(4)
1846 .kr(1)
1847 .sr(1)
1848 .m(1)
1849 .n(4)
1850 .k(k)
1851 .Test(xnn_f32_gemm_relu_ukernel_1x4__wasm);
1852 }
1853 }
1854 }
1855
TEST(F32_GEMM_RELU_1X4__WASM,n_div_4_strided_cn)1856 TEST(F32_GEMM_RELU_1X4__WASM, n_div_4_strided_cn) {
1857 for (uint32_t n = 8; n <= 12; n += 4) {
1858 for (size_t k = 1; k <= 5; k += 2) {
1859 GemmMicrokernelTester()
1860 .mr(1)
1861 .nr(4)
1862 .kr(1)
1863 .sr(1)
1864 .m(1)
1865 .n(n)
1866 .k(k)
1867 .cn_stride(7)
1868 .Test(xnn_f32_gemm_relu_ukernel_1x4__wasm);
1869 }
1870 }
1871 }
1872
TEST(F32_GEMM_RELU_1X4__WASM,n_div_4_strided_a)1873 TEST(F32_GEMM_RELU_1X4__WASM, n_div_4_strided_a) {
1874 for (uint32_t n = 8; n <= 12; n += 4) {
1875 for (size_t k = 1; k <= 5; k += 2) {
1876 GemmMicrokernelTester()
1877 .mr(1)
1878 .nr(4)
1879 .kr(1)
1880 .sr(1)
1881 .m(1)
1882 .n(n)
1883 .k(k)
1884 .a_stride(7)
1885 .Test(xnn_f32_gemm_relu_ukernel_1x4__wasm);
1886 }
1887 }
1888 }
1889
TEST(F32_GEMM_RELU_1X4__WASM,n_div_4_subtile)1890 TEST(F32_GEMM_RELU_1X4__WASM, n_div_4_subtile) {
1891 for (uint32_t n = 8; n <= 12; n += 4) {
1892 for (size_t k = 1; k <= 5; k += 2) {
1893 for (uint32_t m = 1; m <= 1; m++) {
1894 GemmMicrokernelTester()
1895 .mr(1)
1896 .nr(4)
1897 .kr(1)
1898 .sr(1)
1899 .m(m)
1900 .n(n)
1901 .k(k)
1902 .iterations(1)
1903 .Test(xnn_f32_gemm_relu_ukernel_1x4__wasm);
1904 }
1905 }
1906 }
1907 }
1908
TEST(F32_GEMM_RELU_1X4__WASM,strided_cm_subtile)1909 TEST(F32_GEMM_RELU_1X4__WASM, strided_cm_subtile) {
1910 for (size_t k = 1; k <= 5; k += 2) {
1911 for (uint32_t m = 1; m <= 1; m++) {
1912 for (uint32_t n = 1; n <= 4; n++) {
1913 GemmMicrokernelTester()
1914 .mr(1)
1915 .nr(4)
1916 .kr(1)
1917 .sr(1)
1918 .m(m)
1919 .n(n)
1920 .k(k)
1921 .cm_stride(7)
1922 .iterations(1)
1923 .Test(xnn_f32_gemm_relu_ukernel_1x4__wasm);
1924 }
1925 }
1926 }
1927 }
1928
TEST(F32_GEMM_RELU_1X4__WASM,strided_cm)1929 TEST(F32_GEMM_RELU_1X4__WASM, strided_cm) {
1930 GemmMicrokernelTester()
1931 .mr(1)
1932 .nr(4)
1933 .kr(1)
1934 .sr(1)
1935 .m(1)
1936 .n(4)
1937 .k(1)
1938 .cm_stride(7)
1939 .Test(xnn_f32_gemm_relu_ukernel_1x4__wasm);
1940 }
1941 #endif // XNN_ARCH_WASM || XNN_ARCH_WASMSIMD
1942
1943
1944 #if XNN_ARCH_WASM || XNN_ARCH_WASMSIMD
TEST(F32_GEMM_RELU_2X4__WASM,k_eq_1)1945 TEST(F32_GEMM_RELU_2X4__WASM, k_eq_1) {
1946 GemmMicrokernelTester()
1947 .mr(2)
1948 .nr(4)
1949 .kr(1)
1950 .sr(1)
1951 .m(2)
1952 .n(4)
1953 .k(1)
1954 .Test(xnn_f32_gemm_relu_ukernel_2x4__wasm);
1955 }
1956
TEST(F32_GEMM_RELU_2X4__WASM,strided_cn)1957 TEST(F32_GEMM_RELU_2X4__WASM, strided_cn) {
1958 GemmMicrokernelTester()
1959 .mr(2)
1960 .nr(4)
1961 .kr(1)
1962 .sr(1)
1963 .m(2)
1964 .n(4)
1965 .k(1)
1966 .cn_stride(7)
1967 .Test(xnn_f32_gemm_relu_ukernel_2x4__wasm);
1968 }
1969
TEST(F32_GEMM_RELU_2X4__WASM,k_eq_1_strided_a)1970 TEST(F32_GEMM_RELU_2X4__WASM, k_eq_1_strided_a) {
1971 GemmMicrokernelTester()
1972 .mr(2)
1973 .nr(4)
1974 .kr(1)
1975 .sr(1)
1976 .m(2)
1977 .n(4)
1978 .k(1)
1979 .a_stride(3)
1980 .Test(xnn_f32_gemm_relu_ukernel_2x4__wasm);
1981 }
1982
TEST(F32_GEMM_RELU_2X4__WASM,k_eq_1_subtile)1983 TEST(F32_GEMM_RELU_2X4__WASM, k_eq_1_subtile) {
1984 for (uint32_t m = 1; m <= 2; m++) {
1985 for (uint32_t n = 1; n <= 4; n++) {
1986 GemmMicrokernelTester()
1987 .mr(2)
1988 .nr(4)
1989 .kr(1)
1990 .sr(1)
1991 .m(m)
1992 .n(n)
1993 .k(1)
1994 .iterations(1)
1995 .Test(xnn_f32_gemm_relu_ukernel_2x4__wasm);
1996 }
1997 }
1998 }
1999
TEST(F32_GEMM_RELU_2X4__WASM,k_eq_1_subtile_m)2000 TEST(F32_GEMM_RELU_2X4__WASM, k_eq_1_subtile_m) {
2001 for (uint32_t m = 1; m <= 2; m++) {
2002 GemmMicrokernelTester()
2003 .mr(2)
2004 .nr(4)
2005 .kr(1)
2006 .sr(1)
2007 .m(m)
2008 .n(4)
2009 .k(1)
2010 .iterations(1)
2011 .Test(xnn_f32_gemm_relu_ukernel_2x4__wasm);
2012 }
2013 }
2014
TEST(F32_GEMM_RELU_2X4__WASM,k_eq_1_subtile_n)2015 TEST(F32_GEMM_RELU_2X4__WASM, k_eq_1_subtile_n) {
2016 for (uint32_t n = 1; n <= 4; n++) {
2017 GemmMicrokernelTester()
2018 .mr(2)
2019 .nr(4)
2020 .kr(1)
2021 .sr(1)
2022 .m(2)
2023 .n(n)
2024 .k(1)
2025 .iterations(1)
2026 .Test(xnn_f32_gemm_relu_ukernel_2x4__wasm);
2027 }
2028 }
2029
TEST(F32_GEMM_RELU_2X4__WASM,k_gt_1)2030 TEST(F32_GEMM_RELU_2X4__WASM, k_gt_1) {
2031 for (size_t k = 2; k < 10; k++) {
2032 GemmMicrokernelTester()
2033 .mr(2)
2034 .nr(4)
2035 .kr(1)
2036 .sr(1)
2037 .m(2)
2038 .n(4)
2039 .k(k)
2040 .Test(xnn_f32_gemm_relu_ukernel_2x4__wasm);
2041 }
2042 }
2043
TEST(F32_GEMM_RELU_2X4__WASM,k_gt_1_strided_a)2044 TEST(F32_GEMM_RELU_2X4__WASM, k_gt_1_strided_a) {
2045 for (size_t k = 2; k < 10; k++) {
2046 GemmMicrokernelTester()
2047 .mr(2)
2048 .nr(4)
2049 .kr(1)
2050 .sr(1)
2051 .m(2)
2052 .n(4)
2053 .k(k)
2054 .a_stride(11)
2055 .Test(xnn_f32_gemm_relu_ukernel_2x4__wasm);
2056 }
2057 }
2058
TEST(F32_GEMM_RELU_2X4__WASM,k_gt_1_subtile)2059 TEST(F32_GEMM_RELU_2X4__WASM, k_gt_1_subtile) {
2060 for (size_t k = 2; k < 10; k++) {
2061 for (uint32_t m = 1; m <= 2; m++) {
2062 for (uint32_t n = 1; n <= 4; n++) {
2063 GemmMicrokernelTester()
2064 .mr(2)
2065 .nr(4)
2066 .kr(1)
2067 .sr(1)
2068 .m(m)
2069 .n(n)
2070 .k(k)
2071 .iterations(1)
2072 .Test(xnn_f32_gemm_relu_ukernel_2x4__wasm);
2073 }
2074 }
2075 }
2076 }
2077
TEST(F32_GEMM_RELU_2X4__WASM,n_gt_4)2078 TEST(F32_GEMM_RELU_2X4__WASM, n_gt_4) {
2079 for (uint32_t n = 5; n < 8; n++) {
2080 for (size_t k = 1; k <= 5; k += 2) {
2081 GemmMicrokernelTester()
2082 .mr(2)
2083 .nr(4)
2084 .kr(1)
2085 .sr(1)
2086 .m(2)
2087 .n(4)
2088 .k(k)
2089 .Test(xnn_f32_gemm_relu_ukernel_2x4__wasm);
2090 }
2091 }
2092 }
2093
TEST(F32_GEMM_RELU_2X4__WASM,n_gt_4_strided_cn)2094 TEST(F32_GEMM_RELU_2X4__WASM, n_gt_4_strided_cn) {
2095 for (uint32_t n = 5; n < 8; n++) {
2096 for (size_t k = 1; k <= 5; k += 2) {
2097 GemmMicrokernelTester()
2098 .mr(2)
2099 .nr(4)
2100 .kr(1)
2101 .sr(1)
2102 .m(2)
2103 .n(4)
2104 .k(k)
2105 .cn_stride(7)
2106 .Test(xnn_f32_gemm_relu_ukernel_2x4__wasm);
2107 }
2108 }
2109 }
2110
TEST(F32_GEMM_RELU_2X4__WASM,n_gt_4_strided_a)2111 TEST(F32_GEMM_RELU_2X4__WASM, n_gt_4_strided_a) {
2112 for (uint32_t n = 5; n < 8; n++) {
2113 for (size_t k = 1; k <= 5; k += 2) {
2114 GemmMicrokernelTester()
2115 .mr(2)
2116 .nr(4)
2117 .kr(1)
2118 .sr(1)
2119 .m(2)
2120 .n(n)
2121 .k(k)
2122 .a_stride(7)
2123 .Test(xnn_f32_gemm_relu_ukernel_2x4__wasm);
2124 }
2125 }
2126 }
2127
TEST(F32_GEMM_RELU_2X4__WASM,n_gt_4_subtile)2128 TEST(F32_GEMM_RELU_2X4__WASM, n_gt_4_subtile) {
2129 for (uint32_t n = 5; n < 8; n++) {
2130 for (size_t k = 1; k <= 5; k += 2) {
2131 for (uint32_t m = 1; m <= 2; m++) {
2132 GemmMicrokernelTester()
2133 .mr(2)
2134 .nr(4)
2135 .kr(1)
2136 .sr(1)
2137 .m(m)
2138 .n(n)
2139 .k(k)
2140 .iterations(1)
2141 .Test(xnn_f32_gemm_relu_ukernel_2x4__wasm);
2142 }
2143 }
2144 }
2145 }
2146
TEST(F32_GEMM_RELU_2X4__WASM,n_div_4)2147 TEST(F32_GEMM_RELU_2X4__WASM, n_div_4) {
2148 for (uint32_t n = 8; n <= 12; n += 4) {
2149 for (size_t k = 1; k <= 5; k += 2) {
2150 GemmMicrokernelTester()
2151 .mr(2)
2152 .nr(4)
2153 .kr(1)
2154 .sr(1)
2155 .m(2)
2156 .n(4)
2157 .k(k)
2158 .Test(xnn_f32_gemm_relu_ukernel_2x4__wasm);
2159 }
2160 }
2161 }
2162
TEST(F32_GEMM_RELU_2X4__WASM,n_div_4_strided_cn)2163 TEST(F32_GEMM_RELU_2X4__WASM, n_div_4_strided_cn) {
2164 for (uint32_t n = 8; n <= 12; n += 4) {
2165 for (size_t k = 1; k <= 5; k += 2) {
2166 GemmMicrokernelTester()
2167 .mr(2)
2168 .nr(4)
2169 .kr(1)
2170 .sr(1)
2171 .m(2)
2172 .n(n)
2173 .k(k)
2174 .cn_stride(7)
2175 .Test(xnn_f32_gemm_relu_ukernel_2x4__wasm);
2176 }
2177 }
2178 }
2179
TEST(F32_GEMM_RELU_2X4__WASM,n_div_4_strided_a)2180 TEST(F32_GEMM_RELU_2X4__WASM, n_div_4_strided_a) {
2181 for (uint32_t n = 8; n <= 12; n += 4) {
2182 for (size_t k = 1; k <= 5; k += 2) {
2183 GemmMicrokernelTester()
2184 .mr(2)
2185 .nr(4)
2186 .kr(1)
2187 .sr(1)
2188 .m(2)
2189 .n(n)
2190 .k(k)
2191 .a_stride(7)
2192 .Test(xnn_f32_gemm_relu_ukernel_2x4__wasm);
2193 }
2194 }
2195 }
2196
TEST(F32_GEMM_RELU_2X4__WASM,n_div_4_subtile)2197 TEST(F32_GEMM_RELU_2X4__WASM, n_div_4_subtile) {
2198 for (uint32_t n = 8; n <= 12; n += 4) {
2199 for (size_t k = 1; k <= 5; k += 2) {
2200 for (uint32_t m = 1; m <= 2; m++) {
2201 GemmMicrokernelTester()
2202 .mr(2)
2203 .nr(4)
2204 .kr(1)
2205 .sr(1)
2206 .m(m)
2207 .n(n)
2208 .k(k)
2209 .iterations(1)
2210 .Test(xnn_f32_gemm_relu_ukernel_2x4__wasm);
2211 }
2212 }
2213 }
2214 }
2215
TEST(F32_GEMM_RELU_2X4__WASM,strided_cm_subtile)2216 TEST(F32_GEMM_RELU_2X4__WASM, strided_cm_subtile) {
2217 for (size_t k = 1; k <= 5; k += 2) {
2218 for (uint32_t m = 1; m <= 2; m++) {
2219 for (uint32_t n = 1; n <= 4; n++) {
2220 GemmMicrokernelTester()
2221 .mr(2)
2222 .nr(4)
2223 .kr(1)
2224 .sr(1)
2225 .m(m)
2226 .n(n)
2227 .k(k)
2228 .cm_stride(7)
2229 .iterations(1)
2230 .Test(xnn_f32_gemm_relu_ukernel_2x4__wasm);
2231 }
2232 }
2233 }
2234 }
2235
TEST(F32_GEMM_RELU_2X4__WASM,strided_cm)2236 TEST(F32_GEMM_RELU_2X4__WASM, strided_cm) {
2237 GemmMicrokernelTester()
2238 .mr(2)
2239 .nr(4)
2240 .kr(1)
2241 .sr(1)
2242 .m(2)
2243 .n(4)
2244 .k(1)
2245 .cm_stride(7)
2246 .Test(xnn_f32_gemm_relu_ukernel_2x4__wasm);
2247 }
2248 #endif // XNN_ARCH_WASM || XNN_ARCH_WASMSIMD
2249
2250
2251 #if XNN_ARCH_WASM || XNN_ARCH_WASMSIMD
TEST(F32_GEMM_RELU_4X4__WASM,k_eq_1)2252 TEST(F32_GEMM_RELU_4X4__WASM, k_eq_1) {
2253 GemmMicrokernelTester()
2254 .mr(4)
2255 .nr(4)
2256 .kr(1)
2257 .sr(1)
2258 .m(4)
2259 .n(4)
2260 .k(1)
2261 .Test(xnn_f32_gemm_relu_ukernel_4x4__wasm);
2262 }
2263
TEST(F32_GEMM_RELU_4X4__WASM,strided_cn)2264 TEST(F32_GEMM_RELU_4X4__WASM, strided_cn) {
2265 GemmMicrokernelTester()
2266 .mr(4)
2267 .nr(4)
2268 .kr(1)
2269 .sr(1)
2270 .m(4)
2271 .n(4)
2272 .k(1)
2273 .cn_stride(7)
2274 .Test(xnn_f32_gemm_relu_ukernel_4x4__wasm);
2275 }
2276
TEST(F32_GEMM_RELU_4X4__WASM,k_eq_1_strided_a)2277 TEST(F32_GEMM_RELU_4X4__WASM, k_eq_1_strided_a) {
2278 GemmMicrokernelTester()
2279 .mr(4)
2280 .nr(4)
2281 .kr(1)
2282 .sr(1)
2283 .m(4)
2284 .n(4)
2285 .k(1)
2286 .a_stride(3)
2287 .Test(xnn_f32_gemm_relu_ukernel_4x4__wasm);
2288 }
2289
TEST(F32_GEMM_RELU_4X4__WASM,k_eq_1_subtile)2290 TEST(F32_GEMM_RELU_4X4__WASM, k_eq_1_subtile) {
2291 for (uint32_t m = 1; m <= 4; m++) {
2292 for (uint32_t n = 1; n <= 4; n++) {
2293 GemmMicrokernelTester()
2294 .mr(4)
2295 .nr(4)
2296 .kr(1)
2297 .sr(1)
2298 .m(m)
2299 .n(n)
2300 .k(1)
2301 .iterations(1)
2302 .Test(xnn_f32_gemm_relu_ukernel_4x4__wasm);
2303 }
2304 }
2305 }
2306
TEST(F32_GEMM_RELU_4X4__WASM,k_eq_1_subtile_m)2307 TEST(F32_GEMM_RELU_4X4__WASM, k_eq_1_subtile_m) {
2308 for (uint32_t m = 1; m <= 4; m++) {
2309 GemmMicrokernelTester()
2310 .mr(4)
2311 .nr(4)
2312 .kr(1)
2313 .sr(1)
2314 .m(m)
2315 .n(4)
2316 .k(1)
2317 .iterations(1)
2318 .Test(xnn_f32_gemm_relu_ukernel_4x4__wasm);
2319 }
2320 }
2321
TEST(F32_GEMM_RELU_4X4__WASM,k_eq_1_subtile_n)2322 TEST(F32_GEMM_RELU_4X4__WASM, k_eq_1_subtile_n) {
2323 for (uint32_t n = 1; n <= 4; n++) {
2324 GemmMicrokernelTester()
2325 .mr(4)
2326 .nr(4)
2327 .kr(1)
2328 .sr(1)
2329 .m(4)
2330 .n(n)
2331 .k(1)
2332 .iterations(1)
2333 .Test(xnn_f32_gemm_relu_ukernel_4x4__wasm);
2334 }
2335 }
2336
TEST(F32_GEMM_RELU_4X4__WASM,k_gt_1)2337 TEST(F32_GEMM_RELU_4X4__WASM, k_gt_1) {
2338 for (size_t k = 2; k < 10; k++) {
2339 GemmMicrokernelTester()
2340 .mr(4)
2341 .nr(4)
2342 .kr(1)
2343 .sr(1)
2344 .m(4)
2345 .n(4)
2346 .k(k)
2347 .Test(xnn_f32_gemm_relu_ukernel_4x4__wasm);
2348 }
2349 }
2350
TEST(F32_GEMM_RELU_4X4__WASM,k_gt_1_strided_a)2351 TEST(F32_GEMM_RELU_4X4__WASM, k_gt_1_strided_a) {
2352 for (size_t k = 2; k < 10; k++) {
2353 GemmMicrokernelTester()
2354 .mr(4)
2355 .nr(4)
2356 .kr(1)
2357 .sr(1)
2358 .m(4)
2359 .n(4)
2360 .k(k)
2361 .a_stride(11)
2362 .Test(xnn_f32_gemm_relu_ukernel_4x4__wasm);
2363 }
2364 }
2365
TEST(F32_GEMM_RELU_4X4__WASM,k_gt_1_subtile)2366 TEST(F32_GEMM_RELU_4X4__WASM, k_gt_1_subtile) {
2367 for (size_t k = 2; k < 10; k++) {
2368 for (uint32_t m = 1; m <= 4; m++) {
2369 for (uint32_t n = 1; n <= 4; n++) {
2370 GemmMicrokernelTester()
2371 .mr(4)
2372 .nr(4)
2373 .kr(1)
2374 .sr(1)
2375 .m(m)
2376 .n(n)
2377 .k(k)
2378 .iterations(1)
2379 .Test(xnn_f32_gemm_relu_ukernel_4x4__wasm);
2380 }
2381 }
2382 }
2383 }
2384
TEST(F32_GEMM_RELU_4X4__WASM,n_gt_4)2385 TEST(F32_GEMM_RELU_4X4__WASM, n_gt_4) {
2386 for (uint32_t n = 5; n < 8; n++) {
2387 for (size_t k = 1; k <= 5; k += 2) {
2388 GemmMicrokernelTester()
2389 .mr(4)
2390 .nr(4)
2391 .kr(1)
2392 .sr(1)
2393 .m(4)
2394 .n(4)
2395 .k(k)
2396 .Test(xnn_f32_gemm_relu_ukernel_4x4__wasm);
2397 }
2398 }
2399 }
2400
TEST(F32_GEMM_RELU_4X4__WASM,n_gt_4_strided_cn)2401 TEST(F32_GEMM_RELU_4X4__WASM, n_gt_4_strided_cn) {
2402 for (uint32_t n = 5; n < 8; n++) {
2403 for (size_t k = 1; k <= 5; k += 2) {
2404 GemmMicrokernelTester()
2405 .mr(4)
2406 .nr(4)
2407 .kr(1)
2408 .sr(1)
2409 .m(4)
2410 .n(4)
2411 .k(k)
2412 .cn_stride(7)
2413 .Test(xnn_f32_gemm_relu_ukernel_4x4__wasm);
2414 }
2415 }
2416 }
2417
TEST(F32_GEMM_RELU_4X4__WASM,n_gt_4_strided_a)2418 TEST(F32_GEMM_RELU_4X4__WASM, n_gt_4_strided_a) {
2419 for (uint32_t n = 5; n < 8; n++) {
2420 for (size_t k = 1; k <= 5; k += 2) {
2421 GemmMicrokernelTester()
2422 .mr(4)
2423 .nr(4)
2424 .kr(1)
2425 .sr(1)
2426 .m(4)
2427 .n(n)
2428 .k(k)
2429 .a_stride(7)
2430 .Test(xnn_f32_gemm_relu_ukernel_4x4__wasm);
2431 }
2432 }
2433 }
2434
TEST(F32_GEMM_RELU_4X4__WASM,n_gt_4_subtile)2435 TEST(F32_GEMM_RELU_4X4__WASM, n_gt_4_subtile) {
2436 for (uint32_t n = 5; n < 8; n++) {
2437 for (size_t k = 1; k <= 5; k += 2) {
2438 for (uint32_t m = 1; m <= 4; m++) {
2439 GemmMicrokernelTester()
2440 .mr(4)
2441 .nr(4)
2442 .kr(1)
2443 .sr(1)
2444 .m(m)
2445 .n(n)
2446 .k(k)
2447 .iterations(1)
2448 .Test(xnn_f32_gemm_relu_ukernel_4x4__wasm);
2449 }
2450 }
2451 }
2452 }
2453
TEST(F32_GEMM_RELU_4X4__WASM,n_div_4)2454 TEST(F32_GEMM_RELU_4X4__WASM, n_div_4) {
2455 for (uint32_t n = 8; n <= 12; n += 4) {
2456 for (size_t k = 1; k <= 5; k += 2) {
2457 GemmMicrokernelTester()
2458 .mr(4)
2459 .nr(4)
2460 .kr(1)
2461 .sr(1)
2462 .m(4)
2463 .n(4)
2464 .k(k)
2465 .Test(xnn_f32_gemm_relu_ukernel_4x4__wasm);
2466 }
2467 }
2468 }
2469
TEST(F32_GEMM_RELU_4X4__WASM,n_div_4_strided_cn)2470 TEST(F32_GEMM_RELU_4X4__WASM, n_div_4_strided_cn) {
2471 for (uint32_t n = 8; n <= 12; n += 4) {
2472 for (size_t k = 1; k <= 5; k += 2) {
2473 GemmMicrokernelTester()
2474 .mr(4)
2475 .nr(4)
2476 .kr(1)
2477 .sr(1)
2478 .m(4)
2479 .n(n)
2480 .k(k)
2481 .cn_stride(7)
2482 .Test(xnn_f32_gemm_relu_ukernel_4x4__wasm);
2483 }
2484 }
2485 }
2486
TEST(F32_GEMM_RELU_4X4__WASM,n_div_4_strided_a)2487 TEST(F32_GEMM_RELU_4X4__WASM, n_div_4_strided_a) {
2488 for (uint32_t n = 8; n <= 12; n += 4) {
2489 for (size_t k = 1; k <= 5; k += 2) {
2490 GemmMicrokernelTester()
2491 .mr(4)
2492 .nr(4)
2493 .kr(1)
2494 .sr(1)
2495 .m(4)
2496 .n(n)
2497 .k(k)
2498 .a_stride(7)
2499 .Test(xnn_f32_gemm_relu_ukernel_4x4__wasm);
2500 }
2501 }
2502 }
2503
TEST(F32_GEMM_RELU_4X4__WASM,n_div_4_subtile)2504 TEST(F32_GEMM_RELU_4X4__WASM, n_div_4_subtile) {
2505 for (uint32_t n = 8; n <= 12; n += 4) {
2506 for (size_t k = 1; k <= 5; k += 2) {
2507 for (uint32_t m = 1; m <= 4; m++) {
2508 GemmMicrokernelTester()
2509 .mr(4)
2510 .nr(4)
2511 .kr(1)
2512 .sr(1)
2513 .m(m)
2514 .n(n)
2515 .k(k)
2516 .iterations(1)
2517 .Test(xnn_f32_gemm_relu_ukernel_4x4__wasm);
2518 }
2519 }
2520 }
2521 }
2522
TEST(F32_GEMM_RELU_4X4__WASM,strided_cm_subtile)2523 TEST(F32_GEMM_RELU_4X4__WASM, strided_cm_subtile) {
2524 for (size_t k = 1; k <= 5; k += 2) {
2525 for (uint32_t m = 1; m <= 4; m++) {
2526 for (uint32_t n = 1; n <= 4; n++) {
2527 GemmMicrokernelTester()
2528 .mr(4)
2529 .nr(4)
2530 .kr(1)
2531 .sr(1)
2532 .m(m)
2533 .n(n)
2534 .k(k)
2535 .cm_stride(7)
2536 .iterations(1)
2537 .Test(xnn_f32_gemm_relu_ukernel_4x4__wasm);
2538 }
2539 }
2540 }
2541 }
2542
TEST(F32_GEMM_RELU_4X4__WASM,strided_cm)2543 TEST(F32_GEMM_RELU_4X4__WASM, strided_cm) {
2544 GemmMicrokernelTester()
2545 .mr(4)
2546 .nr(4)
2547 .kr(1)
2548 .sr(1)
2549 .m(4)
2550 .n(4)
2551 .k(1)
2552 .cm_stride(7)
2553 .Test(xnn_f32_gemm_relu_ukernel_4x4__wasm);
2554 }
2555 #endif // XNN_ARCH_WASM || XNN_ARCH_WASMSIMD
2556
2557
2558 #if XNN_ARCH_WASM || XNN_ARCH_WASMSIMD
TEST(F32_GEMM_RELU_4X2__WASM,k_eq_1)2559 TEST(F32_GEMM_RELU_4X2__WASM, k_eq_1) {
2560 GemmMicrokernelTester()
2561 .mr(4)
2562 .nr(2)
2563 .kr(1)
2564 .sr(1)
2565 .m(4)
2566 .n(2)
2567 .k(1)
2568 .Test(xnn_f32_gemm_relu_ukernel_4x2__wasm);
2569 }
2570
TEST(F32_GEMM_RELU_4X2__WASM,strided_cn)2571 TEST(F32_GEMM_RELU_4X2__WASM, strided_cn) {
2572 GemmMicrokernelTester()
2573 .mr(4)
2574 .nr(2)
2575 .kr(1)
2576 .sr(1)
2577 .m(4)
2578 .n(2)
2579 .k(1)
2580 .cn_stride(5)
2581 .Test(xnn_f32_gemm_relu_ukernel_4x2__wasm);
2582 }
2583
TEST(F32_GEMM_RELU_4X2__WASM,k_eq_1_strided_a)2584 TEST(F32_GEMM_RELU_4X2__WASM, k_eq_1_strided_a) {
2585 GemmMicrokernelTester()
2586 .mr(4)
2587 .nr(2)
2588 .kr(1)
2589 .sr(1)
2590 .m(4)
2591 .n(2)
2592 .k(1)
2593 .a_stride(3)
2594 .Test(xnn_f32_gemm_relu_ukernel_4x2__wasm);
2595 }
2596
TEST(F32_GEMM_RELU_4X2__WASM,k_eq_1_subtile)2597 TEST(F32_GEMM_RELU_4X2__WASM, k_eq_1_subtile) {
2598 for (uint32_t m = 1; m <= 4; m++) {
2599 for (uint32_t n = 1; n <= 2; n++) {
2600 GemmMicrokernelTester()
2601 .mr(4)
2602 .nr(2)
2603 .kr(1)
2604 .sr(1)
2605 .m(m)
2606 .n(n)
2607 .k(1)
2608 .iterations(1)
2609 .Test(xnn_f32_gemm_relu_ukernel_4x2__wasm);
2610 }
2611 }
2612 }
2613
TEST(F32_GEMM_RELU_4X2__WASM,k_eq_1_subtile_m)2614 TEST(F32_GEMM_RELU_4X2__WASM, k_eq_1_subtile_m) {
2615 for (uint32_t m = 1; m <= 4; m++) {
2616 GemmMicrokernelTester()
2617 .mr(4)
2618 .nr(2)
2619 .kr(1)
2620 .sr(1)
2621 .m(m)
2622 .n(2)
2623 .k(1)
2624 .iterations(1)
2625 .Test(xnn_f32_gemm_relu_ukernel_4x2__wasm);
2626 }
2627 }
2628
TEST(F32_GEMM_RELU_4X2__WASM,k_eq_1_subtile_n)2629 TEST(F32_GEMM_RELU_4X2__WASM, k_eq_1_subtile_n) {
2630 for (uint32_t n = 1; n <= 2; n++) {
2631 GemmMicrokernelTester()
2632 .mr(4)
2633 .nr(2)
2634 .kr(1)
2635 .sr(1)
2636 .m(4)
2637 .n(n)
2638 .k(1)
2639 .iterations(1)
2640 .Test(xnn_f32_gemm_relu_ukernel_4x2__wasm);
2641 }
2642 }
2643
TEST(F32_GEMM_RELU_4X2__WASM,k_gt_1)2644 TEST(F32_GEMM_RELU_4X2__WASM, k_gt_1) {
2645 for (size_t k = 2; k < 10; k++) {
2646 GemmMicrokernelTester()
2647 .mr(4)
2648 .nr(2)
2649 .kr(1)
2650 .sr(1)
2651 .m(4)
2652 .n(2)
2653 .k(k)
2654 .Test(xnn_f32_gemm_relu_ukernel_4x2__wasm);
2655 }
2656 }
2657
TEST(F32_GEMM_RELU_4X2__WASM,k_gt_1_strided_a)2658 TEST(F32_GEMM_RELU_4X2__WASM, k_gt_1_strided_a) {
2659 for (size_t k = 2; k < 10; k++) {
2660 GemmMicrokernelTester()
2661 .mr(4)
2662 .nr(2)
2663 .kr(1)
2664 .sr(1)
2665 .m(4)
2666 .n(2)
2667 .k(k)
2668 .a_stride(11)
2669 .Test(xnn_f32_gemm_relu_ukernel_4x2__wasm);
2670 }
2671 }
2672
TEST(F32_GEMM_RELU_4X2__WASM,k_gt_1_subtile)2673 TEST(F32_GEMM_RELU_4X2__WASM, k_gt_1_subtile) {
2674 for (size_t k = 2; k < 10; k++) {
2675 for (uint32_t m = 1; m <= 4; m++) {
2676 for (uint32_t n = 1; n <= 2; n++) {
2677 GemmMicrokernelTester()
2678 .mr(4)
2679 .nr(2)
2680 .kr(1)
2681 .sr(1)
2682 .m(m)
2683 .n(n)
2684 .k(k)
2685 .iterations(1)
2686 .Test(xnn_f32_gemm_relu_ukernel_4x2__wasm);
2687 }
2688 }
2689 }
2690 }
2691
TEST(F32_GEMM_RELU_4X2__WASM,n_gt_2)2692 TEST(F32_GEMM_RELU_4X2__WASM, n_gt_2) {
2693 for (uint32_t n = 3; n < 4; n++) {
2694 for (size_t k = 1; k <= 5; k += 2) {
2695 GemmMicrokernelTester()
2696 .mr(4)
2697 .nr(2)
2698 .kr(1)
2699 .sr(1)
2700 .m(4)
2701 .n(2)
2702 .k(k)
2703 .Test(xnn_f32_gemm_relu_ukernel_4x2__wasm);
2704 }
2705 }
2706 }
2707
TEST(F32_GEMM_RELU_4X2__WASM,n_gt_2_strided_cn)2708 TEST(F32_GEMM_RELU_4X2__WASM, n_gt_2_strided_cn) {
2709 for (uint32_t n = 3; n < 4; n++) {
2710 for (size_t k = 1; k <= 5; k += 2) {
2711 GemmMicrokernelTester()
2712 .mr(4)
2713 .nr(2)
2714 .kr(1)
2715 .sr(1)
2716 .m(4)
2717 .n(2)
2718 .k(k)
2719 .cn_stride(5)
2720 .Test(xnn_f32_gemm_relu_ukernel_4x2__wasm);
2721 }
2722 }
2723 }
2724
TEST(F32_GEMM_RELU_4X2__WASM,n_gt_2_strided_a)2725 TEST(F32_GEMM_RELU_4X2__WASM, n_gt_2_strided_a) {
2726 for (uint32_t n = 3; n < 4; n++) {
2727 for (size_t k = 1; k <= 5; k += 2) {
2728 GemmMicrokernelTester()
2729 .mr(4)
2730 .nr(2)
2731 .kr(1)
2732 .sr(1)
2733 .m(4)
2734 .n(n)
2735 .k(k)
2736 .a_stride(7)
2737 .Test(xnn_f32_gemm_relu_ukernel_4x2__wasm);
2738 }
2739 }
2740 }
2741
TEST(F32_GEMM_RELU_4X2__WASM,n_gt_2_subtile)2742 TEST(F32_GEMM_RELU_4X2__WASM, n_gt_2_subtile) {
2743 for (uint32_t n = 3; n < 4; n++) {
2744 for (size_t k = 1; k <= 5; k += 2) {
2745 for (uint32_t m = 1; m <= 4; m++) {
2746 GemmMicrokernelTester()
2747 .mr(4)
2748 .nr(2)
2749 .kr(1)
2750 .sr(1)
2751 .m(m)
2752 .n(n)
2753 .k(k)
2754 .iterations(1)
2755 .Test(xnn_f32_gemm_relu_ukernel_4x2__wasm);
2756 }
2757 }
2758 }
2759 }
2760
TEST(F32_GEMM_RELU_4X2__WASM,n_div_2)2761 TEST(F32_GEMM_RELU_4X2__WASM, n_div_2) {
2762 for (uint32_t n = 4; n <= 6; n += 2) {
2763 for (size_t k = 1; k <= 5; k += 2) {
2764 GemmMicrokernelTester()
2765 .mr(4)
2766 .nr(2)
2767 .kr(1)
2768 .sr(1)
2769 .m(4)
2770 .n(2)
2771 .k(k)
2772 .Test(xnn_f32_gemm_relu_ukernel_4x2__wasm);
2773 }
2774 }
2775 }
2776
TEST(F32_GEMM_RELU_4X2__WASM,n_div_2_strided_cn)2777 TEST(F32_GEMM_RELU_4X2__WASM, n_div_2_strided_cn) {
2778 for (uint32_t n = 4; n <= 6; n += 2) {
2779 for (size_t k = 1; k <= 5; k += 2) {
2780 GemmMicrokernelTester()
2781 .mr(4)
2782 .nr(2)
2783 .kr(1)
2784 .sr(1)
2785 .m(4)
2786 .n(n)
2787 .k(k)
2788 .cn_stride(5)
2789 .Test(xnn_f32_gemm_relu_ukernel_4x2__wasm);
2790 }
2791 }
2792 }
2793
TEST(F32_GEMM_RELU_4X2__WASM,n_div_2_strided_a)2794 TEST(F32_GEMM_RELU_4X2__WASM, n_div_2_strided_a) {
2795 for (uint32_t n = 4; n <= 6; n += 2) {
2796 for (size_t k = 1; k <= 5; k += 2) {
2797 GemmMicrokernelTester()
2798 .mr(4)
2799 .nr(2)
2800 .kr(1)
2801 .sr(1)
2802 .m(4)
2803 .n(n)
2804 .k(k)
2805 .a_stride(7)
2806 .Test(xnn_f32_gemm_relu_ukernel_4x2__wasm);
2807 }
2808 }
2809 }
2810
TEST(F32_GEMM_RELU_4X2__WASM,n_div_2_subtile)2811 TEST(F32_GEMM_RELU_4X2__WASM, n_div_2_subtile) {
2812 for (uint32_t n = 4; n <= 6; n += 2) {
2813 for (size_t k = 1; k <= 5; k += 2) {
2814 for (uint32_t m = 1; m <= 4; m++) {
2815 GemmMicrokernelTester()
2816 .mr(4)
2817 .nr(2)
2818 .kr(1)
2819 .sr(1)
2820 .m(m)
2821 .n(n)
2822 .k(k)
2823 .iterations(1)
2824 .Test(xnn_f32_gemm_relu_ukernel_4x2__wasm);
2825 }
2826 }
2827 }
2828 }
2829
TEST(F32_GEMM_RELU_4X2__WASM,strided_cm_subtile)2830 TEST(F32_GEMM_RELU_4X2__WASM, strided_cm_subtile) {
2831 for (size_t k = 1; k <= 5; k += 2) {
2832 for (uint32_t m = 1; m <= 4; m++) {
2833 for (uint32_t n = 1; n <= 2; n++) {
2834 GemmMicrokernelTester()
2835 .mr(4)
2836 .nr(2)
2837 .kr(1)
2838 .sr(1)
2839 .m(m)
2840 .n(n)
2841 .k(k)
2842 .cm_stride(5)
2843 .iterations(1)
2844 .Test(xnn_f32_gemm_relu_ukernel_4x2__wasm);
2845 }
2846 }
2847 }
2848 }
2849
TEST(F32_GEMM_RELU_4X2__WASM,strided_cm)2850 TEST(F32_GEMM_RELU_4X2__WASM, strided_cm) {
2851 GemmMicrokernelTester()
2852 .mr(4)
2853 .nr(2)
2854 .kr(1)
2855 .sr(1)
2856 .m(4)
2857 .n(2)
2858 .k(1)
2859 .cm_stride(5)
2860 .Test(xnn_f32_gemm_relu_ukernel_4x2__wasm);
2861 }
2862 #endif // XNN_ARCH_WASM || XNN_ARCH_WASMSIMD
2863
2864
TEST(F32_GEMM_RELU_1X4__SCALAR,k_eq_1)2865 TEST(F32_GEMM_RELU_1X4__SCALAR, k_eq_1) {
2866 GemmMicrokernelTester()
2867 .mr(1)
2868 .nr(4)
2869 .kr(1)
2870 .sr(1)
2871 .m(1)
2872 .n(4)
2873 .k(1)
2874 .Test(xnn_f32_gemm_relu_ukernel_1x4__scalar);
2875 }
2876
TEST(F32_GEMM_RELU_1X4__SCALAR,strided_cn)2877 TEST(F32_GEMM_RELU_1X4__SCALAR, strided_cn) {
2878 GemmMicrokernelTester()
2879 .mr(1)
2880 .nr(4)
2881 .kr(1)
2882 .sr(1)
2883 .m(1)
2884 .n(4)
2885 .k(1)
2886 .cn_stride(7)
2887 .Test(xnn_f32_gemm_relu_ukernel_1x4__scalar);
2888 }
2889
TEST(F32_GEMM_RELU_1X4__SCALAR,k_eq_1_strided_a)2890 TEST(F32_GEMM_RELU_1X4__SCALAR, k_eq_1_strided_a) {
2891 GemmMicrokernelTester()
2892 .mr(1)
2893 .nr(4)
2894 .kr(1)
2895 .sr(1)
2896 .m(1)
2897 .n(4)
2898 .k(1)
2899 .a_stride(3)
2900 .Test(xnn_f32_gemm_relu_ukernel_1x4__scalar);
2901 }
2902
TEST(F32_GEMM_RELU_1X4__SCALAR,k_eq_1_subtile)2903 TEST(F32_GEMM_RELU_1X4__SCALAR, k_eq_1_subtile) {
2904 for (uint32_t m = 1; m <= 1; m++) {
2905 for (uint32_t n = 1; n <= 4; n++) {
2906 GemmMicrokernelTester()
2907 .mr(1)
2908 .nr(4)
2909 .kr(1)
2910 .sr(1)
2911 .m(m)
2912 .n(n)
2913 .k(1)
2914 .iterations(1)
2915 .Test(xnn_f32_gemm_relu_ukernel_1x4__scalar);
2916 }
2917 }
2918 }
2919
TEST(F32_GEMM_RELU_1X4__SCALAR,k_eq_1_subtile_m)2920 TEST(F32_GEMM_RELU_1X4__SCALAR, k_eq_1_subtile_m) {
2921 for (uint32_t m = 1; m <= 1; m++) {
2922 GemmMicrokernelTester()
2923 .mr(1)
2924 .nr(4)
2925 .kr(1)
2926 .sr(1)
2927 .m(m)
2928 .n(4)
2929 .k(1)
2930 .iterations(1)
2931 .Test(xnn_f32_gemm_relu_ukernel_1x4__scalar);
2932 }
2933 }
2934
TEST(F32_GEMM_RELU_1X4__SCALAR,k_eq_1_subtile_n)2935 TEST(F32_GEMM_RELU_1X4__SCALAR, k_eq_1_subtile_n) {
2936 for (uint32_t n = 1; n <= 4; n++) {
2937 GemmMicrokernelTester()
2938 .mr(1)
2939 .nr(4)
2940 .kr(1)
2941 .sr(1)
2942 .m(1)
2943 .n(n)
2944 .k(1)
2945 .iterations(1)
2946 .Test(xnn_f32_gemm_relu_ukernel_1x4__scalar);
2947 }
2948 }
2949
TEST(F32_GEMM_RELU_1X4__SCALAR,k_gt_1)2950 TEST(F32_GEMM_RELU_1X4__SCALAR, k_gt_1) {
2951 for (size_t k = 2; k < 10; k++) {
2952 GemmMicrokernelTester()
2953 .mr(1)
2954 .nr(4)
2955 .kr(1)
2956 .sr(1)
2957 .m(1)
2958 .n(4)
2959 .k(k)
2960 .Test(xnn_f32_gemm_relu_ukernel_1x4__scalar);
2961 }
2962 }
2963
TEST(F32_GEMM_RELU_1X4__SCALAR,k_gt_1_strided_a)2964 TEST(F32_GEMM_RELU_1X4__SCALAR, k_gt_1_strided_a) {
2965 for (size_t k = 2; k < 10; k++) {
2966 GemmMicrokernelTester()
2967 .mr(1)
2968 .nr(4)
2969 .kr(1)
2970 .sr(1)
2971 .m(1)
2972 .n(4)
2973 .k(k)
2974 .a_stride(11)
2975 .Test(xnn_f32_gemm_relu_ukernel_1x4__scalar);
2976 }
2977 }
2978
TEST(F32_GEMM_RELU_1X4__SCALAR,k_gt_1_subtile)2979 TEST(F32_GEMM_RELU_1X4__SCALAR, k_gt_1_subtile) {
2980 for (size_t k = 2; k < 10; k++) {
2981 for (uint32_t m = 1; m <= 1; m++) {
2982 for (uint32_t n = 1; n <= 4; n++) {
2983 GemmMicrokernelTester()
2984 .mr(1)
2985 .nr(4)
2986 .kr(1)
2987 .sr(1)
2988 .m(m)
2989 .n(n)
2990 .k(k)
2991 .iterations(1)
2992 .Test(xnn_f32_gemm_relu_ukernel_1x4__scalar);
2993 }
2994 }
2995 }
2996 }
2997
TEST(F32_GEMM_RELU_1X4__SCALAR,n_gt_4)2998 TEST(F32_GEMM_RELU_1X4__SCALAR, n_gt_4) {
2999 for (uint32_t n = 5; n < 8; n++) {
3000 for (size_t k = 1; k <= 5; k += 2) {
3001 GemmMicrokernelTester()
3002 .mr(1)
3003 .nr(4)
3004 .kr(1)
3005 .sr(1)
3006 .m(1)
3007 .n(4)
3008 .k(k)
3009 .Test(xnn_f32_gemm_relu_ukernel_1x4__scalar);
3010 }
3011 }
3012 }
3013
TEST(F32_GEMM_RELU_1X4__SCALAR,n_gt_4_strided_cn)3014 TEST(F32_GEMM_RELU_1X4__SCALAR, n_gt_4_strided_cn) {
3015 for (uint32_t n = 5; n < 8; n++) {
3016 for (size_t k = 1; k <= 5; k += 2) {
3017 GemmMicrokernelTester()
3018 .mr(1)
3019 .nr(4)
3020 .kr(1)
3021 .sr(1)
3022 .m(1)
3023 .n(4)
3024 .k(k)
3025 .cn_stride(7)
3026 .Test(xnn_f32_gemm_relu_ukernel_1x4__scalar);
3027 }
3028 }
3029 }
3030
TEST(F32_GEMM_RELU_1X4__SCALAR,n_gt_4_strided_a)3031 TEST(F32_GEMM_RELU_1X4__SCALAR, n_gt_4_strided_a) {
3032 for (uint32_t n = 5; n < 8; n++) {
3033 for (size_t k = 1; k <= 5; k += 2) {
3034 GemmMicrokernelTester()
3035 .mr(1)
3036 .nr(4)
3037 .kr(1)
3038 .sr(1)
3039 .m(1)
3040 .n(n)
3041 .k(k)
3042 .a_stride(7)
3043 .Test(xnn_f32_gemm_relu_ukernel_1x4__scalar);
3044 }
3045 }
3046 }
3047
TEST(F32_GEMM_RELU_1X4__SCALAR,n_gt_4_subtile)3048 TEST(F32_GEMM_RELU_1X4__SCALAR, n_gt_4_subtile) {
3049 for (uint32_t n = 5; n < 8; n++) {
3050 for (size_t k = 1; k <= 5; k += 2) {
3051 for (uint32_t m = 1; m <= 1; m++) {
3052 GemmMicrokernelTester()
3053 .mr(1)
3054 .nr(4)
3055 .kr(1)
3056 .sr(1)
3057 .m(m)
3058 .n(n)
3059 .k(k)
3060 .iterations(1)
3061 .Test(xnn_f32_gemm_relu_ukernel_1x4__scalar);
3062 }
3063 }
3064 }
3065 }
3066
TEST(F32_GEMM_RELU_1X4__SCALAR,n_div_4)3067 TEST(F32_GEMM_RELU_1X4__SCALAR, n_div_4) {
3068 for (uint32_t n = 8; n <= 12; n += 4) {
3069 for (size_t k = 1; k <= 5; k += 2) {
3070 GemmMicrokernelTester()
3071 .mr(1)
3072 .nr(4)
3073 .kr(1)
3074 .sr(1)
3075 .m(1)
3076 .n(4)
3077 .k(k)
3078 .Test(xnn_f32_gemm_relu_ukernel_1x4__scalar);
3079 }
3080 }
3081 }
3082
TEST(F32_GEMM_RELU_1X4__SCALAR,n_div_4_strided_cn)3083 TEST(F32_GEMM_RELU_1X4__SCALAR, n_div_4_strided_cn) {
3084 for (uint32_t n = 8; n <= 12; n += 4) {
3085 for (size_t k = 1; k <= 5; k += 2) {
3086 GemmMicrokernelTester()
3087 .mr(1)
3088 .nr(4)
3089 .kr(1)
3090 .sr(1)
3091 .m(1)
3092 .n(n)
3093 .k(k)
3094 .cn_stride(7)
3095 .Test(xnn_f32_gemm_relu_ukernel_1x4__scalar);
3096 }
3097 }
3098 }
3099
TEST(F32_GEMM_RELU_1X4__SCALAR,n_div_4_strided_a)3100 TEST(F32_GEMM_RELU_1X4__SCALAR, n_div_4_strided_a) {
3101 for (uint32_t n = 8; n <= 12; n += 4) {
3102 for (size_t k = 1; k <= 5; k += 2) {
3103 GemmMicrokernelTester()
3104 .mr(1)
3105 .nr(4)
3106 .kr(1)
3107 .sr(1)
3108 .m(1)
3109 .n(n)
3110 .k(k)
3111 .a_stride(7)
3112 .Test(xnn_f32_gemm_relu_ukernel_1x4__scalar);
3113 }
3114 }
3115 }
3116
TEST(F32_GEMM_RELU_1X4__SCALAR,n_div_4_subtile)3117 TEST(F32_GEMM_RELU_1X4__SCALAR, n_div_4_subtile) {
3118 for (uint32_t n = 8; n <= 12; n += 4) {
3119 for (size_t k = 1; k <= 5; k += 2) {
3120 for (uint32_t m = 1; m <= 1; m++) {
3121 GemmMicrokernelTester()
3122 .mr(1)
3123 .nr(4)
3124 .kr(1)
3125 .sr(1)
3126 .m(m)
3127 .n(n)
3128 .k(k)
3129 .iterations(1)
3130 .Test(xnn_f32_gemm_relu_ukernel_1x4__scalar);
3131 }
3132 }
3133 }
3134 }
3135
TEST(F32_GEMM_RELU_1X4__SCALAR,strided_cm_subtile)3136 TEST(F32_GEMM_RELU_1X4__SCALAR, strided_cm_subtile) {
3137 for (size_t k = 1; k <= 5; k += 2) {
3138 for (uint32_t m = 1; m <= 1; m++) {
3139 for (uint32_t n = 1; n <= 4; n++) {
3140 GemmMicrokernelTester()
3141 .mr(1)
3142 .nr(4)
3143 .kr(1)
3144 .sr(1)
3145 .m(m)
3146 .n(n)
3147 .k(k)
3148 .cm_stride(7)
3149 .iterations(1)
3150 .Test(xnn_f32_gemm_relu_ukernel_1x4__scalar);
3151 }
3152 }
3153 }
3154 }
3155
TEST(F32_GEMM_RELU_1X4__SCALAR,strided_cm)3156 TEST(F32_GEMM_RELU_1X4__SCALAR, strided_cm) {
3157 GemmMicrokernelTester()
3158 .mr(1)
3159 .nr(4)
3160 .kr(1)
3161 .sr(1)
3162 .m(1)
3163 .n(4)
3164 .k(1)
3165 .cm_stride(7)
3166 .Test(xnn_f32_gemm_relu_ukernel_1x4__scalar);
3167 }
3168
3169
TEST(F32_GEMM_RELU_2X4__SCALAR,k_eq_1)3170 TEST(F32_GEMM_RELU_2X4__SCALAR, k_eq_1) {
3171 GemmMicrokernelTester()
3172 .mr(2)
3173 .nr(4)
3174 .kr(1)
3175 .sr(1)
3176 .m(2)
3177 .n(4)
3178 .k(1)
3179 .Test(xnn_f32_gemm_relu_ukernel_2x4__scalar);
3180 }
3181
TEST(F32_GEMM_RELU_2X4__SCALAR,strided_cn)3182 TEST(F32_GEMM_RELU_2X4__SCALAR, strided_cn) {
3183 GemmMicrokernelTester()
3184 .mr(2)
3185 .nr(4)
3186 .kr(1)
3187 .sr(1)
3188 .m(2)
3189 .n(4)
3190 .k(1)
3191 .cn_stride(7)
3192 .Test(xnn_f32_gemm_relu_ukernel_2x4__scalar);
3193 }
3194
TEST(F32_GEMM_RELU_2X4__SCALAR,k_eq_1_strided_a)3195 TEST(F32_GEMM_RELU_2X4__SCALAR, k_eq_1_strided_a) {
3196 GemmMicrokernelTester()
3197 .mr(2)
3198 .nr(4)
3199 .kr(1)
3200 .sr(1)
3201 .m(2)
3202 .n(4)
3203 .k(1)
3204 .a_stride(3)
3205 .Test(xnn_f32_gemm_relu_ukernel_2x4__scalar);
3206 }
3207
TEST(F32_GEMM_RELU_2X4__SCALAR,k_eq_1_subtile)3208 TEST(F32_GEMM_RELU_2X4__SCALAR, k_eq_1_subtile) {
3209 for (uint32_t m = 1; m <= 2; m++) {
3210 for (uint32_t n = 1; n <= 4; n++) {
3211 GemmMicrokernelTester()
3212 .mr(2)
3213 .nr(4)
3214 .kr(1)
3215 .sr(1)
3216 .m(m)
3217 .n(n)
3218 .k(1)
3219 .iterations(1)
3220 .Test(xnn_f32_gemm_relu_ukernel_2x4__scalar);
3221 }
3222 }
3223 }
3224
TEST(F32_GEMM_RELU_2X4__SCALAR,k_eq_1_subtile_m)3225 TEST(F32_GEMM_RELU_2X4__SCALAR, k_eq_1_subtile_m) {
3226 for (uint32_t m = 1; m <= 2; m++) {
3227 GemmMicrokernelTester()
3228 .mr(2)
3229 .nr(4)
3230 .kr(1)
3231 .sr(1)
3232 .m(m)
3233 .n(4)
3234 .k(1)
3235 .iterations(1)
3236 .Test(xnn_f32_gemm_relu_ukernel_2x4__scalar);
3237 }
3238 }
3239
TEST(F32_GEMM_RELU_2X4__SCALAR,k_eq_1_subtile_n)3240 TEST(F32_GEMM_RELU_2X4__SCALAR, k_eq_1_subtile_n) {
3241 for (uint32_t n = 1; n <= 4; n++) {
3242 GemmMicrokernelTester()
3243 .mr(2)
3244 .nr(4)
3245 .kr(1)
3246 .sr(1)
3247 .m(2)
3248 .n(n)
3249 .k(1)
3250 .iterations(1)
3251 .Test(xnn_f32_gemm_relu_ukernel_2x4__scalar);
3252 }
3253 }
3254
TEST(F32_GEMM_RELU_2X4__SCALAR,k_gt_1)3255 TEST(F32_GEMM_RELU_2X4__SCALAR, k_gt_1) {
3256 for (size_t k = 2; k < 10; k++) {
3257 GemmMicrokernelTester()
3258 .mr(2)
3259 .nr(4)
3260 .kr(1)
3261 .sr(1)
3262 .m(2)
3263 .n(4)
3264 .k(k)
3265 .Test(xnn_f32_gemm_relu_ukernel_2x4__scalar);
3266 }
3267 }
3268
TEST(F32_GEMM_RELU_2X4__SCALAR,k_gt_1_strided_a)3269 TEST(F32_GEMM_RELU_2X4__SCALAR, k_gt_1_strided_a) {
3270 for (size_t k = 2; k < 10; k++) {
3271 GemmMicrokernelTester()
3272 .mr(2)
3273 .nr(4)
3274 .kr(1)
3275 .sr(1)
3276 .m(2)
3277 .n(4)
3278 .k(k)
3279 .a_stride(11)
3280 .Test(xnn_f32_gemm_relu_ukernel_2x4__scalar);
3281 }
3282 }
3283
TEST(F32_GEMM_RELU_2X4__SCALAR,k_gt_1_subtile)3284 TEST(F32_GEMM_RELU_2X4__SCALAR, k_gt_1_subtile) {
3285 for (size_t k = 2; k < 10; k++) {
3286 for (uint32_t m = 1; m <= 2; m++) {
3287 for (uint32_t n = 1; n <= 4; n++) {
3288 GemmMicrokernelTester()
3289 .mr(2)
3290 .nr(4)
3291 .kr(1)
3292 .sr(1)
3293 .m(m)
3294 .n(n)
3295 .k(k)
3296 .iterations(1)
3297 .Test(xnn_f32_gemm_relu_ukernel_2x4__scalar);
3298 }
3299 }
3300 }
3301 }
3302
TEST(F32_GEMM_RELU_2X4__SCALAR,n_gt_4)3303 TEST(F32_GEMM_RELU_2X4__SCALAR, n_gt_4) {
3304 for (uint32_t n = 5; n < 8; n++) {
3305 for (size_t k = 1; k <= 5; k += 2) {
3306 GemmMicrokernelTester()
3307 .mr(2)
3308 .nr(4)
3309 .kr(1)
3310 .sr(1)
3311 .m(2)
3312 .n(4)
3313 .k(k)
3314 .Test(xnn_f32_gemm_relu_ukernel_2x4__scalar);
3315 }
3316 }
3317 }
3318
TEST(F32_GEMM_RELU_2X4__SCALAR,n_gt_4_strided_cn)3319 TEST(F32_GEMM_RELU_2X4__SCALAR, n_gt_4_strided_cn) {
3320 for (uint32_t n = 5; n < 8; n++) {
3321 for (size_t k = 1; k <= 5; k += 2) {
3322 GemmMicrokernelTester()
3323 .mr(2)
3324 .nr(4)
3325 .kr(1)
3326 .sr(1)
3327 .m(2)
3328 .n(4)
3329 .k(k)
3330 .cn_stride(7)
3331 .Test(xnn_f32_gemm_relu_ukernel_2x4__scalar);
3332 }
3333 }
3334 }
3335
TEST(F32_GEMM_RELU_2X4__SCALAR,n_gt_4_strided_a)3336 TEST(F32_GEMM_RELU_2X4__SCALAR, n_gt_4_strided_a) {
3337 for (uint32_t n = 5; n < 8; n++) {
3338 for (size_t k = 1; k <= 5; k += 2) {
3339 GemmMicrokernelTester()
3340 .mr(2)
3341 .nr(4)
3342 .kr(1)
3343 .sr(1)
3344 .m(2)
3345 .n(n)
3346 .k(k)
3347 .a_stride(7)
3348 .Test(xnn_f32_gemm_relu_ukernel_2x4__scalar);
3349 }
3350 }
3351 }
3352
TEST(F32_GEMM_RELU_2X4__SCALAR,n_gt_4_subtile)3353 TEST(F32_GEMM_RELU_2X4__SCALAR, n_gt_4_subtile) {
3354 for (uint32_t n = 5; n < 8; n++) {
3355 for (size_t k = 1; k <= 5; k += 2) {
3356 for (uint32_t m = 1; m <= 2; m++) {
3357 GemmMicrokernelTester()
3358 .mr(2)
3359 .nr(4)
3360 .kr(1)
3361 .sr(1)
3362 .m(m)
3363 .n(n)
3364 .k(k)
3365 .iterations(1)
3366 .Test(xnn_f32_gemm_relu_ukernel_2x4__scalar);
3367 }
3368 }
3369 }
3370 }
3371
TEST(F32_GEMM_RELU_2X4__SCALAR,n_div_4)3372 TEST(F32_GEMM_RELU_2X4__SCALAR, n_div_4) {
3373 for (uint32_t n = 8; n <= 12; n += 4) {
3374 for (size_t k = 1; k <= 5; k += 2) {
3375 GemmMicrokernelTester()
3376 .mr(2)
3377 .nr(4)
3378 .kr(1)
3379 .sr(1)
3380 .m(2)
3381 .n(4)
3382 .k(k)
3383 .Test(xnn_f32_gemm_relu_ukernel_2x4__scalar);
3384 }
3385 }
3386 }
3387
TEST(F32_GEMM_RELU_2X4__SCALAR,n_div_4_strided_cn)3388 TEST(F32_GEMM_RELU_2X4__SCALAR, n_div_4_strided_cn) {
3389 for (uint32_t n = 8; n <= 12; n += 4) {
3390 for (size_t k = 1; k <= 5; k += 2) {
3391 GemmMicrokernelTester()
3392 .mr(2)
3393 .nr(4)
3394 .kr(1)
3395 .sr(1)
3396 .m(2)
3397 .n(n)
3398 .k(k)
3399 .cn_stride(7)
3400 .Test(xnn_f32_gemm_relu_ukernel_2x4__scalar);
3401 }
3402 }
3403 }
3404
TEST(F32_GEMM_RELU_2X4__SCALAR,n_div_4_strided_a)3405 TEST(F32_GEMM_RELU_2X4__SCALAR, n_div_4_strided_a) {
3406 for (uint32_t n = 8; n <= 12; n += 4) {
3407 for (size_t k = 1; k <= 5; k += 2) {
3408 GemmMicrokernelTester()
3409 .mr(2)
3410 .nr(4)
3411 .kr(1)
3412 .sr(1)
3413 .m(2)
3414 .n(n)
3415 .k(k)
3416 .a_stride(7)
3417 .Test(xnn_f32_gemm_relu_ukernel_2x4__scalar);
3418 }
3419 }
3420 }
3421
TEST(F32_GEMM_RELU_2X4__SCALAR,n_div_4_subtile)3422 TEST(F32_GEMM_RELU_2X4__SCALAR, n_div_4_subtile) {
3423 for (uint32_t n = 8; n <= 12; n += 4) {
3424 for (size_t k = 1; k <= 5; k += 2) {
3425 for (uint32_t m = 1; m <= 2; m++) {
3426 GemmMicrokernelTester()
3427 .mr(2)
3428 .nr(4)
3429 .kr(1)
3430 .sr(1)
3431 .m(m)
3432 .n(n)
3433 .k(k)
3434 .iterations(1)
3435 .Test(xnn_f32_gemm_relu_ukernel_2x4__scalar);
3436 }
3437 }
3438 }
3439 }
3440
TEST(F32_GEMM_RELU_2X4__SCALAR,strided_cm_subtile)3441 TEST(F32_GEMM_RELU_2X4__SCALAR, strided_cm_subtile) {
3442 for (size_t k = 1; k <= 5; k += 2) {
3443 for (uint32_t m = 1; m <= 2; m++) {
3444 for (uint32_t n = 1; n <= 4; n++) {
3445 GemmMicrokernelTester()
3446 .mr(2)
3447 .nr(4)
3448 .kr(1)
3449 .sr(1)
3450 .m(m)
3451 .n(n)
3452 .k(k)
3453 .cm_stride(7)
3454 .iterations(1)
3455 .Test(xnn_f32_gemm_relu_ukernel_2x4__scalar);
3456 }
3457 }
3458 }
3459 }
3460
TEST(F32_GEMM_RELU_2X4__SCALAR,strided_cm)3461 TEST(F32_GEMM_RELU_2X4__SCALAR, strided_cm) {
3462 GemmMicrokernelTester()
3463 .mr(2)
3464 .nr(4)
3465 .kr(1)
3466 .sr(1)
3467 .m(2)
3468 .n(4)
3469 .k(1)
3470 .cm_stride(7)
3471 .Test(xnn_f32_gemm_relu_ukernel_2x4__scalar);
3472 }
3473
3474
TEST(F32_GEMM_RELU_4X4__SCALAR,k_eq_1)3475 TEST(F32_GEMM_RELU_4X4__SCALAR, k_eq_1) {
3476 GemmMicrokernelTester()
3477 .mr(4)
3478 .nr(4)
3479 .kr(1)
3480 .sr(1)
3481 .m(4)
3482 .n(4)
3483 .k(1)
3484 .Test(xnn_f32_gemm_relu_ukernel_4x4__scalar);
3485 }
3486
TEST(F32_GEMM_RELU_4X4__SCALAR,strided_cn)3487 TEST(F32_GEMM_RELU_4X4__SCALAR, strided_cn) {
3488 GemmMicrokernelTester()
3489 .mr(4)
3490 .nr(4)
3491 .kr(1)
3492 .sr(1)
3493 .m(4)
3494 .n(4)
3495 .k(1)
3496 .cn_stride(7)
3497 .Test(xnn_f32_gemm_relu_ukernel_4x4__scalar);
3498 }
3499
TEST(F32_GEMM_RELU_4X4__SCALAR,k_eq_1_strided_a)3500 TEST(F32_GEMM_RELU_4X4__SCALAR, k_eq_1_strided_a) {
3501 GemmMicrokernelTester()
3502 .mr(4)
3503 .nr(4)
3504 .kr(1)
3505 .sr(1)
3506 .m(4)
3507 .n(4)
3508 .k(1)
3509 .a_stride(3)
3510 .Test(xnn_f32_gemm_relu_ukernel_4x4__scalar);
3511 }
3512
TEST(F32_GEMM_RELU_4X4__SCALAR,k_eq_1_subtile)3513 TEST(F32_GEMM_RELU_4X4__SCALAR, k_eq_1_subtile) {
3514 for (uint32_t m = 1; m <= 4; m++) {
3515 for (uint32_t n = 1; n <= 4; n++) {
3516 GemmMicrokernelTester()
3517 .mr(4)
3518 .nr(4)
3519 .kr(1)
3520 .sr(1)
3521 .m(m)
3522 .n(n)
3523 .k(1)
3524 .iterations(1)
3525 .Test(xnn_f32_gemm_relu_ukernel_4x4__scalar);
3526 }
3527 }
3528 }
3529
TEST(F32_GEMM_RELU_4X4__SCALAR,k_eq_1_subtile_m)3530 TEST(F32_GEMM_RELU_4X4__SCALAR, k_eq_1_subtile_m) {
3531 for (uint32_t m = 1; m <= 4; m++) {
3532 GemmMicrokernelTester()
3533 .mr(4)
3534 .nr(4)
3535 .kr(1)
3536 .sr(1)
3537 .m(m)
3538 .n(4)
3539 .k(1)
3540 .iterations(1)
3541 .Test(xnn_f32_gemm_relu_ukernel_4x4__scalar);
3542 }
3543 }
3544
TEST(F32_GEMM_RELU_4X4__SCALAR,k_eq_1_subtile_n)3545 TEST(F32_GEMM_RELU_4X4__SCALAR, k_eq_1_subtile_n) {
3546 for (uint32_t n = 1; n <= 4; n++) {
3547 GemmMicrokernelTester()
3548 .mr(4)
3549 .nr(4)
3550 .kr(1)
3551 .sr(1)
3552 .m(4)
3553 .n(n)
3554 .k(1)
3555 .iterations(1)
3556 .Test(xnn_f32_gemm_relu_ukernel_4x4__scalar);
3557 }
3558 }
3559
TEST(F32_GEMM_RELU_4X4__SCALAR,k_gt_1)3560 TEST(F32_GEMM_RELU_4X4__SCALAR, k_gt_1) {
3561 for (size_t k = 2; k < 10; k++) {
3562 GemmMicrokernelTester()
3563 .mr(4)
3564 .nr(4)
3565 .kr(1)
3566 .sr(1)
3567 .m(4)
3568 .n(4)
3569 .k(k)
3570 .Test(xnn_f32_gemm_relu_ukernel_4x4__scalar);
3571 }
3572 }
3573
TEST(F32_GEMM_RELU_4X4__SCALAR,k_gt_1_strided_a)3574 TEST(F32_GEMM_RELU_4X4__SCALAR, k_gt_1_strided_a) {
3575 for (size_t k = 2; k < 10; k++) {
3576 GemmMicrokernelTester()
3577 .mr(4)
3578 .nr(4)
3579 .kr(1)
3580 .sr(1)
3581 .m(4)
3582 .n(4)
3583 .k(k)
3584 .a_stride(11)
3585 .Test(xnn_f32_gemm_relu_ukernel_4x4__scalar);
3586 }
3587 }
3588
TEST(F32_GEMM_RELU_4X4__SCALAR,k_gt_1_subtile)3589 TEST(F32_GEMM_RELU_4X4__SCALAR, k_gt_1_subtile) {
3590 for (size_t k = 2; k < 10; k++) {
3591 for (uint32_t m = 1; m <= 4; m++) {
3592 for (uint32_t n = 1; n <= 4; n++) {
3593 GemmMicrokernelTester()
3594 .mr(4)
3595 .nr(4)
3596 .kr(1)
3597 .sr(1)
3598 .m(m)
3599 .n(n)
3600 .k(k)
3601 .iterations(1)
3602 .Test(xnn_f32_gemm_relu_ukernel_4x4__scalar);
3603 }
3604 }
3605 }
3606 }
3607
TEST(F32_GEMM_RELU_4X4__SCALAR,n_gt_4)3608 TEST(F32_GEMM_RELU_4X4__SCALAR, n_gt_4) {
3609 for (uint32_t n = 5; n < 8; n++) {
3610 for (size_t k = 1; k <= 5; k += 2) {
3611 GemmMicrokernelTester()
3612 .mr(4)
3613 .nr(4)
3614 .kr(1)
3615 .sr(1)
3616 .m(4)
3617 .n(4)
3618 .k(k)
3619 .Test(xnn_f32_gemm_relu_ukernel_4x4__scalar);
3620 }
3621 }
3622 }
3623
TEST(F32_GEMM_RELU_4X4__SCALAR,n_gt_4_strided_cn)3624 TEST(F32_GEMM_RELU_4X4__SCALAR, n_gt_4_strided_cn) {
3625 for (uint32_t n = 5; n < 8; n++) {
3626 for (size_t k = 1; k <= 5; k += 2) {
3627 GemmMicrokernelTester()
3628 .mr(4)
3629 .nr(4)
3630 .kr(1)
3631 .sr(1)
3632 .m(4)
3633 .n(4)
3634 .k(k)
3635 .cn_stride(7)
3636 .Test(xnn_f32_gemm_relu_ukernel_4x4__scalar);
3637 }
3638 }
3639 }
3640
TEST(F32_GEMM_RELU_4X4__SCALAR,n_gt_4_strided_a)3641 TEST(F32_GEMM_RELU_4X4__SCALAR, n_gt_4_strided_a) {
3642 for (uint32_t n = 5; n < 8; n++) {
3643 for (size_t k = 1; k <= 5; k += 2) {
3644 GemmMicrokernelTester()
3645 .mr(4)
3646 .nr(4)
3647 .kr(1)
3648 .sr(1)
3649 .m(4)
3650 .n(n)
3651 .k(k)
3652 .a_stride(7)
3653 .Test(xnn_f32_gemm_relu_ukernel_4x4__scalar);
3654 }
3655 }
3656 }
3657
TEST(F32_GEMM_RELU_4X4__SCALAR,n_gt_4_subtile)3658 TEST(F32_GEMM_RELU_4X4__SCALAR, n_gt_4_subtile) {
3659 for (uint32_t n = 5; n < 8; n++) {
3660 for (size_t k = 1; k <= 5; k += 2) {
3661 for (uint32_t m = 1; m <= 4; m++) {
3662 GemmMicrokernelTester()
3663 .mr(4)
3664 .nr(4)
3665 .kr(1)
3666 .sr(1)
3667 .m(m)
3668 .n(n)
3669 .k(k)
3670 .iterations(1)
3671 .Test(xnn_f32_gemm_relu_ukernel_4x4__scalar);
3672 }
3673 }
3674 }
3675 }
3676
TEST(F32_GEMM_RELU_4X4__SCALAR,n_div_4)3677 TEST(F32_GEMM_RELU_4X4__SCALAR, n_div_4) {
3678 for (uint32_t n = 8; n <= 12; n += 4) {
3679 for (size_t k = 1; k <= 5; k += 2) {
3680 GemmMicrokernelTester()
3681 .mr(4)
3682 .nr(4)
3683 .kr(1)
3684 .sr(1)
3685 .m(4)
3686 .n(4)
3687 .k(k)
3688 .Test(xnn_f32_gemm_relu_ukernel_4x4__scalar);
3689 }
3690 }
3691 }
3692
TEST(F32_GEMM_RELU_4X4__SCALAR,n_div_4_strided_cn)3693 TEST(F32_GEMM_RELU_4X4__SCALAR, n_div_4_strided_cn) {
3694 for (uint32_t n = 8; n <= 12; n += 4) {
3695 for (size_t k = 1; k <= 5; k += 2) {
3696 GemmMicrokernelTester()
3697 .mr(4)
3698 .nr(4)
3699 .kr(1)
3700 .sr(1)
3701 .m(4)
3702 .n(n)
3703 .k(k)
3704 .cn_stride(7)
3705 .Test(xnn_f32_gemm_relu_ukernel_4x4__scalar);
3706 }
3707 }
3708 }
3709
TEST(F32_GEMM_RELU_4X4__SCALAR,n_div_4_strided_a)3710 TEST(F32_GEMM_RELU_4X4__SCALAR, n_div_4_strided_a) {
3711 for (uint32_t n = 8; n <= 12; n += 4) {
3712 for (size_t k = 1; k <= 5; k += 2) {
3713 GemmMicrokernelTester()
3714 .mr(4)
3715 .nr(4)
3716 .kr(1)
3717 .sr(1)
3718 .m(4)
3719 .n(n)
3720 .k(k)
3721 .a_stride(7)
3722 .Test(xnn_f32_gemm_relu_ukernel_4x4__scalar);
3723 }
3724 }
3725 }
3726
TEST(F32_GEMM_RELU_4X4__SCALAR,n_div_4_subtile)3727 TEST(F32_GEMM_RELU_4X4__SCALAR, n_div_4_subtile) {
3728 for (uint32_t n = 8; n <= 12; n += 4) {
3729 for (size_t k = 1; k <= 5; k += 2) {
3730 for (uint32_t m = 1; m <= 4; m++) {
3731 GemmMicrokernelTester()
3732 .mr(4)
3733 .nr(4)
3734 .kr(1)
3735 .sr(1)
3736 .m(m)
3737 .n(n)
3738 .k(k)
3739 .iterations(1)
3740 .Test(xnn_f32_gemm_relu_ukernel_4x4__scalar);
3741 }
3742 }
3743 }
3744 }
3745
TEST(F32_GEMM_RELU_4X4__SCALAR,strided_cm_subtile)3746 TEST(F32_GEMM_RELU_4X4__SCALAR, strided_cm_subtile) {
3747 for (size_t k = 1; k <= 5; k += 2) {
3748 for (uint32_t m = 1; m <= 4; m++) {
3749 for (uint32_t n = 1; n <= 4; n++) {
3750 GemmMicrokernelTester()
3751 .mr(4)
3752 .nr(4)
3753 .kr(1)
3754 .sr(1)
3755 .m(m)
3756 .n(n)
3757 .k(k)
3758 .cm_stride(7)
3759 .iterations(1)
3760 .Test(xnn_f32_gemm_relu_ukernel_4x4__scalar);
3761 }
3762 }
3763 }
3764 }
3765
TEST(F32_GEMM_RELU_4X4__SCALAR,strided_cm)3766 TEST(F32_GEMM_RELU_4X4__SCALAR, strided_cm) {
3767 GemmMicrokernelTester()
3768 .mr(4)
3769 .nr(4)
3770 .kr(1)
3771 .sr(1)
3772 .m(4)
3773 .n(4)
3774 .k(1)
3775 .cm_stride(7)
3776 .Test(xnn_f32_gemm_relu_ukernel_4x4__scalar);
3777 }
3778
3779
TEST(F32_GEMM_RELU_4X2__SCALAR,k_eq_1)3780 TEST(F32_GEMM_RELU_4X2__SCALAR, k_eq_1) {
3781 GemmMicrokernelTester()
3782 .mr(4)
3783 .nr(2)
3784 .kr(1)
3785 .sr(1)
3786 .m(4)
3787 .n(2)
3788 .k(1)
3789 .Test(xnn_f32_gemm_relu_ukernel_4x2__scalar);
3790 }
3791
TEST(F32_GEMM_RELU_4X2__SCALAR,strided_cn)3792 TEST(F32_GEMM_RELU_4X2__SCALAR, strided_cn) {
3793 GemmMicrokernelTester()
3794 .mr(4)
3795 .nr(2)
3796 .kr(1)
3797 .sr(1)
3798 .m(4)
3799 .n(2)
3800 .k(1)
3801 .cn_stride(5)
3802 .Test(xnn_f32_gemm_relu_ukernel_4x2__scalar);
3803 }
3804
TEST(F32_GEMM_RELU_4X2__SCALAR,k_eq_1_strided_a)3805 TEST(F32_GEMM_RELU_4X2__SCALAR, k_eq_1_strided_a) {
3806 GemmMicrokernelTester()
3807 .mr(4)
3808 .nr(2)
3809 .kr(1)
3810 .sr(1)
3811 .m(4)
3812 .n(2)
3813 .k(1)
3814 .a_stride(3)
3815 .Test(xnn_f32_gemm_relu_ukernel_4x2__scalar);
3816 }
3817
TEST(F32_GEMM_RELU_4X2__SCALAR,k_eq_1_subtile)3818 TEST(F32_GEMM_RELU_4X2__SCALAR, k_eq_1_subtile) {
3819 for (uint32_t m = 1; m <= 4; m++) {
3820 for (uint32_t n = 1; n <= 2; n++) {
3821 GemmMicrokernelTester()
3822 .mr(4)
3823 .nr(2)
3824 .kr(1)
3825 .sr(1)
3826 .m(m)
3827 .n(n)
3828 .k(1)
3829 .iterations(1)
3830 .Test(xnn_f32_gemm_relu_ukernel_4x2__scalar);
3831 }
3832 }
3833 }
3834
TEST(F32_GEMM_RELU_4X2__SCALAR,k_eq_1_subtile_m)3835 TEST(F32_GEMM_RELU_4X2__SCALAR, k_eq_1_subtile_m) {
3836 for (uint32_t m = 1; m <= 4; m++) {
3837 GemmMicrokernelTester()
3838 .mr(4)
3839 .nr(2)
3840 .kr(1)
3841 .sr(1)
3842 .m(m)
3843 .n(2)
3844 .k(1)
3845 .iterations(1)
3846 .Test(xnn_f32_gemm_relu_ukernel_4x2__scalar);
3847 }
3848 }
3849
TEST(F32_GEMM_RELU_4X2__SCALAR,k_eq_1_subtile_n)3850 TEST(F32_GEMM_RELU_4X2__SCALAR, k_eq_1_subtile_n) {
3851 for (uint32_t n = 1; n <= 2; n++) {
3852 GemmMicrokernelTester()
3853 .mr(4)
3854 .nr(2)
3855 .kr(1)
3856 .sr(1)
3857 .m(4)
3858 .n(n)
3859 .k(1)
3860 .iterations(1)
3861 .Test(xnn_f32_gemm_relu_ukernel_4x2__scalar);
3862 }
3863 }
3864
TEST(F32_GEMM_RELU_4X2__SCALAR,k_gt_1)3865 TEST(F32_GEMM_RELU_4X2__SCALAR, k_gt_1) {
3866 for (size_t k = 2; k < 10; k++) {
3867 GemmMicrokernelTester()
3868 .mr(4)
3869 .nr(2)
3870 .kr(1)
3871 .sr(1)
3872 .m(4)
3873 .n(2)
3874 .k(k)
3875 .Test(xnn_f32_gemm_relu_ukernel_4x2__scalar);
3876 }
3877 }
3878
TEST(F32_GEMM_RELU_4X2__SCALAR,k_gt_1_strided_a)3879 TEST(F32_GEMM_RELU_4X2__SCALAR, k_gt_1_strided_a) {
3880 for (size_t k = 2; k < 10; k++) {
3881 GemmMicrokernelTester()
3882 .mr(4)
3883 .nr(2)
3884 .kr(1)
3885 .sr(1)
3886 .m(4)
3887 .n(2)
3888 .k(k)
3889 .a_stride(11)
3890 .Test(xnn_f32_gemm_relu_ukernel_4x2__scalar);
3891 }
3892 }
3893
TEST(F32_GEMM_RELU_4X2__SCALAR,k_gt_1_subtile)3894 TEST(F32_GEMM_RELU_4X2__SCALAR, k_gt_1_subtile) {
3895 for (size_t k = 2; k < 10; k++) {
3896 for (uint32_t m = 1; m <= 4; m++) {
3897 for (uint32_t n = 1; n <= 2; n++) {
3898 GemmMicrokernelTester()
3899 .mr(4)
3900 .nr(2)
3901 .kr(1)
3902 .sr(1)
3903 .m(m)
3904 .n(n)
3905 .k(k)
3906 .iterations(1)
3907 .Test(xnn_f32_gemm_relu_ukernel_4x2__scalar);
3908 }
3909 }
3910 }
3911 }
3912
TEST(F32_GEMM_RELU_4X2__SCALAR,n_gt_2)3913 TEST(F32_GEMM_RELU_4X2__SCALAR, n_gt_2) {
3914 for (uint32_t n = 3; n < 4; n++) {
3915 for (size_t k = 1; k <= 5; k += 2) {
3916 GemmMicrokernelTester()
3917 .mr(4)
3918 .nr(2)
3919 .kr(1)
3920 .sr(1)
3921 .m(4)
3922 .n(2)
3923 .k(k)
3924 .Test(xnn_f32_gemm_relu_ukernel_4x2__scalar);
3925 }
3926 }
3927 }
3928
TEST(F32_GEMM_RELU_4X2__SCALAR,n_gt_2_strided_cn)3929 TEST(F32_GEMM_RELU_4X2__SCALAR, n_gt_2_strided_cn) {
3930 for (uint32_t n = 3; n < 4; n++) {
3931 for (size_t k = 1; k <= 5; k += 2) {
3932 GemmMicrokernelTester()
3933 .mr(4)
3934 .nr(2)
3935 .kr(1)
3936 .sr(1)
3937 .m(4)
3938 .n(2)
3939 .k(k)
3940 .cn_stride(5)
3941 .Test(xnn_f32_gemm_relu_ukernel_4x2__scalar);
3942 }
3943 }
3944 }
3945
TEST(F32_GEMM_RELU_4X2__SCALAR,n_gt_2_strided_a)3946 TEST(F32_GEMM_RELU_4X2__SCALAR, n_gt_2_strided_a) {
3947 for (uint32_t n = 3; n < 4; n++) {
3948 for (size_t k = 1; k <= 5; k += 2) {
3949 GemmMicrokernelTester()
3950 .mr(4)
3951 .nr(2)
3952 .kr(1)
3953 .sr(1)
3954 .m(4)
3955 .n(n)
3956 .k(k)
3957 .a_stride(7)
3958 .Test(xnn_f32_gemm_relu_ukernel_4x2__scalar);
3959 }
3960 }
3961 }
3962
TEST(F32_GEMM_RELU_4X2__SCALAR,n_gt_2_subtile)3963 TEST(F32_GEMM_RELU_4X2__SCALAR, n_gt_2_subtile) {
3964 for (uint32_t n = 3; n < 4; n++) {
3965 for (size_t k = 1; k <= 5; k += 2) {
3966 for (uint32_t m = 1; m <= 4; m++) {
3967 GemmMicrokernelTester()
3968 .mr(4)
3969 .nr(2)
3970 .kr(1)
3971 .sr(1)
3972 .m(m)
3973 .n(n)
3974 .k(k)
3975 .iterations(1)
3976 .Test(xnn_f32_gemm_relu_ukernel_4x2__scalar);
3977 }
3978 }
3979 }
3980 }
3981
TEST(F32_GEMM_RELU_4X2__SCALAR,n_div_2)3982 TEST(F32_GEMM_RELU_4X2__SCALAR, n_div_2) {
3983 for (uint32_t n = 4; n <= 6; n += 2) {
3984 for (size_t k = 1; k <= 5; k += 2) {
3985 GemmMicrokernelTester()
3986 .mr(4)
3987 .nr(2)
3988 .kr(1)
3989 .sr(1)
3990 .m(4)
3991 .n(2)
3992 .k(k)
3993 .Test(xnn_f32_gemm_relu_ukernel_4x2__scalar);
3994 }
3995 }
3996 }
3997
TEST(F32_GEMM_RELU_4X2__SCALAR,n_div_2_strided_cn)3998 TEST(F32_GEMM_RELU_4X2__SCALAR, n_div_2_strided_cn) {
3999 for (uint32_t n = 4; n <= 6; n += 2) {
4000 for (size_t k = 1; k <= 5; k += 2) {
4001 GemmMicrokernelTester()
4002 .mr(4)
4003 .nr(2)
4004 .kr(1)
4005 .sr(1)
4006 .m(4)
4007 .n(n)
4008 .k(k)
4009 .cn_stride(5)
4010 .Test(xnn_f32_gemm_relu_ukernel_4x2__scalar);
4011 }
4012 }
4013 }
4014
TEST(F32_GEMM_RELU_4X2__SCALAR,n_div_2_strided_a)4015 TEST(F32_GEMM_RELU_4X2__SCALAR, n_div_2_strided_a) {
4016 for (uint32_t n = 4; n <= 6; n += 2) {
4017 for (size_t k = 1; k <= 5; k += 2) {
4018 GemmMicrokernelTester()
4019 .mr(4)
4020 .nr(2)
4021 .kr(1)
4022 .sr(1)
4023 .m(4)
4024 .n(n)
4025 .k(k)
4026 .a_stride(7)
4027 .Test(xnn_f32_gemm_relu_ukernel_4x2__scalar);
4028 }
4029 }
4030 }
4031
TEST(F32_GEMM_RELU_4X2__SCALAR,n_div_2_subtile)4032 TEST(F32_GEMM_RELU_4X2__SCALAR, n_div_2_subtile) {
4033 for (uint32_t n = 4; n <= 6; n += 2) {
4034 for (size_t k = 1; k <= 5; k += 2) {
4035 for (uint32_t m = 1; m <= 4; m++) {
4036 GemmMicrokernelTester()
4037 .mr(4)
4038 .nr(2)
4039 .kr(1)
4040 .sr(1)
4041 .m(m)
4042 .n(n)
4043 .k(k)
4044 .iterations(1)
4045 .Test(xnn_f32_gemm_relu_ukernel_4x2__scalar);
4046 }
4047 }
4048 }
4049 }
4050
TEST(F32_GEMM_RELU_4X2__SCALAR,strided_cm_subtile)4051 TEST(F32_GEMM_RELU_4X2__SCALAR, strided_cm_subtile) {
4052 for (size_t k = 1; k <= 5; k += 2) {
4053 for (uint32_t m = 1; m <= 4; m++) {
4054 for (uint32_t n = 1; n <= 2; n++) {
4055 GemmMicrokernelTester()
4056 .mr(4)
4057 .nr(2)
4058 .kr(1)
4059 .sr(1)
4060 .m(m)
4061 .n(n)
4062 .k(k)
4063 .cm_stride(5)
4064 .iterations(1)
4065 .Test(xnn_f32_gemm_relu_ukernel_4x2__scalar);
4066 }
4067 }
4068 }
4069 }
4070
TEST(F32_GEMM_RELU_4X2__SCALAR,strided_cm)4071 TEST(F32_GEMM_RELU_4X2__SCALAR, strided_cm) {
4072 GemmMicrokernelTester()
4073 .mr(4)
4074 .nr(2)
4075 .kr(1)
4076 .sr(1)
4077 .m(4)
4078 .n(2)
4079 .k(1)
4080 .cm_stride(5)
4081 .Test(xnn_f32_gemm_relu_ukernel_4x2__scalar);
4082 }
4083