1 // Copyright (c) Facebook, Inc. and its affiliates.
2 // All rights reserved.
3 //
4 // Copyright 2019 Google LLC
5 //
6 // This source code is licensed under the BSD-style license found in the
7 // LICENSE file in the root directory of this source tree.
8 //
9 // Auto-generated file. Do not edit!
10 // Specification: test/f32-ppmm-minmax.yaml
11 // Generator: tools/generate-gemm-test.py
12
13
14 #include <gtest/gtest.h>
15
16 #include <xnnpack/common.h>
17 #include <xnnpack/isa-checks.h>
18
19 #include <xnnpack/gemm.h>
20 #include <xnnpack/igemm.h>
21 #include <xnnpack/ppmm.h>
22 #include "gemm-microkernel-tester.h"
23
24
25 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(F32_PPMM_MINMAX_4X8__NEON,k_eq_1)26 TEST(F32_PPMM_MINMAX_4X8__NEON, k_eq_1) {
27 TEST_REQUIRES_ARM_NEON;
28 GemmMicrokernelTester()
29 .mr(4)
30 .nr(8)
31 .kr(1)
32 .sr(1)
33 .m(4)
34 .n(8)
35 .k(1)
36 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__neon);
37 }
38
TEST(F32_PPMM_MINMAX_4X8__NEON,strided_cn)39 TEST(F32_PPMM_MINMAX_4X8__NEON, strided_cn) {
40 TEST_REQUIRES_ARM_NEON;
41 GemmMicrokernelTester()
42 .mr(4)
43 .nr(8)
44 .kr(1)
45 .sr(1)
46 .m(4)
47 .n(8)
48 .k(1)
49 .cn_stride(11)
50 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__neon);
51 }
52
TEST(F32_PPMM_MINMAX_4X8__NEON,k_eq_1_strided_a)53 TEST(F32_PPMM_MINMAX_4X8__NEON, k_eq_1_strided_a) {
54 TEST_REQUIRES_ARM_NEON;
55 GemmMicrokernelTester()
56 .mr(4)
57 .nr(8)
58 .kr(1)
59 .sr(1)
60 .m(4)
61 .n(8)
62 .k(1)
63 .a_stride(3)
64 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__neon);
65 }
66
TEST(F32_PPMM_MINMAX_4X8__NEON,k_eq_1_subtile)67 TEST(F32_PPMM_MINMAX_4X8__NEON, k_eq_1_subtile) {
68 TEST_REQUIRES_ARM_NEON;
69 for (uint32_t m = 1; m <= 4; m++) {
70 for (uint32_t n = 1; n <= 8; n++) {
71 GemmMicrokernelTester()
72 .mr(4)
73 .nr(8)
74 .kr(1)
75 .sr(1)
76 .m(m)
77 .n(n)
78 .k(1)
79 .iterations(1)
80 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__neon);
81 }
82 }
83 }
84
TEST(F32_PPMM_MINMAX_4X8__NEON,k_eq_1_subtile_m)85 TEST(F32_PPMM_MINMAX_4X8__NEON, k_eq_1_subtile_m) {
86 TEST_REQUIRES_ARM_NEON;
87 for (uint32_t m = 1; m <= 4; m++) {
88 GemmMicrokernelTester()
89 .mr(4)
90 .nr(8)
91 .kr(1)
92 .sr(1)
93 .m(m)
94 .n(8)
95 .k(1)
96 .iterations(1)
97 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__neon);
98 }
99 }
100
TEST(F32_PPMM_MINMAX_4X8__NEON,k_eq_1_subtile_n)101 TEST(F32_PPMM_MINMAX_4X8__NEON, k_eq_1_subtile_n) {
102 TEST_REQUIRES_ARM_NEON;
103 for (uint32_t n = 1; n <= 8; n++) {
104 GemmMicrokernelTester()
105 .mr(4)
106 .nr(8)
107 .kr(1)
108 .sr(1)
109 .m(4)
110 .n(n)
111 .k(1)
112 .iterations(1)
113 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__neon);
114 }
115 }
116
TEST(F32_PPMM_MINMAX_4X8__NEON,k_gt_1)117 TEST(F32_PPMM_MINMAX_4X8__NEON, k_gt_1) {
118 TEST_REQUIRES_ARM_NEON;
119 for (size_t k = 2; k < 10; k++) {
120 GemmMicrokernelTester()
121 .mr(4)
122 .nr(8)
123 .kr(1)
124 .sr(1)
125 .m(4)
126 .n(8)
127 .k(k)
128 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__neon);
129 }
130 }
131
TEST(F32_PPMM_MINMAX_4X8__NEON,k_gt_1_subtile)132 TEST(F32_PPMM_MINMAX_4X8__NEON, k_gt_1_subtile) {
133 TEST_REQUIRES_ARM_NEON;
134 for (size_t k = 2; k < 10; k++) {
135 for (uint32_t m = 1; m <= 4; m++) {
136 for (uint32_t n = 1; n <= 8; n++) {
137 GemmMicrokernelTester()
138 .mr(4)
139 .nr(8)
140 .kr(1)
141 .sr(1)
142 .m(m)
143 .n(n)
144 .k(k)
145 .iterations(1)
146 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__neon);
147 }
148 }
149 }
150 }
151
TEST(F32_PPMM_MINMAX_4X8__NEON,n_gt_8)152 TEST(F32_PPMM_MINMAX_4X8__NEON, n_gt_8) {
153 TEST_REQUIRES_ARM_NEON;
154 for (uint32_t n = 9; n < 16; n++) {
155 for (size_t k = 1; k <= 5; k += 2) {
156 GemmMicrokernelTester()
157 .mr(4)
158 .nr(8)
159 .kr(1)
160 .sr(1)
161 .m(4)
162 .n(8)
163 .k(k)
164 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__neon);
165 }
166 }
167 }
168
TEST(F32_PPMM_MINMAX_4X8__NEON,n_gt_8_strided_cn)169 TEST(F32_PPMM_MINMAX_4X8__NEON, n_gt_8_strided_cn) {
170 TEST_REQUIRES_ARM_NEON;
171 for (uint32_t n = 9; n < 16; n++) {
172 for (size_t k = 1; k <= 5; k += 2) {
173 GemmMicrokernelTester()
174 .mr(4)
175 .nr(8)
176 .kr(1)
177 .sr(1)
178 .m(4)
179 .n(8)
180 .k(k)
181 .cn_stride(11)
182 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__neon);
183 }
184 }
185 }
186
TEST(F32_PPMM_MINMAX_4X8__NEON,n_gt_8_strided_a)187 TEST(F32_PPMM_MINMAX_4X8__NEON, n_gt_8_strided_a) {
188 TEST_REQUIRES_ARM_NEON;
189 for (uint32_t n = 9; n < 16; n++) {
190 for (size_t k = 1; k <= 5; k += 2) {
191 GemmMicrokernelTester()
192 .mr(4)
193 .nr(8)
194 .kr(1)
195 .sr(1)
196 .m(4)
197 .n(n)
198 .k(k)
199 .a_stride(7)
200 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__neon);
201 }
202 }
203 }
204
TEST(F32_PPMM_MINMAX_4X8__NEON,n_gt_8_subtile)205 TEST(F32_PPMM_MINMAX_4X8__NEON, n_gt_8_subtile) {
206 TEST_REQUIRES_ARM_NEON;
207 for (uint32_t n = 9; n < 16; n++) {
208 for (size_t k = 1; k <= 5; k += 2) {
209 for (uint32_t m = 1; m <= 4; m++) {
210 GemmMicrokernelTester()
211 .mr(4)
212 .nr(8)
213 .kr(1)
214 .sr(1)
215 .m(m)
216 .n(n)
217 .k(k)
218 .iterations(1)
219 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__neon);
220 }
221 }
222 }
223 }
224
TEST(F32_PPMM_MINMAX_4X8__NEON,n_div_8)225 TEST(F32_PPMM_MINMAX_4X8__NEON, n_div_8) {
226 TEST_REQUIRES_ARM_NEON;
227 for (uint32_t n = 16; n <= 24; n += 8) {
228 for (size_t k = 1; k <= 5; k += 2) {
229 GemmMicrokernelTester()
230 .mr(4)
231 .nr(8)
232 .kr(1)
233 .sr(1)
234 .m(4)
235 .n(8)
236 .k(k)
237 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__neon);
238 }
239 }
240 }
241
TEST(F32_PPMM_MINMAX_4X8__NEON,n_div_8_strided_cn)242 TEST(F32_PPMM_MINMAX_4X8__NEON, n_div_8_strided_cn) {
243 TEST_REQUIRES_ARM_NEON;
244 for (uint32_t n = 16; n <= 24; n += 8) {
245 for (size_t k = 1; k <= 5; k += 2) {
246 GemmMicrokernelTester()
247 .mr(4)
248 .nr(8)
249 .kr(1)
250 .sr(1)
251 .m(4)
252 .n(n)
253 .k(k)
254 .cn_stride(11)
255 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__neon);
256 }
257 }
258 }
259
TEST(F32_PPMM_MINMAX_4X8__NEON,n_div_8_strided_a)260 TEST(F32_PPMM_MINMAX_4X8__NEON, n_div_8_strided_a) {
261 TEST_REQUIRES_ARM_NEON;
262 for (uint32_t n = 16; n <= 24; n += 8) {
263 for (size_t k = 1; k <= 5; k += 2) {
264 GemmMicrokernelTester()
265 .mr(4)
266 .nr(8)
267 .kr(1)
268 .sr(1)
269 .m(4)
270 .n(n)
271 .k(k)
272 .a_stride(7)
273 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__neon);
274 }
275 }
276 }
277
TEST(F32_PPMM_MINMAX_4X8__NEON,n_div_8_subtile)278 TEST(F32_PPMM_MINMAX_4X8__NEON, n_div_8_subtile) {
279 TEST_REQUIRES_ARM_NEON;
280 for (uint32_t n = 16; n <= 24; n += 8) {
281 for (size_t k = 1; k <= 5; k += 2) {
282 for (uint32_t m = 1; m <= 4; m++) {
283 GemmMicrokernelTester()
284 .mr(4)
285 .nr(8)
286 .kr(1)
287 .sr(1)
288 .m(m)
289 .n(n)
290 .k(k)
291 .iterations(1)
292 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__neon);
293 }
294 }
295 }
296 }
297
TEST(F32_PPMM_MINMAX_4X8__NEON,strided_cm_subtile)298 TEST(F32_PPMM_MINMAX_4X8__NEON, strided_cm_subtile) {
299 TEST_REQUIRES_ARM_NEON;
300 for (size_t k = 1; k <= 5; k += 2) {
301 for (uint32_t m = 1; m <= 4; m++) {
302 for (uint32_t n = 1; n <= 8; n++) {
303 GemmMicrokernelTester()
304 .mr(4)
305 .nr(8)
306 .kr(1)
307 .sr(1)
308 .m(m)
309 .n(n)
310 .k(k)
311 .cm_stride(11)
312 .iterations(1)
313 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__neon);
314 }
315 }
316 }
317 }
318
TEST(F32_PPMM_MINMAX_4X8__NEON,qmin)319 TEST(F32_PPMM_MINMAX_4X8__NEON, qmin) {
320 TEST_REQUIRES_ARM_NEON;
321 GemmMicrokernelTester()
322 .mr(4)
323 .nr(8)
324 .kr(1)
325 .sr(1)
326 .m(4)
327 .n(8)
328 .k(1)
329 .qmin(128)
330 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__neon);
331 }
332
TEST(F32_PPMM_MINMAX_4X8__NEON,qmax)333 TEST(F32_PPMM_MINMAX_4X8__NEON, qmax) {
334 TEST_REQUIRES_ARM_NEON;
335 GemmMicrokernelTester()
336 .mr(4)
337 .nr(8)
338 .kr(1)
339 .sr(1)
340 .m(4)
341 .n(8)
342 .k(1)
343 .qmax(128)
344 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__neon);
345 }
346
TEST(F32_PPMM_MINMAX_4X8__NEON,strided_cm)347 TEST(F32_PPMM_MINMAX_4X8__NEON, strided_cm) {
348 TEST_REQUIRES_ARM_NEON;
349 GemmMicrokernelTester()
350 .mr(4)
351 .nr(8)
352 .kr(1)
353 .sr(1)
354 .m(4)
355 .n(8)
356 .k(1)
357 .cm_stride(11)
358 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__neon);
359 }
360 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
361
362
363 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(F32_PPMM_MINMAX_4X8__NEONFMA,k_eq_1)364 TEST(F32_PPMM_MINMAX_4X8__NEONFMA, k_eq_1) {
365 TEST_REQUIRES_ARM_NEON_FMA;
366 GemmMicrokernelTester()
367 .mr(4)
368 .nr(8)
369 .kr(1)
370 .sr(1)
371 .m(4)
372 .n(8)
373 .k(1)
374 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__neonfma);
375 }
376
TEST(F32_PPMM_MINMAX_4X8__NEONFMA,strided_cn)377 TEST(F32_PPMM_MINMAX_4X8__NEONFMA, strided_cn) {
378 TEST_REQUIRES_ARM_NEON_FMA;
379 GemmMicrokernelTester()
380 .mr(4)
381 .nr(8)
382 .kr(1)
383 .sr(1)
384 .m(4)
385 .n(8)
386 .k(1)
387 .cn_stride(11)
388 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__neonfma);
389 }
390
TEST(F32_PPMM_MINMAX_4X8__NEONFMA,k_eq_1_strided_a)391 TEST(F32_PPMM_MINMAX_4X8__NEONFMA, k_eq_1_strided_a) {
392 TEST_REQUIRES_ARM_NEON_FMA;
393 GemmMicrokernelTester()
394 .mr(4)
395 .nr(8)
396 .kr(1)
397 .sr(1)
398 .m(4)
399 .n(8)
400 .k(1)
401 .a_stride(3)
402 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__neonfma);
403 }
404
TEST(F32_PPMM_MINMAX_4X8__NEONFMA,k_eq_1_subtile)405 TEST(F32_PPMM_MINMAX_4X8__NEONFMA, k_eq_1_subtile) {
406 TEST_REQUIRES_ARM_NEON_FMA;
407 for (uint32_t m = 1; m <= 4; m++) {
408 for (uint32_t n = 1; n <= 8; n++) {
409 GemmMicrokernelTester()
410 .mr(4)
411 .nr(8)
412 .kr(1)
413 .sr(1)
414 .m(m)
415 .n(n)
416 .k(1)
417 .iterations(1)
418 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__neonfma);
419 }
420 }
421 }
422
TEST(F32_PPMM_MINMAX_4X8__NEONFMA,k_eq_1_subtile_m)423 TEST(F32_PPMM_MINMAX_4X8__NEONFMA, k_eq_1_subtile_m) {
424 TEST_REQUIRES_ARM_NEON_FMA;
425 for (uint32_t m = 1; m <= 4; m++) {
426 GemmMicrokernelTester()
427 .mr(4)
428 .nr(8)
429 .kr(1)
430 .sr(1)
431 .m(m)
432 .n(8)
433 .k(1)
434 .iterations(1)
435 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__neonfma);
436 }
437 }
438
TEST(F32_PPMM_MINMAX_4X8__NEONFMA,k_eq_1_subtile_n)439 TEST(F32_PPMM_MINMAX_4X8__NEONFMA, k_eq_1_subtile_n) {
440 TEST_REQUIRES_ARM_NEON_FMA;
441 for (uint32_t n = 1; n <= 8; n++) {
442 GemmMicrokernelTester()
443 .mr(4)
444 .nr(8)
445 .kr(1)
446 .sr(1)
447 .m(4)
448 .n(n)
449 .k(1)
450 .iterations(1)
451 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__neonfma);
452 }
453 }
454
TEST(F32_PPMM_MINMAX_4X8__NEONFMA,k_gt_1)455 TEST(F32_PPMM_MINMAX_4X8__NEONFMA, k_gt_1) {
456 TEST_REQUIRES_ARM_NEON_FMA;
457 for (size_t k = 2; k < 10; k++) {
458 GemmMicrokernelTester()
459 .mr(4)
460 .nr(8)
461 .kr(1)
462 .sr(1)
463 .m(4)
464 .n(8)
465 .k(k)
466 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__neonfma);
467 }
468 }
469
TEST(F32_PPMM_MINMAX_4X8__NEONFMA,k_gt_1_subtile)470 TEST(F32_PPMM_MINMAX_4X8__NEONFMA, k_gt_1_subtile) {
471 TEST_REQUIRES_ARM_NEON_FMA;
472 for (size_t k = 2; k < 10; k++) {
473 for (uint32_t m = 1; m <= 4; m++) {
474 for (uint32_t n = 1; n <= 8; n++) {
475 GemmMicrokernelTester()
476 .mr(4)
477 .nr(8)
478 .kr(1)
479 .sr(1)
480 .m(m)
481 .n(n)
482 .k(k)
483 .iterations(1)
484 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__neonfma);
485 }
486 }
487 }
488 }
489
TEST(F32_PPMM_MINMAX_4X8__NEONFMA,n_gt_8)490 TEST(F32_PPMM_MINMAX_4X8__NEONFMA, n_gt_8) {
491 TEST_REQUIRES_ARM_NEON_FMA;
492 for (uint32_t n = 9; n < 16; n++) {
493 for (size_t k = 1; k <= 5; k += 2) {
494 GemmMicrokernelTester()
495 .mr(4)
496 .nr(8)
497 .kr(1)
498 .sr(1)
499 .m(4)
500 .n(8)
501 .k(k)
502 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__neonfma);
503 }
504 }
505 }
506
TEST(F32_PPMM_MINMAX_4X8__NEONFMA,n_gt_8_strided_cn)507 TEST(F32_PPMM_MINMAX_4X8__NEONFMA, n_gt_8_strided_cn) {
508 TEST_REQUIRES_ARM_NEON_FMA;
509 for (uint32_t n = 9; n < 16; n++) {
510 for (size_t k = 1; k <= 5; k += 2) {
511 GemmMicrokernelTester()
512 .mr(4)
513 .nr(8)
514 .kr(1)
515 .sr(1)
516 .m(4)
517 .n(8)
518 .k(k)
519 .cn_stride(11)
520 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__neonfma);
521 }
522 }
523 }
524
TEST(F32_PPMM_MINMAX_4X8__NEONFMA,n_gt_8_strided_a)525 TEST(F32_PPMM_MINMAX_4X8__NEONFMA, n_gt_8_strided_a) {
526 TEST_REQUIRES_ARM_NEON_FMA;
527 for (uint32_t n = 9; n < 16; n++) {
528 for (size_t k = 1; k <= 5; k += 2) {
529 GemmMicrokernelTester()
530 .mr(4)
531 .nr(8)
532 .kr(1)
533 .sr(1)
534 .m(4)
535 .n(n)
536 .k(k)
537 .a_stride(7)
538 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__neonfma);
539 }
540 }
541 }
542
TEST(F32_PPMM_MINMAX_4X8__NEONFMA,n_gt_8_subtile)543 TEST(F32_PPMM_MINMAX_4X8__NEONFMA, n_gt_8_subtile) {
544 TEST_REQUIRES_ARM_NEON_FMA;
545 for (uint32_t n = 9; n < 16; n++) {
546 for (size_t k = 1; k <= 5; k += 2) {
547 for (uint32_t m = 1; m <= 4; m++) {
548 GemmMicrokernelTester()
549 .mr(4)
550 .nr(8)
551 .kr(1)
552 .sr(1)
553 .m(m)
554 .n(n)
555 .k(k)
556 .iterations(1)
557 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__neonfma);
558 }
559 }
560 }
561 }
562
TEST(F32_PPMM_MINMAX_4X8__NEONFMA,n_div_8)563 TEST(F32_PPMM_MINMAX_4X8__NEONFMA, n_div_8) {
564 TEST_REQUIRES_ARM_NEON_FMA;
565 for (uint32_t n = 16; n <= 24; n += 8) {
566 for (size_t k = 1; k <= 5; k += 2) {
567 GemmMicrokernelTester()
568 .mr(4)
569 .nr(8)
570 .kr(1)
571 .sr(1)
572 .m(4)
573 .n(8)
574 .k(k)
575 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__neonfma);
576 }
577 }
578 }
579
TEST(F32_PPMM_MINMAX_4X8__NEONFMA,n_div_8_strided_cn)580 TEST(F32_PPMM_MINMAX_4X8__NEONFMA, n_div_8_strided_cn) {
581 TEST_REQUIRES_ARM_NEON_FMA;
582 for (uint32_t n = 16; n <= 24; n += 8) {
583 for (size_t k = 1; k <= 5; k += 2) {
584 GemmMicrokernelTester()
585 .mr(4)
586 .nr(8)
587 .kr(1)
588 .sr(1)
589 .m(4)
590 .n(n)
591 .k(k)
592 .cn_stride(11)
593 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__neonfma);
594 }
595 }
596 }
597
TEST(F32_PPMM_MINMAX_4X8__NEONFMA,n_div_8_strided_a)598 TEST(F32_PPMM_MINMAX_4X8__NEONFMA, n_div_8_strided_a) {
599 TEST_REQUIRES_ARM_NEON_FMA;
600 for (uint32_t n = 16; n <= 24; n += 8) {
601 for (size_t k = 1; k <= 5; k += 2) {
602 GemmMicrokernelTester()
603 .mr(4)
604 .nr(8)
605 .kr(1)
606 .sr(1)
607 .m(4)
608 .n(n)
609 .k(k)
610 .a_stride(7)
611 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__neonfma);
612 }
613 }
614 }
615
TEST(F32_PPMM_MINMAX_4X8__NEONFMA,n_div_8_subtile)616 TEST(F32_PPMM_MINMAX_4X8__NEONFMA, n_div_8_subtile) {
617 TEST_REQUIRES_ARM_NEON_FMA;
618 for (uint32_t n = 16; n <= 24; n += 8) {
619 for (size_t k = 1; k <= 5; k += 2) {
620 for (uint32_t m = 1; m <= 4; m++) {
621 GemmMicrokernelTester()
622 .mr(4)
623 .nr(8)
624 .kr(1)
625 .sr(1)
626 .m(m)
627 .n(n)
628 .k(k)
629 .iterations(1)
630 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__neonfma);
631 }
632 }
633 }
634 }
635
TEST(F32_PPMM_MINMAX_4X8__NEONFMA,strided_cm_subtile)636 TEST(F32_PPMM_MINMAX_4X8__NEONFMA, strided_cm_subtile) {
637 TEST_REQUIRES_ARM_NEON_FMA;
638 for (size_t k = 1; k <= 5; k += 2) {
639 for (uint32_t m = 1; m <= 4; m++) {
640 for (uint32_t n = 1; n <= 8; n++) {
641 GemmMicrokernelTester()
642 .mr(4)
643 .nr(8)
644 .kr(1)
645 .sr(1)
646 .m(m)
647 .n(n)
648 .k(k)
649 .cm_stride(11)
650 .iterations(1)
651 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__neonfma);
652 }
653 }
654 }
655 }
656
TEST(F32_PPMM_MINMAX_4X8__NEONFMA,qmin)657 TEST(F32_PPMM_MINMAX_4X8__NEONFMA, qmin) {
658 TEST_REQUIRES_ARM_NEON_FMA;
659 GemmMicrokernelTester()
660 .mr(4)
661 .nr(8)
662 .kr(1)
663 .sr(1)
664 .m(4)
665 .n(8)
666 .k(1)
667 .qmin(128)
668 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__neonfma);
669 }
670
TEST(F32_PPMM_MINMAX_4X8__NEONFMA,qmax)671 TEST(F32_PPMM_MINMAX_4X8__NEONFMA, qmax) {
672 TEST_REQUIRES_ARM_NEON_FMA;
673 GemmMicrokernelTester()
674 .mr(4)
675 .nr(8)
676 .kr(1)
677 .sr(1)
678 .m(4)
679 .n(8)
680 .k(1)
681 .qmax(128)
682 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__neonfma);
683 }
684
TEST(F32_PPMM_MINMAX_4X8__NEONFMA,strided_cm)685 TEST(F32_PPMM_MINMAX_4X8__NEONFMA, strided_cm) {
686 TEST_REQUIRES_ARM_NEON_FMA;
687 GemmMicrokernelTester()
688 .mr(4)
689 .nr(8)
690 .kr(1)
691 .sr(1)
692 .m(4)
693 .n(8)
694 .k(1)
695 .cm_stride(11)
696 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__neonfma);
697 }
698 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
699
700
701 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(F32_PPMM_MINMAX_8X8__NEON,k_eq_1)702 TEST(F32_PPMM_MINMAX_8X8__NEON, k_eq_1) {
703 TEST_REQUIRES_ARM_NEON;
704 GemmMicrokernelTester()
705 .mr(8)
706 .nr(8)
707 .kr(1)
708 .sr(1)
709 .m(8)
710 .n(8)
711 .k(1)
712 .Test(xnn_f32_ppmm_minmax_ukernel_8x8__neon);
713 }
714
TEST(F32_PPMM_MINMAX_8X8__NEON,strided_cn)715 TEST(F32_PPMM_MINMAX_8X8__NEON, strided_cn) {
716 TEST_REQUIRES_ARM_NEON;
717 GemmMicrokernelTester()
718 .mr(8)
719 .nr(8)
720 .kr(1)
721 .sr(1)
722 .m(8)
723 .n(8)
724 .k(1)
725 .cn_stride(11)
726 .Test(xnn_f32_ppmm_minmax_ukernel_8x8__neon);
727 }
728
TEST(F32_PPMM_MINMAX_8X8__NEON,k_eq_1_strided_a)729 TEST(F32_PPMM_MINMAX_8X8__NEON, k_eq_1_strided_a) {
730 TEST_REQUIRES_ARM_NEON;
731 GemmMicrokernelTester()
732 .mr(8)
733 .nr(8)
734 .kr(1)
735 .sr(1)
736 .m(8)
737 .n(8)
738 .k(1)
739 .a_stride(3)
740 .Test(xnn_f32_ppmm_minmax_ukernel_8x8__neon);
741 }
742
TEST(F32_PPMM_MINMAX_8X8__NEON,k_eq_1_subtile)743 TEST(F32_PPMM_MINMAX_8X8__NEON, k_eq_1_subtile) {
744 TEST_REQUIRES_ARM_NEON;
745 for (uint32_t m = 1; m <= 8; m++) {
746 for (uint32_t n = 1; n <= 8; n++) {
747 GemmMicrokernelTester()
748 .mr(8)
749 .nr(8)
750 .kr(1)
751 .sr(1)
752 .m(m)
753 .n(n)
754 .k(1)
755 .iterations(1)
756 .Test(xnn_f32_ppmm_minmax_ukernel_8x8__neon);
757 }
758 }
759 }
760
TEST(F32_PPMM_MINMAX_8X8__NEON,k_eq_1_subtile_m)761 TEST(F32_PPMM_MINMAX_8X8__NEON, k_eq_1_subtile_m) {
762 TEST_REQUIRES_ARM_NEON;
763 for (uint32_t m = 1; m <= 8; m++) {
764 GemmMicrokernelTester()
765 .mr(8)
766 .nr(8)
767 .kr(1)
768 .sr(1)
769 .m(m)
770 .n(8)
771 .k(1)
772 .iterations(1)
773 .Test(xnn_f32_ppmm_minmax_ukernel_8x8__neon);
774 }
775 }
776
TEST(F32_PPMM_MINMAX_8X8__NEON,k_eq_1_subtile_n)777 TEST(F32_PPMM_MINMAX_8X8__NEON, k_eq_1_subtile_n) {
778 TEST_REQUIRES_ARM_NEON;
779 for (uint32_t n = 1; n <= 8; n++) {
780 GemmMicrokernelTester()
781 .mr(8)
782 .nr(8)
783 .kr(1)
784 .sr(1)
785 .m(8)
786 .n(n)
787 .k(1)
788 .iterations(1)
789 .Test(xnn_f32_ppmm_minmax_ukernel_8x8__neon);
790 }
791 }
792
TEST(F32_PPMM_MINMAX_8X8__NEON,k_gt_1)793 TEST(F32_PPMM_MINMAX_8X8__NEON, k_gt_1) {
794 TEST_REQUIRES_ARM_NEON;
795 for (size_t k = 2; k < 10; k++) {
796 GemmMicrokernelTester()
797 .mr(8)
798 .nr(8)
799 .kr(1)
800 .sr(1)
801 .m(8)
802 .n(8)
803 .k(k)
804 .Test(xnn_f32_ppmm_minmax_ukernel_8x8__neon);
805 }
806 }
807
TEST(F32_PPMM_MINMAX_8X8__NEON,k_gt_1_subtile)808 TEST(F32_PPMM_MINMAX_8X8__NEON, k_gt_1_subtile) {
809 TEST_REQUIRES_ARM_NEON;
810 for (size_t k = 2; k < 10; k++) {
811 for (uint32_t m = 1; m <= 8; m++) {
812 for (uint32_t n = 1; n <= 8; n++) {
813 GemmMicrokernelTester()
814 .mr(8)
815 .nr(8)
816 .kr(1)
817 .sr(1)
818 .m(m)
819 .n(n)
820 .k(k)
821 .iterations(1)
822 .Test(xnn_f32_ppmm_minmax_ukernel_8x8__neon);
823 }
824 }
825 }
826 }
827
TEST(F32_PPMM_MINMAX_8X8__NEON,n_gt_8)828 TEST(F32_PPMM_MINMAX_8X8__NEON, n_gt_8) {
829 TEST_REQUIRES_ARM_NEON;
830 for (uint32_t n = 9; n < 16; n++) {
831 for (size_t k = 1; k <= 5; k += 2) {
832 GemmMicrokernelTester()
833 .mr(8)
834 .nr(8)
835 .kr(1)
836 .sr(1)
837 .m(8)
838 .n(8)
839 .k(k)
840 .Test(xnn_f32_ppmm_minmax_ukernel_8x8__neon);
841 }
842 }
843 }
844
TEST(F32_PPMM_MINMAX_8X8__NEON,n_gt_8_strided_cn)845 TEST(F32_PPMM_MINMAX_8X8__NEON, n_gt_8_strided_cn) {
846 TEST_REQUIRES_ARM_NEON;
847 for (uint32_t n = 9; n < 16; n++) {
848 for (size_t k = 1; k <= 5; k += 2) {
849 GemmMicrokernelTester()
850 .mr(8)
851 .nr(8)
852 .kr(1)
853 .sr(1)
854 .m(8)
855 .n(8)
856 .k(k)
857 .cn_stride(11)
858 .Test(xnn_f32_ppmm_minmax_ukernel_8x8__neon);
859 }
860 }
861 }
862
TEST(F32_PPMM_MINMAX_8X8__NEON,n_gt_8_strided_a)863 TEST(F32_PPMM_MINMAX_8X8__NEON, n_gt_8_strided_a) {
864 TEST_REQUIRES_ARM_NEON;
865 for (uint32_t n = 9; n < 16; n++) {
866 for (size_t k = 1; k <= 5; k += 2) {
867 GemmMicrokernelTester()
868 .mr(8)
869 .nr(8)
870 .kr(1)
871 .sr(1)
872 .m(8)
873 .n(n)
874 .k(k)
875 .a_stride(7)
876 .Test(xnn_f32_ppmm_minmax_ukernel_8x8__neon);
877 }
878 }
879 }
880
TEST(F32_PPMM_MINMAX_8X8__NEON,n_gt_8_subtile)881 TEST(F32_PPMM_MINMAX_8X8__NEON, n_gt_8_subtile) {
882 TEST_REQUIRES_ARM_NEON;
883 for (uint32_t n = 9; n < 16; n++) {
884 for (size_t k = 1; k <= 5; k += 2) {
885 for (uint32_t m = 1; m <= 8; m++) {
886 GemmMicrokernelTester()
887 .mr(8)
888 .nr(8)
889 .kr(1)
890 .sr(1)
891 .m(m)
892 .n(n)
893 .k(k)
894 .iterations(1)
895 .Test(xnn_f32_ppmm_minmax_ukernel_8x8__neon);
896 }
897 }
898 }
899 }
900
TEST(F32_PPMM_MINMAX_8X8__NEON,n_div_8)901 TEST(F32_PPMM_MINMAX_8X8__NEON, n_div_8) {
902 TEST_REQUIRES_ARM_NEON;
903 for (uint32_t n = 16; n <= 24; n += 8) {
904 for (size_t k = 1; k <= 5; k += 2) {
905 GemmMicrokernelTester()
906 .mr(8)
907 .nr(8)
908 .kr(1)
909 .sr(1)
910 .m(8)
911 .n(8)
912 .k(k)
913 .Test(xnn_f32_ppmm_minmax_ukernel_8x8__neon);
914 }
915 }
916 }
917
TEST(F32_PPMM_MINMAX_8X8__NEON,n_div_8_strided_cn)918 TEST(F32_PPMM_MINMAX_8X8__NEON, n_div_8_strided_cn) {
919 TEST_REQUIRES_ARM_NEON;
920 for (uint32_t n = 16; n <= 24; n += 8) {
921 for (size_t k = 1; k <= 5; k += 2) {
922 GemmMicrokernelTester()
923 .mr(8)
924 .nr(8)
925 .kr(1)
926 .sr(1)
927 .m(8)
928 .n(n)
929 .k(k)
930 .cn_stride(11)
931 .Test(xnn_f32_ppmm_minmax_ukernel_8x8__neon);
932 }
933 }
934 }
935
TEST(F32_PPMM_MINMAX_8X8__NEON,n_div_8_strided_a)936 TEST(F32_PPMM_MINMAX_8X8__NEON, n_div_8_strided_a) {
937 TEST_REQUIRES_ARM_NEON;
938 for (uint32_t n = 16; n <= 24; n += 8) {
939 for (size_t k = 1; k <= 5; k += 2) {
940 GemmMicrokernelTester()
941 .mr(8)
942 .nr(8)
943 .kr(1)
944 .sr(1)
945 .m(8)
946 .n(n)
947 .k(k)
948 .a_stride(7)
949 .Test(xnn_f32_ppmm_minmax_ukernel_8x8__neon);
950 }
951 }
952 }
953
TEST(F32_PPMM_MINMAX_8X8__NEON,n_div_8_subtile)954 TEST(F32_PPMM_MINMAX_8X8__NEON, n_div_8_subtile) {
955 TEST_REQUIRES_ARM_NEON;
956 for (uint32_t n = 16; n <= 24; n += 8) {
957 for (size_t k = 1; k <= 5; k += 2) {
958 for (uint32_t m = 1; m <= 8; m++) {
959 GemmMicrokernelTester()
960 .mr(8)
961 .nr(8)
962 .kr(1)
963 .sr(1)
964 .m(m)
965 .n(n)
966 .k(k)
967 .iterations(1)
968 .Test(xnn_f32_ppmm_minmax_ukernel_8x8__neon);
969 }
970 }
971 }
972 }
973
TEST(F32_PPMM_MINMAX_8X8__NEON,strided_cm_subtile)974 TEST(F32_PPMM_MINMAX_8X8__NEON, strided_cm_subtile) {
975 TEST_REQUIRES_ARM_NEON;
976 for (size_t k = 1; k <= 5; k += 2) {
977 for (uint32_t m = 1; m <= 8; m++) {
978 for (uint32_t n = 1; n <= 8; n++) {
979 GemmMicrokernelTester()
980 .mr(8)
981 .nr(8)
982 .kr(1)
983 .sr(1)
984 .m(m)
985 .n(n)
986 .k(k)
987 .cm_stride(11)
988 .iterations(1)
989 .Test(xnn_f32_ppmm_minmax_ukernel_8x8__neon);
990 }
991 }
992 }
993 }
994
TEST(F32_PPMM_MINMAX_8X8__NEON,qmin)995 TEST(F32_PPMM_MINMAX_8X8__NEON, qmin) {
996 TEST_REQUIRES_ARM_NEON;
997 GemmMicrokernelTester()
998 .mr(8)
999 .nr(8)
1000 .kr(1)
1001 .sr(1)
1002 .m(8)
1003 .n(8)
1004 .k(1)
1005 .qmin(128)
1006 .Test(xnn_f32_ppmm_minmax_ukernel_8x8__neon);
1007 }
1008
TEST(F32_PPMM_MINMAX_8X8__NEON,qmax)1009 TEST(F32_PPMM_MINMAX_8X8__NEON, qmax) {
1010 TEST_REQUIRES_ARM_NEON;
1011 GemmMicrokernelTester()
1012 .mr(8)
1013 .nr(8)
1014 .kr(1)
1015 .sr(1)
1016 .m(8)
1017 .n(8)
1018 .k(1)
1019 .qmax(128)
1020 .Test(xnn_f32_ppmm_minmax_ukernel_8x8__neon);
1021 }
1022
TEST(F32_PPMM_MINMAX_8X8__NEON,strided_cm)1023 TEST(F32_PPMM_MINMAX_8X8__NEON, strided_cm) {
1024 TEST_REQUIRES_ARM_NEON;
1025 GemmMicrokernelTester()
1026 .mr(8)
1027 .nr(8)
1028 .kr(1)
1029 .sr(1)
1030 .m(8)
1031 .n(8)
1032 .k(1)
1033 .cm_stride(11)
1034 .Test(xnn_f32_ppmm_minmax_ukernel_8x8__neon);
1035 }
1036 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
1037
1038
1039 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(F32_PPMM_MINMAX_8X8__NEONFMA,k_eq_1)1040 TEST(F32_PPMM_MINMAX_8X8__NEONFMA, k_eq_1) {
1041 TEST_REQUIRES_ARM_NEON_FMA;
1042 GemmMicrokernelTester()
1043 .mr(8)
1044 .nr(8)
1045 .kr(1)
1046 .sr(1)
1047 .m(8)
1048 .n(8)
1049 .k(1)
1050 .Test(xnn_f32_ppmm_minmax_ukernel_8x8__neonfma);
1051 }
1052
TEST(F32_PPMM_MINMAX_8X8__NEONFMA,strided_cn)1053 TEST(F32_PPMM_MINMAX_8X8__NEONFMA, strided_cn) {
1054 TEST_REQUIRES_ARM_NEON_FMA;
1055 GemmMicrokernelTester()
1056 .mr(8)
1057 .nr(8)
1058 .kr(1)
1059 .sr(1)
1060 .m(8)
1061 .n(8)
1062 .k(1)
1063 .cn_stride(11)
1064 .Test(xnn_f32_ppmm_minmax_ukernel_8x8__neonfma);
1065 }
1066
TEST(F32_PPMM_MINMAX_8X8__NEONFMA,k_eq_1_strided_a)1067 TEST(F32_PPMM_MINMAX_8X8__NEONFMA, k_eq_1_strided_a) {
1068 TEST_REQUIRES_ARM_NEON_FMA;
1069 GemmMicrokernelTester()
1070 .mr(8)
1071 .nr(8)
1072 .kr(1)
1073 .sr(1)
1074 .m(8)
1075 .n(8)
1076 .k(1)
1077 .a_stride(3)
1078 .Test(xnn_f32_ppmm_minmax_ukernel_8x8__neonfma);
1079 }
1080
TEST(F32_PPMM_MINMAX_8X8__NEONFMA,k_eq_1_subtile)1081 TEST(F32_PPMM_MINMAX_8X8__NEONFMA, k_eq_1_subtile) {
1082 TEST_REQUIRES_ARM_NEON_FMA;
1083 for (uint32_t m = 1; m <= 8; m++) {
1084 for (uint32_t n = 1; n <= 8; n++) {
1085 GemmMicrokernelTester()
1086 .mr(8)
1087 .nr(8)
1088 .kr(1)
1089 .sr(1)
1090 .m(m)
1091 .n(n)
1092 .k(1)
1093 .iterations(1)
1094 .Test(xnn_f32_ppmm_minmax_ukernel_8x8__neonfma);
1095 }
1096 }
1097 }
1098
TEST(F32_PPMM_MINMAX_8X8__NEONFMA,k_eq_1_subtile_m)1099 TEST(F32_PPMM_MINMAX_8X8__NEONFMA, k_eq_1_subtile_m) {
1100 TEST_REQUIRES_ARM_NEON_FMA;
1101 for (uint32_t m = 1; m <= 8; m++) {
1102 GemmMicrokernelTester()
1103 .mr(8)
1104 .nr(8)
1105 .kr(1)
1106 .sr(1)
1107 .m(m)
1108 .n(8)
1109 .k(1)
1110 .iterations(1)
1111 .Test(xnn_f32_ppmm_minmax_ukernel_8x8__neonfma);
1112 }
1113 }
1114
TEST(F32_PPMM_MINMAX_8X8__NEONFMA,k_eq_1_subtile_n)1115 TEST(F32_PPMM_MINMAX_8X8__NEONFMA, k_eq_1_subtile_n) {
1116 TEST_REQUIRES_ARM_NEON_FMA;
1117 for (uint32_t n = 1; n <= 8; n++) {
1118 GemmMicrokernelTester()
1119 .mr(8)
1120 .nr(8)
1121 .kr(1)
1122 .sr(1)
1123 .m(8)
1124 .n(n)
1125 .k(1)
1126 .iterations(1)
1127 .Test(xnn_f32_ppmm_minmax_ukernel_8x8__neonfma);
1128 }
1129 }
1130
TEST(F32_PPMM_MINMAX_8X8__NEONFMA,k_gt_1)1131 TEST(F32_PPMM_MINMAX_8X8__NEONFMA, k_gt_1) {
1132 TEST_REQUIRES_ARM_NEON_FMA;
1133 for (size_t k = 2; k < 10; k++) {
1134 GemmMicrokernelTester()
1135 .mr(8)
1136 .nr(8)
1137 .kr(1)
1138 .sr(1)
1139 .m(8)
1140 .n(8)
1141 .k(k)
1142 .Test(xnn_f32_ppmm_minmax_ukernel_8x8__neonfma);
1143 }
1144 }
1145
TEST(F32_PPMM_MINMAX_8X8__NEONFMA,k_gt_1_subtile)1146 TEST(F32_PPMM_MINMAX_8X8__NEONFMA, k_gt_1_subtile) {
1147 TEST_REQUIRES_ARM_NEON_FMA;
1148 for (size_t k = 2; k < 10; k++) {
1149 for (uint32_t m = 1; m <= 8; m++) {
1150 for (uint32_t n = 1; n <= 8; n++) {
1151 GemmMicrokernelTester()
1152 .mr(8)
1153 .nr(8)
1154 .kr(1)
1155 .sr(1)
1156 .m(m)
1157 .n(n)
1158 .k(k)
1159 .iterations(1)
1160 .Test(xnn_f32_ppmm_minmax_ukernel_8x8__neonfma);
1161 }
1162 }
1163 }
1164 }
1165
TEST(F32_PPMM_MINMAX_8X8__NEONFMA,n_gt_8)1166 TEST(F32_PPMM_MINMAX_8X8__NEONFMA, n_gt_8) {
1167 TEST_REQUIRES_ARM_NEON_FMA;
1168 for (uint32_t n = 9; n < 16; n++) {
1169 for (size_t k = 1; k <= 5; k += 2) {
1170 GemmMicrokernelTester()
1171 .mr(8)
1172 .nr(8)
1173 .kr(1)
1174 .sr(1)
1175 .m(8)
1176 .n(8)
1177 .k(k)
1178 .Test(xnn_f32_ppmm_minmax_ukernel_8x8__neonfma);
1179 }
1180 }
1181 }
1182
TEST(F32_PPMM_MINMAX_8X8__NEONFMA,n_gt_8_strided_cn)1183 TEST(F32_PPMM_MINMAX_8X8__NEONFMA, n_gt_8_strided_cn) {
1184 TEST_REQUIRES_ARM_NEON_FMA;
1185 for (uint32_t n = 9; n < 16; n++) {
1186 for (size_t k = 1; k <= 5; k += 2) {
1187 GemmMicrokernelTester()
1188 .mr(8)
1189 .nr(8)
1190 .kr(1)
1191 .sr(1)
1192 .m(8)
1193 .n(8)
1194 .k(k)
1195 .cn_stride(11)
1196 .Test(xnn_f32_ppmm_minmax_ukernel_8x8__neonfma);
1197 }
1198 }
1199 }
1200
TEST(F32_PPMM_MINMAX_8X8__NEONFMA,n_gt_8_strided_a)1201 TEST(F32_PPMM_MINMAX_8X8__NEONFMA, n_gt_8_strided_a) {
1202 TEST_REQUIRES_ARM_NEON_FMA;
1203 for (uint32_t n = 9; n < 16; n++) {
1204 for (size_t k = 1; k <= 5; k += 2) {
1205 GemmMicrokernelTester()
1206 .mr(8)
1207 .nr(8)
1208 .kr(1)
1209 .sr(1)
1210 .m(8)
1211 .n(n)
1212 .k(k)
1213 .a_stride(7)
1214 .Test(xnn_f32_ppmm_minmax_ukernel_8x8__neonfma);
1215 }
1216 }
1217 }
1218
TEST(F32_PPMM_MINMAX_8X8__NEONFMA,n_gt_8_subtile)1219 TEST(F32_PPMM_MINMAX_8X8__NEONFMA, n_gt_8_subtile) {
1220 TEST_REQUIRES_ARM_NEON_FMA;
1221 for (uint32_t n = 9; n < 16; n++) {
1222 for (size_t k = 1; k <= 5; k += 2) {
1223 for (uint32_t m = 1; m <= 8; m++) {
1224 GemmMicrokernelTester()
1225 .mr(8)
1226 .nr(8)
1227 .kr(1)
1228 .sr(1)
1229 .m(m)
1230 .n(n)
1231 .k(k)
1232 .iterations(1)
1233 .Test(xnn_f32_ppmm_minmax_ukernel_8x8__neonfma);
1234 }
1235 }
1236 }
1237 }
1238
TEST(F32_PPMM_MINMAX_8X8__NEONFMA,n_div_8)1239 TEST(F32_PPMM_MINMAX_8X8__NEONFMA, n_div_8) {
1240 TEST_REQUIRES_ARM_NEON_FMA;
1241 for (uint32_t n = 16; n <= 24; n += 8) {
1242 for (size_t k = 1; k <= 5; k += 2) {
1243 GemmMicrokernelTester()
1244 .mr(8)
1245 .nr(8)
1246 .kr(1)
1247 .sr(1)
1248 .m(8)
1249 .n(8)
1250 .k(k)
1251 .Test(xnn_f32_ppmm_minmax_ukernel_8x8__neonfma);
1252 }
1253 }
1254 }
1255
TEST(F32_PPMM_MINMAX_8X8__NEONFMA,n_div_8_strided_cn)1256 TEST(F32_PPMM_MINMAX_8X8__NEONFMA, n_div_8_strided_cn) {
1257 TEST_REQUIRES_ARM_NEON_FMA;
1258 for (uint32_t n = 16; n <= 24; n += 8) {
1259 for (size_t k = 1; k <= 5; k += 2) {
1260 GemmMicrokernelTester()
1261 .mr(8)
1262 .nr(8)
1263 .kr(1)
1264 .sr(1)
1265 .m(8)
1266 .n(n)
1267 .k(k)
1268 .cn_stride(11)
1269 .Test(xnn_f32_ppmm_minmax_ukernel_8x8__neonfma);
1270 }
1271 }
1272 }
1273
TEST(F32_PPMM_MINMAX_8X8__NEONFMA,n_div_8_strided_a)1274 TEST(F32_PPMM_MINMAX_8X8__NEONFMA, n_div_8_strided_a) {
1275 TEST_REQUIRES_ARM_NEON_FMA;
1276 for (uint32_t n = 16; n <= 24; n += 8) {
1277 for (size_t k = 1; k <= 5; k += 2) {
1278 GemmMicrokernelTester()
1279 .mr(8)
1280 .nr(8)
1281 .kr(1)
1282 .sr(1)
1283 .m(8)
1284 .n(n)
1285 .k(k)
1286 .a_stride(7)
1287 .Test(xnn_f32_ppmm_minmax_ukernel_8x8__neonfma);
1288 }
1289 }
1290 }
1291
TEST(F32_PPMM_MINMAX_8X8__NEONFMA,n_div_8_subtile)1292 TEST(F32_PPMM_MINMAX_8X8__NEONFMA, n_div_8_subtile) {
1293 TEST_REQUIRES_ARM_NEON_FMA;
1294 for (uint32_t n = 16; n <= 24; n += 8) {
1295 for (size_t k = 1; k <= 5; k += 2) {
1296 for (uint32_t m = 1; m <= 8; m++) {
1297 GemmMicrokernelTester()
1298 .mr(8)
1299 .nr(8)
1300 .kr(1)
1301 .sr(1)
1302 .m(m)
1303 .n(n)
1304 .k(k)
1305 .iterations(1)
1306 .Test(xnn_f32_ppmm_minmax_ukernel_8x8__neonfma);
1307 }
1308 }
1309 }
1310 }
1311
TEST(F32_PPMM_MINMAX_8X8__NEONFMA,strided_cm_subtile)1312 TEST(F32_PPMM_MINMAX_8X8__NEONFMA, strided_cm_subtile) {
1313 TEST_REQUIRES_ARM_NEON_FMA;
1314 for (size_t k = 1; k <= 5; k += 2) {
1315 for (uint32_t m = 1; m <= 8; m++) {
1316 for (uint32_t n = 1; n <= 8; n++) {
1317 GemmMicrokernelTester()
1318 .mr(8)
1319 .nr(8)
1320 .kr(1)
1321 .sr(1)
1322 .m(m)
1323 .n(n)
1324 .k(k)
1325 .cm_stride(11)
1326 .iterations(1)
1327 .Test(xnn_f32_ppmm_minmax_ukernel_8x8__neonfma);
1328 }
1329 }
1330 }
1331 }
1332
TEST(F32_PPMM_MINMAX_8X8__NEONFMA,qmin)1333 TEST(F32_PPMM_MINMAX_8X8__NEONFMA, qmin) {
1334 TEST_REQUIRES_ARM_NEON_FMA;
1335 GemmMicrokernelTester()
1336 .mr(8)
1337 .nr(8)
1338 .kr(1)
1339 .sr(1)
1340 .m(8)
1341 .n(8)
1342 .k(1)
1343 .qmin(128)
1344 .Test(xnn_f32_ppmm_minmax_ukernel_8x8__neonfma);
1345 }
1346
TEST(F32_PPMM_MINMAX_8X8__NEONFMA,qmax)1347 TEST(F32_PPMM_MINMAX_8X8__NEONFMA, qmax) {
1348 TEST_REQUIRES_ARM_NEON_FMA;
1349 GemmMicrokernelTester()
1350 .mr(8)
1351 .nr(8)
1352 .kr(1)
1353 .sr(1)
1354 .m(8)
1355 .n(8)
1356 .k(1)
1357 .qmax(128)
1358 .Test(xnn_f32_ppmm_minmax_ukernel_8x8__neonfma);
1359 }
1360
TEST(F32_PPMM_MINMAX_8X8__NEONFMA,strided_cm)1361 TEST(F32_PPMM_MINMAX_8X8__NEONFMA, strided_cm) {
1362 TEST_REQUIRES_ARM_NEON_FMA;
1363 GemmMicrokernelTester()
1364 .mr(8)
1365 .nr(8)
1366 .kr(1)
1367 .sr(1)
1368 .m(8)
1369 .n(8)
1370 .k(1)
1371 .cm_stride(11)
1372 .Test(xnn_f32_ppmm_minmax_ukernel_8x8__neonfma);
1373 }
1374 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
1375
1376
1377 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_PPMM_MINMAX_4X8__SSE,k_eq_1)1378 TEST(F32_PPMM_MINMAX_4X8__SSE, k_eq_1) {
1379 TEST_REQUIRES_X86_SSE;
1380 GemmMicrokernelTester()
1381 .mr(4)
1382 .nr(8)
1383 .kr(1)
1384 .sr(1)
1385 .m(4)
1386 .n(8)
1387 .k(1)
1388 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__sse);
1389 }
1390
TEST(F32_PPMM_MINMAX_4X8__SSE,strided_cn)1391 TEST(F32_PPMM_MINMAX_4X8__SSE, strided_cn) {
1392 TEST_REQUIRES_X86_SSE;
1393 GemmMicrokernelTester()
1394 .mr(4)
1395 .nr(8)
1396 .kr(1)
1397 .sr(1)
1398 .m(4)
1399 .n(8)
1400 .k(1)
1401 .cn_stride(11)
1402 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__sse);
1403 }
1404
TEST(F32_PPMM_MINMAX_4X8__SSE,k_eq_1_strided_a)1405 TEST(F32_PPMM_MINMAX_4X8__SSE, k_eq_1_strided_a) {
1406 TEST_REQUIRES_X86_SSE;
1407 GemmMicrokernelTester()
1408 .mr(4)
1409 .nr(8)
1410 .kr(1)
1411 .sr(1)
1412 .m(4)
1413 .n(8)
1414 .k(1)
1415 .a_stride(3)
1416 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__sse);
1417 }
1418
TEST(F32_PPMM_MINMAX_4X8__SSE,k_eq_1_subtile)1419 TEST(F32_PPMM_MINMAX_4X8__SSE, k_eq_1_subtile) {
1420 TEST_REQUIRES_X86_SSE;
1421 for (uint32_t m = 1; m <= 4; m++) {
1422 for (uint32_t n = 1; n <= 8; n++) {
1423 GemmMicrokernelTester()
1424 .mr(4)
1425 .nr(8)
1426 .kr(1)
1427 .sr(1)
1428 .m(m)
1429 .n(n)
1430 .k(1)
1431 .iterations(1)
1432 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__sse);
1433 }
1434 }
1435 }
1436
TEST(F32_PPMM_MINMAX_4X8__SSE,k_eq_1_subtile_m)1437 TEST(F32_PPMM_MINMAX_4X8__SSE, k_eq_1_subtile_m) {
1438 TEST_REQUIRES_X86_SSE;
1439 for (uint32_t m = 1; m <= 4; m++) {
1440 GemmMicrokernelTester()
1441 .mr(4)
1442 .nr(8)
1443 .kr(1)
1444 .sr(1)
1445 .m(m)
1446 .n(8)
1447 .k(1)
1448 .iterations(1)
1449 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__sse);
1450 }
1451 }
1452
TEST(F32_PPMM_MINMAX_4X8__SSE,k_eq_1_subtile_n)1453 TEST(F32_PPMM_MINMAX_4X8__SSE, k_eq_1_subtile_n) {
1454 TEST_REQUIRES_X86_SSE;
1455 for (uint32_t n = 1; n <= 8; n++) {
1456 GemmMicrokernelTester()
1457 .mr(4)
1458 .nr(8)
1459 .kr(1)
1460 .sr(1)
1461 .m(4)
1462 .n(n)
1463 .k(1)
1464 .iterations(1)
1465 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__sse);
1466 }
1467 }
1468
TEST(F32_PPMM_MINMAX_4X8__SSE,k_gt_1)1469 TEST(F32_PPMM_MINMAX_4X8__SSE, k_gt_1) {
1470 TEST_REQUIRES_X86_SSE;
1471 for (size_t k = 2; k < 10; k++) {
1472 GemmMicrokernelTester()
1473 .mr(4)
1474 .nr(8)
1475 .kr(1)
1476 .sr(1)
1477 .m(4)
1478 .n(8)
1479 .k(k)
1480 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__sse);
1481 }
1482 }
1483
TEST(F32_PPMM_MINMAX_4X8__SSE,k_gt_1_subtile)1484 TEST(F32_PPMM_MINMAX_4X8__SSE, k_gt_1_subtile) {
1485 TEST_REQUIRES_X86_SSE;
1486 for (size_t k = 2; k < 10; k++) {
1487 for (uint32_t m = 1; m <= 4; m++) {
1488 for (uint32_t n = 1; n <= 8; n++) {
1489 GemmMicrokernelTester()
1490 .mr(4)
1491 .nr(8)
1492 .kr(1)
1493 .sr(1)
1494 .m(m)
1495 .n(n)
1496 .k(k)
1497 .iterations(1)
1498 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__sse);
1499 }
1500 }
1501 }
1502 }
1503
TEST(F32_PPMM_MINMAX_4X8__SSE,n_gt_8)1504 TEST(F32_PPMM_MINMAX_4X8__SSE, n_gt_8) {
1505 TEST_REQUIRES_X86_SSE;
1506 for (uint32_t n = 9; n < 16; n++) {
1507 for (size_t k = 1; k <= 5; k += 2) {
1508 GemmMicrokernelTester()
1509 .mr(4)
1510 .nr(8)
1511 .kr(1)
1512 .sr(1)
1513 .m(4)
1514 .n(8)
1515 .k(k)
1516 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__sse);
1517 }
1518 }
1519 }
1520
TEST(F32_PPMM_MINMAX_4X8__SSE,n_gt_8_strided_cn)1521 TEST(F32_PPMM_MINMAX_4X8__SSE, n_gt_8_strided_cn) {
1522 TEST_REQUIRES_X86_SSE;
1523 for (uint32_t n = 9; n < 16; n++) {
1524 for (size_t k = 1; k <= 5; k += 2) {
1525 GemmMicrokernelTester()
1526 .mr(4)
1527 .nr(8)
1528 .kr(1)
1529 .sr(1)
1530 .m(4)
1531 .n(8)
1532 .k(k)
1533 .cn_stride(11)
1534 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__sse);
1535 }
1536 }
1537 }
1538
TEST(F32_PPMM_MINMAX_4X8__SSE,n_gt_8_strided_a)1539 TEST(F32_PPMM_MINMAX_4X8__SSE, n_gt_8_strided_a) {
1540 TEST_REQUIRES_X86_SSE;
1541 for (uint32_t n = 9; n < 16; n++) {
1542 for (size_t k = 1; k <= 5; k += 2) {
1543 GemmMicrokernelTester()
1544 .mr(4)
1545 .nr(8)
1546 .kr(1)
1547 .sr(1)
1548 .m(4)
1549 .n(n)
1550 .k(k)
1551 .a_stride(7)
1552 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__sse);
1553 }
1554 }
1555 }
1556
TEST(F32_PPMM_MINMAX_4X8__SSE,n_gt_8_subtile)1557 TEST(F32_PPMM_MINMAX_4X8__SSE, n_gt_8_subtile) {
1558 TEST_REQUIRES_X86_SSE;
1559 for (uint32_t n = 9; n < 16; n++) {
1560 for (size_t k = 1; k <= 5; k += 2) {
1561 for (uint32_t m = 1; m <= 4; m++) {
1562 GemmMicrokernelTester()
1563 .mr(4)
1564 .nr(8)
1565 .kr(1)
1566 .sr(1)
1567 .m(m)
1568 .n(n)
1569 .k(k)
1570 .iterations(1)
1571 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__sse);
1572 }
1573 }
1574 }
1575 }
1576
TEST(F32_PPMM_MINMAX_4X8__SSE,n_div_8)1577 TEST(F32_PPMM_MINMAX_4X8__SSE, n_div_8) {
1578 TEST_REQUIRES_X86_SSE;
1579 for (uint32_t n = 16; n <= 24; n += 8) {
1580 for (size_t k = 1; k <= 5; k += 2) {
1581 GemmMicrokernelTester()
1582 .mr(4)
1583 .nr(8)
1584 .kr(1)
1585 .sr(1)
1586 .m(4)
1587 .n(8)
1588 .k(k)
1589 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__sse);
1590 }
1591 }
1592 }
1593
TEST(F32_PPMM_MINMAX_4X8__SSE,n_div_8_strided_cn)1594 TEST(F32_PPMM_MINMAX_4X8__SSE, n_div_8_strided_cn) {
1595 TEST_REQUIRES_X86_SSE;
1596 for (uint32_t n = 16; n <= 24; n += 8) {
1597 for (size_t k = 1; k <= 5; k += 2) {
1598 GemmMicrokernelTester()
1599 .mr(4)
1600 .nr(8)
1601 .kr(1)
1602 .sr(1)
1603 .m(4)
1604 .n(n)
1605 .k(k)
1606 .cn_stride(11)
1607 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__sse);
1608 }
1609 }
1610 }
1611
TEST(F32_PPMM_MINMAX_4X8__SSE,n_div_8_strided_a)1612 TEST(F32_PPMM_MINMAX_4X8__SSE, n_div_8_strided_a) {
1613 TEST_REQUIRES_X86_SSE;
1614 for (uint32_t n = 16; n <= 24; n += 8) {
1615 for (size_t k = 1; k <= 5; k += 2) {
1616 GemmMicrokernelTester()
1617 .mr(4)
1618 .nr(8)
1619 .kr(1)
1620 .sr(1)
1621 .m(4)
1622 .n(n)
1623 .k(k)
1624 .a_stride(7)
1625 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__sse);
1626 }
1627 }
1628 }
1629
TEST(F32_PPMM_MINMAX_4X8__SSE,n_div_8_subtile)1630 TEST(F32_PPMM_MINMAX_4X8__SSE, n_div_8_subtile) {
1631 TEST_REQUIRES_X86_SSE;
1632 for (uint32_t n = 16; n <= 24; n += 8) {
1633 for (size_t k = 1; k <= 5; k += 2) {
1634 for (uint32_t m = 1; m <= 4; m++) {
1635 GemmMicrokernelTester()
1636 .mr(4)
1637 .nr(8)
1638 .kr(1)
1639 .sr(1)
1640 .m(m)
1641 .n(n)
1642 .k(k)
1643 .iterations(1)
1644 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__sse);
1645 }
1646 }
1647 }
1648 }
1649
TEST(F32_PPMM_MINMAX_4X8__SSE,strided_cm_subtile)1650 TEST(F32_PPMM_MINMAX_4X8__SSE, strided_cm_subtile) {
1651 TEST_REQUIRES_X86_SSE;
1652 for (size_t k = 1; k <= 5; k += 2) {
1653 for (uint32_t m = 1; m <= 4; m++) {
1654 for (uint32_t n = 1; n <= 8; n++) {
1655 GemmMicrokernelTester()
1656 .mr(4)
1657 .nr(8)
1658 .kr(1)
1659 .sr(1)
1660 .m(m)
1661 .n(n)
1662 .k(k)
1663 .cm_stride(11)
1664 .iterations(1)
1665 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__sse);
1666 }
1667 }
1668 }
1669 }
1670
TEST(F32_PPMM_MINMAX_4X8__SSE,qmin)1671 TEST(F32_PPMM_MINMAX_4X8__SSE, qmin) {
1672 TEST_REQUIRES_X86_SSE;
1673 GemmMicrokernelTester()
1674 .mr(4)
1675 .nr(8)
1676 .kr(1)
1677 .sr(1)
1678 .m(4)
1679 .n(8)
1680 .k(1)
1681 .qmin(128)
1682 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__sse);
1683 }
1684
TEST(F32_PPMM_MINMAX_4X8__SSE,qmax)1685 TEST(F32_PPMM_MINMAX_4X8__SSE, qmax) {
1686 TEST_REQUIRES_X86_SSE;
1687 GemmMicrokernelTester()
1688 .mr(4)
1689 .nr(8)
1690 .kr(1)
1691 .sr(1)
1692 .m(4)
1693 .n(8)
1694 .k(1)
1695 .qmax(128)
1696 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__sse);
1697 }
1698
TEST(F32_PPMM_MINMAX_4X8__SSE,strided_cm)1699 TEST(F32_PPMM_MINMAX_4X8__SSE, strided_cm) {
1700 TEST_REQUIRES_X86_SSE;
1701 GemmMicrokernelTester()
1702 .mr(4)
1703 .nr(8)
1704 .kr(1)
1705 .sr(1)
1706 .m(4)
1707 .n(8)
1708 .k(1)
1709 .cm_stride(11)
1710 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__sse);
1711 }
1712 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
1713
1714
1715 #if XNN_ARCH_WASMSIMD
TEST(F32_PPMM_MINMAX_4X8__WASMSIMD_ARM_SPLAT,k_eq_1)1716 TEST(F32_PPMM_MINMAX_4X8__WASMSIMD_ARM_SPLAT, k_eq_1) {
1717 GemmMicrokernelTester()
1718 .mr(4)
1719 .nr(8)
1720 .kr(1)
1721 .sr(1)
1722 .m(4)
1723 .n(8)
1724 .k(1)
1725 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__wasmsimd_arm_splat);
1726 }
1727
TEST(F32_PPMM_MINMAX_4X8__WASMSIMD_ARM_SPLAT,strided_cn)1728 TEST(F32_PPMM_MINMAX_4X8__WASMSIMD_ARM_SPLAT, strided_cn) {
1729 GemmMicrokernelTester()
1730 .mr(4)
1731 .nr(8)
1732 .kr(1)
1733 .sr(1)
1734 .m(4)
1735 .n(8)
1736 .k(1)
1737 .cn_stride(11)
1738 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__wasmsimd_arm_splat);
1739 }
1740
TEST(F32_PPMM_MINMAX_4X8__WASMSIMD_ARM_SPLAT,k_eq_1_strided_a)1741 TEST(F32_PPMM_MINMAX_4X8__WASMSIMD_ARM_SPLAT, k_eq_1_strided_a) {
1742 GemmMicrokernelTester()
1743 .mr(4)
1744 .nr(8)
1745 .kr(1)
1746 .sr(1)
1747 .m(4)
1748 .n(8)
1749 .k(1)
1750 .a_stride(3)
1751 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__wasmsimd_arm_splat);
1752 }
1753
TEST(F32_PPMM_MINMAX_4X8__WASMSIMD_ARM_SPLAT,k_eq_1_subtile)1754 TEST(F32_PPMM_MINMAX_4X8__WASMSIMD_ARM_SPLAT, k_eq_1_subtile) {
1755 for (uint32_t m = 1; m <= 4; m++) {
1756 for (uint32_t n = 1; n <= 8; n++) {
1757 GemmMicrokernelTester()
1758 .mr(4)
1759 .nr(8)
1760 .kr(1)
1761 .sr(1)
1762 .m(m)
1763 .n(n)
1764 .k(1)
1765 .iterations(1)
1766 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__wasmsimd_arm_splat);
1767 }
1768 }
1769 }
1770
TEST(F32_PPMM_MINMAX_4X8__WASMSIMD_ARM_SPLAT,k_eq_1_subtile_m)1771 TEST(F32_PPMM_MINMAX_4X8__WASMSIMD_ARM_SPLAT, k_eq_1_subtile_m) {
1772 for (uint32_t m = 1; m <= 4; m++) {
1773 GemmMicrokernelTester()
1774 .mr(4)
1775 .nr(8)
1776 .kr(1)
1777 .sr(1)
1778 .m(m)
1779 .n(8)
1780 .k(1)
1781 .iterations(1)
1782 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__wasmsimd_arm_splat);
1783 }
1784 }
1785
TEST(F32_PPMM_MINMAX_4X8__WASMSIMD_ARM_SPLAT,k_eq_1_subtile_n)1786 TEST(F32_PPMM_MINMAX_4X8__WASMSIMD_ARM_SPLAT, k_eq_1_subtile_n) {
1787 for (uint32_t n = 1; n <= 8; n++) {
1788 GemmMicrokernelTester()
1789 .mr(4)
1790 .nr(8)
1791 .kr(1)
1792 .sr(1)
1793 .m(4)
1794 .n(n)
1795 .k(1)
1796 .iterations(1)
1797 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__wasmsimd_arm_splat);
1798 }
1799 }
1800
TEST(F32_PPMM_MINMAX_4X8__WASMSIMD_ARM_SPLAT,k_gt_1)1801 TEST(F32_PPMM_MINMAX_4X8__WASMSIMD_ARM_SPLAT, k_gt_1) {
1802 for (size_t k = 2; k < 10; k++) {
1803 GemmMicrokernelTester()
1804 .mr(4)
1805 .nr(8)
1806 .kr(1)
1807 .sr(1)
1808 .m(4)
1809 .n(8)
1810 .k(k)
1811 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__wasmsimd_arm_splat);
1812 }
1813 }
1814
TEST(F32_PPMM_MINMAX_4X8__WASMSIMD_ARM_SPLAT,k_gt_1_subtile)1815 TEST(F32_PPMM_MINMAX_4X8__WASMSIMD_ARM_SPLAT, k_gt_1_subtile) {
1816 for (size_t k = 2; k < 10; k++) {
1817 for (uint32_t m = 1; m <= 4; m++) {
1818 for (uint32_t n = 1; n <= 8; n++) {
1819 GemmMicrokernelTester()
1820 .mr(4)
1821 .nr(8)
1822 .kr(1)
1823 .sr(1)
1824 .m(m)
1825 .n(n)
1826 .k(k)
1827 .iterations(1)
1828 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__wasmsimd_arm_splat);
1829 }
1830 }
1831 }
1832 }
1833
TEST(F32_PPMM_MINMAX_4X8__WASMSIMD_ARM_SPLAT,n_gt_8)1834 TEST(F32_PPMM_MINMAX_4X8__WASMSIMD_ARM_SPLAT, n_gt_8) {
1835 for (uint32_t n = 9; n < 16; n++) {
1836 for (size_t k = 1; k <= 5; k += 2) {
1837 GemmMicrokernelTester()
1838 .mr(4)
1839 .nr(8)
1840 .kr(1)
1841 .sr(1)
1842 .m(4)
1843 .n(8)
1844 .k(k)
1845 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__wasmsimd_arm_splat);
1846 }
1847 }
1848 }
1849
TEST(F32_PPMM_MINMAX_4X8__WASMSIMD_ARM_SPLAT,n_gt_8_strided_cn)1850 TEST(F32_PPMM_MINMAX_4X8__WASMSIMD_ARM_SPLAT, n_gt_8_strided_cn) {
1851 for (uint32_t n = 9; n < 16; n++) {
1852 for (size_t k = 1; k <= 5; k += 2) {
1853 GemmMicrokernelTester()
1854 .mr(4)
1855 .nr(8)
1856 .kr(1)
1857 .sr(1)
1858 .m(4)
1859 .n(8)
1860 .k(k)
1861 .cn_stride(11)
1862 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__wasmsimd_arm_splat);
1863 }
1864 }
1865 }
1866
TEST(F32_PPMM_MINMAX_4X8__WASMSIMD_ARM_SPLAT,n_gt_8_strided_a)1867 TEST(F32_PPMM_MINMAX_4X8__WASMSIMD_ARM_SPLAT, n_gt_8_strided_a) {
1868 for (uint32_t n = 9; n < 16; n++) {
1869 for (size_t k = 1; k <= 5; k += 2) {
1870 GemmMicrokernelTester()
1871 .mr(4)
1872 .nr(8)
1873 .kr(1)
1874 .sr(1)
1875 .m(4)
1876 .n(n)
1877 .k(k)
1878 .a_stride(7)
1879 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__wasmsimd_arm_splat);
1880 }
1881 }
1882 }
1883
TEST(F32_PPMM_MINMAX_4X8__WASMSIMD_ARM_SPLAT,n_gt_8_subtile)1884 TEST(F32_PPMM_MINMAX_4X8__WASMSIMD_ARM_SPLAT, n_gt_8_subtile) {
1885 for (uint32_t n = 9; n < 16; n++) {
1886 for (size_t k = 1; k <= 5; k += 2) {
1887 for (uint32_t m = 1; m <= 4; m++) {
1888 GemmMicrokernelTester()
1889 .mr(4)
1890 .nr(8)
1891 .kr(1)
1892 .sr(1)
1893 .m(m)
1894 .n(n)
1895 .k(k)
1896 .iterations(1)
1897 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__wasmsimd_arm_splat);
1898 }
1899 }
1900 }
1901 }
1902
TEST(F32_PPMM_MINMAX_4X8__WASMSIMD_ARM_SPLAT,n_div_8)1903 TEST(F32_PPMM_MINMAX_4X8__WASMSIMD_ARM_SPLAT, n_div_8) {
1904 for (uint32_t n = 16; n <= 24; n += 8) {
1905 for (size_t k = 1; k <= 5; k += 2) {
1906 GemmMicrokernelTester()
1907 .mr(4)
1908 .nr(8)
1909 .kr(1)
1910 .sr(1)
1911 .m(4)
1912 .n(8)
1913 .k(k)
1914 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__wasmsimd_arm_splat);
1915 }
1916 }
1917 }
1918
TEST(F32_PPMM_MINMAX_4X8__WASMSIMD_ARM_SPLAT,n_div_8_strided_cn)1919 TEST(F32_PPMM_MINMAX_4X8__WASMSIMD_ARM_SPLAT, n_div_8_strided_cn) {
1920 for (uint32_t n = 16; n <= 24; n += 8) {
1921 for (size_t k = 1; k <= 5; k += 2) {
1922 GemmMicrokernelTester()
1923 .mr(4)
1924 .nr(8)
1925 .kr(1)
1926 .sr(1)
1927 .m(4)
1928 .n(n)
1929 .k(k)
1930 .cn_stride(11)
1931 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__wasmsimd_arm_splat);
1932 }
1933 }
1934 }
1935
TEST(F32_PPMM_MINMAX_4X8__WASMSIMD_ARM_SPLAT,n_div_8_strided_a)1936 TEST(F32_PPMM_MINMAX_4X8__WASMSIMD_ARM_SPLAT, n_div_8_strided_a) {
1937 for (uint32_t n = 16; n <= 24; n += 8) {
1938 for (size_t k = 1; k <= 5; k += 2) {
1939 GemmMicrokernelTester()
1940 .mr(4)
1941 .nr(8)
1942 .kr(1)
1943 .sr(1)
1944 .m(4)
1945 .n(n)
1946 .k(k)
1947 .a_stride(7)
1948 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__wasmsimd_arm_splat);
1949 }
1950 }
1951 }
1952
TEST(F32_PPMM_MINMAX_4X8__WASMSIMD_ARM_SPLAT,n_div_8_subtile)1953 TEST(F32_PPMM_MINMAX_4X8__WASMSIMD_ARM_SPLAT, n_div_8_subtile) {
1954 for (uint32_t n = 16; n <= 24; n += 8) {
1955 for (size_t k = 1; k <= 5; k += 2) {
1956 for (uint32_t m = 1; m <= 4; m++) {
1957 GemmMicrokernelTester()
1958 .mr(4)
1959 .nr(8)
1960 .kr(1)
1961 .sr(1)
1962 .m(m)
1963 .n(n)
1964 .k(k)
1965 .iterations(1)
1966 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__wasmsimd_arm_splat);
1967 }
1968 }
1969 }
1970 }
1971
TEST(F32_PPMM_MINMAX_4X8__WASMSIMD_ARM_SPLAT,strided_cm_subtile)1972 TEST(F32_PPMM_MINMAX_4X8__WASMSIMD_ARM_SPLAT, strided_cm_subtile) {
1973 for (size_t k = 1; k <= 5; k += 2) {
1974 for (uint32_t m = 1; m <= 4; m++) {
1975 for (uint32_t n = 1; n <= 8; n++) {
1976 GemmMicrokernelTester()
1977 .mr(4)
1978 .nr(8)
1979 .kr(1)
1980 .sr(1)
1981 .m(m)
1982 .n(n)
1983 .k(k)
1984 .cm_stride(11)
1985 .iterations(1)
1986 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__wasmsimd_arm_splat);
1987 }
1988 }
1989 }
1990 }
1991
TEST(F32_PPMM_MINMAX_4X8__WASMSIMD_ARM_SPLAT,qmin)1992 TEST(F32_PPMM_MINMAX_4X8__WASMSIMD_ARM_SPLAT, qmin) {
1993 GemmMicrokernelTester()
1994 .mr(4)
1995 .nr(8)
1996 .kr(1)
1997 .sr(1)
1998 .m(4)
1999 .n(8)
2000 .k(1)
2001 .qmin(128)
2002 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__wasmsimd_arm_splat);
2003 }
2004
TEST(F32_PPMM_MINMAX_4X8__WASMSIMD_ARM_SPLAT,qmax)2005 TEST(F32_PPMM_MINMAX_4X8__WASMSIMD_ARM_SPLAT, qmax) {
2006 GemmMicrokernelTester()
2007 .mr(4)
2008 .nr(8)
2009 .kr(1)
2010 .sr(1)
2011 .m(4)
2012 .n(8)
2013 .k(1)
2014 .qmax(128)
2015 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__wasmsimd_arm_splat);
2016 }
2017
TEST(F32_PPMM_MINMAX_4X8__WASMSIMD_ARM_SPLAT,strided_cm)2018 TEST(F32_PPMM_MINMAX_4X8__WASMSIMD_ARM_SPLAT, strided_cm) {
2019 GemmMicrokernelTester()
2020 .mr(4)
2021 .nr(8)
2022 .kr(1)
2023 .sr(1)
2024 .m(4)
2025 .n(8)
2026 .k(1)
2027 .cm_stride(11)
2028 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__wasmsimd_arm_splat);
2029 }
2030 #endif // XNN_ARCH_WASMSIMD
2031
2032
2033 #if XNN_ARCH_WASMSIMD
TEST(F32_PPMM_MINMAX_4X8__WASMSIMD_X86_SPLAT,k_eq_1)2034 TEST(F32_PPMM_MINMAX_4X8__WASMSIMD_X86_SPLAT, k_eq_1) {
2035 GemmMicrokernelTester()
2036 .mr(4)
2037 .nr(8)
2038 .kr(1)
2039 .sr(1)
2040 .m(4)
2041 .n(8)
2042 .k(1)
2043 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__wasmsimd_x86_splat);
2044 }
2045
TEST(F32_PPMM_MINMAX_4X8__WASMSIMD_X86_SPLAT,strided_cn)2046 TEST(F32_PPMM_MINMAX_4X8__WASMSIMD_X86_SPLAT, strided_cn) {
2047 GemmMicrokernelTester()
2048 .mr(4)
2049 .nr(8)
2050 .kr(1)
2051 .sr(1)
2052 .m(4)
2053 .n(8)
2054 .k(1)
2055 .cn_stride(11)
2056 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__wasmsimd_x86_splat);
2057 }
2058
TEST(F32_PPMM_MINMAX_4X8__WASMSIMD_X86_SPLAT,k_eq_1_strided_a)2059 TEST(F32_PPMM_MINMAX_4X8__WASMSIMD_X86_SPLAT, k_eq_1_strided_a) {
2060 GemmMicrokernelTester()
2061 .mr(4)
2062 .nr(8)
2063 .kr(1)
2064 .sr(1)
2065 .m(4)
2066 .n(8)
2067 .k(1)
2068 .a_stride(3)
2069 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__wasmsimd_x86_splat);
2070 }
2071
TEST(F32_PPMM_MINMAX_4X8__WASMSIMD_X86_SPLAT,k_eq_1_subtile)2072 TEST(F32_PPMM_MINMAX_4X8__WASMSIMD_X86_SPLAT, k_eq_1_subtile) {
2073 for (uint32_t m = 1; m <= 4; m++) {
2074 for (uint32_t n = 1; n <= 8; n++) {
2075 GemmMicrokernelTester()
2076 .mr(4)
2077 .nr(8)
2078 .kr(1)
2079 .sr(1)
2080 .m(m)
2081 .n(n)
2082 .k(1)
2083 .iterations(1)
2084 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__wasmsimd_x86_splat);
2085 }
2086 }
2087 }
2088
TEST(F32_PPMM_MINMAX_4X8__WASMSIMD_X86_SPLAT,k_eq_1_subtile_m)2089 TEST(F32_PPMM_MINMAX_4X8__WASMSIMD_X86_SPLAT, k_eq_1_subtile_m) {
2090 for (uint32_t m = 1; m <= 4; m++) {
2091 GemmMicrokernelTester()
2092 .mr(4)
2093 .nr(8)
2094 .kr(1)
2095 .sr(1)
2096 .m(m)
2097 .n(8)
2098 .k(1)
2099 .iterations(1)
2100 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__wasmsimd_x86_splat);
2101 }
2102 }
2103
TEST(F32_PPMM_MINMAX_4X8__WASMSIMD_X86_SPLAT,k_eq_1_subtile_n)2104 TEST(F32_PPMM_MINMAX_4X8__WASMSIMD_X86_SPLAT, k_eq_1_subtile_n) {
2105 for (uint32_t n = 1; n <= 8; n++) {
2106 GemmMicrokernelTester()
2107 .mr(4)
2108 .nr(8)
2109 .kr(1)
2110 .sr(1)
2111 .m(4)
2112 .n(n)
2113 .k(1)
2114 .iterations(1)
2115 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__wasmsimd_x86_splat);
2116 }
2117 }
2118
TEST(F32_PPMM_MINMAX_4X8__WASMSIMD_X86_SPLAT,k_gt_1)2119 TEST(F32_PPMM_MINMAX_4X8__WASMSIMD_X86_SPLAT, k_gt_1) {
2120 for (size_t k = 2; k < 10; k++) {
2121 GemmMicrokernelTester()
2122 .mr(4)
2123 .nr(8)
2124 .kr(1)
2125 .sr(1)
2126 .m(4)
2127 .n(8)
2128 .k(k)
2129 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__wasmsimd_x86_splat);
2130 }
2131 }
2132
TEST(F32_PPMM_MINMAX_4X8__WASMSIMD_X86_SPLAT,k_gt_1_subtile)2133 TEST(F32_PPMM_MINMAX_4X8__WASMSIMD_X86_SPLAT, k_gt_1_subtile) {
2134 for (size_t k = 2; k < 10; k++) {
2135 for (uint32_t m = 1; m <= 4; m++) {
2136 for (uint32_t n = 1; n <= 8; n++) {
2137 GemmMicrokernelTester()
2138 .mr(4)
2139 .nr(8)
2140 .kr(1)
2141 .sr(1)
2142 .m(m)
2143 .n(n)
2144 .k(k)
2145 .iterations(1)
2146 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__wasmsimd_x86_splat);
2147 }
2148 }
2149 }
2150 }
2151
TEST(F32_PPMM_MINMAX_4X8__WASMSIMD_X86_SPLAT,n_gt_8)2152 TEST(F32_PPMM_MINMAX_4X8__WASMSIMD_X86_SPLAT, n_gt_8) {
2153 for (uint32_t n = 9; n < 16; n++) {
2154 for (size_t k = 1; k <= 5; k += 2) {
2155 GemmMicrokernelTester()
2156 .mr(4)
2157 .nr(8)
2158 .kr(1)
2159 .sr(1)
2160 .m(4)
2161 .n(8)
2162 .k(k)
2163 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__wasmsimd_x86_splat);
2164 }
2165 }
2166 }
2167
TEST(F32_PPMM_MINMAX_4X8__WASMSIMD_X86_SPLAT,n_gt_8_strided_cn)2168 TEST(F32_PPMM_MINMAX_4X8__WASMSIMD_X86_SPLAT, n_gt_8_strided_cn) {
2169 for (uint32_t n = 9; n < 16; n++) {
2170 for (size_t k = 1; k <= 5; k += 2) {
2171 GemmMicrokernelTester()
2172 .mr(4)
2173 .nr(8)
2174 .kr(1)
2175 .sr(1)
2176 .m(4)
2177 .n(8)
2178 .k(k)
2179 .cn_stride(11)
2180 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__wasmsimd_x86_splat);
2181 }
2182 }
2183 }
2184
TEST(F32_PPMM_MINMAX_4X8__WASMSIMD_X86_SPLAT,n_gt_8_strided_a)2185 TEST(F32_PPMM_MINMAX_4X8__WASMSIMD_X86_SPLAT, n_gt_8_strided_a) {
2186 for (uint32_t n = 9; n < 16; n++) {
2187 for (size_t k = 1; k <= 5; k += 2) {
2188 GemmMicrokernelTester()
2189 .mr(4)
2190 .nr(8)
2191 .kr(1)
2192 .sr(1)
2193 .m(4)
2194 .n(n)
2195 .k(k)
2196 .a_stride(7)
2197 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__wasmsimd_x86_splat);
2198 }
2199 }
2200 }
2201
TEST(F32_PPMM_MINMAX_4X8__WASMSIMD_X86_SPLAT,n_gt_8_subtile)2202 TEST(F32_PPMM_MINMAX_4X8__WASMSIMD_X86_SPLAT, n_gt_8_subtile) {
2203 for (uint32_t n = 9; n < 16; n++) {
2204 for (size_t k = 1; k <= 5; k += 2) {
2205 for (uint32_t m = 1; m <= 4; m++) {
2206 GemmMicrokernelTester()
2207 .mr(4)
2208 .nr(8)
2209 .kr(1)
2210 .sr(1)
2211 .m(m)
2212 .n(n)
2213 .k(k)
2214 .iterations(1)
2215 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__wasmsimd_x86_splat);
2216 }
2217 }
2218 }
2219 }
2220
TEST(F32_PPMM_MINMAX_4X8__WASMSIMD_X86_SPLAT,n_div_8)2221 TEST(F32_PPMM_MINMAX_4X8__WASMSIMD_X86_SPLAT, n_div_8) {
2222 for (uint32_t n = 16; n <= 24; n += 8) {
2223 for (size_t k = 1; k <= 5; k += 2) {
2224 GemmMicrokernelTester()
2225 .mr(4)
2226 .nr(8)
2227 .kr(1)
2228 .sr(1)
2229 .m(4)
2230 .n(8)
2231 .k(k)
2232 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__wasmsimd_x86_splat);
2233 }
2234 }
2235 }
2236
TEST(F32_PPMM_MINMAX_4X8__WASMSIMD_X86_SPLAT,n_div_8_strided_cn)2237 TEST(F32_PPMM_MINMAX_4X8__WASMSIMD_X86_SPLAT, n_div_8_strided_cn) {
2238 for (uint32_t n = 16; n <= 24; n += 8) {
2239 for (size_t k = 1; k <= 5; k += 2) {
2240 GemmMicrokernelTester()
2241 .mr(4)
2242 .nr(8)
2243 .kr(1)
2244 .sr(1)
2245 .m(4)
2246 .n(n)
2247 .k(k)
2248 .cn_stride(11)
2249 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__wasmsimd_x86_splat);
2250 }
2251 }
2252 }
2253
TEST(F32_PPMM_MINMAX_4X8__WASMSIMD_X86_SPLAT,n_div_8_strided_a)2254 TEST(F32_PPMM_MINMAX_4X8__WASMSIMD_X86_SPLAT, n_div_8_strided_a) {
2255 for (uint32_t n = 16; n <= 24; n += 8) {
2256 for (size_t k = 1; k <= 5; k += 2) {
2257 GemmMicrokernelTester()
2258 .mr(4)
2259 .nr(8)
2260 .kr(1)
2261 .sr(1)
2262 .m(4)
2263 .n(n)
2264 .k(k)
2265 .a_stride(7)
2266 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__wasmsimd_x86_splat);
2267 }
2268 }
2269 }
2270
TEST(F32_PPMM_MINMAX_4X8__WASMSIMD_X86_SPLAT,n_div_8_subtile)2271 TEST(F32_PPMM_MINMAX_4X8__WASMSIMD_X86_SPLAT, n_div_8_subtile) {
2272 for (uint32_t n = 16; n <= 24; n += 8) {
2273 for (size_t k = 1; k <= 5; k += 2) {
2274 for (uint32_t m = 1; m <= 4; m++) {
2275 GemmMicrokernelTester()
2276 .mr(4)
2277 .nr(8)
2278 .kr(1)
2279 .sr(1)
2280 .m(m)
2281 .n(n)
2282 .k(k)
2283 .iterations(1)
2284 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__wasmsimd_x86_splat);
2285 }
2286 }
2287 }
2288 }
2289
TEST(F32_PPMM_MINMAX_4X8__WASMSIMD_X86_SPLAT,strided_cm_subtile)2290 TEST(F32_PPMM_MINMAX_4X8__WASMSIMD_X86_SPLAT, strided_cm_subtile) {
2291 for (size_t k = 1; k <= 5; k += 2) {
2292 for (uint32_t m = 1; m <= 4; m++) {
2293 for (uint32_t n = 1; n <= 8; n++) {
2294 GemmMicrokernelTester()
2295 .mr(4)
2296 .nr(8)
2297 .kr(1)
2298 .sr(1)
2299 .m(m)
2300 .n(n)
2301 .k(k)
2302 .cm_stride(11)
2303 .iterations(1)
2304 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__wasmsimd_x86_splat);
2305 }
2306 }
2307 }
2308 }
2309
TEST(F32_PPMM_MINMAX_4X8__WASMSIMD_X86_SPLAT,qmin)2310 TEST(F32_PPMM_MINMAX_4X8__WASMSIMD_X86_SPLAT, qmin) {
2311 GemmMicrokernelTester()
2312 .mr(4)
2313 .nr(8)
2314 .kr(1)
2315 .sr(1)
2316 .m(4)
2317 .n(8)
2318 .k(1)
2319 .qmin(128)
2320 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__wasmsimd_x86_splat);
2321 }
2322
TEST(F32_PPMM_MINMAX_4X8__WASMSIMD_X86_SPLAT,qmax)2323 TEST(F32_PPMM_MINMAX_4X8__WASMSIMD_X86_SPLAT, qmax) {
2324 GemmMicrokernelTester()
2325 .mr(4)
2326 .nr(8)
2327 .kr(1)
2328 .sr(1)
2329 .m(4)
2330 .n(8)
2331 .k(1)
2332 .qmax(128)
2333 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__wasmsimd_x86_splat);
2334 }
2335
TEST(F32_PPMM_MINMAX_4X8__WASMSIMD_X86_SPLAT,strided_cm)2336 TEST(F32_PPMM_MINMAX_4X8__WASMSIMD_X86_SPLAT, strided_cm) {
2337 GemmMicrokernelTester()
2338 .mr(4)
2339 .nr(8)
2340 .kr(1)
2341 .sr(1)
2342 .m(4)
2343 .n(8)
2344 .k(1)
2345 .cm_stride(11)
2346 .Test(xnn_f32_ppmm_minmax_ukernel_4x8__wasmsimd_x86_splat);
2347 }
2348 #endif // XNN_ARCH_WASMSIMD
2349
2350
TEST(F32_PPMM_MINMAX_4X2__SCALAR,k_eq_1)2351 TEST(F32_PPMM_MINMAX_4X2__SCALAR, k_eq_1) {
2352 GemmMicrokernelTester()
2353 .mr(4)
2354 .nr(2)
2355 .kr(1)
2356 .sr(1)
2357 .m(4)
2358 .n(2)
2359 .k(1)
2360 .Test(xnn_f32_ppmm_minmax_ukernel_4x2__scalar, GemmMicrokernelTester::Variant::Scalar);
2361 }
2362
TEST(F32_PPMM_MINMAX_4X2__SCALAR,strided_cn)2363 TEST(F32_PPMM_MINMAX_4X2__SCALAR, strided_cn) {
2364 GemmMicrokernelTester()
2365 .mr(4)
2366 .nr(2)
2367 .kr(1)
2368 .sr(1)
2369 .m(4)
2370 .n(2)
2371 .k(1)
2372 .cn_stride(5)
2373 .Test(xnn_f32_ppmm_minmax_ukernel_4x2__scalar, GemmMicrokernelTester::Variant::Scalar);
2374 }
2375
TEST(F32_PPMM_MINMAX_4X2__SCALAR,k_eq_1_strided_a)2376 TEST(F32_PPMM_MINMAX_4X2__SCALAR, k_eq_1_strided_a) {
2377 GemmMicrokernelTester()
2378 .mr(4)
2379 .nr(2)
2380 .kr(1)
2381 .sr(1)
2382 .m(4)
2383 .n(2)
2384 .k(1)
2385 .a_stride(3)
2386 .Test(xnn_f32_ppmm_minmax_ukernel_4x2__scalar, GemmMicrokernelTester::Variant::Scalar);
2387 }
2388
TEST(F32_PPMM_MINMAX_4X2__SCALAR,k_eq_1_subtile)2389 TEST(F32_PPMM_MINMAX_4X2__SCALAR, k_eq_1_subtile) {
2390 for (uint32_t m = 1; m <= 4; m++) {
2391 for (uint32_t n = 1; n <= 2; n++) {
2392 GemmMicrokernelTester()
2393 .mr(4)
2394 .nr(2)
2395 .kr(1)
2396 .sr(1)
2397 .m(m)
2398 .n(n)
2399 .k(1)
2400 .iterations(1)
2401 .Test(xnn_f32_ppmm_minmax_ukernel_4x2__scalar, GemmMicrokernelTester::Variant::Scalar);
2402 }
2403 }
2404 }
2405
TEST(F32_PPMM_MINMAX_4X2__SCALAR,k_eq_1_subtile_m)2406 TEST(F32_PPMM_MINMAX_4X2__SCALAR, k_eq_1_subtile_m) {
2407 for (uint32_t m = 1; m <= 4; m++) {
2408 GemmMicrokernelTester()
2409 .mr(4)
2410 .nr(2)
2411 .kr(1)
2412 .sr(1)
2413 .m(m)
2414 .n(2)
2415 .k(1)
2416 .iterations(1)
2417 .Test(xnn_f32_ppmm_minmax_ukernel_4x2__scalar, GemmMicrokernelTester::Variant::Scalar);
2418 }
2419 }
2420
TEST(F32_PPMM_MINMAX_4X2__SCALAR,k_eq_1_subtile_n)2421 TEST(F32_PPMM_MINMAX_4X2__SCALAR, k_eq_1_subtile_n) {
2422 for (uint32_t n = 1; n <= 2; n++) {
2423 GemmMicrokernelTester()
2424 .mr(4)
2425 .nr(2)
2426 .kr(1)
2427 .sr(1)
2428 .m(4)
2429 .n(n)
2430 .k(1)
2431 .iterations(1)
2432 .Test(xnn_f32_ppmm_minmax_ukernel_4x2__scalar, GemmMicrokernelTester::Variant::Scalar);
2433 }
2434 }
2435
TEST(F32_PPMM_MINMAX_4X2__SCALAR,k_gt_1)2436 TEST(F32_PPMM_MINMAX_4X2__SCALAR, k_gt_1) {
2437 for (size_t k = 2; k < 10; k++) {
2438 GemmMicrokernelTester()
2439 .mr(4)
2440 .nr(2)
2441 .kr(1)
2442 .sr(1)
2443 .m(4)
2444 .n(2)
2445 .k(k)
2446 .Test(xnn_f32_ppmm_minmax_ukernel_4x2__scalar, GemmMicrokernelTester::Variant::Scalar);
2447 }
2448 }
2449
TEST(F32_PPMM_MINMAX_4X2__SCALAR,k_gt_1_subtile)2450 TEST(F32_PPMM_MINMAX_4X2__SCALAR, k_gt_1_subtile) {
2451 for (size_t k = 2; k < 10; k++) {
2452 for (uint32_t m = 1; m <= 4; m++) {
2453 for (uint32_t n = 1; n <= 2; n++) {
2454 GemmMicrokernelTester()
2455 .mr(4)
2456 .nr(2)
2457 .kr(1)
2458 .sr(1)
2459 .m(m)
2460 .n(n)
2461 .k(k)
2462 .iterations(1)
2463 .Test(xnn_f32_ppmm_minmax_ukernel_4x2__scalar, GemmMicrokernelTester::Variant::Scalar);
2464 }
2465 }
2466 }
2467 }
2468
TEST(F32_PPMM_MINMAX_4X2__SCALAR,n_gt_2)2469 TEST(F32_PPMM_MINMAX_4X2__SCALAR, n_gt_2) {
2470 for (uint32_t n = 3; n < 4; n++) {
2471 for (size_t k = 1; k <= 5; k += 2) {
2472 GemmMicrokernelTester()
2473 .mr(4)
2474 .nr(2)
2475 .kr(1)
2476 .sr(1)
2477 .m(4)
2478 .n(2)
2479 .k(k)
2480 .Test(xnn_f32_ppmm_minmax_ukernel_4x2__scalar, GemmMicrokernelTester::Variant::Scalar);
2481 }
2482 }
2483 }
2484
TEST(F32_PPMM_MINMAX_4X2__SCALAR,n_gt_2_strided_cn)2485 TEST(F32_PPMM_MINMAX_4X2__SCALAR, n_gt_2_strided_cn) {
2486 for (uint32_t n = 3; n < 4; n++) {
2487 for (size_t k = 1; k <= 5; k += 2) {
2488 GemmMicrokernelTester()
2489 .mr(4)
2490 .nr(2)
2491 .kr(1)
2492 .sr(1)
2493 .m(4)
2494 .n(2)
2495 .k(k)
2496 .cn_stride(5)
2497 .Test(xnn_f32_ppmm_minmax_ukernel_4x2__scalar, GemmMicrokernelTester::Variant::Scalar);
2498 }
2499 }
2500 }
2501
TEST(F32_PPMM_MINMAX_4X2__SCALAR,n_gt_2_strided_a)2502 TEST(F32_PPMM_MINMAX_4X2__SCALAR, n_gt_2_strided_a) {
2503 for (uint32_t n = 3; n < 4; n++) {
2504 for (size_t k = 1; k <= 5; k += 2) {
2505 GemmMicrokernelTester()
2506 .mr(4)
2507 .nr(2)
2508 .kr(1)
2509 .sr(1)
2510 .m(4)
2511 .n(n)
2512 .k(k)
2513 .a_stride(7)
2514 .Test(xnn_f32_ppmm_minmax_ukernel_4x2__scalar, GemmMicrokernelTester::Variant::Scalar);
2515 }
2516 }
2517 }
2518
TEST(F32_PPMM_MINMAX_4X2__SCALAR,n_gt_2_subtile)2519 TEST(F32_PPMM_MINMAX_4X2__SCALAR, n_gt_2_subtile) {
2520 for (uint32_t n = 3; n < 4; n++) {
2521 for (size_t k = 1; k <= 5; k += 2) {
2522 for (uint32_t m = 1; m <= 4; m++) {
2523 GemmMicrokernelTester()
2524 .mr(4)
2525 .nr(2)
2526 .kr(1)
2527 .sr(1)
2528 .m(m)
2529 .n(n)
2530 .k(k)
2531 .iterations(1)
2532 .Test(xnn_f32_ppmm_minmax_ukernel_4x2__scalar, GemmMicrokernelTester::Variant::Scalar);
2533 }
2534 }
2535 }
2536 }
2537
TEST(F32_PPMM_MINMAX_4X2__SCALAR,n_div_2)2538 TEST(F32_PPMM_MINMAX_4X2__SCALAR, n_div_2) {
2539 for (uint32_t n = 4; n <= 6; n += 2) {
2540 for (size_t k = 1; k <= 5; k += 2) {
2541 GemmMicrokernelTester()
2542 .mr(4)
2543 .nr(2)
2544 .kr(1)
2545 .sr(1)
2546 .m(4)
2547 .n(2)
2548 .k(k)
2549 .Test(xnn_f32_ppmm_minmax_ukernel_4x2__scalar, GemmMicrokernelTester::Variant::Scalar);
2550 }
2551 }
2552 }
2553
TEST(F32_PPMM_MINMAX_4X2__SCALAR,n_div_2_strided_cn)2554 TEST(F32_PPMM_MINMAX_4X2__SCALAR, n_div_2_strided_cn) {
2555 for (uint32_t n = 4; n <= 6; n += 2) {
2556 for (size_t k = 1; k <= 5; k += 2) {
2557 GemmMicrokernelTester()
2558 .mr(4)
2559 .nr(2)
2560 .kr(1)
2561 .sr(1)
2562 .m(4)
2563 .n(n)
2564 .k(k)
2565 .cn_stride(5)
2566 .Test(xnn_f32_ppmm_minmax_ukernel_4x2__scalar, GemmMicrokernelTester::Variant::Scalar);
2567 }
2568 }
2569 }
2570
TEST(F32_PPMM_MINMAX_4X2__SCALAR,n_div_2_strided_a)2571 TEST(F32_PPMM_MINMAX_4X2__SCALAR, n_div_2_strided_a) {
2572 for (uint32_t n = 4; n <= 6; n += 2) {
2573 for (size_t k = 1; k <= 5; k += 2) {
2574 GemmMicrokernelTester()
2575 .mr(4)
2576 .nr(2)
2577 .kr(1)
2578 .sr(1)
2579 .m(4)
2580 .n(n)
2581 .k(k)
2582 .a_stride(7)
2583 .Test(xnn_f32_ppmm_minmax_ukernel_4x2__scalar, GemmMicrokernelTester::Variant::Scalar);
2584 }
2585 }
2586 }
2587
TEST(F32_PPMM_MINMAX_4X2__SCALAR,n_div_2_subtile)2588 TEST(F32_PPMM_MINMAX_4X2__SCALAR, n_div_2_subtile) {
2589 for (uint32_t n = 4; n <= 6; n += 2) {
2590 for (size_t k = 1; k <= 5; k += 2) {
2591 for (uint32_t m = 1; m <= 4; m++) {
2592 GemmMicrokernelTester()
2593 .mr(4)
2594 .nr(2)
2595 .kr(1)
2596 .sr(1)
2597 .m(m)
2598 .n(n)
2599 .k(k)
2600 .iterations(1)
2601 .Test(xnn_f32_ppmm_minmax_ukernel_4x2__scalar, GemmMicrokernelTester::Variant::Scalar);
2602 }
2603 }
2604 }
2605 }
2606
TEST(F32_PPMM_MINMAX_4X2__SCALAR,strided_cm_subtile)2607 TEST(F32_PPMM_MINMAX_4X2__SCALAR, strided_cm_subtile) {
2608 for (size_t k = 1; k <= 5; k += 2) {
2609 for (uint32_t m = 1; m <= 4; m++) {
2610 for (uint32_t n = 1; n <= 2; n++) {
2611 GemmMicrokernelTester()
2612 .mr(4)
2613 .nr(2)
2614 .kr(1)
2615 .sr(1)
2616 .m(m)
2617 .n(n)
2618 .k(k)
2619 .cm_stride(5)
2620 .iterations(1)
2621 .Test(xnn_f32_ppmm_minmax_ukernel_4x2__scalar, GemmMicrokernelTester::Variant::Scalar);
2622 }
2623 }
2624 }
2625 }
2626
TEST(F32_PPMM_MINMAX_4X2__SCALAR,qmin)2627 TEST(F32_PPMM_MINMAX_4X2__SCALAR, qmin) {
2628 GemmMicrokernelTester()
2629 .mr(4)
2630 .nr(2)
2631 .kr(1)
2632 .sr(1)
2633 .m(4)
2634 .n(2)
2635 .k(1)
2636 .qmin(128)
2637 .Test(xnn_f32_ppmm_minmax_ukernel_4x2__scalar, GemmMicrokernelTester::Variant::Scalar);
2638 }
2639
TEST(F32_PPMM_MINMAX_4X2__SCALAR,qmax)2640 TEST(F32_PPMM_MINMAX_4X2__SCALAR, qmax) {
2641 GemmMicrokernelTester()
2642 .mr(4)
2643 .nr(2)
2644 .kr(1)
2645 .sr(1)
2646 .m(4)
2647 .n(2)
2648 .k(1)
2649 .qmax(128)
2650 .Test(xnn_f32_ppmm_minmax_ukernel_4x2__scalar, GemmMicrokernelTester::Variant::Scalar);
2651 }
2652
TEST(F32_PPMM_MINMAX_4X2__SCALAR,strided_cm)2653 TEST(F32_PPMM_MINMAX_4X2__SCALAR, strided_cm) {
2654 GemmMicrokernelTester()
2655 .mr(4)
2656 .nr(2)
2657 .kr(1)
2658 .sr(1)
2659 .m(4)
2660 .n(2)
2661 .k(1)
2662 .cm_stride(5)
2663 .Test(xnn_f32_ppmm_minmax_ukernel_4x2__scalar, GemmMicrokernelTester::Variant::Scalar);
2664 }
2665
2666
TEST(F32_PPMM_MINMAX_2X4__SCALAR,k_eq_1)2667 TEST(F32_PPMM_MINMAX_2X4__SCALAR, k_eq_1) {
2668 GemmMicrokernelTester()
2669 .mr(2)
2670 .nr(4)
2671 .kr(1)
2672 .sr(1)
2673 .m(2)
2674 .n(4)
2675 .k(1)
2676 .Test(xnn_f32_ppmm_minmax_ukernel_2x4__scalar, GemmMicrokernelTester::Variant::Scalar);
2677 }
2678
TEST(F32_PPMM_MINMAX_2X4__SCALAR,strided_cn)2679 TEST(F32_PPMM_MINMAX_2X4__SCALAR, strided_cn) {
2680 GemmMicrokernelTester()
2681 .mr(2)
2682 .nr(4)
2683 .kr(1)
2684 .sr(1)
2685 .m(2)
2686 .n(4)
2687 .k(1)
2688 .cn_stride(7)
2689 .Test(xnn_f32_ppmm_minmax_ukernel_2x4__scalar, GemmMicrokernelTester::Variant::Scalar);
2690 }
2691
TEST(F32_PPMM_MINMAX_2X4__SCALAR,k_eq_1_strided_a)2692 TEST(F32_PPMM_MINMAX_2X4__SCALAR, k_eq_1_strided_a) {
2693 GemmMicrokernelTester()
2694 .mr(2)
2695 .nr(4)
2696 .kr(1)
2697 .sr(1)
2698 .m(2)
2699 .n(4)
2700 .k(1)
2701 .a_stride(3)
2702 .Test(xnn_f32_ppmm_minmax_ukernel_2x4__scalar, GemmMicrokernelTester::Variant::Scalar);
2703 }
2704
TEST(F32_PPMM_MINMAX_2X4__SCALAR,k_eq_1_subtile)2705 TEST(F32_PPMM_MINMAX_2X4__SCALAR, k_eq_1_subtile) {
2706 for (uint32_t m = 1; m <= 2; m++) {
2707 for (uint32_t n = 1; n <= 4; n++) {
2708 GemmMicrokernelTester()
2709 .mr(2)
2710 .nr(4)
2711 .kr(1)
2712 .sr(1)
2713 .m(m)
2714 .n(n)
2715 .k(1)
2716 .iterations(1)
2717 .Test(xnn_f32_ppmm_minmax_ukernel_2x4__scalar, GemmMicrokernelTester::Variant::Scalar);
2718 }
2719 }
2720 }
2721
TEST(F32_PPMM_MINMAX_2X4__SCALAR,k_eq_1_subtile_m)2722 TEST(F32_PPMM_MINMAX_2X4__SCALAR, k_eq_1_subtile_m) {
2723 for (uint32_t m = 1; m <= 2; m++) {
2724 GemmMicrokernelTester()
2725 .mr(2)
2726 .nr(4)
2727 .kr(1)
2728 .sr(1)
2729 .m(m)
2730 .n(4)
2731 .k(1)
2732 .iterations(1)
2733 .Test(xnn_f32_ppmm_minmax_ukernel_2x4__scalar, GemmMicrokernelTester::Variant::Scalar);
2734 }
2735 }
2736
TEST(F32_PPMM_MINMAX_2X4__SCALAR,k_eq_1_subtile_n)2737 TEST(F32_PPMM_MINMAX_2X4__SCALAR, k_eq_1_subtile_n) {
2738 for (uint32_t n = 1; n <= 4; n++) {
2739 GemmMicrokernelTester()
2740 .mr(2)
2741 .nr(4)
2742 .kr(1)
2743 .sr(1)
2744 .m(2)
2745 .n(n)
2746 .k(1)
2747 .iterations(1)
2748 .Test(xnn_f32_ppmm_minmax_ukernel_2x4__scalar, GemmMicrokernelTester::Variant::Scalar);
2749 }
2750 }
2751
TEST(F32_PPMM_MINMAX_2X4__SCALAR,k_gt_1)2752 TEST(F32_PPMM_MINMAX_2X4__SCALAR, k_gt_1) {
2753 for (size_t k = 2; k < 10; k++) {
2754 GemmMicrokernelTester()
2755 .mr(2)
2756 .nr(4)
2757 .kr(1)
2758 .sr(1)
2759 .m(2)
2760 .n(4)
2761 .k(k)
2762 .Test(xnn_f32_ppmm_minmax_ukernel_2x4__scalar, GemmMicrokernelTester::Variant::Scalar);
2763 }
2764 }
2765
TEST(F32_PPMM_MINMAX_2X4__SCALAR,k_gt_1_subtile)2766 TEST(F32_PPMM_MINMAX_2X4__SCALAR, k_gt_1_subtile) {
2767 for (size_t k = 2; k < 10; k++) {
2768 for (uint32_t m = 1; m <= 2; m++) {
2769 for (uint32_t n = 1; n <= 4; n++) {
2770 GemmMicrokernelTester()
2771 .mr(2)
2772 .nr(4)
2773 .kr(1)
2774 .sr(1)
2775 .m(m)
2776 .n(n)
2777 .k(k)
2778 .iterations(1)
2779 .Test(xnn_f32_ppmm_minmax_ukernel_2x4__scalar, GemmMicrokernelTester::Variant::Scalar);
2780 }
2781 }
2782 }
2783 }
2784
TEST(F32_PPMM_MINMAX_2X4__SCALAR,n_gt_4)2785 TEST(F32_PPMM_MINMAX_2X4__SCALAR, n_gt_4) {
2786 for (uint32_t n = 5; n < 8; n++) {
2787 for (size_t k = 1; k <= 5; k += 2) {
2788 GemmMicrokernelTester()
2789 .mr(2)
2790 .nr(4)
2791 .kr(1)
2792 .sr(1)
2793 .m(2)
2794 .n(4)
2795 .k(k)
2796 .Test(xnn_f32_ppmm_minmax_ukernel_2x4__scalar, GemmMicrokernelTester::Variant::Scalar);
2797 }
2798 }
2799 }
2800
TEST(F32_PPMM_MINMAX_2X4__SCALAR,n_gt_4_strided_cn)2801 TEST(F32_PPMM_MINMAX_2X4__SCALAR, n_gt_4_strided_cn) {
2802 for (uint32_t n = 5; n < 8; n++) {
2803 for (size_t k = 1; k <= 5; k += 2) {
2804 GemmMicrokernelTester()
2805 .mr(2)
2806 .nr(4)
2807 .kr(1)
2808 .sr(1)
2809 .m(2)
2810 .n(4)
2811 .k(k)
2812 .cn_stride(7)
2813 .Test(xnn_f32_ppmm_minmax_ukernel_2x4__scalar, GemmMicrokernelTester::Variant::Scalar);
2814 }
2815 }
2816 }
2817
TEST(F32_PPMM_MINMAX_2X4__SCALAR,n_gt_4_strided_a)2818 TEST(F32_PPMM_MINMAX_2X4__SCALAR, n_gt_4_strided_a) {
2819 for (uint32_t n = 5; n < 8; n++) {
2820 for (size_t k = 1; k <= 5; k += 2) {
2821 GemmMicrokernelTester()
2822 .mr(2)
2823 .nr(4)
2824 .kr(1)
2825 .sr(1)
2826 .m(2)
2827 .n(n)
2828 .k(k)
2829 .a_stride(7)
2830 .Test(xnn_f32_ppmm_minmax_ukernel_2x4__scalar, GemmMicrokernelTester::Variant::Scalar);
2831 }
2832 }
2833 }
2834
TEST(F32_PPMM_MINMAX_2X4__SCALAR,n_gt_4_subtile)2835 TEST(F32_PPMM_MINMAX_2X4__SCALAR, n_gt_4_subtile) {
2836 for (uint32_t n = 5; n < 8; n++) {
2837 for (size_t k = 1; k <= 5; k += 2) {
2838 for (uint32_t m = 1; m <= 2; m++) {
2839 GemmMicrokernelTester()
2840 .mr(2)
2841 .nr(4)
2842 .kr(1)
2843 .sr(1)
2844 .m(m)
2845 .n(n)
2846 .k(k)
2847 .iterations(1)
2848 .Test(xnn_f32_ppmm_minmax_ukernel_2x4__scalar, GemmMicrokernelTester::Variant::Scalar);
2849 }
2850 }
2851 }
2852 }
2853
TEST(F32_PPMM_MINMAX_2X4__SCALAR,n_div_4)2854 TEST(F32_PPMM_MINMAX_2X4__SCALAR, n_div_4) {
2855 for (uint32_t n = 8; n <= 12; n += 4) {
2856 for (size_t k = 1; k <= 5; k += 2) {
2857 GemmMicrokernelTester()
2858 .mr(2)
2859 .nr(4)
2860 .kr(1)
2861 .sr(1)
2862 .m(2)
2863 .n(4)
2864 .k(k)
2865 .Test(xnn_f32_ppmm_minmax_ukernel_2x4__scalar, GemmMicrokernelTester::Variant::Scalar);
2866 }
2867 }
2868 }
2869
TEST(F32_PPMM_MINMAX_2X4__SCALAR,n_div_4_strided_cn)2870 TEST(F32_PPMM_MINMAX_2X4__SCALAR, n_div_4_strided_cn) {
2871 for (uint32_t n = 8; n <= 12; n += 4) {
2872 for (size_t k = 1; k <= 5; k += 2) {
2873 GemmMicrokernelTester()
2874 .mr(2)
2875 .nr(4)
2876 .kr(1)
2877 .sr(1)
2878 .m(2)
2879 .n(n)
2880 .k(k)
2881 .cn_stride(7)
2882 .Test(xnn_f32_ppmm_minmax_ukernel_2x4__scalar, GemmMicrokernelTester::Variant::Scalar);
2883 }
2884 }
2885 }
2886
TEST(F32_PPMM_MINMAX_2X4__SCALAR,n_div_4_strided_a)2887 TEST(F32_PPMM_MINMAX_2X4__SCALAR, n_div_4_strided_a) {
2888 for (uint32_t n = 8; n <= 12; n += 4) {
2889 for (size_t k = 1; k <= 5; k += 2) {
2890 GemmMicrokernelTester()
2891 .mr(2)
2892 .nr(4)
2893 .kr(1)
2894 .sr(1)
2895 .m(2)
2896 .n(n)
2897 .k(k)
2898 .a_stride(7)
2899 .Test(xnn_f32_ppmm_minmax_ukernel_2x4__scalar, GemmMicrokernelTester::Variant::Scalar);
2900 }
2901 }
2902 }
2903
TEST(F32_PPMM_MINMAX_2X4__SCALAR,n_div_4_subtile)2904 TEST(F32_PPMM_MINMAX_2X4__SCALAR, n_div_4_subtile) {
2905 for (uint32_t n = 8; n <= 12; n += 4) {
2906 for (size_t k = 1; k <= 5; k += 2) {
2907 for (uint32_t m = 1; m <= 2; m++) {
2908 GemmMicrokernelTester()
2909 .mr(2)
2910 .nr(4)
2911 .kr(1)
2912 .sr(1)
2913 .m(m)
2914 .n(n)
2915 .k(k)
2916 .iterations(1)
2917 .Test(xnn_f32_ppmm_minmax_ukernel_2x4__scalar, GemmMicrokernelTester::Variant::Scalar);
2918 }
2919 }
2920 }
2921 }
2922
TEST(F32_PPMM_MINMAX_2X4__SCALAR,strided_cm_subtile)2923 TEST(F32_PPMM_MINMAX_2X4__SCALAR, strided_cm_subtile) {
2924 for (size_t k = 1; k <= 5; k += 2) {
2925 for (uint32_t m = 1; m <= 2; m++) {
2926 for (uint32_t n = 1; n <= 4; n++) {
2927 GemmMicrokernelTester()
2928 .mr(2)
2929 .nr(4)
2930 .kr(1)
2931 .sr(1)
2932 .m(m)
2933 .n(n)
2934 .k(k)
2935 .cm_stride(7)
2936 .iterations(1)
2937 .Test(xnn_f32_ppmm_minmax_ukernel_2x4__scalar, GemmMicrokernelTester::Variant::Scalar);
2938 }
2939 }
2940 }
2941 }
2942
TEST(F32_PPMM_MINMAX_2X4__SCALAR,qmin)2943 TEST(F32_PPMM_MINMAX_2X4__SCALAR, qmin) {
2944 GemmMicrokernelTester()
2945 .mr(2)
2946 .nr(4)
2947 .kr(1)
2948 .sr(1)
2949 .m(2)
2950 .n(4)
2951 .k(1)
2952 .qmin(128)
2953 .Test(xnn_f32_ppmm_minmax_ukernel_2x4__scalar, GemmMicrokernelTester::Variant::Scalar);
2954 }
2955
TEST(F32_PPMM_MINMAX_2X4__SCALAR,qmax)2956 TEST(F32_PPMM_MINMAX_2X4__SCALAR, qmax) {
2957 GemmMicrokernelTester()
2958 .mr(2)
2959 .nr(4)
2960 .kr(1)
2961 .sr(1)
2962 .m(2)
2963 .n(4)
2964 .k(1)
2965 .qmax(128)
2966 .Test(xnn_f32_ppmm_minmax_ukernel_2x4__scalar, GemmMicrokernelTester::Variant::Scalar);
2967 }
2968
TEST(F32_PPMM_MINMAX_2X4__SCALAR,strided_cm)2969 TEST(F32_PPMM_MINMAX_2X4__SCALAR, strided_cm) {
2970 GemmMicrokernelTester()
2971 .mr(2)
2972 .nr(4)
2973 .kr(1)
2974 .sr(1)
2975 .m(2)
2976 .n(4)
2977 .k(1)
2978 .cm_stride(7)
2979 .Test(xnn_f32_ppmm_minmax_ukernel_2x4__scalar, GemmMicrokernelTester::Variant::Scalar);
2980 }
2981
2982
TEST(F32_PPMM_MINMAX_4X4__SCALAR,k_eq_1)2983 TEST(F32_PPMM_MINMAX_4X4__SCALAR, k_eq_1) {
2984 GemmMicrokernelTester()
2985 .mr(4)
2986 .nr(4)
2987 .kr(1)
2988 .sr(1)
2989 .m(4)
2990 .n(4)
2991 .k(1)
2992 .Test(xnn_f32_ppmm_minmax_ukernel_4x4__scalar, GemmMicrokernelTester::Variant::Scalar);
2993 }
2994
TEST(F32_PPMM_MINMAX_4X4__SCALAR,strided_cn)2995 TEST(F32_PPMM_MINMAX_4X4__SCALAR, strided_cn) {
2996 GemmMicrokernelTester()
2997 .mr(4)
2998 .nr(4)
2999 .kr(1)
3000 .sr(1)
3001 .m(4)
3002 .n(4)
3003 .k(1)
3004 .cn_stride(7)
3005 .Test(xnn_f32_ppmm_minmax_ukernel_4x4__scalar, GemmMicrokernelTester::Variant::Scalar);
3006 }
3007
TEST(F32_PPMM_MINMAX_4X4__SCALAR,k_eq_1_strided_a)3008 TEST(F32_PPMM_MINMAX_4X4__SCALAR, k_eq_1_strided_a) {
3009 GemmMicrokernelTester()
3010 .mr(4)
3011 .nr(4)
3012 .kr(1)
3013 .sr(1)
3014 .m(4)
3015 .n(4)
3016 .k(1)
3017 .a_stride(3)
3018 .Test(xnn_f32_ppmm_minmax_ukernel_4x4__scalar, GemmMicrokernelTester::Variant::Scalar);
3019 }
3020
TEST(F32_PPMM_MINMAX_4X4__SCALAR,k_eq_1_subtile)3021 TEST(F32_PPMM_MINMAX_4X4__SCALAR, k_eq_1_subtile) {
3022 for (uint32_t m = 1; m <= 4; m++) {
3023 for (uint32_t n = 1; n <= 4; n++) {
3024 GemmMicrokernelTester()
3025 .mr(4)
3026 .nr(4)
3027 .kr(1)
3028 .sr(1)
3029 .m(m)
3030 .n(n)
3031 .k(1)
3032 .iterations(1)
3033 .Test(xnn_f32_ppmm_minmax_ukernel_4x4__scalar, GemmMicrokernelTester::Variant::Scalar);
3034 }
3035 }
3036 }
3037
TEST(F32_PPMM_MINMAX_4X4__SCALAR,k_eq_1_subtile_m)3038 TEST(F32_PPMM_MINMAX_4X4__SCALAR, k_eq_1_subtile_m) {
3039 for (uint32_t m = 1; m <= 4; m++) {
3040 GemmMicrokernelTester()
3041 .mr(4)
3042 .nr(4)
3043 .kr(1)
3044 .sr(1)
3045 .m(m)
3046 .n(4)
3047 .k(1)
3048 .iterations(1)
3049 .Test(xnn_f32_ppmm_minmax_ukernel_4x4__scalar, GemmMicrokernelTester::Variant::Scalar);
3050 }
3051 }
3052
TEST(F32_PPMM_MINMAX_4X4__SCALAR,k_eq_1_subtile_n)3053 TEST(F32_PPMM_MINMAX_4X4__SCALAR, k_eq_1_subtile_n) {
3054 for (uint32_t n = 1; n <= 4; n++) {
3055 GemmMicrokernelTester()
3056 .mr(4)
3057 .nr(4)
3058 .kr(1)
3059 .sr(1)
3060 .m(4)
3061 .n(n)
3062 .k(1)
3063 .iterations(1)
3064 .Test(xnn_f32_ppmm_minmax_ukernel_4x4__scalar, GemmMicrokernelTester::Variant::Scalar);
3065 }
3066 }
3067
TEST(F32_PPMM_MINMAX_4X4__SCALAR,k_gt_1)3068 TEST(F32_PPMM_MINMAX_4X4__SCALAR, k_gt_1) {
3069 for (size_t k = 2; k < 10; k++) {
3070 GemmMicrokernelTester()
3071 .mr(4)
3072 .nr(4)
3073 .kr(1)
3074 .sr(1)
3075 .m(4)
3076 .n(4)
3077 .k(k)
3078 .Test(xnn_f32_ppmm_minmax_ukernel_4x4__scalar, GemmMicrokernelTester::Variant::Scalar);
3079 }
3080 }
3081
TEST(F32_PPMM_MINMAX_4X4__SCALAR,k_gt_1_subtile)3082 TEST(F32_PPMM_MINMAX_4X4__SCALAR, k_gt_1_subtile) {
3083 for (size_t k = 2; k < 10; k++) {
3084 for (uint32_t m = 1; m <= 4; m++) {
3085 for (uint32_t n = 1; n <= 4; n++) {
3086 GemmMicrokernelTester()
3087 .mr(4)
3088 .nr(4)
3089 .kr(1)
3090 .sr(1)
3091 .m(m)
3092 .n(n)
3093 .k(k)
3094 .iterations(1)
3095 .Test(xnn_f32_ppmm_minmax_ukernel_4x4__scalar, GemmMicrokernelTester::Variant::Scalar);
3096 }
3097 }
3098 }
3099 }
3100
TEST(F32_PPMM_MINMAX_4X4__SCALAR,n_gt_4)3101 TEST(F32_PPMM_MINMAX_4X4__SCALAR, n_gt_4) {
3102 for (uint32_t n = 5; n < 8; n++) {
3103 for (size_t k = 1; k <= 5; k += 2) {
3104 GemmMicrokernelTester()
3105 .mr(4)
3106 .nr(4)
3107 .kr(1)
3108 .sr(1)
3109 .m(4)
3110 .n(4)
3111 .k(k)
3112 .Test(xnn_f32_ppmm_minmax_ukernel_4x4__scalar, GemmMicrokernelTester::Variant::Scalar);
3113 }
3114 }
3115 }
3116
TEST(F32_PPMM_MINMAX_4X4__SCALAR,n_gt_4_strided_cn)3117 TEST(F32_PPMM_MINMAX_4X4__SCALAR, n_gt_4_strided_cn) {
3118 for (uint32_t n = 5; n < 8; n++) {
3119 for (size_t k = 1; k <= 5; k += 2) {
3120 GemmMicrokernelTester()
3121 .mr(4)
3122 .nr(4)
3123 .kr(1)
3124 .sr(1)
3125 .m(4)
3126 .n(4)
3127 .k(k)
3128 .cn_stride(7)
3129 .Test(xnn_f32_ppmm_minmax_ukernel_4x4__scalar, GemmMicrokernelTester::Variant::Scalar);
3130 }
3131 }
3132 }
3133
TEST(F32_PPMM_MINMAX_4X4__SCALAR,n_gt_4_strided_a)3134 TEST(F32_PPMM_MINMAX_4X4__SCALAR, n_gt_4_strided_a) {
3135 for (uint32_t n = 5; n < 8; n++) {
3136 for (size_t k = 1; k <= 5; k += 2) {
3137 GemmMicrokernelTester()
3138 .mr(4)
3139 .nr(4)
3140 .kr(1)
3141 .sr(1)
3142 .m(4)
3143 .n(n)
3144 .k(k)
3145 .a_stride(7)
3146 .Test(xnn_f32_ppmm_minmax_ukernel_4x4__scalar, GemmMicrokernelTester::Variant::Scalar);
3147 }
3148 }
3149 }
3150
TEST(F32_PPMM_MINMAX_4X4__SCALAR,n_gt_4_subtile)3151 TEST(F32_PPMM_MINMAX_4X4__SCALAR, n_gt_4_subtile) {
3152 for (uint32_t n = 5; n < 8; n++) {
3153 for (size_t k = 1; k <= 5; k += 2) {
3154 for (uint32_t m = 1; m <= 4; m++) {
3155 GemmMicrokernelTester()
3156 .mr(4)
3157 .nr(4)
3158 .kr(1)
3159 .sr(1)
3160 .m(m)
3161 .n(n)
3162 .k(k)
3163 .iterations(1)
3164 .Test(xnn_f32_ppmm_minmax_ukernel_4x4__scalar, GemmMicrokernelTester::Variant::Scalar);
3165 }
3166 }
3167 }
3168 }
3169
TEST(F32_PPMM_MINMAX_4X4__SCALAR,n_div_4)3170 TEST(F32_PPMM_MINMAX_4X4__SCALAR, n_div_4) {
3171 for (uint32_t n = 8; n <= 12; n += 4) {
3172 for (size_t k = 1; k <= 5; k += 2) {
3173 GemmMicrokernelTester()
3174 .mr(4)
3175 .nr(4)
3176 .kr(1)
3177 .sr(1)
3178 .m(4)
3179 .n(4)
3180 .k(k)
3181 .Test(xnn_f32_ppmm_minmax_ukernel_4x4__scalar, GemmMicrokernelTester::Variant::Scalar);
3182 }
3183 }
3184 }
3185
TEST(F32_PPMM_MINMAX_4X4__SCALAR,n_div_4_strided_cn)3186 TEST(F32_PPMM_MINMAX_4X4__SCALAR, n_div_4_strided_cn) {
3187 for (uint32_t n = 8; n <= 12; n += 4) {
3188 for (size_t k = 1; k <= 5; k += 2) {
3189 GemmMicrokernelTester()
3190 .mr(4)
3191 .nr(4)
3192 .kr(1)
3193 .sr(1)
3194 .m(4)
3195 .n(n)
3196 .k(k)
3197 .cn_stride(7)
3198 .Test(xnn_f32_ppmm_minmax_ukernel_4x4__scalar, GemmMicrokernelTester::Variant::Scalar);
3199 }
3200 }
3201 }
3202
TEST(F32_PPMM_MINMAX_4X4__SCALAR,n_div_4_strided_a)3203 TEST(F32_PPMM_MINMAX_4X4__SCALAR, n_div_4_strided_a) {
3204 for (uint32_t n = 8; n <= 12; n += 4) {
3205 for (size_t k = 1; k <= 5; k += 2) {
3206 GemmMicrokernelTester()
3207 .mr(4)
3208 .nr(4)
3209 .kr(1)
3210 .sr(1)
3211 .m(4)
3212 .n(n)
3213 .k(k)
3214 .a_stride(7)
3215 .Test(xnn_f32_ppmm_minmax_ukernel_4x4__scalar, GemmMicrokernelTester::Variant::Scalar);
3216 }
3217 }
3218 }
3219
TEST(F32_PPMM_MINMAX_4X4__SCALAR,n_div_4_subtile)3220 TEST(F32_PPMM_MINMAX_4X4__SCALAR, n_div_4_subtile) {
3221 for (uint32_t n = 8; n <= 12; n += 4) {
3222 for (size_t k = 1; k <= 5; k += 2) {
3223 for (uint32_t m = 1; m <= 4; m++) {
3224 GemmMicrokernelTester()
3225 .mr(4)
3226 .nr(4)
3227 .kr(1)
3228 .sr(1)
3229 .m(m)
3230 .n(n)
3231 .k(k)
3232 .iterations(1)
3233 .Test(xnn_f32_ppmm_minmax_ukernel_4x4__scalar, GemmMicrokernelTester::Variant::Scalar);
3234 }
3235 }
3236 }
3237 }
3238
TEST(F32_PPMM_MINMAX_4X4__SCALAR,strided_cm_subtile)3239 TEST(F32_PPMM_MINMAX_4X4__SCALAR, strided_cm_subtile) {
3240 for (size_t k = 1; k <= 5; k += 2) {
3241 for (uint32_t m = 1; m <= 4; m++) {
3242 for (uint32_t n = 1; n <= 4; n++) {
3243 GemmMicrokernelTester()
3244 .mr(4)
3245 .nr(4)
3246 .kr(1)
3247 .sr(1)
3248 .m(m)
3249 .n(n)
3250 .k(k)
3251 .cm_stride(7)
3252 .iterations(1)
3253 .Test(xnn_f32_ppmm_minmax_ukernel_4x4__scalar, GemmMicrokernelTester::Variant::Scalar);
3254 }
3255 }
3256 }
3257 }
3258
TEST(F32_PPMM_MINMAX_4X4__SCALAR,qmin)3259 TEST(F32_PPMM_MINMAX_4X4__SCALAR, qmin) {
3260 GemmMicrokernelTester()
3261 .mr(4)
3262 .nr(4)
3263 .kr(1)
3264 .sr(1)
3265 .m(4)
3266 .n(4)
3267 .k(1)
3268 .qmin(128)
3269 .Test(xnn_f32_ppmm_minmax_ukernel_4x4__scalar, GemmMicrokernelTester::Variant::Scalar);
3270 }
3271
TEST(F32_PPMM_MINMAX_4X4__SCALAR,qmax)3272 TEST(F32_PPMM_MINMAX_4X4__SCALAR, qmax) {
3273 GemmMicrokernelTester()
3274 .mr(4)
3275 .nr(4)
3276 .kr(1)
3277 .sr(1)
3278 .m(4)
3279 .n(4)
3280 .k(1)
3281 .qmax(128)
3282 .Test(xnn_f32_ppmm_minmax_ukernel_4x4__scalar, GemmMicrokernelTester::Variant::Scalar);
3283 }
3284
TEST(F32_PPMM_MINMAX_4X4__SCALAR,strided_cm)3285 TEST(F32_PPMM_MINMAX_4X4__SCALAR, strided_cm) {
3286 GemmMicrokernelTester()
3287 .mr(4)
3288 .nr(4)
3289 .kr(1)
3290 .sr(1)
3291 .m(4)
3292 .n(4)
3293 .k(1)
3294 .cm_stride(7)
3295 .Test(xnn_f32_ppmm_minmax_ukernel_4x4__scalar, GemmMicrokernelTester::Variant::Scalar);
3296 }
3297
3298
TEST(F32_PPMM_MINMAX_3X3__SCALAR,k_eq_1)3299 TEST(F32_PPMM_MINMAX_3X3__SCALAR, k_eq_1) {
3300 GemmMicrokernelTester()
3301 .mr(3)
3302 .nr(3)
3303 .kr(1)
3304 .sr(1)
3305 .m(3)
3306 .n(3)
3307 .k(1)
3308 .Test(xnn_f32_ppmm_minmax_ukernel_3x3__scalar, GemmMicrokernelTester::Variant::Scalar);
3309 }
3310
TEST(F32_PPMM_MINMAX_3X3__SCALAR,strided_cn)3311 TEST(F32_PPMM_MINMAX_3X3__SCALAR, strided_cn) {
3312 GemmMicrokernelTester()
3313 .mr(3)
3314 .nr(3)
3315 .kr(1)
3316 .sr(1)
3317 .m(3)
3318 .n(3)
3319 .k(1)
3320 .cn_stride(5)
3321 .Test(xnn_f32_ppmm_minmax_ukernel_3x3__scalar, GemmMicrokernelTester::Variant::Scalar);
3322 }
3323
TEST(F32_PPMM_MINMAX_3X3__SCALAR,k_eq_1_strided_a)3324 TEST(F32_PPMM_MINMAX_3X3__SCALAR, k_eq_1_strided_a) {
3325 GemmMicrokernelTester()
3326 .mr(3)
3327 .nr(3)
3328 .kr(1)
3329 .sr(1)
3330 .m(3)
3331 .n(3)
3332 .k(1)
3333 .a_stride(3)
3334 .Test(xnn_f32_ppmm_minmax_ukernel_3x3__scalar, GemmMicrokernelTester::Variant::Scalar);
3335 }
3336
TEST(F32_PPMM_MINMAX_3X3__SCALAR,k_eq_1_subtile)3337 TEST(F32_PPMM_MINMAX_3X3__SCALAR, k_eq_1_subtile) {
3338 for (uint32_t m = 1; m <= 3; m++) {
3339 for (uint32_t n = 1; n <= 3; n++) {
3340 GemmMicrokernelTester()
3341 .mr(3)
3342 .nr(3)
3343 .kr(1)
3344 .sr(1)
3345 .m(m)
3346 .n(n)
3347 .k(1)
3348 .iterations(1)
3349 .Test(xnn_f32_ppmm_minmax_ukernel_3x3__scalar, GemmMicrokernelTester::Variant::Scalar);
3350 }
3351 }
3352 }
3353
TEST(F32_PPMM_MINMAX_3X3__SCALAR,k_eq_1_subtile_m)3354 TEST(F32_PPMM_MINMAX_3X3__SCALAR, k_eq_1_subtile_m) {
3355 for (uint32_t m = 1; m <= 3; m++) {
3356 GemmMicrokernelTester()
3357 .mr(3)
3358 .nr(3)
3359 .kr(1)
3360 .sr(1)
3361 .m(m)
3362 .n(3)
3363 .k(1)
3364 .iterations(1)
3365 .Test(xnn_f32_ppmm_minmax_ukernel_3x3__scalar, GemmMicrokernelTester::Variant::Scalar);
3366 }
3367 }
3368
TEST(F32_PPMM_MINMAX_3X3__SCALAR,k_eq_1_subtile_n)3369 TEST(F32_PPMM_MINMAX_3X3__SCALAR, k_eq_1_subtile_n) {
3370 for (uint32_t n = 1; n <= 3; n++) {
3371 GemmMicrokernelTester()
3372 .mr(3)
3373 .nr(3)
3374 .kr(1)
3375 .sr(1)
3376 .m(3)
3377 .n(n)
3378 .k(1)
3379 .iterations(1)
3380 .Test(xnn_f32_ppmm_minmax_ukernel_3x3__scalar, GemmMicrokernelTester::Variant::Scalar);
3381 }
3382 }
3383
TEST(F32_PPMM_MINMAX_3X3__SCALAR,k_gt_1)3384 TEST(F32_PPMM_MINMAX_3X3__SCALAR, k_gt_1) {
3385 for (size_t k = 2; k < 10; k++) {
3386 GemmMicrokernelTester()
3387 .mr(3)
3388 .nr(3)
3389 .kr(1)
3390 .sr(1)
3391 .m(3)
3392 .n(3)
3393 .k(k)
3394 .Test(xnn_f32_ppmm_minmax_ukernel_3x3__scalar, GemmMicrokernelTester::Variant::Scalar);
3395 }
3396 }
3397
TEST(F32_PPMM_MINMAX_3X3__SCALAR,k_gt_1_subtile)3398 TEST(F32_PPMM_MINMAX_3X3__SCALAR, k_gt_1_subtile) {
3399 for (size_t k = 2; k < 10; k++) {
3400 for (uint32_t m = 1; m <= 3; m++) {
3401 for (uint32_t n = 1; n <= 3; n++) {
3402 GemmMicrokernelTester()
3403 .mr(3)
3404 .nr(3)
3405 .kr(1)
3406 .sr(1)
3407 .m(m)
3408 .n(n)
3409 .k(k)
3410 .iterations(1)
3411 .Test(xnn_f32_ppmm_minmax_ukernel_3x3__scalar, GemmMicrokernelTester::Variant::Scalar);
3412 }
3413 }
3414 }
3415 }
3416
TEST(F32_PPMM_MINMAX_3X3__SCALAR,n_gt_3)3417 TEST(F32_PPMM_MINMAX_3X3__SCALAR, n_gt_3) {
3418 for (uint32_t n = 4; n < 6; n++) {
3419 for (size_t k = 1; k <= 5; k += 2) {
3420 GemmMicrokernelTester()
3421 .mr(3)
3422 .nr(3)
3423 .kr(1)
3424 .sr(1)
3425 .m(3)
3426 .n(3)
3427 .k(k)
3428 .Test(xnn_f32_ppmm_minmax_ukernel_3x3__scalar, GemmMicrokernelTester::Variant::Scalar);
3429 }
3430 }
3431 }
3432
TEST(F32_PPMM_MINMAX_3X3__SCALAR,n_gt_3_strided_cn)3433 TEST(F32_PPMM_MINMAX_3X3__SCALAR, n_gt_3_strided_cn) {
3434 for (uint32_t n = 4; n < 6; n++) {
3435 for (size_t k = 1; k <= 5; k += 2) {
3436 GemmMicrokernelTester()
3437 .mr(3)
3438 .nr(3)
3439 .kr(1)
3440 .sr(1)
3441 .m(3)
3442 .n(3)
3443 .k(k)
3444 .cn_stride(5)
3445 .Test(xnn_f32_ppmm_minmax_ukernel_3x3__scalar, GemmMicrokernelTester::Variant::Scalar);
3446 }
3447 }
3448 }
3449
TEST(F32_PPMM_MINMAX_3X3__SCALAR,n_gt_3_strided_a)3450 TEST(F32_PPMM_MINMAX_3X3__SCALAR, n_gt_3_strided_a) {
3451 for (uint32_t n = 4; n < 6; n++) {
3452 for (size_t k = 1; k <= 5; k += 2) {
3453 GemmMicrokernelTester()
3454 .mr(3)
3455 .nr(3)
3456 .kr(1)
3457 .sr(1)
3458 .m(3)
3459 .n(n)
3460 .k(k)
3461 .a_stride(7)
3462 .Test(xnn_f32_ppmm_minmax_ukernel_3x3__scalar, GemmMicrokernelTester::Variant::Scalar);
3463 }
3464 }
3465 }
3466
TEST(F32_PPMM_MINMAX_3X3__SCALAR,n_gt_3_subtile)3467 TEST(F32_PPMM_MINMAX_3X3__SCALAR, n_gt_3_subtile) {
3468 for (uint32_t n = 4; n < 6; n++) {
3469 for (size_t k = 1; k <= 5; k += 2) {
3470 for (uint32_t m = 1; m <= 3; m++) {
3471 GemmMicrokernelTester()
3472 .mr(3)
3473 .nr(3)
3474 .kr(1)
3475 .sr(1)
3476 .m(m)
3477 .n(n)
3478 .k(k)
3479 .iterations(1)
3480 .Test(xnn_f32_ppmm_minmax_ukernel_3x3__scalar, GemmMicrokernelTester::Variant::Scalar);
3481 }
3482 }
3483 }
3484 }
3485
TEST(F32_PPMM_MINMAX_3X3__SCALAR,n_div_3)3486 TEST(F32_PPMM_MINMAX_3X3__SCALAR, n_div_3) {
3487 for (uint32_t n = 6; n <= 9; n += 3) {
3488 for (size_t k = 1; k <= 5; k += 2) {
3489 GemmMicrokernelTester()
3490 .mr(3)
3491 .nr(3)
3492 .kr(1)
3493 .sr(1)
3494 .m(3)
3495 .n(3)
3496 .k(k)
3497 .Test(xnn_f32_ppmm_minmax_ukernel_3x3__scalar, GemmMicrokernelTester::Variant::Scalar);
3498 }
3499 }
3500 }
3501
TEST(F32_PPMM_MINMAX_3X3__SCALAR,n_div_3_strided_cn)3502 TEST(F32_PPMM_MINMAX_3X3__SCALAR, n_div_3_strided_cn) {
3503 for (uint32_t n = 6; n <= 9; n += 3) {
3504 for (size_t k = 1; k <= 5; k += 2) {
3505 GemmMicrokernelTester()
3506 .mr(3)
3507 .nr(3)
3508 .kr(1)
3509 .sr(1)
3510 .m(3)
3511 .n(n)
3512 .k(k)
3513 .cn_stride(5)
3514 .Test(xnn_f32_ppmm_minmax_ukernel_3x3__scalar, GemmMicrokernelTester::Variant::Scalar);
3515 }
3516 }
3517 }
3518
TEST(F32_PPMM_MINMAX_3X3__SCALAR,n_div_3_strided_a)3519 TEST(F32_PPMM_MINMAX_3X3__SCALAR, n_div_3_strided_a) {
3520 for (uint32_t n = 6; n <= 9; n += 3) {
3521 for (size_t k = 1; k <= 5; k += 2) {
3522 GemmMicrokernelTester()
3523 .mr(3)
3524 .nr(3)
3525 .kr(1)
3526 .sr(1)
3527 .m(3)
3528 .n(n)
3529 .k(k)
3530 .a_stride(7)
3531 .Test(xnn_f32_ppmm_minmax_ukernel_3x3__scalar, GemmMicrokernelTester::Variant::Scalar);
3532 }
3533 }
3534 }
3535
TEST(F32_PPMM_MINMAX_3X3__SCALAR,n_div_3_subtile)3536 TEST(F32_PPMM_MINMAX_3X3__SCALAR, n_div_3_subtile) {
3537 for (uint32_t n = 6; n <= 9; n += 3) {
3538 for (size_t k = 1; k <= 5; k += 2) {
3539 for (uint32_t m = 1; m <= 3; m++) {
3540 GemmMicrokernelTester()
3541 .mr(3)
3542 .nr(3)
3543 .kr(1)
3544 .sr(1)
3545 .m(m)
3546 .n(n)
3547 .k(k)
3548 .iterations(1)
3549 .Test(xnn_f32_ppmm_minmax_ukernel_3x3__scalar, GemmMicrokernelTester::Variant::Scalar);
3550 }
3551 }
3552 }
3553 }
3554
TEST(F32_PPMM_MINMAX_3X3__SCALAR,strided_cm_subtile)3555 TEST(F32_PPMM_MINMAX_3X3__SCALAR, strided_cm_subtile) {
3556 for (size_t k = 1; k <= 5; k += 2) {
3557 for (uint32_t m = 1; m <= 3; m++) {
3558 for (uint32_t n = 1; n <= 3; n++) {
3559 GemmMicrokernelTester()
3560 .mr(3)
3561 .nr(3)
3562 .kr(1)
3563 .sr(1)
3564 .m(m)
3565 .n(n)
3566 .k(k)
3567 .cm_stride(5)
3568 .iterations(1)
3569 .Test(xnn_f32_ppmm_minmax_ukernel_3x3__scalar, GemmMicrokernelTester::Variant::Scalar);
3570 }
3571 }
3572 }
3573 }
3574
TEST(F32_PPMM_MINMAX_3X3__SCALAR,qmin)3575 TEST(F32_PPMM_MINMAX_3X3__SCALAR, qmin) {
3576 GemmMicrokernelTester()
3577 .mr(3)
3578 .nr(3)
3579 .kr(1)
3580 .sr(1)
3581 .m(3)
3582 .n(3)
3583 .k(1)
3584 .qmin(128)
3585 .Test(xnn_f32_ppmm_minmax_ukernel_3x3__scalar, GemmMicrokernelTester::Variant::Scalar);
3586 }
3587
TEST(F32_PPMM_MINMAX_3X3__SCALAR,qmax)3588 TEST(F32_PPMM_MINMAX_3X3__SCALAR, qmax) {
3589 GemmMicrokernelTester()
3590 .mr(3)
3591 .nr(3)
3592 .kr(1)
3593 .sr(1)
3594 .m(3)
3595 .n(3)
3596 .k(1)
3597 .qmax(128)
3598 .Test(xnn_f32_ppmm_minmax_ukernel_3x3__scalar, GemmMicrokernelTester::Variant::Scalar);
3599 }
3600
TEST(F32_PPMM_MINMAX_3X3__SCALAR,strided_cm)3601 TEST(F32_PPMM_MINMAX_3X3__SCALAR, strided_cm) {
3602 GemmMicrokernelTester()
3603 .mr(3)
3604 .nr(3)
3605 .kr(1)
3606 .sr(1)
3607 .m(3)
3608 .n(3)
3609 .k(1)
3610 .cm_stride(5)
3611 .Test(xnn_f32_ppmm_minmax_ukernel_3x3__scalar, GemmMicrokernelTester::Variant::Scalar);
3612 }
3613