1 // Copyright 2021 Google LLC
2 //
3 // This source code is licensed under the BSD-style license found in the
4 // LICENSE file in the root directory of this source tree.
5 //
6 // Auto-generated file. Do not edit!
7 // Specification: test/x32-transpose.yaml
8 // Generator: tools/generate-transpose-test.py
9
10
11 #include <gtest/gtest.h>
12
13 #include <xnnpack/common.h>
14 #include <xnnpack/isa-checks.h>
15
16 #include <xnnpack/transpose.h>
17 #include "transpose-microkernel-tester.h"
18
19
TEST(X32_TRANSPOSE__1X2_SCALAR_INT,bh_1_bw_2)20 TEST(X32_TRANSPOSE__1X2_SCALAR_INT, bh_1_bw_2) {
21 TransposeMicrokernelTester()
22 .input_stride(2)
23 .output_stride(1)
24 .block_width(2)
25 .block_height(1)
26 .iterations(1)
27 .Test(xnn_x32_transpose_ukernel__1x2_scalar_int);
28 }
29
TEST(X32_TRANSPOSE__1X2_SCALAR_INT,bh_1_2_bw_1_4)30 TEST(X32_TRANSPOSE__1X2_SCALAR_INT, bh_1_2_bw_1_4) {
31 for(size_t i = 1; i <= 2; ++i){
32 for(size_t j = 1; j <= 4; ++j){
33 TransposeMicrokernelTester()
34 .input_stride(j)
35 .output_stride(i)
36 .block_width(j)
37 .block_height(i)
38 .iterations(1)
39 .Test(xnn_x32_transpose_ukernel__1x2_scalar_int);
40 }
41 }
42 }
43
TEST(X32_TRANSPOSE__1X2_SCALAR_INT,bh_1_bw_4)44 TEST(X32_TRANSPOSE__1X2_SCALAR_INT, bh_1_bw_4) {
45 TransposeMicrokernelTester()
46 .input_stride(4)
47 .output_stride(1)
48 .block_width(4)
49 .block_height(1)
50 .iterations(1)
51 .Test(xnn_x32_transpose_ukernel__1x2_scalar_int);
52 }
53
TEST(X32_TRANSPOSE__1X2_SCALAR_INT,bh_1_bw_3_4)54 TEST(X32_TRANSPOSE__1X2_SCALAR_INT, bh_1_bw_3_4) {
55 for(size_t i = 3; i < 4; ++i){
56 TransposeMicrokernelTester()
57 .input_stride(i)
58 .output_stride(1)
59 .block_width(i)
60 .block_height(1)
61 .iterations(1)
62 .Test(xnn_x32_transpose_ukernel__1x2_scalar_int);
63 }
64 }
65
TEST(X32_TRANSPOSE__1X2_SCALAR_INT,bh_2_bw_3_4)66 TEST(X32_TRANSPOSE__1X2_SCALAR_INT, bh_2_bw_3_4) {
67 for(size_t i = 3; i < 4; ++i){
68 TransposeMicrokernelTester()
69 .input_stride(i)
70 .output_stride(2)
71 .block_width(i)
72 .block_height(2)
73 .iterations(1)
74 .Test(xnn_x32_transpose_ukernel__1x2_scalar_int);
75 }
76 }
77
TEST(X32_TRANSPOSE__1X2_SCALAR_INT,bh_2_bw_2)78 TEST(X32_TRANSPOSE__1X2_SCALAR_INT, bh_2_bw_2) {
79 TransposeMicrokernelTester()
80 .input_stride(2)
81 .output_stride(2)
82 .block_width(2)
83 .block_height(2)
84 .iterations(1)
85 .Test(xnn_x32_transpose_ukernel__1x2_scalar_int);
86 }
87
TEST(X32_TRANSPOSE__1X2_SCALAR_INT,bh_2_2_bw_2)88 TEST(X32_TRANSPOSE__1X2_SCALAR_INT, bh_2_2_bw_2){
89 for(size_t i = 2; i < 2; ++i){
90 TransposeMicrokernelTester()
91 .input_stride(2)
92 .output_stride(i)
93 .block_width(2)
94 .block_height(i)
95 .iterations(1)
96 .Test(xnn_x32_transpose_ukernel__1x2_scalar_int);
97 }
98 }
99
TEST(X32_TRANSPOSE__1X2_SCALAR_INT,bh_2_2_bw_4)100 TEST(X32_TRANSPOSE__1X2_SCALAR_INT, bh_2_2_bw_4){
101 for(size_t i = 2; i < 2; ++i){
102 TransposeMicrokernelTester()
103 .input_stride(4)
104 .output_stride(i)
105 .block_width(4)
106 .block_height(i)
107 .iterations(1)
108 .Test(xnn_x32_transpose_ukernel__1x2_scalar_int);
109 }
110 }
111
TEST(X32_TRANSPOSE__1X2_SCALAR_INT,bh_2_2_bw_3_4)112 TEST(X32_TRANSPOSE__1X2_SCALAR_INT, bh_2_2_bw_3_4) {
113 for(size_t i = 2; i < 2; ++i){
114 for(size_t j = 3; j < 4; ++j){
115 TransposeMicrokernelTester()
116 .input_stride(j)
117 .output_stride(i)
118 .block_width(j)
119 .block_height(i)
120 .iterations(1)
121 .Test(xnn_x32_transpose_ukernel__1x2_scalar_int);
122 }
123 }
124 }
125
TEST(X32_TRANSPOSE__1X2_SCALAR_INT,bh_1_bw_2_is_4)126 TEST(X32_TRANSPOSE__1X2_SCALAR_INT, bh_1_bw_2_is_4) {
127 TransposeMicrokernelTester()
128 .input_stride(4)
129 .output_stride(1)
130 .block_width(2)
131 .block_height(1)
132 .iterations(1)
133 .Test(xnn_x32_transpose_ukernel__1x2_scalar_int);
134 }
135
TEST(X32_TRANSPOSE__1X2_SCALAR_INT,bh_1_bw_2_os_2)136 TEST(X32_TRANSPOSE__1X2_SCALAR_INT, bh_1_bw_2_os_2) {
137 TransposeMicrokernelTester()
138 .input_stride(2)
139 .output_stride(2)
140 .block_width(2)
141 .block_height(1)
142 .iterations(1)
143 .Test(xnn_x32_transpose_ukernel__1x2_scalar_int);
144 }
145
TEST(X32_TRANSPOSE__1X2_SCALAR_INT,bh_1_bw_2_is_4_os_2)146 TEST(X32_TRANSPOSE__1X2_SCALAR_INT, bh_1_bw_2_is_4_os_2) {
147 TransposeMicrokernelTester()
148 .input_stride(4)
149 .output_stride(2)
150 .block_width(2)
151 .block_height(1)
152 .iterations(1)
153 .Test(xnn_x32_transpose_ukernel__1x2_scalar_int);
154 }
155
TEST(X32_TRANSPOSE__1X4_SCALAR_INT,bh_1_bw_4)156 TEST(X32_TRANSPOSE__1X4_SCALAR_INT, bh_1_bw_4) {
157 TransposeMicrokernelTester()
158 .input_stride(4)
159 .output_stride(1)
160 .block_width(4)
161 .block_height(1)
162 .iterations(1)
163 .Test(xnn_x32_transpose_ukernel__1x4_scalar_int);
164 }
165
TEST(X32_TRANSPOSE__1X4_SCALAR_INT,bh_1_2_bw_1_8)166 TEST(X32_TRANSPOSE__1X4_SCALAR_INT, bh_1_2_bw_1_8) {
167 for(size_t i = 1; i <= 2; ++i){
168 for(size_t j = 1; j <= 8; ++j){
169 TransposeMicrokernelTester()
170 .input_stride(j)
171 .output_stride(i)
172 .block_width(j)
173 .block_height(i)
174 .iterations(1)
175 .Test(xnn_x32_transpose_ukernel__1x4_scalar_int);
176 }
177 }
178 }
179
TEST(X32_TRANSPOSE__1X4_SCALAR_INT,bh_1_bw_8)180 TEST(X32_TRANSPOSE__1X4_SCALAR_INT, bh_1_bw_8) {
181 TransposeMicrokernelTester()
182 .input_stride(8)
183 .output_stride(1)
184 .block_width(8)
185 .block_height(1)
186 .iterations(1)
187 .Test(xnn_x32_transpose_ukernel__1x4_scalar_int);
188 }
189
TEST(X32_TRANSPOSE__1X4_SCALAR_INT,bh_1_bw_5_8)190 TEST(X32_TRANSPOSE__1X4_SCALAR_INT, bh_1_bw_5_8) {
191 for(size_t i = 5; i < 8; ++i){
192 TransposeMicrokernelTester()
193 .input_stride(i)
194 .output_stride(1)
195 .block_width(i)
196 .block_height(1)
197 .iterations(1)
198 .Test(xnn_x32_transpose_ukernel__1x4_scalar_int);
199 }
200 }
201
TEST(X32_TRANSPOSE__1X4_SCALAR_INT,bh_2_bw_5_8)202 TEST(X32_TRANSPOSE__1X4_SCALAR_INT, bh_2_bw_5_8) {
203 for(size_t i = 5; i < 8; ++i){
204 TransposeMicrokernelTester()
205 .input_stride(i)
206 .output_stride(2)
207 .block_width(i)
208 .block_height(2)
209 .iterations(1)
210 .Test(xnn_x32_transpose_ukernel__1x4_scalar_int);
211 }
212 }
213
TEST(X32_TRANSPOSE__1X4_SCALAR_INT,bh_2_bw_4)214 TEST(X32_TRANSPOSE__1X4_SCALAR_INT, bh_2_bw_4) {
215 TransposeMicrokernelTester()
216 .input_stride(4)
217 .output_stride(2)
218 .block_width(4)
219 .block_height(2)
220 .iterations(1)
221 .Test(xnn_x32_transpose_ukernel__1x4_scalar_int);
222 }
223
TEST(X32_TRANSPOSE__1X4_SCALAR_INT,bh_2_2_bw_4)224 TEST(X32_TRANSPOSE__1X4_SCALAR_INT, bh_2_2_bw_4){
225 for(size_t i = 2; i < 2; ++i){
226 TransposeMicrokernelTester()
227 .input_stride(4)
228 .output_stride(i)
229 .block_width(4)
230 .block_height(i)
231 .iterations(1)
232 .Test(xnn_x32_transpose_ukernel__1x4_scalar_int);
233 }
234 }
235
TEST(X32_TRANSPOSE__1X4_SCALAR_INT,bh_2_2_bw_8)236 TEST(X32_TRANSPOSE__1X4_SCALAR_INT, bh_2_2_bw_8){
237 for(size_t i = 2; i < 2; ++i){
238 TransposeMicrokernelTester()
239 .input_stride(8)
240 .output_stride(i)
241 .block_width(8)
242 .block_height(i)
243 .iterations(1)
244 .Test(xnn_x32_transpose_ukernel__1x4_scalar_int);
245 }
246 }
247
TEST(X32_TRANSPOSE__1X4_SCALAR_INT,bh_2_2_bw_5_8)248 TEST(X32_TRANSPOSE__1X4_SCALAR_INT, bh_2_2_bw_5_8) {
249 for(size_t i = 2; i < 2; ++i){
250 for(size_t j = 5; j < 8; ++j){
251 TransposeMicrokernelTester()
252 .input_stride(j)
253 .output_stride(i)
254 .block_width(j)
255 .block_height(i)
256 .iterations(1)
257 .Test(xnn_x32_transpose_ukernel__1x4_scalar_int);
258 }
259 }
260 }
261
TEST(X32_TRANSPOSE__1X4_SCALAR_INT,bh_1_bw_4_is_8)262 TEST(X32_TRANSPOSE__1X4_SCALAR_INT, bh_1_bw_4_is_8) {
263 TransposeMicrokernelTester()
264 .input_stride(8)
265 .output_stride(1)
266 .block_width(4)
267 .block_height(1)
268 .iterations(1)
269 .Test(xnn_x32_transpose_ukernel__1x4_scalar_int);
270 }
271
TEST(X32_TRANSPOSE__1X4_SCALAR_INT,bh_1_bw_4_os_2)272 TEST(X32_TRANSPOSE__1X4_SCALAR_INT, bh_1_bw_4_os_2) {
273 TransposeMicrokernelTester()
274 .input_stride(4)
275 .output_stride(2)
276 .block_width(4)
277 .block_height(1)
278 .iterations(1)
279 .Test(xnn_x32_transpose_ukernel__1x4_scalar_int);
280 }
281
TEST(X32_TRANSPOSE__1X4_SCALAR_INT,bh_1_bw_4_is_8_os_2)282 TEST(X32_TRANSPOSE__1X4_SCALAR_INT, bh_1_bw_4_is_8_os_2) {
283 TransposeMicrokernelTester()
284 .input_stride(8)
285 .output_stride(2)
286 .block_width(4)
287 .block_height(1)
288 .iterations(1)
289 .Test(xnn_x32_transpose_ukernel__1x4_scalar_int);
290 }
291
TEST(X32_TRANSPOSE__2X1_SCALAR_INT,bh_2_bw_1)292 TEST(X32_TRANSPOSE__2X1_SCALAR_INT, bh_2_bw_1) {
293 TransposeMicrokernelTester()
294 .input_stride(1)
295 .output_stride(2)
296 .block_width(1)
297 .block_height(2)
298 .iterations(1)
299 .Test(xnn_x32_transpose_ukernel__2x1_scalar_int);
300 }
301
TEST(X32_TRANSPOSE__2X1_SCALAR_INT,bh_1_4_bw_1_2)302 TEST(X32_TRANSPOSE__2X1_SCALAR_INT, bh_1_4_bw_1_2) {
303 for(size_t i = 1; i <= 4; ++i){
304 for(size_t j = 1; j <= 2; ++j){
305 TransposeMicrokernelTester()
306 .input_stride(j)
307 .output_stride(i)
308 .block_width(j)
309 .block_height(i)
310 .iterations(1)
311 .Test(xnn_x32_transpose_ukernel__2x1_scalar_int);
312 }
313 }
314 }
315
TEST(X32_TRANSPOSE__2X1_SCALAR_INT,bh_2_bw_2)316 TEST(X32_TRANSPOSE__2X1_SCALAR_INT, bh_2_bw_2) {
317 TransposeMicrokernelTester()
318 .input_stride(2)
319 .output_stride(2)
320 .block_width(2)
321 .block_height(2)
322 .iterations(1)
323 .Test(xnn_x32_transpose_ukernel__2x1_scalar_int);
324 }
325
TEST(X32_TRANSPOSE__2X1_SCALAR_INT,bh_2_bw_2_2)326 TEST(X32_TRANSPOSE__2X1_SCALAR_INT, bh_2_bw_2_2) {
327 for(size_t i = 2; i < 2; ++i){
328 TransposeMicrokernelTester()
329 .input_stride(i)
330 .output_stride(2)
331 .block_width(i)
332 .block_height(2)
333 .iterations(1)
334 .Test(xnn_x32_transpose_ukernel__2x1_scalar_int);
335 }
336 }
337
TEST(X32_TRANSPOSE__2X1_SCALAR_INT,bh_4_bw_2_2)338 TEST(X32_TRANSPOSE__2X1_SCALAR_INT, bh_4_bw_2_2) {
339 for(size_t i = 2; i < 2; ++i){
340 TransposeMicrokernelTester()
341 .input_stride(i)
342 .output_stride(4)
343 .block_width(i)
344 .block_height(4)
345 .iterations(1)
346 .Test(xnn_x32_transpose_ukernel__2x1_scalar_int);
347 }
348 }
349
TEST(X32_TRANSPOSE__2X1_SCALAR_INT,bh_4_bw_1)350 TEST(X32_TRANSPOSE__2X1_SCALAR_INT, bh_4_bw_1) {
351 TransposeMicrokernelTester()
352 .input_stride(1)
353 .output_stride(4)
354 .block_width(1)
355 .block_height(4)
356 .iterations(1)
357 .Test(xnn_x32_transpose_ukernel__2x1_scalar_int);
358 }
359
TEST(X32_TRANSPOSE__2X1_SCALAR_INT,bh_3_4_bw_1)360 TEST(X32_TRANSPOSE__2X1_SCALAR_INT, bh_3_4_bw_1){
361 for(size_t i = 3; i < 4; ++i){
362 TransposeMicrokernelTester()
363 .input_stride(1)
364 .output_stride(i)
365 .block_width(1)
366 .block_height(i)
367 .iterations(1)
368 .Test(xnn_x32_transpose_ukernel__2x1_scalar_int);
369 }
370 }
371
TEST(X32_TRANSPOSE__2X1_SCALAR_INT,bh_3_4_bw_2)372 TEST(X32_TRANSPOSE__2X1_SCALAR_INT, bh_3_4_bw_2){
373 for(size_t i = 3; i < 4; ++i){
374 TransposeMicrokernelTester()
375 .input_stride(2)
376 .output_stride(i)
377 .block_width(2)
378 .block_height(i)
379 .iterations(1)
380 .Test(xnn_x32_transpose_ukernel__2x1_scalar_int);
381 }
382 }
383
TEST(X32_TRANSPOSE__2X1_SCALAR_INT,bh_3_4_bw_2_2)384 TEST(X32_TRANSPOSE__2X1_SCALAR_INT, bh_3_4_bw_2_2) {
385 for(size_t i = 3; i < 4; ++i){
386 for(size_t j = 2; j < 2; ++j){
387 TransposeMicrokernelTester()
388 .input_stride(j)
389 .output_stride(i)
390 .block_width(j)
391 .block_height(i)
392 .iterations(1)
393 .Test(xnn_x32_transpose_ukernel__2x1_scalar_int);
394 }
395 }
396 }
397
TEST(X32_TRANSPOSE__2X1_SCALAR_INT,bh_2_bw_1_is_2)398 TEST(X32_TRANSPOSE__2X1_SCALAR_INT, bh_2_bw_1_is_2) {
399 TransposeMicrokernelTester()
400 .input_stride(2)
401 .output_stride(2)
402 .block_width(1)
403 .block_height(2)
404 .iterations(1)
405 .Test(xnn_x32_transpose_ukernel__2x1_scalar_int);
406 }
407
TEST(X32_TRANSPOSE__2X1_SCALAR_INT,bh_2_bw_1_os_4)408 TEST(X32_TRANSPOSE__2X1_SCALAR_INT, bh_2_bw_1_os_4) {
409 TransposeMicrokernelTester()
410 .input_stride(1)
411 .output_stride(4)
412 .block_width(1)
413 .block_height(2)
414 .iterations(1)
415 .Test(xnn_x32_transpose_ukernel__2x1_scalar_int);
416 }
417
TEST(X32_TRANSPOSE__2X1_SCALAR_INT,bh_2_bw_1_is_2_os_4)418 TEST(X32_TRANSPOSE__2X1_SCALAR_INT, bh_2_bw_1_is_2_os_4) {
419 TransposeMicrokernelTester()
420 .input_stride(2)
421 .output_stride(4)
422 .block_width(1)
423 .block_height(2)
424 .iterations(1)
425 .Test(xnn_x32_transpose_ukernel__2x1_scalar_int);
426 }
427
TEST(X32_TRANSPOSE__2X2_SCALAR_INT,bh_2_bw_2)428 TEST(X32_TRANSPOSE__2X2_SCALAR_INT, bh_2_bw_2) {
429 TransposeMicrokernelTester()
430 .input_stride(2)
431 .output_stride(2)
432 .block_width(2)
433 .block_height(2)
434 .iterations(1)
435 .Test(xnn_x32_transpose_ukernel__2x2_scalar_int);
436 }
437
TEST(X32_TRANSPOSE__2X2_SCALAR_INT,bh_1_4_bw_1_4)438 TEST(X32_TRANSPOSE__2X2_SCALAR_INT, bh_1_4_bw_1_4) {
439 for(size_t i = 1; i <= 4; ++i){
440 for(size_t j = 1; j <= 4; ++j){
441 TransposeMicrokernelTester()
442 .input_stride(j)
443 .output_stride(i)
444 .block_width(j)
445 .block_height(i)
446 .iterations(1)
447 .Test(xnn_x32_transpose_ukernel__2x2_scalar_int);
448 }
449 }
450 }
451
TEST(X32_TRANSPOSE__2X2_SCALAR_INT,bh_2_bw_4)452 TEST(X32_TRANSPOSE__2X2_SCALAR_INT, bh_2_bw_4) {
453 TransposeMicrokernelTester()
454 .input_stride(4)
455 .output_stride(2)
456 .block_width(4)
457 .block_height(2)
458 .iterations(1)
459 .Test(xnn_x32_transpose_ukernel__2x2_scalar_int);
460 }
461
TEST(X32_TRANSPOSE__2X2_SCALAR_INT,bh_2_bw_3_4)462 TEST(X32_TRANSPOSE__2X2_SCALAR_INT, bh_2_bw_3_4) {
463 for(size_t i = 3; i < 4; ++i){
464 TransposeMicrokernelTester()
465 .input_stride(i)
466 .output_stride(2)
467 .block_width(i)
468 .block_height(2)
469 .iterations(1)
470 .Test(xnn_x32_transpose_ukernel__2x2_scalar_int);
471 }
472 }
473
TEST(X32_TRANSPOSE__2X2_SCALAR_INT,bh_4_bw_3_4)474 TEST(X32_TRANSPOSE__2X2_SCALAR_INT, bh_4_bw_3_4) {
475 for(size_t i = 3; i < 4; ++i){
476 TransposeMicrokernelTester()
477 .input_stride(i)
478 .output_stride(4)
479 .block_width(i)
480 .block_height(4)
481 .iterations(1)
482 .Test(xnn_x32_transpose_ukernel__2x2_scalar_int);
483 }
484 }
485
TEST(X32_TRANSPOSE__2X2_SCALAR_INT,bh_4_bw_2)486 TEST(X32_TRANSPOSE__2X2_SCALAR_INT, bh_4_bw_2) {
487 TransposeMicrokernelTester()
488 .input_stride(2)
489 .output_stride(4)
490 .block_width(2)
491 .block_height(4)
492 .iterations(1)
493 .Test(xnn_x32_transpose_ukernel__2x2_scalar_int);
494 }
495
TEST(X32_TRANSPOSE__2X2_SCALAR_INT,bh_3_4_bw_2)496 TEST(X32_TRANSPOSE__2X2_SCALAR_INT, bh_3_4_bw_2){
497 for(size_t i = 3; i < 4; ++i){
498 TransposeMicrokernelTester()
499 .input_stride(2)
500 .output_stride(i)
501 .block_width(2)
502 .block_height(i)
503 .iterations(1)
504 .Test(xnn_x32_transpose_ukernel__2x2_scalar_int);
505 }
506 }
507
TEST(X32_TRANSPOSE__2X2_SCALAR_INT,bh_3_4_bw_4)508 TEST(X32_TRANSPOSE__2X2_SCALAR_INT, bh_3_4_bw_4){
509 for(size_t i = 3; i < 4; ++i){
510 TransposeMicrokernelTester()
511 .input_stride(4)
512 .output_stride(i)
513 .block_width(4)
514 .block_height(i)
515 .iterations(1)
516 .Test(xnn_x32_transpose_ukernel__2x2_scalar_int);
517 }
518 }
519
TEST(X32_TRANSPOSE__2X2_SCALAR_INT,bh_3_4_bw_3_4)520 TEST(X32_TRANSPOSE__2X2_SCALAR_INT, bh_3_4_bw_3_4) {
521 for(size_t i = 3; i < 4; ++i){
522 for(size_t j = 3; j < 4; ++j){
523 TransposeMicrokernelTester()
524 .input_stride(j)
525 .output_stride(i)
526 .block_width(j)
527 .block_height(i)
528 .iterations(1)
529 .Test(xnn_x32_transpose_ukernel__2x2_scalar_int);
530 }
531 }
532 }
533
TEST(X32_TRANSPOSE__2X2_SCALAR_INT,bh_2_bw_2_is_4)534 TEST(X32_TRANSPOSE__2X2_SCALAR_INT, bh_2_bw_2_is_4) {
535 TransposeMicrokernelTester()
536 .input_stride(4)
537 .output_stride(2)
538 .block_width(2)
539 .block_height(2)
540 .iterations(1)
541 .Test(xnn_x32_transpose_ukernel__2x2_scalar_int);
542 }
543
TEST(X32_TRANSPOSE__2X2_SCALAR_INT,bh_2_bw_2_os_4)544 TEST(X32_TRANSPOSE__2X2_SCALAR_INT, bh_2_bw_2_os_4) {
545 TransposeMicrokernelTester()
546 .input_stride(2)
547 .output_stride(4)
548 .block_width(2)
549 .block_height(2)
550 .iterations(1)
551 .Test(xnn_x32_transpose_ukernel__2x2_scalar_int);
552 }
553
TEST(X32_TRANSPOSE__2X2_SCALAR_INT,bh_2_bw_2_is_4_os_4)554 TEST(X32_TRANSPOSE__2X2_SCALAR_INT, bh_2_bw_2_is_4_os_4) {
555 TransposeMicrokernelTester()
556 .input_stride(4)
557 .output_stride(4)
558 .block_width(2)
559 .block_height(2)
560 .iterations(1)
561 .Test(xnn_x32_transpose_ukernel__2x2_scalar_int);
562 }
563
TEST(X32_TRANSPOSE__2X4_SCALAR_INT,bh_2_bw_4)564 TEST(X32_TRANSPOSE__2X4_SCALAR_INT, bh_2_bw_4) {
565 TransposeMicrokernelTester()
566 .input_stride(4)
567 .output_stride(2)
568 .block_width(4)
569 .block_height(2)
570 .iterations(1)
571 .Test(xnn_x32_transpose_ukernel__2x4_scalar_int);
572 }
573
TEST(X32_TRANSPOSE__2X4_SCALAR_INT,bh_1_4_bw_1_8)574 TEST(X32_TRANSPOSE__2X4_SCALAR_INT, bh_1_4_bw_1_8) {
575 for(size_t i = 1; i <= 4; ++i){
576 for(size_t j = 1; j <= 8; ++j){
577 TransposeMicrokernelTester()
578 .input_stride(j)
579 .output_stride(i)
580 .block_width(j)
581 .block_height(i)
582 .iterations(1)
583 .Test(xnn_x32_transpose_ukernel__2x4_scalar_int);
584 }
585 }
586 }
587
TEST(X32_TRANSPOSE__2X4_SCALAR_INT,bh_2_bw_8)588 TEST(X32_TRANSPOSE__2X4_SCALAR_INT, bh_2_bw_8) {
589 TransposeMicrokernelTester()
590 .input_stride(8)
591 .output_stride(2)
592 .block_width(8)
593 .block_height(2)
594 .iterations(1)
595 .Test(xnn_x32_transpose_ukernel__2x4_scalar_int);
596 }
597
TEST(X32_TRANSPOSE__2X4_SCALAR_INT,bh_2_bw_5_8)598 TEST(X32_TRANSPOSE__2X4_SCALAR_INT, bh_2_bw_5_8) {
599 for(size_t i = 5; i < 8; ++i){
600 TransposeMicrokernelTester()
601 .input_stride(i)
602 .output_stride(2)
603 .block_width(i)
604 .block_height(2)
605 .iterations(1)
606 .Test(xnn_x32_transpose_ukernel__2x4_scalar_int);
607 }
608 }
609
TEST(X32_TRANSPOSE__2X4_SCALAR_INT,bh_4_bw_5_8)610 TEST(X32_TRANSPOSE__2X4_SCALAR_INT, bh_4_bw_5_8) {
611 for(size_t i = 5; i < 8; ++i){
612 TransposeMicrokernelTester()
613 .input_stride(i)
614 .output_stride(4)
615 .block_width(i)
616 .block_height(4)
617 .iterations(1)
618 .Test(xnn_x32_transpose_ukernel__2x4_scalar_int);
619 }
620 }
621
TEST(X32_TRANSPOSE__2X4_SCALAR_INT,bh_4_bw_4)622 TEST(X32_TRANSPOSE__2X4_SCALAR_INT, bh_4_bw_4) {
623 TransposeMicrokernelTester()
624 .input_stride(4)
625 .output_stride(4)
626 .block_width(4)
627 .block_height(4)
628 .iterations(1)
629 .Test(xnn_x32_transpose_ukernel__2x4_scalar_int);
630 }
631
TEST(X32_TRANSPOSE__2X4_SCALAR_INT,bh_3_4_bw_4)632 TEST(X32_TRANSPOSE__2X4_SCALAR_INT, bh_3_4_bw_4){
633 for(size_t i = 3; i < 4; ++i){
634 TransposeMicrokernelTester()
635 .input_stride(4)
636 .output_stride(i)
637 .block_width(4)
638 .block_height(i)
639 .iterations(1)
640 .Test(xnn_x32_transpose_ukernel__2x4_scalar_int);
641 }
642 }
643
TEST(X32_TRANSPOSE__2X4_SCALAR_INT,bh_3_4_bw_8)644 TEST(X32_TRANSPOSE__2X4_SCALAR_INT, bh_3_4_bw_8){
645 for(size_t i = 3; i < 4; ++i){
646 TransposeMicrokernelTester()
647 .input_stride(8)
648 .output_stride(i)
649 .block_width(8)
650 .block_height(i)
651 .iterations(1)
652 .Test(xnn_x32_transpose_ukernel__2x4_scalar_int);
653 }
654 }
655
TEST(X32_TRANSPOSE__2X4_SCALAR_INT,bh_3_4_bw_5_8)656 TEST(X32_TRANSPOSE__2X4_SCALAR_INT, bh_3_4_bw_5_8) {
657 for(size_t i = 3; i < 4; ++i){
658 for(size_t j = 5; j < 8; ++j){
659 TransposeMicrokernelTester()
660 .input_stride(j)
661 .output_stride(i)
662 .block_width(j)
663 .block_height(i)
664 .iterations(1)
665 .Test(xnn_x32_transpose_ukernel__2x4_scalar_int);
666 }
667 }
668 }
669
TEST(X32_TRANSPOSE__2X4_SCALAR_INT,bh_2_bw_4_is_8)670 TEST(X32_TRANSPOSE__2X4_SCALAR_INT, bh_2_bw_4_is_8) {
671 TransposeMicrokernelTester()
672 .input_stride(8)
673 .output_stride(2)
674 .block_width(4)
675 .block_height(2)
676 .iterations(1)
677 .Test(xnn_x32_transpose_ukernel__2x4_scalar_int);
678 }
679
TEST(X32_TRANSPOSE__2X4_SCALAR_INT,bh_2_bw_4_os_4)680 TEST(X32_TRANSPOSE__2X4_SCALAR_INT, bh_2_bw_4_os_4) {
681 TransposeMicrokernelTester()
682 .input_stride(4)
683 .output_stride(4)
684 .block_width(4)
685 .block_height(2)
686 .iterations(1)
687 .Test(xnn_x32_transpose_ukernel__2x4_scalar_int);
688 }
689
TEST(X32_TRANSPOSE__2X4_SCALAR_INT,bh_2_bw_4_is_8_os_4)690 TEST(X32_TRANSPOSE__2X4_SCALAR_INT, bh_2_bw_4_is_8_os_4) {
691 TransposeMicrokernelTester()
692 .input_stride(8)
693 .output_stride(4)
694 .block_width(4)
695 .block_height(2)
696 .iterations(1)
697 .Test(xnn_x32_transpose_ukernel__2x4_scalar_int);
698 }
699
TEST(X32_TRANSPOSE__4X1_SCALAR_INT,bh_4_bw_1)700 TEST(X32_TRANSPOSE__4X1_SCALAR_INT, bh_4_bw_1) {
701 TransposeMicrokernelTester()
702 .input_stride(1)
703 .output_stride(4)
704 .block_width(1)
705 .block_height(4)
706 .iterations(1)
707 .Test(xnn_x32_transpose_ukernel__4x1_scalar_int);
708 }
709
TEST(X32_TRANSPOSE__4X1_SCALAR_INT,bh_1_8_bw_1_2)710 TEST(X32_TRANSPOSE__4X1_SCALAR_INT, bh_1_8_bw_1_2) {
711 for(size_t i = 1; i <= 8; ++i){
712 for(size_t j = 1; j <= 2; ++j){
713 TransposeMicrokernelTester()
714 .input_stride(j)
715 .output_stride(i)
716 .block_width(j)
717 .block_height(i)
718 .iterations(1)
719 .Test(xnn_x32_transpose_ukernel__4x1_scalar_int);
720 }
721 }
722 }
723
TEST(X32_TRANSPOSE__4X1_SCALAR_INT,bh_4_bw_2)724 TEST(X32_TRANSPOSE__4X1_SCALAR_INT, bh_4_bw_2) {
725 TransposeMicrokernelTester()
726 .input_stride(2)
727 .output_stride(4)
728 .block_width(2)
729 .block_height(4)
730 .iterations(1)
731 .Test(xnn_x32_transpose_ukernel__4x1_scalar_int);
732 }
733
TEST(X32_TRANSPOSE__4X1_SCALAR_INT,bh_4_bw_2_2)734 TEST(X32_TRANSPOSE__4X1_SCALAR_INT, bh_4_bw_2_2) {
735 for(size_t i = 2; i < 2; ++i){
736 TransposeMicrokernelTester()
737 .input_stride(i)
738 .output_stride(4)
739 .block_width(i)
740 .block_height(4)
741 .iterations(1)
742 .Test(xnn_x32_transpose_ukernel__4x1_scalar_int);
743 }
744 }
745
TEST(X32_TRANSPOSE__4X1_SCALAR_INT,bh_8_bw_2_2)746 TEST(X32_TRANSPOSE__4X1_SCALAR_INT, bh_8_bw_2_2) {
747 for(size_t i = 2; i < 2; ++i){
748 TransposeMicrokernelTester()
749 .input_stride(i)
750 .output_stride(8)
751 .block_width(i)
752 .block_height(8)
753 .iterations(1)
754 .Test(xnn_x32_transpose_ukernel__4x1_scalar_int);
755 }
756 }
757
TEST(X32_TRANSPOSE__4X1_SCALAR_INT,bh_8_bw_1)758 TEST(X32_TRANSPOSE__4X1_SCALAR_INT, bh_8_bw_1) {
759 TransposeMicrokernelTester()
760 .input_stride(1)
761 .output_stride(8)
762 .block_width(1)
763 .block_height(8)
764 .iterations(1)
765 .Test(xnn_x32_transpose_ukernel__4x1_scalar_int);
766 }
767
TEST(X32_TRANSPOSE__4X1_SCALAR_INT,bh_5_8_bw_1)768 TEST(X32_TRANSPOSE__4X1_SCALAR_INT, bh_5_8_bw_1){
769 for(size_t i = 5; i < 8; ++i){
770 TransposeMicrokernelTester()
771 .input_stride(1)
772 .output_stride(i)
773 .block_width(1)
774 .block_height(i)
775 .iterations(1)
776 .Test(xnn_x32_transpose_ukernel__4x1_scalar_int);
777 }
778 }
779
TEST(X32_TRANSPOSE__4X1_SCALAR_INT,bh_5_8_bw_2)780 TEST(X32_TRANSPOSE__4X1_SCALAR_INT, bh_5_8_bw_2){
781 for(size_t i = 5; i < 8; ++i){
782 TransposeMicrokernelTester()
783 .input_stride(2)
784 .output_stride(i)
785 .block_width(2)
786 .block_height(i)
787 .iterations(1)
788 .Test(xnn_x32_transpose_ukernel__4x1_scalar_int);
789 }
790 }
791
TEST(X32_TRANSPOSE__4X1_SCALAR_INT,bh_5_8_bw_2_2)792 TEST(X32_TRANSPOSE__4X1_SCALAR_INT, bh_5_8_bw_2_2) {
793 for(size_t i = 5; i < 8; ++i){
794 for(size_t j = 2; j < 2; ++j){
795 TransposeMicrokernelTester()
796 .input_stride(j)
797 .output_stride(i)
798 .block_width(j)
799 .block_height(i)
800 .iterations(1)
801 .Test(xnn_x32_transpose_ukernel__4x1_scalar_int);
802 }
803 }
804 }
805
TEST(X32_TRANSPOSE__4X1_SCALAR_INT,bh_4_bw_1_is_2)806 TEST(X32_TRANSPOSE__4X1_SCALAR_INT, bh_4_bw_1_is_2) {
807 TransposeMicrokernelTester()
808 .input_stride(2)
809 .output_stride(4)
810 .block_width(1)
811 .block_height(4)
812 .iterations(1)
813 .Test(xnn_x32_transpose_ukernel__4x1_scalar_int);
814 }
815
TEST(X32_TRANSPOSE__4X1_SCALAR_INT,bh_4_bw_1_os_8)816 TEST(X32_TRANSPOSE__4X1_SCALAR_INT, bh_4_bw_1_os_8) {
817 TransposeMicrokernelTester()
818 .input_stride(1)
819 .output_stride(8)
820 .block_width(1)
821 .block_height(4)
822 .iterations(1)
823 .Test(xnn_x32_transpose_ukernel__4x1_scalar_int);
824 }
825
TEST(X32_TRANSPOSE__4X1_SCALAR_INT,bh_4_bw_1_is_2_os_8)826 TEST(X32_TRANSPOSE__4X1_SCALAR_INT, bh_4_bw_1_is_2_os_8) {
827 TransposeMicrokernelTester()
828 .input_stride(2)
829 .output_stride(8)
830 .block_width(1)
831 .block_height(4)
832 .iterations(1)
833 .Test(xnn_x32_transpose_ukernel__4x1_scalar_int);
834 }
835
TEST(X32_TRANSPOSE__4X2_SCALAR_INT,bh_4_bw_2)836 TEST(X32_TRANSPOSE__4X2_SCALAR_INT, bh_4_bw_2) {
837 TransposeMicrokernelTester()
838 .input_stride(2)
839 .output_stride(4)
840 .block_width(2)
841 .block_height(4)
842 .iterations(1)
843 .Test(xnn_x32_transpose_ukernel__4x2_scalar_int);
844 }
845
TEST(X32_TRANSPOSE__4X2_SCALAR_INT,bh_1_8_bw_1_4)846 TEST(X32_TRANSPOSE__4X2_SCALAR_INT, bh_1_8_bw_1_4) {
847 for(size_t i = 1; i <= 8; ++i){
848 for(size_t j = 1; j <= 4; ++j){
849 TransposeMicrokernelTester()
850 .input_stride(j)
851 .output_stride(i)
852 .block_width(j)
853 .block_height(i)
854 .iterations(1)
855 .Test(xnn_x32_transpose_ukernel__4x2_scalar_int);
856 }
857 }
858 }
859
TEST(X32_TRANSPOSE__4X2_SCALAR_INT,bh_4_bw_4)860 TEST(X32_TRANSPOSE__4X2_SCALAR_INT, bh_4_bw_4) {
861 TransposeMicrokernelTester()
862 .input_stride(4)
863 .output_stride(4)
864 .block_width(4)
865 .block_height(4)
866 .iterations(1)
867 .Test(xnn_x32_transpose_ukernel__4x2_scalar_int);
868 }
869
TEST(X32_TRANSPOSE__4X2_SCALAR_INT,bh_4_bw_3_4)870 TEST(X32_TRANSPOSE__4X2_SCALAR_INT, bh_4_bw_3_4) {
871 for(size_t i = 3; i < 4; ++i){
872 TransposeMicrokernelTester()
873 .input_stride(i)
874 .output_stride(4)
875 .block_width(i)
876 .block_height(4)
877 .iterations(1)
878 .Test(xnn_x32_transpose_ukernel__4x2_scalar_int);
879 }
880 }
881
TEST(X32_TRANSPOSE__4X2_SCALAR_INT,bh_8_bw_3_4)882 TEST(X32_TRANSPOSE__4X2_SCALAR_INT, bh_8_bw_3_4) {
883 for(size_t i = 3; i < 4; ++i){
884 TransposeMicrokernelTester()
885 .input_stride(i)
886 .output_stride(8)
887 .block_width(i)
888 .block_height(8)
889 .iterations(1)
890 .Test(xnn_x32_transpose_ukernel__4x2_scalar_int);
891 }
892 }
893
TEST(X32_TRANSPOSE__4X2_SCALAR_INT,bh_8_bw_2)894 TEST(X32_TRANSPOSE__4X2_SCALAR_INT, bh_8_bw_2) {
895 TransposeMicrokernelTester()
896 .input_stride(2)
897 .output_stride(8)
898 .block_width(2)
899 .block_height(8)
900 .iterations(1)
901 .Test(xnn_x32_transpose_ukernel__4x2_scalar_int);
902 }
903
TEST(X32_TRANSPOSE__4X2_SCALAR_INT,bh_5_8_bw_2)904 TEST(X32_TRANSPOSE__4X2_SCALAR_INT, bh_5_8_bw_2){
905 for(size_t i = 5; i < 8; ++i){
906 TransposeMicrokernelTester()
907 .input_stride(2)
908 .output_stride(i)
909 .block_width(2)
910 .block_height(i)
911 .iterations(1)
912 .Test(xnn_x32_transpose_ukernel__4x2_scalar_int);
913 }
914 }
915
TEST(X32_TRANSPOSE__4X2_SCALAR_INT,bh_5_8_bw_4)916 TEST(X32_TRANSPOSE__4X2_SCALAR_INT, bh_5_8_bw_4){
917 for(size_t i = 5; i < 8; ++i){
918 TransposeMicrokernelTester()
919 .input_stride(4)
920 .output_stride(i)
921 .block_width(4)
922 .block_height(i)
923 .iterations(1)
924 .Test(xnn_x32_transpose_ukernel__4x2_scalar_int);
925 }
926 }
927
TEST(X32_TRANSPOSE__4X2_SCALAR_INT,bh_5_8_bw_3_4)928 TEST(X32_TRANSPOSE__4X2_SCALAR_INT, bh_5_8_bw_3_4) {
929 for(size_t i = 5; i < 8; ++i){
930 for(size_t j = 3; j < 4; ++j){
931 TransposeMicrokernelTester()
932 .input_stride(j)
933 .output_stride(i)
934 .block_width(j)
935 .block_height(i)
936 .iterations(1)
937 .Test(xnn_x32_transpose_ukernel__4x2_scalar_int);
938 }
939 }
940 }
941
TEST(X32_TRANSPOSE__4X2_SCALAR_INT,bh_4_bw_2_is_4)942 TEST(X32_TRANSPOSE__4X2_SCALAR_INT, bh_4_bw_2_is_4) {
943 TransposeMicrokernelTester()
944 .input_stride(4)
945 .output_stride(4)
946 .block_width(2)
947 .block_height(4)
948 .iterations(1)
949 .Test(xnn_x32_transpose_ukernel__4x2_scalar_int);
950 }
951
TEST(X32_TRANSPOSE__4X2_SCALAR_INT,bh_4_bw_2_os_8)952 TEST(X32_TRANSPOSE__4X2_SCALAR_INT, bh_4_bw_2_os_8) {
953 TransposeMicrokernelTester()
954 .input_stride(2)
955 .output_stride(8)
956 .block_width(2)
957 .block_height(4)
958 .iterations(1)
959 .Test(xnn_x32_transpose_ukernel__4x2_scalar_int);
960 }
961
TEST(X32_TRANSPOSE__4X2_SCALAR_INT,bh_4_bw_2_is_4_os_8)962 TEST(X32_TRANSPOSE__4X2_SCALAR_INT, bh_4_bw_2_is_4_os_8) {
963 TransposeMicrokernelTester()
964 .input_stride(4)
965 .output_stride(8)
966 .block_width(2)
967 .block_height(4)
968 .iterations(1)
969 .Test(xnn_x32_transpose_ukernel__4x2_scalar_int);
970 }
971
TEST(X32_TRANSPOSE__4X4_SCALAR_INT,bh_4_bw_4)972 TEST(X32_TRANSPOSE__4X4_SCALAR_INT, bh_4_bw_4) {
973 TransposeMicrokernelTester()
974 .input_stride(4)
975 .output_stride(4)
976 .block_width(4)
977 .block_height(4)
978 .iterations(1)
979 .Test(xnn_x32_transpose_ukernel__4x4_scalar_int);
980 }
981
TEST(X32_TRANSPOSE__4X4_SCALAR_INT,bh_1_8_bw_1_8)982 TEST(X32_TRANSPOSE__4X4_SCALAR_INT, bh_1_8_bw_1_8) {
983 for(size_t i = 1; i <= 8; ++i){
984 for(size_t j = 1; j <= 8; ++j){
985 TransposeMicrokernelTester()
986 .input_stride(j)
987 .output_stride(i)
988 .block_width(j)
989 .block_height(i)
990 .iterations(1)
991 .Test(xnn_x32_transpose_ukernel__4x4_scalar_int);
992 }
993 }
994 }
995
TEST(X32_TRANSPOSE__4X4_SCALAR_INT,bh_4_bw_8)996 TEST(X32_TRANSPOSE__4X4_SCALAR_INT, bh_4_bw_8) {
997 TransposeMicrokernelTester()
998 .input_stride(8)
999 .output_stride(4)
1000 .block_width(8)
1001 .block_height(4)
1002 .iterations(1)
1003 .Test(xnn_x32_transpose_ukernel__4x4_scalar_int);
1004 }
1005
TEST(X32_TRANSPOSE__4X4_SCALAR_INT,bh_4_bw_5_8)1006 TEST(X32_TRANSPOSE__4X4_SCALAR_INT, bh_4_bw_5_8) {
1007 for(size_t i = 5; i < 8; ++i){
1008 TransposeMicrokernelTester()
1009 .input_stride(i)
1010 .output_stride(4)
1011 .block_width(i)
1012 .block_height(4)
1013 .iterations(1)
1014 .Test(xnn_x32_transpose_ukernel__4x4_scalar_int);
1015 }
1016 }
1017
TEST(X32_TRANSPOSE__4X4_SCALAR_INT,bh_8_bw_5_8)1018 TEST(X32_TRANSPOSE__4X4_SCALAR_INT, bh_8_bw_5_8) {
1019 for(size_t i = 5; i < 8; ++i){
1020 TransposeMicrokernelTester()
1021 .input_stride(i)
1022 .output_stride(8)
1023 .block_width(i)
1024 .block_height(8)
1025 .iterations(1)
1026 .Test(xnn_x32_transpose_ukernel__4x4_scalar_int);
1027 }
1028 }
1029
TEST(X32_TRANSPOSE__4X4_SCALAR_INT,bh_8_bw_4)1030 TEST(X32_TRANSPOSE__4X4_SCALAR_INT, bh_8_bw_4) {
1031 TransposeMicrokernelTester()
1032 .input_stride(4)
1033 .output_stride(8)
1034 .block_width(4)
1035 .block_height(8)
1036 .iterations(1)
1037 .Test(xnn_x32_transpose_ukernel__4x4_scalar_int);
1038 }
1039
TEST(X32_TRANSPOSE__4X4_SCALAR_INT,bh_5_8_bw_4)1040 TEST(X32_TRANSPOSE__4X4_SCALAR_INT, bh_5_8_bw_4){
1041 for(size_t i = 5; i < 8; ++i){
1042 TransposeMicrokernelTester()
1043 .input_stride(4)
1044 .output_stride(i)
1045 .block_width(4)
1046 .block_height(i)
1047 .iterations(1)
1048 .Test(xnn_x32_transpose_ukernel__4x4_scalar_int);
1049 }
1050 }
1051
TEST(X32_TRANSPOSE__4X4_SCALAR_INT,bh_5_8_bw_8)1052 TEST(X32_TRANSPOSE__4X4_SCALAR_INT, bh_5_8_bw_8){
1053 for(size_t i = 5; i < 8; ++i){
1054 TransposeMicrokernelTester()
1055 .input_stride(8)
1056 .output_stride(i)
1057 .block_width(8)
1058 .block_height(i)
1059 .iterations(1)
1060 .Test(xnn_x32_transpose_ukernel__4x4_scalar_int);
1061 }
1062 }
1063
TEST(X32_TRANSPOSE__4X4_SCALAR_INT,bh_5_8_bw_5_8)1064 TEST(X32_TRANSPOSE__4X4_SCALAR_INT, bh_5_8_bw_5_8) {
1065 for(size_t i = 5; i < 8; ++i){
1066 for(size_t j = 5; j < 8; ++j){
1067 TransposeMicrokernelTester()
1068 .input_stride(j)
1069 .output_stride(i)
1070 .block_width(j)
1071 .block_height(i)
1072 .iterations(1)
1073 .Test(xnn_x32_transpose_ukernel__4x4_scalar_int);
1074 }
1075 }
1076 }
1077
TEST(X32_TRANSPOSE__4X4_SCALAR_INT,bh_4_bw_4_is_8)1078 TEST(X32_TRANSPOSE__4X4_SCALAR_INT, bh_4_bw_4_is_8) {
1079 TransposeMicrokernelTester()
1080 .input_stride(8)
1081 .output_stride(4)
1082 .block_width(4)
1083 .block_height(4)
1084 .iterations(1)
1085 .Test(xnn_x32_transpose_ukernel__4x4_scalar_int);
1086 }
1087
TEST(X32_TRANSPOSE__4X4_SCALAR_INT,bh_4_bw_4_os_8)1088 TEST(X32_TRANSPOSE__4X4_SCALAR_INT, bh_4_bw_4_os_8) {
1089 TransposeMicrokernelTester()
1090 .input_stride(4)
1091 .output_stride(8)
1092 .block_width(4)
1093 .block_height(4)
1094 .iterations(1)
1095 .Test(xnn_x32_transpose_ukernel__4x4_scalar_int);
1096 }
1097
TEST(X32_TRANSPOSE__4X4_SCALAR_INT,bh_4_bw_4_is_8_os_8)1098 TEST(X32_TRANSPOSE__4X4_SCALAR_INT, bh_4_bw_4_is_8_os_8) {
1099 TransposeMicrokernelTester()
1100 .input_stride(8)
1101 .output_stride(8)
1102 .block_width(4)
1103 .block_height(4)
1104 .iterations(1)
1105 .Test(xnn_x32_transpose_ukernel__4x4_scalar_int);
1106 }
1107
TEST(X32_TRANSPOSE__1X2_SCALAR_FLOAT,bh_1_bw_2)1108 TEST(X32_TRANSPOSE__1X2_SCALAR_FLOAT, bh_1_bw_2) {
1109 TransposeMicrokernelTester()
1110 .input_stride(2)
1111 .output_stride(1)
1112 .block_width(2)
1113 .block_height(1)
1114 .iterations(1)
1115 .Test(xnn_x32_transpose_ukernel__1x2_scalar_float);
1116 }
1117
TEST(X32_TRANSPOSE__1X2_SCALAR_FLOAT,bh_1_2_bw_1_4)1118 TEST(X32_TRANSPOSE__1X2_SCALAR_FLOAT, bh_1_2_bw_1_4) {
1119 for(size_t i = 1; i <= 2; ++i){
1120 for(size_t j = 1; j <= 4; ++j){
1121 TransposeMicrokernelTester()
1122 .input_stride(j)
1123 .output_stride(i)
1124 .block_width(j)
1125 .block_height(i)
1126 .iterations(1)
1127 .Test(xnn_x32_transpose_ukernel__1x2_scalar_float);
1128 }
1129 }
1130 }
1131
TEST(X32_TRANSPOSE__1X2_SCALAR_FLOAT,bh_1_bw_4)1132 TEST(X32_TRANSPOSE__1X2_SCALAR_FLOAT, bh_1_bw_4) {
1133 TransposeMicrokernelTester()
1134 .input_stride(4)
1135 .output_stride(1)
1136 .block_width(4)
1137 .block_height(1)
1138 .iterations(1)
1139 .Test(xnn_x32_transpose_ukernel__1x2_scalar_float);
1140 }
1141
TEST(X32_TRANSPOSE__1X2_SCALAR_FLOAT,bh_1_bw_3_4)1142 TEST(X32_TRANSPOSE__1X2_SCALAR_FLOAT, bh_1_bw_3_4) {
1143 for(size_t i = 3; i < 4; ++i){
1144 TransposeMicrokernelTester()
1145 .input_stride(i)
1146 .output_stride(1)
1147 .block_width(i)
1148 .block_height(1)
1149 .iterations(1)
1150 .Test(xnn_x32_transpose_ukernel__1x2_scalar_float);
1151 }
1152 }
1153
TEST(X32_TRANSPOSE__1X2_SCALAR_FLOAT,bh_2_bw_3_4)1154 TEST(X32_TRANSPOSE__1X2_SCALAR_FLOAT, bh_2_bw_3_4) {
1155 for(size_t i = 3; i < 4; ++i){
1156 TransposeMicrokernelTester()
1157 .input_stride(i)
1158 .output_stride(2)
1159 .block_width(i)
1160 .block_height(2)
1161 .iterations(1)
1162 .Test(xnn_x32_transpose_ukernel__1x2_scalar_float);
1163 }
1164 }
1165
TEST(X32_TRANSPOSE__1X2_SCALAR_FLOAT,bh_2_bw_2)1166 TEST(X32_TRANSPOSE__1X2_SCALAR_FLOAT, bh_2_bw_2) {
1167 TransposeMicrokernelTester()
1168 .input_stride(2)
1169 .output_stride(2)
1170 .block_width(2)
1171 .block_height(2)
1172 .iterations(1)
1173 .Test(xnn_x32_transpose_ukernel__1x2_scalar_float);
1174 }
1175
TEST(X32_TRANSPOSE__1X2_SCALAR_FLOAT,bh_2_2_bw_2)1176 TEST(X32_TRANSPOSE__1X2_SCALAR_FLOAT, bh_2_2_bw_2){
1177 for(size_t i = 2; i < 2; ++i){
1178 TransposeMicrokernelTester()
1179 .input_stride(2)
1180 .output_stride(i)
1181 .block_width(2)
1182 .block_height(i)
1183 .iterations(1)
1184 .Test(xnn_x32_transpose_ukernel__1x2_scalar_float);
1185 }
1186 }
1187
TEST(X32_TRANSPOSE__1X2_SCALAR_FLOAT,bh_2_2_bw_4)1188 TEST(X32_TRANSPOSE__1X2_SCALAR_FLOAT, bh_2_2_bw_4){
1189 for(size_t i = 2; i < 2; ++i){
1190 TransposeMicrokernelTester()
1191 .input_stride(4)
1192 .output_stride(i)
1193 .block_width(4)
1194 .block_height(i)
1195 .iterations(1)
1196 .Test(xnn_x32_transpose_ukernel__1x2_scalar_float);
1197 }
1198 }
1199
TEST(X32_TRANSPOSE__1X2_SCALAR_FLOAT,bh_2_2_bw_3_4)1200 TEST(X32_TRANSPOSE__1X2_SCALAR_FLOAT, bh_2_2_bw_3_4) {
1201 for(size_t i = 2; i < 2; ++i){
1202 for(size_t j = 3; j < 4; ++j){
1203 TransposeMicrokernelTester()
1204 .input_stride(j)
1205 .output_stride(i)
1206 .block_width(j)
1207 .block_height(i)
1208 .iterations(1)
1209 .Test(xnn_x32_transpose_ukernel__1x2_scalar_float);
1210 }
1211 }
1212 }
1213
TEST(X32_TRANSPOSE__1X2_SCALAR_FLOAT,bh_1_bw_2_is_4)1214 TEST(X32_TRANSPOSE__1X2_SCALAR_FLOAT, bh_1_bw_2_is_4) {
1215 TransposeMicrokernelTester()
1216 .input_stride(4)
1217 .output_stride(1)
1218 .block_width(2)
1219 .block_height(1)
1220 .iterations(1)
1221 .Test(xnn_x32_transpose_ukernel__1x2_scalar_float);
1222 }
1223
TEST(X32_TRANSPOSE__1X2_SCALAR_FLOAT,bh_1_bw_2_os_2)1224 TEST(X32_TRANSPOSE__1X2_SCALAR_FLOAT, bh_1_bw_2_os_2) {
1225 TransposeMicrokernelTester()
1226 .input_stride(2)
1227 .output_stride(2)
1228 .block_width(2)
1229 .block_height(1)
1230 .iterations(1)
1231 .Test(xnn_x32_transpose_ukernel__1x2_scalar_float);
1232 }
1233
TEST(X32_TRANSPOSE__1X2_SCALAR_FLOAT,bh_1_bw_2_is_4_os_2)1234 TEST(X32_TRANSPOSE__1X2_SCALAR_FLOAT, bh_1_bw_2_is_4_os_2) {
1235 TransposeMicrokernelTester()
1236 .input_stride(4)
1237 .output_stride(2)
1238 .block_width(2)
1239 .block_height(1)
1240 .iterations(1)
1241 .Test(xnn_x32_transpose_ukernel__1x2_scalar_float);
1242 }
1243
TEST(X32_TRANSPOSE__1X4_SCALAR_FLOAT,bh_1_bw_4)1244 TEST(X32_TRANSPOSE__1X4_SCALAR_FLOAT, bh_1_bw_4) {
1245 TransposeMicrokernelTester()
1246 .input_stride(4)
1247 .output_stride(1)
1248 .block_width(4)
1249 .block_height(1)
1250 .iterations(1)
1251 .Test(xnn_x32_transpose_ukernel__1x4_scalar_float);
1252 }
1253
TEST(X32_TRANSPOSE__1X4_SCALAR_FLOAT,bh_1_2_bw_1_8)1254 TEST(X32_TRANSPOSE__1X4_SCALAR_FLOAT, bh_1_2_bw_1_8) {
1255 for(size_t i = 1; i <= 2; ++i){
1256 for(size_t j = 1; j <= 8; ++j){
1257 TransposeMicrokernelTester()
1258 .input_stride(j)
1259 .output_stride(i)
1260 .block_width(j)
1261 .block_height(i)
1262 .iterations(1)
1263 .Test(xnn_x32_transpose_ukernel__1x4_scalar_float);
1264 }
1265 }
1266 }
1267
TEST(X32_TRANSPOSE__1X4_SCALAR_FLOAT,bh_1_bw_8)1268 TEST(X32_TRANSPOSE__1X4_SCALAR_FLOAT, bh_1_bw_8) {
1269 TransposeMicrokernelTester()
1270 .input_stride(8)
1271 .output_stride(1)
1272 .block_width(8)
1273 .block_height(1)
1274 .iterations(1)
1275 .Test(xnn_x32_transpose_ukernel__1x4_scalar_float);
1276 }
1277
TEST(X32_TRANSPOSE__1X4_SCALAR_FLOAT,bh_1_bw_5_8)1278 TEST(X32_TRANSPOSE__1X4_SCALAR_FLOAT, bh_1_bw_5_8) {
1279 for(size_t i = 5; i < 8; ++i){
1280 TransposeMicrokernelTester()
1281 .input_stride(i)
1282 .output_stride(1)
1283 .block_width(i)
1284 .block_height(1)
1285 .iterations(1)
1286 .Test(xnn_x32_transpose_ukernel__1x4_scalar_float);
1287 }
1288 }
1289
TEST(X32_TRANSPOSE__1X4_SCALAR_FLOAT,bh_2_bw_5_8)1290 TEST(X32_TRANSPOSE__1X4_SCALAR_FLOAT, bh_2_bw_5_8) {
1291 for(size_t i = 5; i < 8; ++i){
1292 TransposeMicrokernelTester()
1293 .input_stride(i)
1294 .output_stride(2)
1295 .block_width(i)
1296 .block_height(2)
1297 .iterations(1)
1298 .Test(xnn_x32_transpose_ukernel__1x4_scalar_float);
1299 }
1300 }
1301
TEST(X32_TRANSPOSE__1X4_SCALAR_FLOAT,bh_2_bw_4)1302 TEST(X32_TRANSPOSE__1X4_SCALAR_FLOAT, bh_2_bw_4) {
1303 TransposeMicrokernelTester()
1304 .input_stride(4)
1305 .output_stride(2)
1306 .block_width(4)
1307 .block_height(2)
1308 .iterations(1)
1309 .Test(xnn_x32_transpose_ukernel__1x4_scalar_float);
1310 }
1311
TEST(X32_TRANSPOSE__1X4_SCALAR_FLOAT,bh_2_2_bw_4)1312 TEST(X32_TRANSPOSE__1X4_SCALAR_FLOAT, bh_2_2_bw_4){
1313 for(size_t i = 2; i < 2; ++i){
1314 TransposeMicrokernelTester()
1315 .input_stride(4)
1316 .output_stride(i)
1317 .block_width(4)
1318 .block_height(i)
1319 .iterations(1)
1320 .Test(xnn_x32_transpose_ukernel__1x4_scalar_float);
1321 }
1322 }
1323
TEST(X32_TRANSPOSE__1X4_SCALAR_FLOAT,bh_2_2_bw_8)1324 TEST(X32_TRANSPOSE__1X4_SCALAR_FLOAT, bh_2_2_bw_8){
1325 for(size_t i = 2; i < 2; ++i){
1326 TransposeMicrokernelTester()
1327 .input_stride(8)
1328 .output_stride(i)
1329 .block_width(8)
1330 .block_height(i)
1331 .iterations(1)
1332 .Test(xnn_x32_transpose_ukernel__1x4_scalar_float);
1333 }
1334 }
1335
TEST(X32_TRANSPOSE__1X4_SCALAR_FLOAT,bh_2_2_bw_5_8)1336 TEST(X32_TRANSPOSE__1X4_SCALAR_FLOAT, bh_2_2_bw_5_8) {
1337 for(size_t i = 2; i < 2; ++i){
1338 for(size_t j = 5; j < 8; ++j){
1339 TransposeMicrokernelTester()
1340 .input_stride(j)
1341 .output_stride(i)
1342 .block_width(j)
1343 .block_height(i)
1344 .iterations(1)
1345 .Test(xnn_x32_transpose_ukernel__1x4_scalar_float);
1346 }
1347 }
1348 }
1349
TEST(X32_TRANSPOSE__1X4_SCALAR_FLOAT,bh_1_bw_4_is_8)1350 TEST(X32_TRANSPOSE__1X4_SCALAR_FLOAT, bh_1_bw_4_is_8) {
1351 TransposeMicrokernelTester()
1352 .input_stride(8)
1353 .output_stride(1)
1354 .block_width(4)
1355 .block_height(1)
1356 .iterations(1)
1357 .Test(xnn_x32_transpose_ukernel__1x4_scalar_float);
1358 }
1359
TEST(X32_TRANSPOSE__1X4_SCALAR_FLOAT,bh_1_bw_4_os_2)1360 TEST(X32_TRANSPOSE__1X4_SCALAR_FLOAT, bh_1_bw_4_os_2) {
1361 TransposeMicrokernelTester()
1362 .input_stride(4)
1363 .output_stride(2)
1364 .block_width(4)
1365 .block_height(1)
1366 .iterations(1)
1367 .Test(xnn_x32_transpose_ukernel__1x4_scalar_float);
1368 }
1369
TEST(X32_TRANSPOSE__1X4_SCALAR_FLOAT,bh_1_bw_4_is_8_os_2)1370 TEST(X32_TRANSPOSE__1X4_SCALAR_FLOAT, bh_1_bw_4_is_8_os_2) {
1371 TransposeMicrokernelTester()
1372 .input_stride(8)
1373 .output_stride(2)
1374 .block_width(4)
1375 .block_height(1)
1376 .iterations(1)
1377 .Test(xnn_x32_transpose_ukernel__1x4_scalar_float);
1378 }
1379
TEST(X32_TRANSPOSE__2X1_SCALAR_FLOAT,bh_2_bw_1)1380 TEST(X32_TRANSPOSE__2X1_SCALAR_FLOAT, bh_2_bw_1) {
1381 TransposeMicrokernelTester()
1382 .input_stride(1)
1383 .output_stride(2)
1384 .block_width(1)
1385 .block_height(2)
1386 .iterations(1)
1387 .Test(xnn_x32_transpose_ukernel__2x1_scalar_float);
1388 }
1389
TEST(X32_TRANSPOSE__2X1_SCALAR_FLOAT,bh_1_4_bw_1_2)1390 TEST(X32_TRANSPOSE__2X1_SCALAR_FLOAT, bh_1_4_bw_1_2) {
1391 for(size_t i = 1; i <= 4; ++i){
1392 for(size_t j = 1; j <= 2; ++j){
1393 TransposeMicrokernelTester()
1394 .input_stride(j)
1395 .output_stride(i)
1396 .block_width(j)
1397 .block_height(i)
1398 .iterations(1)
1399 .Test(xnn_x32_transpose_ukernel__2x1_scalar_float);
1400 }
1401 }
1402 }
1403
TEST(X32_TRANSPOSE__2X1_SCALAR_FLOAT,bh_2_bw_2)1404 TEST(X32_TRANSPOSE__2X1_SCALAR_FLOAT, bh_2_bw_2) {
1405 TransposeMicrokernelTester()
1406 .input_stride(2)
1407 .output_stride(2)
1408 .block_width(2)
1409 .block_height(2)
1410 .iterations(1)
1411 .Test(xnn_x32_transpose_ukernel__2x1_scalar_float);
1412 }
1413
TEST(X32_TRANSPOSE__2X1_SCALAR_FLOAT,bh_2_bw_2_2)1414 TEST(X32_TRANSPOSE__2X1_SCALAR_FLOAT, bh_2_bw_2_2) {
1415 for(size_t i = 2; i < 2; ++i){
1416 TransposeMicrokernelTester()
1417 .input_stride(i)
1418 .output_stride(2)
1419 .block_width(i)
1420 .block_height(2)
1421 .iterations(1)
1422 .Test(xnn_x32_transpose_ukernel__2x1_scalar_float);
1423 }
1424 }
1425
TEST(X32_TRANSPOSE__2X1_SCALAR_FLOAT,bh_4_bw_2_2)1426 TEST(X32_TRANSPOSE__2X1_SCALAR_FLOAT, bh_4_bw_2_2) {
1427 for(size_t i = 2; i < 2; ++i){
1428 TransposeMicrokernelTester()
1429 .input_stride(i)
1430 .output_stride(4)
1431 .block_width(i)
1432 .block_height(4)
1433 .iterations(1)
1434 .Test(xnn_x32_transpose_ukernel__2x1_scalar_float);
1435 }
1436 }
1437
TEST(X32_TRANSPOSE__2X1_SCALAR_FLOAT,bh_4_bw_1)1438 TEST(X32_TRANSPOSE__2X1_SCALAR_FLOAT, bh_4_bw_1) {
1439 TransposeMicrokernelTester()
1440 .input_stride(1)
1441 .output_stride(4)
1442 .block_width(1)
1443 .block_height(4)
1444 .iterations(1)
1445 .Test(xnn_x32_transpose_ukernel__2x1_scalar_float);
1446 }
1447
TEST(X32_TRANSPOSE__2X1_SCALAR_FLOAT,bh_3_4_bw_1)1448 TEST(X32_TRANSPOSE__2X1_SCALAR_FLOAT, bh_3_4_bw_1){
1449 for(size_t i = 3; i < 4; ++i){
1450 TransposeMicrokernelTester()
1451 .input_stride(1)
1452 .output_stride(i)
1453 .block_width(1)
1454 .block_height(i)
1455 .iterations(1)
1456 .Test(xnn_x32_transpose_ukernel__2x1_scalar_float);
1457 }
1458 }
1459
TEST(X32_TRANSPOSE__2X1_SCALAR_FLOAT,bh_3_4_bw_2)1460 TEST(X32_TRANSPOSE__2X1_SCALAR_FLOAT, bh_3_4_bw_2){
1461 for(size_t i = 3; i < 4; ++i){
1462 TransposeMicrokernelTester()
1463 .input_stride(2)
1464 .output_stride(i)
1465 .block_width(2)
1466 .block_height(i)
1467 .iterations(1)
1468 .Test(xnn_x32_transpose_ukernel__2x1_scalar_float);
1469 }
1470 }
1471
TEST(X32_TRANSPOSE__2X1_SCALAR_FLOAT,bh_3_4_bw_2_2)1472 TEST(X32_TRANSPOSE__2X1_SCALAR_FLOAT, bh_3_4_bw_2_2) {
1473 for(size_t i = 3; i < 4; ++i){
1474 for(size_t j = 2; j < 2; ++j){
1475 TransposeMicrokernelTester()
1476 .input_stride(j)
1477 .output_stride(i)
1478 .block_width(j)
1479 .block_height(i)
1480 .iterations(1)
1481 .Test(xnn_x32_transpose_ukernel__2x1_scalar_float);
1482 }
1483 }
1484 }
1485
TEST(X32_TRANSPOSE__2X1_SCALAR_FLOAT,bh_2_bw_1_is_2)1486 TEST(X32_TRANSPOSE__2X1_SCALAR_FLOAT, bh_2_bw_1_is_2) {
1487 TransposeMicrokernelTester()
1488 .input_stride(2)
1489 .output_stride(2)
1490 .block_width(1)
1491 .block_height(2)
1492 .iterations(1)
1493 .Test(xnn_x32_transpose_ukernel__2x1_scalar_float);
1494 }
1495
TEST(X32_TRANSPOSE__2X1_SCALAR_FLOAT,bh_2_bw_1_os_4)1496 TEST(X32_TRANSPOSE__2X1_SCALAR_FLOAT, bh_2_bw_1_os_4) {
1497 TransposeMicrokernelTester()
1498 .input_stride(1)
1499 .output_stride(4)
1500 .block_width(1)
1501 .block_height(2)
1502 .iterations(1)
1503 .Test(xnn_x32_transpose_ukernel__2x1_scalar_float);
1504 }
1505
TEST(X32_TRANSPOSE__2X1_SCALAR_FLOAT,bh_2_bw_1_is_2_os_4)1506 TEST(X32_TRANSPOSE__2X1_SCALAR_FLOAT, bh_2_bw_1_is_2_os_4) {
1507 TransposeMicrokernelTester()
1508 .input_stride(2)
1509 .output_stride(4)
1510 .block_width(1)
1511 .block_height(2)
1512 .iterations(1)
1513 .Test(xnn_x32_transpose_ukernel__2x1_scalar_float);
1514 }
1515
TEST(X32_TRANSPOSE__2X2_SCALAR_FLOAT,bh_2_bw_2)1516 TEST(X32_TRANSPOSE__2X2_SCALAR_FLOAT, bh_2_bw_2) {
1517 TransposeMicrokernelTester()
1518 .input_stride(2)
1519 .output_stride(2)
1520 .block_width(2)
1521 .block_height(2)
1522 .iterations(1)
1523 .Test(xnn_x32_transpose_ukernel__2x2_scalar_float);
1524 }
1525
TEST(X32_TRANSPOSE__2X2_SCALAR_FLOAT,bh_1_4_bw_1_4)1526 TEST(X32_TRANSPOSE__2X2_SCALAR_FLOAT, bh_1_4_bw_1_4) {
1527 for(size_t i = 1; i <= 4; ++i){
1528 for(size_t j = 1; j <= 4; ++j){
1529 TransposeMicrokernelTester()
1530 .input_stride(j)
1531 .output_stride(i)
1532 .block_width(j)
1533 .block_height(i)
1534 .iterations(1)
1535 .Test(xnn_x32_transpose_ukernel__2x2_scalar_float);
1536 }
1537 }
1538 }
1539
TEST(X32_TRANSPOSE__2X2_SCALAR_FLOAT,bh_2_bw_4)1540 TEST(X32_TRANSPOSE__2X2_SCALAR_FLOAT, bh_2_bw_4) {
1541 TransposeMicrokernelTester()
1542 .input_stride(4)
1543 .output_stride(2)
1544 .block_width(4)
1545 .block_height(2)
1546 .iterations(1)
1547 .Test(xnn_x32_transpose_ukernel__2x2_scalar_float);
1548 }
1549
TEST(X32_TRANSPOSE__2X2_SCALAR_FLOAT,bh_2_bw_3_4)1550 TEST(X32_TRANSPOSE__2X2_SCALAR_FLOAT, bh_2_bw_3_4) {
1551 for(size_t i = 3; i < 4; ++i){
1552 TransposeMicrokernelTester()
1553 .input_stride(i)
1554 .output_stride(2)
1555 .block_width(i)
1556 .block_height(2)
1557 .iterations(1)
1558 .Test(xnn_x32_transpose_ukernel__2x2_scalar_float);
1559 }
1560 }
1561
TEST(X32_TRANSPOSE__2X2_SCALAR_FLOAT,bh_4_bw_3_4)1562 TEST(X32_TRANSPOSE__2X2_SCALAR_FLOAT, bh_4_bw_3_4) {
1563 for(size_t i = 3; i < 4; ++i){
1564 TransposeMicrokernelTester()
1565 .input_stride(i)
1566 .output_stride(4)
1567 .block_width(i)
1568 .block_height(4)
1569 .iterations(1)
1570 .Test(xnn_x32_transpose_ukernel__2x2_scalar_float);
1571 }
1572 }
1573
TEST(X32_TRANSPOSE__2X2_SCALAR_FLOAT,bh_4_bw_2)1574 TEST(X32_TRANSPOSE__2X2_SCALAR_FLOAT, bh_4_bw_2) {
1575 TransposeMicrokernelTester()
1576 .input_stride(2)
1577 .output_stride(4)
1578 .block_width(2)
1579 .block_height(4)
1580 .iterations(1)
1581 .Test(xnn_x32_transpose_ukernel__2x2_scalar_float);
1582 }
1583
TEST(X32_TRANSPOSE__2X2_SCALAR_FLOAT,bh_3_4_bw_2)1584 TEST(X32_TRANSPOSE__2X2_SCALAR_FLOAT, bh_3_4_bw_2){
1585 for(size_t i = 3; i < 4; ++i){
1586 TransposeMicrokernelTester()
1587 .input_stride(2)
1588 .output_stride(i)
1589 .block_width(2)
1590 .block_height(i)
1591 .iterations(1)
1592 .Test(xnn_x32_transpose_ukernel__2x2_scalar_float);
1593 }
1594 }
1595
TEST(X32_TRANSPOSE__2X2_SCALAR_FLOAT,bh_3_4_bw_4)1596 TEST(X32_TRANSPOSE__2X2_SCALAR_FLOAT, bh_3_4_bw_4){
1597 for(size_t i = 3; i < 4; ++i){
1598 TransposeMicrokernelTester()
1599 .input_stride(4)
1600 .output_stride(i)
1601 .block_width(4)
1602 .block_height(i)
1603 .iterations(1)
1604 .Test(xnn_x32_transpose_ukernel__2x2_scalar_float);
1605 }
1606 }
1607
TEST(X32_TRANSPOSE__2X2_SCALAR_FLOAT,bh_3_4_bw_3_4)1608 TEST(X32_TRANSPOSE__2X2_SCALAR_FLOAT, bh_3_4_bw_3_4) {
1609 for(size_t i = 3; i < 4; ++i){
1610 for(size_t j = 3; j < 4; ++j){
1611 TransposeMicrokernelTester()
1612 .input_stride(j)
1613 .output_stride(i)
1614 .block_width(j)
1615 .block_height(i)
1616 .iterations(1)
1617 .Test(xnn_x32_transpose_ukernel__2x2_scalar_float);
1618 }
1619 }
1620 }
1621
TEST(X32_TRANSPOSE__2X2_SCALAR_FLOAT,bh_2_bw_2_is_4)1622 TEST(X32_TRANSPOSE__2X2_SCALAR_FLOAT, bh_2_bw_2_is_4) {
1623 TransposeMicrokernelTester()
1624 .input_stride(4)
1625 .output_stride(2)
1626 .block_width(2)
1627 .block_height(2)
1628 .iterations(1)
1629 .Test(xnn_x32_transpose_ukernel__2x2_scalar_float);
1630 }
1631
TEST(X32_TRANSPOSE__2X2_SCALAR_FLOAT,bh_2_bw_2_os_4)1632 TEST(X32_TRANSPOSE__2X2_SCALAR_FLOAT, bh_2_bw_2_os_4) {
1633 TransposeMicrokernelTester()
1634 .input_stride(2)
1635 .output_stride(4)
1636 .block_width(2)
1637 .block_height(2)
1638 .iterations(1)
1639 .Test(xnn_x32_transpose_ukernel__2x2_scalar_float);
1640 }
1641
TEST(X32_TRANSPOSE__2X2_SCALAR_FLOAT,bh_2_bw_2_is_4_os_4)1642 TEST(X32_TRANSPOSE__2X2_SCALAR_FLOAT, bh_2_bw_2_is_4_os_4) {
1643 TransposeMicrokernelTester()
1644 .input_stride(4)
1645 .output_stride(4)
1646 .block_width(2)
1647 .block_height(2)
1648 .iterations(1)
1649 .Test(xnn_x32_transpose_ukernel__2x2_scalar_float);
1650 }
1651
TEST(X32_TRANSPOSE__2X4_SCALAR_FLOAT,bh_2_bw_4)1652 TEST(X32_TRANSPOSE__2X4_SCALAR_FLOAT, bh_2_bw_4) {
1653 TransposeMicrokernelTester()
1654 .input_stride(4)
1655 .output_stride(2)
1656 .block_width(4)
1657 .block_height(2)
1658 .iterations(1)
1659 .Test(xnn_x32_transpose_ukernel__2x4_scalar_float);
1660 }
1661
TEST(X32_TRANSPOSE__2X4_SCALAR_FLOAT,bh_1_4_bw_1_8)1662 TEST(X32_TRANSPOSE__2X4_SCALAR_FLOAT, bh_1_4_bw_1_8) {
1663 for(size_t i = 1; i <= 4; ++i){
1664 for(size_t j = 1; j <= 8; ++j){
1665 TransposeMicrokernelTester()
1666 .input_stride(j)
1667 .output_stride(i)
1668 .block_width(j)
1669 .block_height(i)
1670 .iterations(1)
1671 .Test(xnn_x32_transpose_ukernel__2x4_scalar_float);
1672 }
1673 }
1674 }
1675
TEST(X32_TRANSPOSE__2X4_SCALAR_FLOAT,bh_2_bw_8)1676 TEST(X32_TRANSPOSE__2X4_SCALAR_FLOAT, bh_2_bw_8) {
1677 TransposeMicrokernelTester()
1678 .input_stride(8)
1679 .output_stride(2)
1680 .block_width(8)
1681 .block_height(2)
1682 .iterations(1)
1683 .Test(xnn_x32_transpose_ukernel__2x4_scalar_float);
1684 }
1685
TEST(X32_TRANSPOSE__2X4_SCALAR_FLOAT,bh_2_bw_5_8)1686 TEST(X32_TRANSPOSE__2X4_SCALAR_FLOAT, bh_2_bw_5_8) {
1687 for(size_t i = 5; i < 8; ++i){
1688 TransposeMicrokernelTester()
1689 .input_stride(i)
1690 .output_stride(2)
1691 .block_width(i)
1692 .block_height(2)
1693 .iterations(1)
1694 .Test(xnn_x32_transpose_ukernel__2x4_scalar_float);
1695 }
1696 }
1697
TEST(X32_TRANSPOSE__2X4_SCALAR_FLOAT,bh_4_bw_5_8)1698 TEST(X32_TRANSPOSE__2X4_SCALAR_FLOAT, bh_4_bw_5_8) {
1699 for(size_t i = 5; i < 8; ++i){
1700 TransposeMicrokernelTester()
1701 .input_stride(i)
1702 .output_stride(4)
1703 .block_width(i)
1704 .block_height(4)
1705 .iterations(1)
1706 .Test(xnn_x32_transpose_ukernel__2x4_scalar_float);
1707 }
1708 }
1709
TEST(X32_TRANSPOSE__2X4_SCALAR_FLOAT,bh_4_bw_4)1710 TEST(X32_TRANSPOSE__2X4_SCALAR_FLOAT, bh_4_bw_4) {
1711 TransposeMicrokernelTester()
1712 .input_stride(4)
1713 .output_stride(4)
1714 .block_width(4)
1715 .block_height(4)
1716 .iterations(1)
1717 .Test(xnn_x32_transpose_ukernel__2x4_scalar_float);
1718 }
1719
TEST(X32_TRANSPOSE__2X4_SCALAR_FLOAT,bh_3_4_bw_4)1720 TEST(X32_TRANSPOSE__2X4_SCALAR_FLOAT, bh_3_4_bw_4){
1721 for(size_t i = 3; i < 4; ++i){
1722 TransposeMicrokernelTester()
1723 .input_stride(4)
1724 .output_stride(i)
1725 .block_width(4)
1726 .block_height(i)
1727 .iterations(1)
1728 .Test(xnn_x32_transpose_ukernel__2x4_scalar_float);
1729 }
1730 }
1731
TEST(X32_TRANSPOSE__2X4_SCALAR_FLOAT,bh_3_4_bw_8)1732 TEST(X32_TRANSPOSE__2X4_SCALAR_FLOAT, bh_3_4_bw_8){
1733 for(size_t i = 3; i < 4; ++i){
1734 TransposeMicrokernelTester()
1735 .input_stride(8)
1736 .output_stride(i)
1737 .block_width(8)
1738 .block_height(i)
1739 .iterations(1)
1740 .Test(xnn_x32_transpose_ukernel__2x4_scalar_float);
1741 }
1742 }
1743
TEST(X32_TRANSPOSE__2X4_SCALAR_FLOAT,bh_3_4_bw_5_8)1744 TEST(X32_TRANSPOSE__2X4_SCALAR_FLOAT, bh_3_4_bw_5_8) {
1745 for(size_t i = 3; i < 4; ++i){
1746 for(size_t j = 5; j < 8; ++j){
1747 TransposeMicrokernelTester()
1748 .input_stride(j)
1749 .output_stride(i)
1750 .block_width(j)
1751 .block_height(i)
1752 .iterations(1)
1753 .Test(xnn_x32_transpose_ukernel__2x4_scalar_float);
1754 }
1755 }
1756 }
1757
TEST(X32_TRANSPOSE__2X4_SCALAR_FLOAT,bh_2_bw_4_is_8)1758 TEST(X32_TRANSPOSE__2X4_SCALAR_FLOAT, bh_2_bw_4_is_8) {
1759 TransposeMicrokernelTester()
1760 .input_stride(8)
1761 .output_stride(2)
1762 .block_width(4)
1763 .block_height(2)
1764 .iterations(1)
1765 .Test(xnn_x32_transpose_ukernel__2x4_scalar_float);
1766 }
1767
TEST(X32_TRANSPOSE__2X4_SCALAR_FLOAT,bh_2_bw_4_os_4)1768 TEST(X32_TRANSPOSE__2X4_SCALAR_FLOAT, bh_2_bw_4_os_4) {
1769 TransposeMicrokernelTester()
1770 .input_stride(4)
1771 .output_stride(4)
1772 .block_width(4)
1773 .block_height(2)
1774 .iterations(1)
1775 .Test(xnn_x32_transpose_ukernel__2x4_scalar_float);
1776 }
1777
TEST(X32_TRANSPOSE__2X4_SCALAR_FLOAT,bh_2_bw_4_is_8_os_4)1778 TEST(X32_TRANSPOSE__2X4_SCALAR_FLOAT, bh_2_bw_4_is_8_os_4) {
1779 TransposeMicrokernelTester()
1780 .input_stride(8)
1781 .output_stride(4)
1782 .block_width(4)
1783 .block_height(2)
1784 .iterations(1)
1785 .Test(xnn_x32_transpose_ukernel__2x4_scalar_float);
1786 }
1787
TEST(X32_TRANSPOSE__4X1_SCALAR_FLOAT,bh_4_bw_1)1788 TEST(X32_TRANSPOSE__4X1_SCALAR_FLOAT, bh_4_bw_1) {
1789 TransposeMicrokernelTester()
1790 .input_stride(1)
1791 .output_stride(4)
1792 .block_width(1)
1793 .block_height(4)
1794 .iterations(1)
1795 .Test(xnn_x32_transpose_ukernel__4x1_scalar_float);
1796 }
1797
TEST(X32_TRANSPOSE__4X1_SCALAR_FLOAT,bh_1_8_bw_1_2)1798 TEST(X32_TRANSPOSE__4X1_SCALAR_FLOAT, bh_1_8_bw_1_2) {
1799 for(size_t i = 1; i <= 8; ++i){
1800 for(size_t j = 1; j <= 2; ++j){
1801 TransposeMicrokernelTester()
1802 .input_stride(j)
1803 .output_stride(i)
1804 .block_width(j)
1805 .block_height(i)
1806 .iterations(1)
1807 .Test(xnn_x32_transpose_ukernel__4x1_scalar_float);
1808 }
1809 }
1810 }
1811
TEST(X32_TRANSPOSE__4X1_SCALAR_FLOAT,bh_4_bw_2)1812 TEST(X32_TRANSPOSE__4X1_SCALAR_FLOAT, bh_4_bw_2) {
1813 TransposeMicrokernelTester()
1814 .input_stride(2)
1815 .output_stride(4)
1816 .block_width(2)
1817 .block_height(4)
1818 .iterations(1)
1819 .Test(xnn_x32_transpose_ukernel__4x1_scalar_float);
1820 }
1821
TEST(X32_TRANSPOSE__4X1_SCALAR_FLOAT,bh_4_bw_2_2)1822 TEST(X32_TRANSPOSE__4X1_SCALAR_FLOAT, bh_4_bw_2_2) {
1823 for(size_t i = 2; i < 2; ++i){
1824 TransposeMicrokernelTester()
1825 .input_stride(i)
1826 .output_stride(4)
1827 .block_width(i)
1828 .block_height(4)
1829 .iterations(1)
1830 .Test(xnn_x32_transpose_ukernel__4x1_scalar_float);
1831 }
1832 }
1833
TEST(X32_TRANSPOSE__4X1_SCALAR_FLOAT,bh_8_bw_2_2)1834 TEST(X32_TRANSPOSE__4X1_SCALAR_FLOAT, bh_8_bw_2_2) {
1835 for(size_t i = 2; i < 2; ++i){
1836 TransposeMicrokernelTester()
1837 .input_stride(i)
1838 .output_stride(8)
1839 .block_width(i)
1840 .block_height(8)
1841 .iterations(1)
1842 .Test(xnn_x32_transpose_ukernel__4x1_scalar_float);
1843 }
1844 }
1845
TEST(X32_TRANSPOSE__4X1_SCALAR_FLOAT,bh_8_bw_1)1846 TEST(X32_TRANSPOSE__4X1_SCALAR_FLOAT, bh_8_bw_1) {
1847 TransposeMicrokernelTester()
1848 .input_stride(1)
1849 .output_stride(8)
1850 .block_width(1)
1851 .block_height(8)
1852 .iterations(1)
1853 .Test(xnn_x32_transpose_ukernel__4x1_scalar_float);
1854 }
1855
TEST(X32_TRANSPOSE__4X1_SCALAR_FLOAT,bh_5_8_bw_1)1856 TEST(X32_TRANSPOSE__4X1_SCALAR_FLOAT, bh_5_8_bw_1){
1857 for(size_t i = 5; i < 8; ++i){
1858 TransposeMicrokernelTester()
1859 .input_stride(1)
1860 .output_stride(i)
1861 .block_width(1)
1862 .block_height(i)
1863 .iterations(1)
1864 .Test(xnn_x32_transpose_ukernel__4x1_scalar_float);
1865 }
1866 }
1867
TEST(X32_TRANSPOSE__4X1_SCALAR_FLOAT,bh_5_8_bw_2)1868 TEST(X32_TRANSPOSE__4X1_SCALAR_FLOAT, bh_5_8_bw_2){
1869 for(size_t i = 5; i < 8; ++i){
1870 TransposeMicrokernelTester()
1871 .input_stride(2)
1872 .output_stride(i)
1873 .block_width(2)
1874 .block_height(i)
1875 .iterations(1)
1876 .Test(xnn_x32_transpose_ukernel__4x1_scalar_float);
1877 }
1878 }
1879
TEST(X32_TRANSPOSE__4X1_SCALAR_FLOAT,bh_5_8_bw_2_2)1880 TEST(X32_TRANSPOSE__4X1_SCALAR_FLOAT, bh_5_8_bw_2_2) {
1881 for(size_t i = 5; i < 8; ++i){
1882 for(size_t j = 2; j < 2; ++j){
1883 TransposeMicrokernelTester()
1884 .input_stride(j)
1885 .output_stride(i)
1886 .block_width(j)
1887 .block_height(i)
1888 .iterations(1)
1889 .Test(xnn_x32_transpose_ukernel__4x1_scalar_float);
1890 }
1891 }
1892 }
1893
TEST(X32_TRANSPOSE__4X1_SCALAR_FLOAT,bh_4_bw_1_is_2)1894 TEST(X32_TRANSPOSE__4X1_SCALAR_FLOAT, bh_4_bw_1_is_2) {
1895 TransposeMicrokernelTester()
1896 .input_stride(2)
1897 .output_stride(4)
1898 .block_width(1)
1899 .block_height(4)
1900 .iterations(1)
1901 .Test(xnn_x32_transpose_ukernel__4x1_scalar_float);
1902 }
1903
TEST(X32_TRANSPOSE__4X1_SCALAR_FLOAT,bh_4_bw_1_os_8)1904 TEST(X32_TRANSPOSE__4X1_SCALAR_FLOAT, bh_4_bw_1_os_8) {
1905 TransposeMicrokernelTester()
1906 .input_stride(1)
1907 .output_stride(8)
1908 .block_width(1)
1909 .block_height(4)
1910 .iterations(1)
1911 .Test(xnn_x32_transpose_ukernel__4x1_scalar_float);
1912 }
1913
TEST(X32_TRANSPOSE__4X1_SCALAR_FLOAT,bh_4_bw_1_is_2_os_8)1914 TEST(X32_TRANSPOSE__4X1_SCALAR_FLOAT, bh_4_bw_1_is_2_os_8) {
1915 TransposeMicrokernelTester()
1916 .input_stride(2)
1917 .output_stride(8)
1918 .block_width(1)
1919 .block_height(4)
1920 .iterations(1)
1921 .Test(xnn_x32_transpose_ukernel__4x1_scalar_float);
1922 }
1923
TEST(X32_TRANSPOSE__4X2_SCALAR_FLOAT,bh_4_bw_2)1924 TEST(X32_TRANSPOSE__4X2_SCALAR_FLOAT, bh_4_bw_2) {
1925 TransposeMicrokernelTester()
1926 .input_stride(2)
1927 .output_stride(4)
1928 .block_width(2)
1929 .block_height(4)
1930 .iterations(1)
1931 .Test(xnn_x32_transpose_ukernel__4x2_scalar_float);
1932 }
1933
TEST(X32_TRANSPOSE__4X2_SCALAR_FLOAT,bh_1_8_bw_1_4)1934 TEST(X32_TRANSPOSE__4X2_SCALAR_FLOAT, bh_1_8_bw_1_4) {
1935 for(size_t i = 1; i <= 8; ++i){
1936 for(size_t j = 1; j <= 4; ++j){
1937 TransposeMicrokernelTester()
1938 .input_stride(j)
1939 .output_stride(i)
1940 .block_width(j)
1941 .block_height(i)
1942 .iterations(1)
1943 .Test(xnn_x32_transpose_ukernel__4x2_scalar_float);
1944 }
1945 }
1946 }
1947
TEST(X32_TRANSPOSE__4X2_SCALAR_FLOAT,bh_4_bw_4)1948 TEST(X32_TRANSPOSE__4X2_SCALAR_FLOAT, bh_4_bw_4) {
1949 TransposeMicrokernelTester()
1950 .input_stride(4)
1951 .output_stride(4)
1952 .block_width(4)
1953 .block_height(4)
1954 .iterations(1)
1955 .Test(xnn_x32_transpose_ukernel__4x2_scalar_float);
1956 }
1957
TEST(X32_TRANSPOSE__4X2_SCALAR_FLOAT,bh_4_bw_3_4)1958 TEST(X32_TRANSPOSE__4X2_SCALAR_FLOAT, bh_4_bw_3_4) {
1959 for(size_t i = 3; i < 4; ++i){
1960 TransposeMicrokernelTester()
1961 .input_stride(i)
1962 .output_stride(4)
1963 .block_width(i)
1964 .block_height(4)
1965 .iterations(1)
1966 .Test(xnn_x32_transpose_ukernel__4x2_scalar_float);
1967 }
1968 }
1969
TEST(X32_TRANSPOSE__4X2_SCALAR_FLOAT,bh_8_bw_3_4)1970 TEST(X32_TRANSPOSE__4X2_SCALAR_FLOAT, bh_8_bw_3_4) {
1971 for(size_t i = 3; i < 4; ++i){
1972 TransposeMicrokernelTester()
1973 .input_stride(i)
1974 .output_stride(8)
1975 .block_width(i)
1976 .block_height(8)
1977 .iterations(1)
1978 .Test(xnn_x32_transpose_ukernel__4x2_scalar_float);
1979 }
1980 }
1981
TEST(X32_TRANSPOSE__4X2_SCALAR_FLOAT,bh_8_bw_2)1982 TEST(X32_TRANSPOSE__4X2_SCALAR_FLOAT, bh_8_bw_2) {
1983 TransposeMicrokernelTester()
1984 .input_stride(2)
1985 .output_stride(8)
1986 .block_width(2)
1987 .block_height(8)
1988 .iterations(1)
1989 .Test(xnn_x32_transpose_ukernel__4x2_scalar_float);
1990 }
1991
TEST(X32_TRANSPOSE__4X2_SCALAR_FLOAT,bh_5_8_bw_2)1992 TEST(X32_TRANSPOSE__4X2_SCALAR_FLOAT, bh_5_8_bw_2){
1993 for(size_t i = 5; i < 8; ++i){
1994 TransposeMicrokernelTester()
1995 .input_stride(2)
1996 .output_stride(i)
1997 .block_width(2)
1998 .block_height(i)
1999 .iterations(1)
2000 .Test(xnn_x32_transpose_ukernel__4x2_scalar_float);
2001 }
2002 }
2003
TEST(X32_TRANSPOSE__4X2_SCALAR_FLOAT,bh_5_8_bw_4)2004 TEST(X32_TRANSPOSE__4X2_SCALAR_FLOAT, bh_5_8_bw_4){
2005 for(size_t i = 5; i < 8; ++i){
2006 TransposeMicrokernelTester()
2007 .input_stride(4)
2008 .output_stride(i)
2009 .block_width(4)
2010 .block_height(i)
2011 .iterations(1)
2012 .Test(xnn_x32_transpose_ukernel__4x2_scalar_float);
2013 }
2014 }
2015
TEST(X32_TRANSPOSE__4X2_SCALAR_FLOAT,bh_5_8_bw_3_4)2016 TEST(X32_TRANSPOSE__4X2_SCALAR_FLOAT, bh_5_8_bw_3_4) {
2017 for(size_t i = 5; i < 8; ++i){
2018 for(size_t j = 3; j < 4; ++j){
2019 TransposeMicrokernelTester()
2020 .input_stride(j)
2021 .output_stride(i)
2022 .block_width(j)
2023 .block_height(i)
2024 .iterations(1)
2025 .Test(xnn_x32_transpose_ukernel__4x2_scalar_float);
2026 }
2027 }
2028 }
2029
TEST(X32_TRANSPOSE__4X2_SCALAR_FLOAT,bh_4_bw_2_is_4)2030 TEST(X32_TRANSPOSE__4X2_SCALAR_FLOAT, bh_4_bw_2_is_4) {
2031 TransposeMicrokernelTester()
2032 .input_stride(4)
2033 .output_stride(4)
2034 .block_width(2)
2035 .block_height(4)
2036 .iterations(1)
2037 .Test(xnn_x32_transpose_ukernel__4x2_scalar_float);
2038 }
2039
TEST(X32_TRANSPOSE__4X2_SCALAR_FLOAT,bh_4_bw_2_os_8)2040 TEST(X32_TRANSPOSE__4X2_SCALAR_FLOAT, bh_4_bw_2_os_8) {
2041 TransposeMicrokernelTester()
2042 .input_stride(2)
2043 .output_stride(8)
2044 .block_width(2)
2045 .block_height(4)
2046 .iterations(1)
2047 .Test(xnn_x32_transpose_ukernel__4x2_scalar_float);
2048 }
2049
TEST(X32_TRANSPOSE__4X2_SCALAR_FLOAT,bh_4_bw_2_is_4_os_8)2050 TEST(X32_TRANSPOSE__4X2_SCALAR_FLOAT, bh_4_bw_2_is_4_os_8) {
2051 TransposeMicrokernelTester()
2052 .input_stride(4)
2053 .output_stride(8)
2054 .block_width(2)
2055 .block_height(4)
2056 .iterations(1)
2057 .Test(xnn_x32_transpose_ukernel__4x2_scalar_float);
2058 }
2059
TEST(X32_TRANSPOSE__4X4_SCALAR_FLOAT,bh_4_bw_4)2060 TEST(X32_TRANSPOSE__4X4_SCALAR_FLOAT, bh_4_bw_4) {
2061 TransposeMicrokernelTester()
2062 .input_stride(4)
2063 .output_stride(4)
2064 .block_width(4)
2065 .block_height(4)
2066 .iterations(1)
2067 .Test(xnn_x32_transpose_ukernel__4x4_scalar_float);
2068 }
2069
TEST(X32_TRANSPOSE__4X4_SCALAR_FLOAT,bh_1_8_bw_1_8)2070 TEST(X32_TRANSPOSE__4X4_SCALAR_FLOAT, bh_1_8_bw_1_8) {
2071 for(size_t i = 1; i <= 8; ++i){
2072 for(size_t j = 1; j <= 8; ++j){
2073 TransposeMicrokernelTester()
2074 .input_stride(j)
2075 .output_stride(i)
2076 .block_width(j)
2077 .block_height(i)
2078 .iterations(1)
2079 .Test(xnn_x32_transpose_ukernel__4x4_scalar_float);
2080 }
2081 }
2082 }
2083
TEST(X32_TRANSPOSE__4X4_SCALAR_FLOAT,bh_4_bw_8)2084 TEST(X32_TRANSPOSE__4X4_SCALAR_FLOAT, bh_4_bw_8) {
2085 TransposeMicrokernelTester()
2086 .input_stride(8)
2087 .output_stride(4)
2088 .block_width(8)
2089 .block_height(4)
2090 .iterations(1)
2091 .Test(xnn_x32_transpose_ukernel__4x4_scalar_float);
2092 }
2093
TEST(X32_TRANSPOSE__4X4_SCALAR_FLOAT,bh_4_bw_5_8)2094 TEST(X32_TRANSPOSE__4X4_SCALAR_FLOAT, bh_4_bw_5_8) {
2095 for(size_t i = 5; i < 8; ++i){
2096 TransposeMicrokernelTester()
2097 .input_stride(i)
2098 .output_stride(4)
2099 .block_width(i)
2100 .block_height(4)
2101 .iterations(1)
2102 .Test(xnn_x32_transpose_ukernel__4x4_scalar_float);
2103 }
2104 }
2105
TEST(X32_TRANSPOSE__4X4_SCALAR_FLOAT,bh_8_bw_5_8)2106 TEST(X32_TRANSPOSE__4X4_SCALAR_FLOAT, bh_8_bw_5_8) {
2107 for(size_t i = 5; i < 8; ++i){
2108 TransposeMicrokernelTester()
2109 .input_stride(i)
2110 .output_stride(8)
2111 .block_width(i)
2112 .block_height(8)
2113 .iterations(1)
2114 .Test(xnn_x32_transpose_ukernel__4x4_scalar_float);
2115 }
2116 }
2117
TEST(X32_TRANSPOSE__4X4_SCALAR_FLOAT,bh_8_bw_4)2118 TEST(X32_TRANSPOSE__4X4_SCALAR_FLOAT, bh_8_bw_4) {
2119 TransposeMicrokernelTester()
2120 .input_stride(4)
2121 .output_stride(8)
2122 .block_width(4)
2123 .block_height(8)
2124 .iterations(1)
2125 .Test(xnn_x32_transpose_ukernel__4x4_scalar_float);
2126 }
2127
TEST(X32_TRANSPOSE__4X4_SCALAR_FLOAT,bh_5_8_bw_4)2128 TEST(X32_TRANSPOSE__4X4_SCALAR_FLOAT, bh_5_8_bw_4){
2129 for(size_t i = 5; i < 8; ++i){
2130 TransposeMicrokernelTester()
2131 .input_stride(4)
2132 .output_stride(i)
2133 .block_width(4)
2134 .block_height(i)
2135 .iterations(1)
2136 .Test(xnn_x32_transpose_ukernel__4x4_scalar_float);
2137 }
2138 }
2139
TEST(X32_TRANSPOSE__4X4_SCALAR_FLOAT,bh_5_8_bw_8)2140 TEST(X32_TRANSPOSE__4X4_SCALAR_FLOAT, bh_5_8_bw_8){
2141 for(size_t i = 5; i < 8; ++i){
2142 TransposeMicrokernelTester()
2143 .input_stride(8)
2144 .output_stride(i)
2145 .block_width(8)
2146 .block_height(i)
2147 .iterations(1)
2148 .Test(xnn_x32_transpose_ukernel__4x4_scalar_float);
2149 }
2150 }
2151
TEST(X32_TRANSPOSE__4X4_SCALAR_FLOAT,bh_5_8_bw_5_8)2152 TEST(X32_TRANSPOSE__4X4_SCALAR_FLOAT, bh_5_8_bw_5_8) {
2153 for(size_t i = 5; i < 8; ++i){
2154 for(size_t j = 5; j < 8; ++j){
2155 TransposeMicrokernelTester()
2156 .input_stride(j)
2157 .output_stride(i)
2158 .block_width(j)
2159 .block_height(i)
2160 .iterations(1)
2161 .Test(xnn_x32_transpose_ukernel__4x4_scalar_float);
2162 }
2163 }
2164 }
2165
TEST(X32_TRANSPOSE__4X4_SCALAR_FLOAT,bh_4_bw_4_is_8)2166 TEST(X32_TRANSPOSE__4X4_SCALAR_FLOAT, bh_4_bw_4_is_8) {
2167 TransposeMicrokernelTester()
2168 .input_stride(8)
2169 .output_stride(4)
2170 .block_width(4)
2171 .block_height(4)
2172 .iterations(1)
2173 .Test(xnn_x32_transpose_ukernel__4x4_scalar_float);
2174 }
2175
TEST(X32_TRANSPOSE__4X4_SCALAR_FLOAT,bh_4_bw_4_os_8)2176 TEST(X32_TRANSPOSE__4X4_SCALAR_FLOAT, bh_4_bw_4_os_8) {
2177 TransposeMicrokernelTester()
2178 .input_stride(4)
2179 .output_stride(8)
2180 .block_width(4)
2181 .block_height(4)
2182 .iterations(1)
2183 .Test(xnn_x32_transpose_ukernel__4x4_scalar_float);
2184 }
2185
TEST(X32_TRANSPOSE__4X4_SCALAR_FLOAT,bh_4_bw_4_is_8_os_8)2186 TEST(X32_TRANSPOSE__4X4_SCALAR_FLOAT, bh_4_bw_4_is_8_os_8) {
2187 TransposeMicrokernelTester()
2188 .input_stride(8)
2189 .output_stride(8)
2190 .block_width(4)
2191 .block_height(4)
2192 .iterations(1)
2193 .Test(xnn_x32_transpose_ukernel__4x4_scalar_float);
2194 }
2195
2196 #if XNN_ARCH_ARM64
TEST(X32_TRANSPOSE__4X4_AARCH64_NEON_TBL,bh_4_bw_4)2197 TEST(X32_TRANSPOSE__4X4_AARCH64_NEON_TBL, bh_4_bw_4) {
2198 TEST_REQUIRES_ARM_NEON;
2199 TransposeMicrokernelTester()
2200 .input_stride(4)
2201 .output_stride(4)
2202 .block_width(4)
2203 .block_height(4)
2204 .iterations(1)
2205 .Test(xnn_x32_transpose_ukernel__4x4_aarch64_neon_tbl);
2206 }
2207
TEST(X32_TRANSPOSE__4X4_AARCH64_NEON_TBL,bh_1_8_bw_1_8)2208 TEST(X32_TRANSPOSE__4X4_AARCH64_NEON_TBL, bh_1_8_bw_1_8) {
2209 TEST_REQUIRES_ARM_NEON;
2210 for(size_t i = 1; i <= 8; ++i){
2211 for(size_t j = 1; j <= 8; ++j){
2212 TransposeMicrokernelTester()
2213 .input_stride(j)
2214 .output_stride(i)
2215 .block_width(j)
2216 .block_height(i)
2217 .iterations(1)
2218 .Test(xnn_x32_transpose_ukernel__4x4_aarch64_neon_tbl);
2219 }
2220 }
2221 }
2222
TEST(X32_TRANSPOSE__4X4_AARCH64_NEON_TBL,bh_4_bw_8)2223 TEST(X32_TRANSPOSE__4X4_AARCH64_NEON_TBL, bh_4_bw_8) {
2224 TEST_REQUIRES_ARM_NEON;
2225 TransposeMicrokernelTester()
2226 .input_stride(8)
2227 .output_stride(4)
2228 .block_width(8)
2229 .block_height(4)
2230 .iterations(1)
2231 .Test(xnn_x32_transpose_ukernel__4x4_aarch64_neon_tbl);
2232 }
2233
TEST(X32_TRANSPOSE__4X4_AARCH64_NEON_TBL,bh_4_bw_5_8)2234 TEST(X32_TRANSPOSE__4X4_AARCH64_NEON_TBL, bh_4_bw_5_8) {
2235 TEST_REQUIRES_ARM_NEON;
2236 for(size_t i = 5; i < 8; ++i){
2237 TransposeMicrokernelTester()
2238 .input_stride(i)
2239 .output_stride(4)
2240 .block_width(i)
2241 .block_height(4)
2242 .iterations(1)
2243 .Test(xnn_x32_transpose_ukernel__4x4_aarch64_neon_tbl);
2244 }
2245 }
2246
TEST(X32_TRANSPOSE__4X4_AARCH64_NEON_TBL,bh_8_bw_5_8)2247 TEST(X32_TRANSPOSE__4X4_AARCH64_NEON_TBL, bh_8_bw_5_8) {
2248 TEST_REQUIRES_ARM_NEON;
2249 for(size_t i = 5; i < 8; ++i){
2250 TransposeMicrokernelTester()
2251 .input_stride(i)
2252 .output_stride(8)
2253 .block_width(i)
2254 .block_height(8)
2255 .iterations(1)
2256 .Test(xnn_x32_transpose_ukernel__4x4_aarch64_neon_tbl);
2257 }
2258 }
2259
TEST(X32_TRANSPOSE__4X4_AARCH64_NEON_TBL,bh_8_bw_4)2260 TEST(X32_TRANSPOSE__4X4_AARCH64_NEON_TBL, bh_8_bw_4) {
2261 TEST_REQUIRES_ARM_NEON;
2262 TransposeMicrokernelTester()
2263 .input_stride(4)
2264 .output_stride(8)
2265 .block_width(4)
2266 .block_height(8)
2267 .iterations(1)
2268 .Test(xnn_x32_transpose_ukernel__4x4_aarch64_neon_tbl);
2269 }
2270
TEST(X32_TRANSPOSE__4X4_AARCH64_NEON_TBL,bh_5_8_bw_4)2271 TEST(X32_TRANSPOSE__4X4_AARCH64_NEON_TBL, bh_5_8_bw_4){
2272 TEST_REQUIRES_ARM_NEON;
2273 for(size_t i = 5; i < 8; ++i){
2274 TransposeMicrokernelTester()
2275 .input_stride(4)
2276 .output_stride(i)
2277 .block_width(4)
2278 .block_height(i)
2279 .iterations(1)
2280 .Test(xnn_x32_transpose_ukernel__4x4_aarch64_neon_tbl);
2281 }
2282 }
2283
TEST(X32_TRANSPOSE__4X4_AARCH64_NEON_TBL,bh_5_8_bw_8)2284 TEST(X32_TRANSPOSE__4X4_AARCH64_NEON_TBL, bh_5_8_bw_8){
2285 TEST_REQUIRES_ARM_NEON;
2286 for(size_t i = 5; i < 8; ++i){
2287 TransposeMicrokernelTester()
2288 .input_stride(8)
2289 .output_stride(i)
2290 .block_width(8)
2291 .block_height(i)
2292 .iterations(1)
2293 .Test(xnn_x32_transpose_ukernel__4x4_aarch64_neon_tbl);
2294 }
2295 }
2296
TEST(X32_TRANSPOSE__4X4_AARCH64_NEON_TBL,bh_5_8_bw_5_8)2297 TEST(X32_TRANSPOSE__4X4_AARCH64_NEON_TBL, bh_5_8_bw_5_8) {
2298 TEST_REQUIRES_ARM_NEON;
2299 for(size_t i = 5; i < 8; ++i){
2300 for(size_t j = 5; j < 8; ++j){
2301 TransposeMicrokernelTester()
2302 .input_stride(j)
2303 .output_stride(i)
2304 .block_width(j)
2305 .block_height(i)
2306 .iterations(1)
2307 .Test(xnn_x32_transpose_ukernel__4x4_aarch64_neon_tbl);
2308 }
2309 }
2310 }
2311
TEST(X32_TRANSPOSE__4X4_AARCH64_NEON_TBL,bh_4_bw_4_is_8)2312 TEST(X32_TRANSPOSE__4X4_AARCH64_NEON_TBL, bh_4_bw_4_is_8) {
2313 TEST_REQUIRES_ARM_NEON;
2314 TransposeMicrokernelTester()
2315 .input_stride(8)
2316 .output_stride(4)
2317 .block_width(4)
2318 .block_height(4)
2319 .iterations(1)
2320 .Test(xnn_x32_transpose_ukernel__4x4_aarch64_neon_tbl);
2321 }
2322
TEST(X32_TRANSPOSE__4X4_AARCH64_NEON_TBL,bh_4_bw_4_os_8)2323 TEST(X32_TRANSPOSE__4X4_AARCH64_NEON_TBL, bh_4_bw_4_os_8) {
2324 TEST_REQUIRES_ARM_NEON;
2325 TransposeMicrokernelTester()
2326 .input_stride(4)
2327 .output_stride(8)
2328 .block_width(4)
2329 .block_height(4)
2330 .iterations(1)
2331 .Test(xnn_x32_transpose_ukernel__4x4_aarch64_neon_tbl);
2332 }
2333
TEST(X32_TRANSPOSE__4X4_AARCH64_NEON_TBL,bh_4_bw_4_is_8_os_8)2334 TEST(X32_TRANSPOSE__4X4_AARCH64_NEON_TBL, bh_4_bw_4_is_8_os_8) {
2335 TEST_REQUIRES_ARM_NEON;
2336 TransposeMicrokernelTester()
2337 .input_stride(8)
2338 .output_stride(8)
2339 .block_width(4)
2340 .block_height(4)
2341 .iterations(1)
2342 .Test(xnn_x32_transpose_ukernel__4x4_aarch64_neon_tbl);
2343 }
2344 #endif // XNN_ARCH_ARM64
2345
2346
2347 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(X32_TRANSPOSE__4X4_MULTI_MOV_SSE2,bh_4_bw_4)2348 TEST(X32_TRANSPOSE__4X4_MULTI_MOV_SSE2, bh_4_bw_4) {
2349 TEST_REQUIRES_X86_SSE2;
2350 TransposeMicrokernelTester()
2351 .input_stride(4)
2352 .output_stride(4)
2353 .block_width(4)
2354 .block_height(4)
2355 .iterations(1)
2356 .Test(xnn_x32_transpose_ukernel__4x4_multi_mov_sse2);
2357 }
2358
TEST(X32_TRANSPOSE__4X4_MULTI_MOV_SSE2,bh_1_8_bw_1_8)2359 TEST(X32_TRANSPOSE__4X4_MULTI_MOV_SSE2, bh_1_8_bw_1_8) {
2360 TEST_REQUIRES_X86_SSE2;
2361 for(size_t i = 1; i <= 8; ++i){
2362 for(size_t j = 1; j <= 8; ++j){
2363 TransposeMicrokernelTester()
2364 .input_stride(j)
2365 .output_stride(i)
2366 .block_width(j)
2367 .block_height(i)
2368 .iterations(1)
2369 .Test(xnn_x32_transpose_ukernel__4x4_multi_mov_sse2);
2370 }
2371 }
2372 }
2373
TEST(X32_TRANSPOSE__4X4_MULTI_MOV_SSE2,bh_4_bw_8)2374 TEST(X32_TRANSPOSE__4X4_MULTI_MOV_SSE2, bh_4_bw_8) {
2375 TEST_REQUIRES_X86_SSE2;
2376 TransposeMicrokernelTester()
2377 .input_stride(8)
2378 .output_stride(4)
2379 .block_width(8)
2380 .block_height(4)
2381 .iterations(1)
2382 .Test(xnn_x32_transpose_ukernel__4x4_multi_mov_sse2);
2383 }
2384
TEST(X32_TRANSPOSE__4X4_MULTI_MOV_SSE2,bh_4_bw_5_8)2385 TEST(X32_TRANSPOSE__4X4_MULTI_MOV_SSE2, bh_4_bw_5_8) {
2386 TEST_REQUIRES_X86_SSE2;
2387 for(size_t i = 5; i < 8; ++i){
2388 TransposeMicrokernelTester()
2389 .input_stride(i)
2390 .output_stride(4)
2391 .block_width(i)
2392 .block_height(4)
2393 .iterations(1)
2394 .Test(xnn_x32_transpose_ukernel__4x4_multi_mov_sse2);
2395 }
2396 }
2397
TEST(X32_TRANSPOSE__4X4_MULTI_MOV_SSE2,bh_8_bw_5_8)2398 TEST(X32_TRANSPOSE__4X4_MULTI_MOV_SSE2, bh_8_bw_5_8) {
2399 TEST_REQUIRES_X86_SSE2;
2400 for(size_t i = 5; i < 8; ++i){
2401 TransposeMicrokernelTester()
2402 .input_stride(i)
2403 .output_stride(8)
2404 .block_width(i)
2405 .block_height(8)
2406 .iterations(1)
2407 .Test(xnn_x32_transpose_ukernel__4x4_multi_mov_sse2);
2408 }
2409 }
2410
TEST(X32_TRANSPOSE__4X4_MULTI_MOV_SSE2,bh_8_bw_4)2411 TEST(X32_TRANSPOSE__4X4_MULTI_MOV_SSE2, bh_8_bw_4) {
2412 TEST_REQUIRES_X86_SSE2;
2413 TransposeMicrokernelTester()
2414 .input_stride(4)
2415 .output_stride(8)
2416 .block_width(4)
2417 .block_height(8)
2418 .iterations(1)
2419 .Test(xnn_x32_transpose_ukernel__4x4_multi_mov_sse2);
2420 }
2421
TEST(X32_TRANSPOSE__4X4_MULTI_MOV_SSE2,bh_5_8_bw_4)2422 TEST(X32_TRANSPOSE__4X4_MULTI_MOV_SSE2, bh_5_8_bw_4){
2423 TEST_REQUIRES_X86_SSE2;
2424 for(size_t i = 5; i < 8; ++i){
2425 TransposeMicrokernelTester()
2426 .input_stride(4)
2427 .output_stride(i)
2428 .block_width(4)
2429 .block_height(i)
2430 .iterations(1)
2431 .Test(xnn_x32_transpose_ukernel__4x4_multi_mov_sse2);
2432 }
2433 }
2434
TEST(X32_TRANSPOSE__4X4_MULTI_MOV_SSE2,bh_5_8_bw_8)2435 TEST(X32_TRANSPOSE__4X4_MULTI_MOV_SSE2, bh_5_8_bw_8){
2436 TEST_REQUIRES_X86_SSE2;
2437 for(size_t i = 5; i < 8; ++i){
2438 TransposeMicrokernelTester()
2439 .input_stride(8)
2440 .output_stride(i)
2441 .block_width(8)
2442 .block_height(i)
2443 .iterations(1)
2444 .Test(xnn_x32_transpose_ukernel__4x4_multi_mov_sse2);
2445 }
2446 }
2447
TEST(X32_TRANSPOSE__4X4_MULTI_MOV_SSE2,bh_5_8_bw_5_8)2448 TEST(X32_TRANSPOSE__4X4_MULTI_MOV_SSE2, bh_5_8_bw_5_8) {
2449 TEST_REQUIRES_X86_SSE2;
2450 for(size_t i = 5; i < 8; ++i){
2451 for(size_t j = 5; j < 8; ++j){
2452 TransposeMicrokernelTester()
2453 .input_stride(j)
2454 .output_stride(i)
2455 .block_width(j)
2456 .block_height(i)
2457 .iterations(1)
2458 .Test(xnn_x32_transpose_ukernel__4x4_multi_mov_sse2);
2459 }
2460 }
2461 }
2462
TEST(X32_TRANSPOSE__4X4_MULTI_MOV_SSE2,bh_4_bw_4_is_8)2463 TEST(X32_TRANSPOSE__4X4_MULTI_MOV_SSE2, bh_4_bw_4_is_8) {
2464 TEST_REQUIRES_X86_SSE2;
2465 TransposeMicrokernelTester()
2466 .input_stride(8)
2467 .output_stride(4)
2468 .block_width(4)
2469 .block_height(4)
2470 .iterations(1)
2471 .Test(xnn_x32_transpose_ukernel__4x4_multi_mov_sse2);
2472 }
2473
TEST(X32_TRANSPOSE__4X4_MULTI_MOV_SSE2,bh_4_bw_4_os_8)2474 TEST(X32_TRANSPOSE__4X4_MULTI_MOV_SSE2, bh_4_bw_4_os_8) {
2475 TEST_REQUIRES_X86_SSE2;
2476 TransposeMicrokernelTester()
2477 .input_stride(4)
2478 .output_stride(8)
2479 .block_width(4)
2480 .block_height(4)
2481 .iterations(1)
2482 .Test(xnn_x32_transpose_ukernel__4x4_multi_mov_sse2);
2483 }
2484
TEST(X32_TRANSPOSE__4X4_MULTI_MOV_SSE2,bh_4_bw_4_is_8_os_8)2485 TEST(X32_TRANSPOSE__4X4_MULTI_MOV_SSE2, bh_4_bw_4_is_8_os_8) {
2486 TEST_REQUIRES_X86_SSE2;
2487 TransposeMicrokernelTester()
2488 .input_stride(8)
2489 .output_stride(8)
2490 .block_width(4)
2491 .block_height(4)
2492 .iterations(1)
2493 .Test(xnn_x32_transpose_ukernel__4x4_multi_mov_sse2);
2494 }
2495 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
2496
2497
2498 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(X32_TRANSPOSE__4X4_MULTI_MULTI_SSE2,bh_4_bw_4)2499 TEST(X32_TRANSPOSE__4X4_MULTI_MULTI_SSE2, bh_4_bw_4) {
2500 TEST_REQUIRES_X86_SSE2;
2501 TransposeMicrokernelTester()
2502 .input_stride(4)
2503 .output_stride(4)
2504 .block_width(4)
2505 .block_height(4)
2506 .iterations(1)
2507 .Test(xnn_x32_transpose_ukernel__4x4_multi_multi_sse2);
2508 }
2509
TEST(X32_TRANSPOSE__4X4_MULTI_MULTI_SSE2,bh_1_8_bw_1_8)2510 TEST(X32_TRANSPOSE__4X4_MULTI_MULTI_SSE2, bh_1_8_bw_1_8) {
2511 TEST_REQUIRES_X86_SSE2;
2512 for(size_t i = 1; i <= 8; ++i){
2513 for(size_t j = 1; j <= 8; ++j){
2514 TransposeMicrokernelTester()
2515 .input_stride(j)
2516 .output_stride(i)
2517 .block_width(j)
2518 .block_height(i)
2519 .iterations(1)
2520 .Test(xnn_x32_transpose_ukernel__4x4_multi_multi_sse2);
2521 }
2522 }
2523 }
2524
TEST(X32_TRANSPOSE__4X4_MULTI_MULTI_SSE2,bh_4_bw_8)2525 TEST(X32_TRANSPOSE__4X4_MULTI_MULTI_SSE2, bh_4_bw_8) {
2526 TEST_REQUIRES_X86_SSE2;
2527 TransposeMicrokernelTester()
2528 .input_stride(8)
2529 .output_stride(4)
2530 .block_width(8)
2531 .block_height(4)
2532 .iterations(1)
2533 .Test(xnn_x32_transpose_ukernel__4x4_multi_multi_sse2);
2534 }
2535
TEST(X32_TRANSPOSE__4X4_MULTI_MULTI_SSE2,bh_4_bw_5_8)2536 TEST(X32_TRANSPOSE__4X4_MULTI_MULTI_SSE2, bh_4_bw_5_8) {
2537 TEST_REQUIRES_X86_SSE2;
2538 for(size_t i = 5; i < 8; ++i){
2539 TransposeMicrokernelTester()
2540 .input_stride(i)
2541 .output_stride(4)
2542 .block_width(i)
2543 .block_height(4)
2544 .iterations(1)
2545 .Test(xnn_x32_transpose_ukernel__4x4_multi_multi_sse2);
2546 }
2547 }
2548
TEST(X32_TRANSPOSE__4X4_MULTI_MULTI_SSE2,bh_8_bw_5_8)2549 TEST(X32_TRANSPOSE__4X4_MULTI_MULTI_SSE2, bh_8_bw_5_8) {
2550 TEST_REQUIRES_X86_SSE2;
2551 for(size_t i = 5; i < 8; ++i){
2552 TransposeMicrokernelTester()
2553 .input_stride(i)
2554 .output_stride(8)
2555 .block_width(i)
2556 .block_height(8)
2557 .iterations(1)
2558 .Test(xnn_x32_transpose_ukernel__4x4_multi_multi_sse2);
2559 }
2560 }
2561
TEST(X32_TRANSPOSE__4X4_MULTI_MULTI_SSE2,bh_8_bw_4)2562 TEST(X32_TRANSPOSE__4X4_MULTI_MULTI_SSE2, bh_8_bw_4) {
2563 TEST_REQUIRES_X86_SSE2;
2564 TransposeMicrokernelTester()
2565 .input_stride(4)
2566 .output_stride(8)
2567 .block_width(4)
2568 .block_height(8)
2569 .iterations(1)
2570 .Test(xnn_x32_transpose_ukernel__4x4_multi_multi_sse2);
2571 }
2572
TEST(X32_TRANSPOSE__4X4_MULTI_MULTI_SSE2,bh_5_8_bw_4)2573 TEST(X32_TRANSPOSE__4X4_MULTI_MULTI_SSE2, bh_5_8_bw_4){
2574 TEST_REQUIRES_X86_SSE2;
2575 for(size_t i = 5; i < 8; ++i){
2576 TransposeMicrokernelTester()
2577 .input_stride(4)
2578 .output_stride(i)
2579 .block_width(4)
2580 .block_height(i)
2581 .iterations(1)
2582 .Test(xnn_x32_transpose_ukernel__4x4_multi_multi_sse2);
2583 }
2584 }
2585
TEST(X32_TRANSPOSE__4X4_MULTI_MULTI_SSE2,bh_5_8_bw_8)2586 TEST(X32_TRANSPOSE__4X4_MULTI_MULTI_SSE2, bh_5_8_bw_8){
2587 TEST_REQUIRES_X86_SSE2;
2588 for(size_t i = 5; i < 8; ++i){
2589 TransposeMicrokernelTester()
2590 .input_stride(8)
2591 .output_stride(i)
2592 .block_width(8)
2593 .block_height(i)
2594 .iterations(1)
2595 .Test(xnn_x32_transpose_ukernel__4x4_multi_multi_sse2);
2596 }
2597 }
2598
TEST(X32_TRANSPOSE__4X4_MULTI_MULTI_SSE2,bh_5_8_bw_5_8)2599 TEST(X32_TRANSPOSE__4X4_MULTI_MULTI_SSE2, bh_5_8_bw_5_8) {
2600 TEST_REQUIRES_X86_SSE2;
2601 for(size_t i = 5; i < 8; ++i){
2602 for(size_t j = 5; j < 8; ++j){
2603 TransposeMicrokernelTester()
2604 .input_stride(j)
2605 .output_stride(i)
2606 .block_width(j)
2607 .block_height(i)
2608 .iterations(1)
2609 .Test(xnn_x32_transpose_ukernel__4x4_multi_multi_sse2);
2610 }
2611 }
2612 }
2613
TEST(X32_TRANSPOSE__4X4_MULTI_MULTI_SSE2,bh_4_bw_4_is_8)2614 TEST(X32_TRANSPOSE__4X4_MULTI_MULTI_SSE2, bh_4_bw_4_is_8) {
2615 TEST_REQUIRES_X86_SSE2;
2616 TransposeMicrokernelTester()
2617 .input_stride(8)
2618 .output_stride(4)
2619 .block_width(4)
2620 .block_height(4)
2621 .iterations(1)
2622 .Test(xnn_x32_transpose_ukernel__4x4_multi_multi_sse2);
2623 }
2624
TEST(X32_TRANSPOSE__4X4_MULTI_MULTI_SSE2,bh_4_bw_4_os_8)2625 TEST(X32_TRANSPOSE__4X4_MULTI_MULTI_SSE2, bh_4_bw_4_os_8) {
2626 TEST_REQUIRES_X86_SSE2;
2627 TransposeMicrokernelTester()
2628 .input_stride(4)
2629 .output_stride(8)
2630 .block_width(4)
2631 .block_height(4)
2632 .iterations(1)
2633 .Test(xnn_x32_transpose_ukernel__4x4_multi_multi_sse2);
2634 }
2635
TEST(X32_TRANSPOSE__4X4_MULTI_MULTI_SSE2,bh_4_bw_4_is_8_os_8)2636 TEST(X32_TRANSPOSE__4X4_MULTI_MULTI_SSE2, bh_4_bw_4_is_8_os_8) {
2637 TEST_REQUIRES_X86_SSE2;
2638 TransposeMicrokernelTester()
2639 .input_stride(8)
2640 .output_stride(8)
2641 .block_width(4)
2642 .block_height(4)
2643 .iterations(1)
2644 .Test(xnn_x32_transpose_ukernel__4x4_multi_multi_sse2);
2645 }
2646 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
2647
2648
2649 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(X32_TRANSPOSE__4X4_MULTI_SWITCH_SSE2,bh_4_bw_4)2650 TEST(X32_TRANSPOSE__4X4_MULTI_SWITCH_SSE2, bh_4_bw_4) {
2651 TEST_REQUIRES_X86_SSE2;
2652 TransposeMicrokernelTester()
2653 .input_stride(4)
2654 .output_stride(4)
2655 .block_width(4)
2656 .block_height(4)
2657 .iterations(1)
2658 .Test(xnn_x32_transpose_ukernel__4x4_multi_switch_sse2);
2659 }
2660
TEST(X32_TRANSPOSE__4X4_MULTI_SWITCH_SSE2,bh_1_8_bw_1_8)2661 TEST(X32_TRANSPOSE__4X4_MULTI_SWITCH_SSE2, bh_1_8_bw_1_8) {
2662 TEST_REQUIRES_X86_SSE2;
2663 for(size_t i = 1; i <= 8; ++i){
2664 for(size_t j = 1; j <= 8; ++j){
2665 TransposeMicrokernelTester()
2666 .input_stride(j)
2667 .output_stride(i)
2668 .block_width(j)
2669 .block_height(i)
2670 .iterations(1)
2671 .Test(xnn_x32_transpose_ukernel__4x4_multi_switch_sse2);
2672 }
2673 }
2674 }
2675
TEST(X32_TRANSPOSE__4X4_MULTI_SWITCH_SSE2,bh_4_bw_8)2676 TEST(X32_TRANSPOSE__4X4_MULTI_SWITCH_SSE2, bh_4_bw_8) {
2677 TEST_REQUIRES_X86_SSE2;
2678 TransposeMicrokernelTester()
2679 .input_stride(8)
2680 .output_stride(4)
2681 .block_width(8)
2682 .block_height(4)
2683 .iterations(1)
2684 .Test(xnn_x32_transpose_ukernel__4x4_multi_switch_sse2);
2685 }
2686
TEST(X32_TRANSPOSE__4X4_MULTI_SWITCH_SSE2,bh_4_bw_5_8)2687 TEST(X32_TRANSPOSE__4X4_MULTI_SWITCH_SSE2, bh_4_bw_5_8) {
2688 TEST_REQUIRES_X86_SSE2;
2689 for(size_t i = 5; i < 8; ++i){
2690 TransposeMicrokernelTester()
2691 .input_stride(i)
2692 .output_stride(4)
2693 .block_width(i)
2694 .block_height(4)
2695 .iterations(1)
2696 .Test(xnn_x32_transpose_ukernel__4x4_multi_switch_sse2);
2697 }
2698 }
2699
TEST(X32_TRANSPOSE__4X4_MULTI_SWITCH_SSE2,bh_8_bw_5_8)2700 TEST(X32_TRANSPOSE__4X4_MULTI_SWITCH_SSE2, bh_8_bw_5_8) {
2701 TEST_REQUIRES_X86_SSE2;
2702 for(size_t i = 5; i < 8; ++i){
2703 TransposeMicrokernelTester()
2704 .input_stride(i)
2705 .output_stride(8)
2706 .block_width(i)
2707 .block_height(8)
2708 .iterations(1)
2709 .Test(xnn_x32_transpose_ukernel__4x4_multi_switch_sse2);
2710 }
2711 }
2712
TEST(X32_TRANSPOSE__4X4_MULTI_SWITCH_SSE2,bh_8_bw_4)2713 TEST(X32_TRANSPOSE__4X4_MULTI_SWITCH_SSE2, bh_8_bw_4) {
2714 TEST_REQUIRES_X86_SSE2;
2715 TransposeMicrokernelTester()
2716 .input_stride(4)
2717 .output_stride(8)
2718 .block_width(4)
2719 .block_height(8)
2720 .iterations(1)
2721 .Test(xnn_x32_transpose_ukernel__4x4_multi_switch_sse2);
2722 }
2723
TEST(X32_TRANSPOSE__4X4_MULTI_SWITCH_SSE2,bh_5_8_bw_4)2724 TEST(X32_TRANSPOSE__4X4_MULTI_SWITCH_SSE2, bh_5_8_bw_4){
2725 TEST_REQUIRES_X86_SSE2;
2726 for(size_t i = 5; i < 8; ++i){
2727 TransposeMicrokernelTester()
2728 .input_stride(4)
2729 .output_stride(i)
2730 .block_width(4)
2731 .block_height(i)
2732 .iterations(1)
2733 .Test(xnn_x32_transpose_ukernel__4x4_multi_switch_sse2);
2734 }
2735 }
2736
TEST(X32_TRANSPOSE__4X4_MULTI_SWITCH_SSE2,bh_5_8_bw_8)2737 TEST(X32_TRANSPOSE__4X4_MULTI_SWITCH_SSE2, bh_5_8_bw_8){
2738 TEST_REQUIRES_X86_SSE2;
2739 for(size_t i = 5; i < 8; ++i){
2740 TransposeMicrokernelTester()
2741 .input_stride(8)
2742 .output_stride(i)
2743 .block_width(8)
2744 .block_height(i)
2745 .iterations(1)
2746 .Test(xnn_x32_transpose_ukernel__4x4_multi_switch_sse2);
2747 }
2748 }
2749
TEST(X32_TRANSPOSE__4X4_MULTI_SWITCH_SSE2,bh_5_8_bw_5_8)2750 TEST(X32_TRANSPOSE__4X4_MULTI_SWITCH_SSE2, bh_5_8_bw_5_8) {
2751 TEST_REQUIRES_X86_SSE2;
2752 for(size_t i = 5; i < 8; ++i){
2753 for(size_t j = 5; j < 8; ++j){
2754 TransposeMicrokernelTester()
2755 .input_stride(j)
2756 .output_stride(i)
2757 .block_width(j)
2758 .block_height(i)
2759 .iterations(1)
2760 .Test(xnn_x32_transpose_ukernel__4x4_multi_switch_sse2);
2761 }
2762 }
2763 }
2764
TEST(X32_TRANSPOSE__4X4_MULTI_SWITCH_SSE2,bh_4_bw_4_is_8)2765 TEST(X32_TRANSPOSE__4X4_MULTI_SWITCH_SSE2, bh_4_bw_4_is_8) {
2766 TEST_REQUIRES_X86_SSE2;
2767 TransposeMicrokernelTester()
2768 .input_stride(8)
2769 .output_stride(4)
2770 .block_width(4)
2771 .block_height(4)
2772 .iterations(1)
2773 .Test(xnn_x32_transpose_ukernel__4x4_multi_switch_sse2);
2774 }
2775
TEST(X32_TRANSPOSE__4X4_MULTI_SWITCH_SSE2,bh_4_bw_4_os_8)2776 TEST(X32_TRANSPOSE__4X4_MULTI_SWITCH_SSE2, bh_4_bw_4_os_8) {
2777 TEST_REQUIRES_X86_SSE2;
2778 TransposeMicrokernelTester()
2779 .input_stride(4)
2780 .output_stride(8)
2781 .block_width(4)
2782 .block_height(4)
2783 .iterations(1)
2784 .Test(xnn_x32_transpose_ukernel__4x4_multi_switch_sse2);
2785 }
2786
TEST(X32_TRANSPOSE__4X4_MULTI_SWITCH_SSE2,bh_4_bw_4_is_8_os_8)2787 TEST(X32_TRANSPOSE__4X4_MULTI_SWITCH_SSE2, bh_4_bw_4_is_8_os_8) {
2788 TEST_REQUIRES_X86_SSE2;
2789 TransposeMicrokernelTester()
2790 .input_stride(8)
2791 .output_stride(8)
2792 .block_width(4)
2793 .block_height(4)
2794 .iterations(1)
2795 .Test(xnn_x32_transpose_ukernel__4x4_multi_switch_sse2);
2796 }
2797 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
2798
2799
2800 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(X32_TRANSPOSE__4X4_REUSE_MOV_SSE2,bh_4_bw_4)2801 TEST(X32_TRANSPOSE__4X4_REUSE_MOV_SSE2, bh_4_bw_4) {
2802 TEST_REQUIRES_X86_SSE2;
2803 TransposeMicrokernelTester()
2804 .input_stride(4)
2805 .output_stride(4)
2806 .block_width(4)
2807 .block_height(4)
2808 .iterations(1)
2809 .Test(xnn_x32_transpose_ukernel__4x4_reuse_mov_sse2);
2810 }
2811
TEST(X32_TRANSPOSE__4X4_REUSE_MOV_SSE2,bh_1_8_bw_1_8)2812 TEST(X32_TRANSPOSE__4X4_REUSE_MOV_SSE2, bh_1_8_bw_1_8) {
2813 TEST_REQUIRES_X86_SSE2;
2814 for(size_t i = 1; i <= 8; ++i){
2815 for(size_t j = 1; j <= 8; ++j){
2816 TransposeMicrokernelTester()
2817 .input_stride(j)
2818 .output_stride(i)
2819 .block_width(j)
2820 .block_height(i)
2821 .iterations(1)
2822 .Test(xnn_x32_transpose_ukernel__4x4_reuse_mov_sse2);
2823 }
2824 }
2825 }
2826
TEST(X32_TRANSPOSE__4X4_REUSE_MOV_SSE2,bh_4_bw_8)2827 TEST(X32_TRANSPOSE__4X4_REUSE_MOV_SSE2, bh_4_bw_8) {
2828 TEST_REQUIRES_X86_SSE2;
2829 TransposeMicrokernelTester()
2830 .input_stride(8)
2831 .output_stride(4)
2832 .block_width(8)
2833 .block_height(4)
2834 .iterations(1)
2835 .Test(xnn_x32_transpose_ukernel__4x4_reuse_mov_sse2);
2836 }
2837
TEST(X32_TRANSPOSE__4X4_REUSE_MOV_SSE2,bh_4_bw_5_8)2838 TEST(X32_TRANSPOSE__4X4_REUSE_MOV_SSE2, bh_4_bw_5_8) {
2839 TEST_REQUIRES_X86_SSE2;
2840 for(size_t i = 5; i < 8; ++i){
2841 TransposeMicrokernelTester()
2842 .input_stride(i)
2843 .output_stride(4)
2844 .block_width(i)
2845 .block_height(4)
2846 .iterations(1)
2847 .Test(xnn_x32_transpose_ukernel__4x4_reuse_mov_sse2);
2848 }
2849 }
2850
TEST(X32_TRANSPOSE__4X4_REUSE_MOV_SSE2,bh_8_bw_5_8)2851 TEST(X32_TRANSPOSE__4X4_REUSE_MOV_SSE2, bh_8_bw_5_8) {
2852 TEST_REQUIRES_X86_SSE2;
2853 for(size_t i = 5; i < 8; ++i){
2854 TransposeMicrokernelTester()
2855 .input_stride(i)
2856 .output_stride(8)
2857 .block_width(i)
2858 .block_height(8)
2859 .iterations(1)
2860 .Test(xnn_x32_transpose_ukernel__4x4_reuse_mov_sse2);
2861 }
2862 }
2863
TEST(X32_TRANSPOSE__4X4_REUSE_MOV_SSE2,bh_8_bw_4)2864 TEST(X32_TRANSPOSE__4X4_REUSE_MOV_SSE2, bh_8_bw_4) {
2865 TEST_REQUIRES_X86_SSE2;
2866 TransposeMicrokernelTester()
2867 .input_stride(4)
2868 .output_stride(8)
2869 .block_width(4)
2870 .block_height(8)
2871 .iterations(1)
2872 .Test(xnn_x32_transpose_ukernel__4x4_reuse_mov_sse2);
2873 }
2874
TEST(X32_TRANSPOSE__4X4_REUSE_MOV_SSE2,bh_5_8_bw_4)2875 TEST(X32_TRANSPOSE__4X4_REUSE_MOV_SSE2, bh_5_8_bw_4){
2876 TEST_REQUIRES_X86_SSE2;
2877 for(size_t i = 5; i < 8; ++i){
2878 TransposeMicrokernelTester()
2879 .input_stride(4)
2880 .output_stride(i)
2881 .block_width(4)
2882 .block_height(i)
2883 .iterations(1)
2884 .Test(xnn_x32_transpose_ukernel__4x4_reuse_mov_sse2);
2885 }
2886 }
2887
TEST(X32_TRANSPOSE__4X4_REUSE_MOV_SSE2,bh_5_8_bw_8)2888 TEST(X32_TRANSPOSE__4X4_REUSE_MOV_SSE2, bh_5_8_bw_8){
2889 TEST_REQUIRES_X86_SSE2;
2890 for(size_t i = 5; i < 8; ++i){
2891 TransposeMicrokernelTester()
2892 .input_stride(8)
2893 .output_stride(i)
2894 .block_width(8)
2895 .block_height(i)
2896 .iterations(1)
2897 .Test(xnn_x32_transpose_ukernel__4x4_reuse_mov_sse2);
2898 }
2899 }
2900
TEST(X32_TRANSPOSE__4X4_REUSE_MOV_SSE2,bh_5_8_bw_5_8)2901 TEST(X32_TRANSPOSE__4X4_REUSE_MOV_SSE2, bh_5_8_bw_5_8) {
2902 TEST_REQUIRES_X86_SSE2;
2903 for(size_t i = 5; i < 8; ++i){
2904 for(size_t j = 5; j < 8; ++j){
2905 TransposeMicrokernelTester()
2906 .input_stride(j)
2907 .output_stride(i)
2908 .block_width(j)
2909 .block_height(i)
2910 .iterations(1)
2911 .Test(xnn_x32_transpose_ukernel__4x4_reuse_mov_sse2);
2912 }
2913 }
2914 }
2915
TEST(X32_TRANSPOSE__4X4_REUSE_MOV_SSE2,bh_4_bw_4_is_8)2916 TEST(X32_TRANSPOSE__4X4_REUSE_MOV_SSE2, bh_4_bw_4_is_8) {
2917 TEST_REQUIRES_X86_SSE2;
2918 TransposeMicrokernelTester()
2919 .input_stride(8)
2920 .output_stride(4)
2921 .block_width(4)
2922 .block_height(4)
2923 .iterations(1)
2924 .Test(xnn_x32_transpose_ukernel__4x4_reuse_mov_sse2);
2925 }
2926
TEST(X32_TRANSPOSE__4X4_REUSE_MOV_SSE2,bh_4_bw_4_os_8)2927 TEST(X32_TRANSPOSE__4X4_REUSE_MOV_SSE2, bh_4_bw_4_os_8) {
2928 TEST_REQUIRES_X86_SSE2;
2929 TransposeMicrokernelTester()
2930 .input_stride(4)
2931 .output_stride(8)
2932 .block_width(4)
2933 .block_height(4)
2934 .iterations(1)
2935 .Test(xnn_x32_transpose_ukernel__4x4_reuse_mov_sse2);
2936 }
2937
TEST(X32_TRANSPOSE__4X4_REUSE_MOV_SSE2,bh_4_bw_4_is_8_os_8)2938 TEST(X32_TRANSPOSE__4X4_REUSE_MOV_SSE2, bh_4_bw_4_is_8_os_8) {
2939 TEST_REQUIRES_X86_SSE2;
2940 TransposeMicrokernelTester()
2941 .input_stride(8)
2942 .output_stride(8)
2943 .block_width(4)
2944 .block_height(4)
2945 .iterations(1)
2946 .Test(xnn_x32_transpose_ukernel__4x4_reuse_mov_sse2);
2947 }
2948 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
2949
2950
2951 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(X32_TRANSPOSE__4X4_REUSE_MULTI_SSE2,bh_4_bw_4)2952 TEST(X32_TRANSPOSE__4X4_REUSE_MULTI_SSE2, bh_4_bw_4) {
2953 TEST_REQUIRES_X86_SSE2;
2954 TransposeMicrokernelTester()
2955 .input_stride(4)
2956 .output_stride(4)
2957 .block_width(4)
2958 .block_height(4)
2959 .iterations(1)
2960 .Test(xnn_x32_transpose_ukernel__4x4_reuse_multi_sse2);
2961 }
2962
TEST(X32_TRANSPOSE__4X4_REUSE_MULTI_SSE2,bh_1_8_bw_1_8)2963 TEST(X32_TRANSPOSE__4X4_REUSE_MULTI_SSE2, bh_1_8_bw_1_8) {
2964 TEST_REQUIRES_X86_SSE2;
2965 for(size_t i = 1; i <= 8; ++i){
2966 for(size_t j = 1; j <= 8; ++j){
2967 TransposeMicrokernelTester()
2968 .input_stride(j)
2969 .output_stride(i)
2970 .block_width(j)
2971 .block_height(i)
2972 .iterations(1)
2973 .Test(xnn_x32_transpose_ukernel__4x4_reuse_multi_sse2);
2974 }
2975 }
2976 }
2977
TEST(X32_TRANSPOSE__4X4_REUSE_MULTI_SSE2,bh_4_bw_8)2978 TEST(X32_TRANSPOSE__4X4_REUSE_MULTI_SSE2, bh_4_bw_8) {
2979 TEST_REQUIRES_X86_SSE2;
2980 TransposeMicrokernelTester()
2981 .input_stride(8)
2982 .output_stride(4)
2983 .block_width(8)
2984 .block_height(4)
2985 .iterations(1)
2986 .Test(xnn_x32_transpose_ukernel__4x4_reuse_multi_sse2);
2987 }
2988
TEST(X32_TRANSPOSE__4X4_REUSE_MULTI_SSE2,bh_4_bw_5_8)2989 TEST(X32_TRANSPOSE__4X4_REUSE_MULTI_SSE2, bh_4_bw_5_8) {
2990 TEST_REQUIRES_X86_SSE2;
2991 for(size_t i = 5; i < 8; ++i){
2992 TransposeMicrokernelTester()
2993 .input_stride(i)
2994 .output_stride(4)
2995 .block_width(i)
2996 .block_height(4)
2997 .iterations(1)
2998 .Test(xnn_x32_transpose_ukernel__4x4_reuse_multi_sse2);
2999 }
3000 }
3001
TEST(X32_TRANSPOSE__4X4_REUSE_MULTI_SSE2,bh_8_bw_5_8)3002 TEST(X32_TRANSPOSE__4X4_REUSE_MULTI_SSE2, bh_8_bw_5_8) {
3003 TEST_REQUIRES_X86_SSE2;
3004 for(size_t i = 5; i < 8; ++i){
3005 TransposeMicrokernelTester()
3006 .input_stride(i)
3007 .output_stride(8)
3008 .block_width(i)
3009 .block_height(8)
3010 .iterations(1)
3011 .Test(xnn_x32_transpose_ukernel__4x4_reuse_multi_sse2);
3012 }
3013 }
3014
TEST(X32_TRANSPOSE__4X4_REUSE_MULTI_SSE2,bh_8_bw_4)3015 TEST(X32_TRANSPOSE__4X4_REUSE_MULTI_SSE2, bh_8_bw_4) {
3016 TEST_REQUIRES_X86_SSE2;
3017 TransposeMicrokernelTester()
3018 .input_stride(4)
3019 .output_stride(8)
3020 .block_width(4)
3021 .block_height(8)
3022 .iterations(1)
3023 .Test(xnn_x32_transpose_ukernel__4x4_reuse_multi_sse2);
3024 }
3025
TEST(X32_TRANSPOSE__4X4_REUSE_MULTI_SSE2,bh_5_8_bw_4)3026 TEST(X32_TRANSPOSE__4X4_REUSE_MULTI_SSE2, bh_5_8_bw_4){
3027 TEST_REQUIRES_X86_SSE2;
3028 for(size_t i = 5; i < 8; ++i){
3029 TransposeMicrokernelTester()
3030 .input_stride(4)
3031 .output_stride(i)
3032 .block_width(4)
3033 .block_height(i)
3034 .iterations(1)
3035 .Test(xnn_x32_transpose_ukernel__4x4_reuse_multi_sse2);
3036 }
3037 }
3038
TEST(X32_TRANSPOSE__4X4_REUSE_MULTI_SSE2,bh_5_8_bw_8)3039 TEST(X32_TRANSPOSE__4X4_REUSE_MULTI_SSE2, bh_5_8_bw_8){
3040 TEST_REQUIRES_X86_SSE2;
3041 for(size_t i = 5; i < 8; ++i){
3042 TransposeMicrokernelTester()
3043 .input_stride(8)
3044 .output_stride(i)
3045 .block_width(8)
3046 .block_height(i)
3047 .iterations(1)
3048 .Test(xnn_x32_transpose_ukernel__4x4_reuse_multi_sse2);
3049 }
3050 }
3051
TEST(X32_TRANSPOSE__4X4_REUSE_MULTI_SSE2,bh_5_8_bw_5_8)3052 TEST(X32_TRANSPOSE__4X4_REUSE_MULTI_SSE2, bh_5_8_bw_5_8) {
3053 TEST_REQUIRES_X86_SSE2;
3054 for(size_t i = 5; i < 8; ++i){
3055 for(size_t j = 5; j < 8; ++j){
3056 TransposeMicrokernelTester()
3057 .input_stride(j)
3058 .output_stride(i)
3059 .block_width(j)
3060 .block_height(i)
3061 .iterations(1)
3062 .Test(xnn_x32_transpose_ukernel__4x4_reuse_multi_sse2);
3063 }
3064 }
3065 }
3066
TEST(X32_TRANSPOSE__4X4_REUSE_MULTI_SSE2,bh_4_bw_4_is_8)3067 TEST(X32_TRANSPOSE__4X4_REUSE_MULTI_SSE2, bh_4_bw_4_is_8) {
3068 TEST_REQUIRES_X86_SSE2;
3069 TransposeMicrokernelTester()
3070 .input_stride(8)
3071 .output_stride(4)
3072 .block_width(4)
3073 .block_height(4)
3074 .iterations(1)
3075 .Test(xnn_x32_transpose_ukernel__4x4_reuse_multi_sse2);
3076 }
3077
TEST(X32_TRANSPOSE__4X4_REUSE_MULTI_SSE2,bh_4_bw_4_os_8)3078 TEST(X32_TRANSPOSE__4X4_REUSE_MULTI_SSE2, bh_4_bw_4_os_8) {
3079 TEST_REQUIRES_X86_SSE2;
3080 TransposeMicrokernelTester()
3081 .input_stride(4)
3082 .output_stride(8)
3083 .block_width(4)
3084 .block_height(4)
3085 .iterations(1)
3086 .Test(xnn_x32_transpose_ukernel__4x4_reuse_multi_sse2);
3087 }
3088
TEST(X32_TRANSPOSE__4X4_REUSE_MULTI_SSE2,bh_4_bw_4_is_8_os_8)3089 TEST(X32_TRANSPOSE__4X4_REUSE_MULTI_SSE2, bh_4_bw_4_is_8_os_8) {
3090 TEST_REQUIRES_X86_SSE2;
3091 TransposeMicrokernelTester()
3092 .input_stride(8)
3093 .output_stride(8)
3094 .block_width(4)
3095 .block_height(4)
3096 .iterations(1)
3097 .Test(xnn_x32_transpose_ukernel__4x4_reuse_multi_sse2);
3098 }
3099 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
3100
3101
3102 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(X32_TRANSPOSE__4X4_REUSE_SWITCH_SSE2,bh_4_bw_4)3103 TEST(X32_TRANSPOSE__4X4_REUSE_SWITCH_SSE2, bh_4_bw_4) {
3104 TEST_REQUIRES_X86_SSE2;
3105 TransposeMicrokernelTester()
3106 .input_stride(4)
3107 .output_stride(4)
3108 .block_width(4)
3109 .block_height(4)
3110 .iterations(1)
3111 .Test(xnn_x32_transpose_ukernel__4x4_reuse_switch_sse2);
3112 }
3113
TEST(X32_TRANSPOSE__4X4_REUSE_SWITCH_SSE2,bh_1_8_bw_1_8)3114 TEST(X32_TRANSPOSE__4X4_REUSE_SWITCH_SSE2, bh_1_8_bw_1_8) {
3115 TEST_REQUIRES_X86_SSE2;
3116 for(size_t i = 1; i <= 8; ++i){
3117 for(size_t j = 1; j <= 8; ++j){
3118 TransposeMicrokernelTester()
3119 .input_stride(j)
3120 .output_stride(i)
3121 .block_width(j)
3122 .block_height(i)
3123 .iterations(1)
3124 .Test(xnn_x32_transpose_ukernel__4x4_reuse_switch_sse2);
3125 }
3126 }
3127 }
3128
TEST(X32_TRANSPOSE__4X4_REUSE_SWITCH_SSE2,bh_4_bw_8)3129 TEST(X32_TRANSPOSE__4X4_REUSE_SWITCH_SSE2, bh_4_bw_8) {
3130 TEST_REQUIRES_X86_SSE2;
3131 TransposeMicrokernelTester()
3132 .input_stride(8)
3133 .output_stride(4)
3134 .block_width(8)
3135 .block_height(4)
3136 .iterations(1)
3137 .Test(xnn_x32_transpose_ukernel__4x4_reuse_switch_sse2);
3138 }
3139
TEST(X32_TRANSPOSE__4X4_REUSE_SWITCH_SSE2,bh_4_bw_5_8)3140 TEST(X32_TRANSPOSE__4X4_REUSE_SWITCH_SSE2, bh_4_bw_5_8) {
3141 TEST_REQUIRES_X86_SSE2;
3142 for(size_t i = 5; i < 8; ++i){
3143 TransposeMicrokernelTester()
3144 .input_stride(i)
3145 .output_stride(4)
3146 .block_width(i)
3147 .block_height(4)
3148 .iterations(1)
3149 .Test(xnn_x32_transpose_ukernel__4x4_reuse_switch_sse2);
3150 }
3151 }
3152
TEST(X32_TRANSPOSE__4X4_REUSE_SWITCH_SSE2,bh_8_bw_5_8)3153 TEST(X32_TRANSPOSE__4X4_REUSE_SWITCH_SSE2, bh_8_bw_5_8) {
3154 TEST_REQUIRES_X86_SSE2;
3155 for(size_t i = 5; i < 8; ++i){
3156 TransposeMicrokernelTester()
3157 .input_stride(i)
3158 .output_stride(8)
3159 .block_width(i)
3160 .block_height(8)
3161 .iterations(1)
3162 .Test(xnn_x32_transpose_ukernel__4x4_reuse_switch_sse2);
3163 }
3164 }
3165
TEST(X32_TRANSPOSE__4X4_REUSE_SWITCH_SSE2,bh_8_bw_4)3166 TEST(X32_TRANSPOSE__4X4_REUSE_SWITCH_SSE2, bh_8_bw_4) {
3167 TEST_REQUIRES_X86_SSE2;
3168 TransposeMicrokernelTester()
3169 .input_stride(4)
3170 .output_stride(8)
3171 .block_width(4)
3172 .block_height(8)
3173 .iterations(1)
3174 .Test(xnn_x32_transpose_ukernel__4x4_reuse_switch_sse2);
3175 }
3176
TEST(X32_TRANSPOSE__4X4_REUSE_SWITCH_SSE2,bh_5_8_bw_4)3177 TEST(X32_TRANSPOSE__4X4_REUSE_SWITCH_SSE2, bh_5_8_bw_4){
3178 TEST_REQUIRES_X86_SSE2;
3179 for(size_t i = 5; i < 8; ++i){
3180 TransposeMicrokernelTester()
3181 .input_stride(4)
3182 .output_stride(i)
3183 .block_width(4)
3184 .block_height(i)
3185 .iterations(1)
3186 .Test(xnn_x32_transpose_ukernel__4x4_reuse_switch_sse2);
3187 }
3188 }
3189
TEST(X32_TRANSPOSE__4X4_REUSE_SWITCH_SSE2,bh_5_8_bw_8)3190 TEST(X32_TRANSPOSE__4X4_REUSE_SWITCH_SSE2, bh_5_8_bw_8){
3191 TEST_REQUIRES_X86_SSE2;
3192 for(size_t i = 5; i < 8; ++i){
3193 TransposeMicrokernelTester()
3194 .input_stride(8)
3195 .output_stride(i)
3196 .block_width(8)
3197 .block_height(i)
3198 .iterations(1)
3199 .Test(xnn_x32_transpose_ukernel__4x4_reuse_switch_sse2);
3200 }
3201 }
3202
TEST(X32_TRANSPOSE__4X4_REUSE_SWITCH_SSE2,bh_5_8_bw_5_8)3203 TEST(X32_TRANSPOSE__4X4_REUSE_SWITCH_SSE2, bh_5_8_bw_5_8) {
3204 TEST_REQUIRES_X86_SSE2;
3205 for(size_t i = 5; i < 8; ++i){
3206 for(size_t j = 5; j < 8; ++j){
3207 TransposeMicrokernelTester()
3208 .input_stride(j)
3209 .output_stride(i)
3210 .block_width(j)
3211 .block_height(i)
3212 .iterations(1)
3213 .Test(xnn_x32_transpose_ukernel__4x4_reuse_switch_sse2);
3214 }
3215 }
3216 }
3217
TEST(X32_TRANSPOSE__4X4_REUSE_SWITCH_SSE2,bh_4_bw_4_is_8)3218 TEST(X32_TRANSPOSE__4X4_REUSE_SWITCH_SSE2, bh_4_bw_4_is_8) {
3219 TEST_REQUIRES_X86_SSE2;
3220 TransposeMicrokernelTester()
3221 .input_stride(8)
3222 .output_stride(4)
3223 .block_width(4)
3224 .block_height(4)
3225 .iterations(1)
3226 .Test(xnn_x32_transpose_ukernel__4x4_reuse_switch_sse2);
3227 }
3228
TEST(X32_TRANSPOSE__4X4_REUSE_SWITCH_SSE2,bh_4_bw_4_os_8)3229 TEST(X32_TRANSPOSE__4X4_REUSE_SWITCH_SSE2, bh_4_bw_4_os_8) {
3230 TEST_REQUIRES_X86_SSE2;
3231 TransposeMicrokernelTester()
3232 .input_stride(4)
3233 .output_stride(8)
3234 .block_width(4)
3235 .block_height(4)
3236 .iterations(1)
3237 .Test(xnn_x32_transpose_ukernel__4x4_reuse_switch_sse2);
3238 }
3239
TEST(X32_TRANSPOSE__4X4_REUSE_SWITCH_SSE2,bh_4_bw_4_is_8_os_8)3240 TEST(X32_TRANSPOSE__4X4_REUSE_SWITCH_SSE2, bh_4_bw_4_is_8_os_8) {
3241 TEST_REQUIRES_X86_SSE2;
3242 TransposeMicrokernelTester()
3243 .input_stride(8)
3244 .output_stride(8)
3245 .block_width(4)
3246 .block_height(4)
3247 .iterations(1)
3248 .Test(xnn_x32_transpose_ukernel__4x4_reuse_switch_sse2);
3249 }
3250 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
3251
3252
3253 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(X32_TRANSPOSE__4X4_SSE,bh_4_bw_4)3254 TEST(X32_TRANSPOSE__4X4_SSE, bh_4_bw_4) {
3255 TEST_REQUIRES_X86_SSE;
3256 TransposeMicrokernelTester()
3257 .input_stride(4)
3258 .output_stride(4)
3259 .block_width(4)
3260 .block_height(4)
3261 .iterations(1)
3262 .Test(xnn_x32_transpose_ukernel__4x4_sse);
3263 }
3264
TEST(X32_TRANSPOSE__4X4_SSE,bh_1_8_bw_1_8)3265 TEST(X32_TRANSPOSE__4X4_SSE, bh_1_8_bw_1_8) {
3266 TEST_REQUIRES_X86_SSE;
3267 for(size_t i = 1; i <= 8; ++i){
3268 for(size_t j = 1; j <= 8; ++j){
3269 TransposeMicrokernelTester()
3270 .input_stride(j)
3271 .output_stride(i)
3272 .block_width(j)
3273 .block_height(i)
3274 .iterations(1)
3275 .Test(xnn_x32_transpose_ukernel__4x4_sse);
3276 }
3277 }
3278 }
3279
TEST(X32_TRANSPOSE__4X4_SSE,bh_4_bw_8)3280 TEST(X32_TRANSPOSE__4X4_SSE, bh_4_bw_8) {
3281 TEST_REQUIRES_X86_SSE;
3282 TransposeMicrokernelTester()
3283 .input_stride(8)
3284 .output_stride(4)
3285 .block_width(8)
3286 .block_height(4)
3287 .iterations(1)
3288 .Test(xnn_x32_transpose_ukernel__4x4_sse);
3289 }
3290
TEST(X32_TRANSPOSE__4X4_SSE,bh_4_bw_5_8)3291 TEST(X32_TRANSPOSE__4X4_SSE, bh_4_bw_5_8) {
3292 TEST_REQUIRES_X86_SSE;
3293 for(size_t i = 5; i < 8; ++i){
3294 TransposeMicrokernelTester()
3295 .input_stride(i)
3296 .output_stride(4)
3297 .block_width(i)
3298 .block_height(4)
3299 .iterations(1)
3300 .Test(xnn_x32_transpose_ukernel__4x4_sse);
3301 }
3302 }
3303
TEST(X32_TRANSPOSE__4X4_SSE,bh_8_bw_5_8)3304 TEST(X32_TRANSPOSE__4X4_SSE, bh_8_bw_5_8) {
3305 TEST_REQUIRES_X86_SSE;
3306 for(size_t i = 5; i < 8; ++i){
3307 TransposeMicrokernelTester()
3308 .input_stride(i)
3309 .output_stride(8)
3310 .block_width(i)
3311 .block_height(8)
3312 .iterations(1)
3313 .Test(xnn_x32_transpose_ukernel__4x4_sse);
3314 }
3315 }
3316
TEST(X32_TRANSPOSE__4X4_SSE,bh_8_bw_4)3317 TEST(X32_TRANSPOSE__4X4_SSE, bh_8_bw_4) {
3318 TEST_REQUIRES_X86_SSE;
3319 TransposeMicrokernelTester()
3320 .input_stride(4)
3321 .output_stride(8)
3322 .block_width(4)
3323 .block_height(8)
3324 .iterations(1)
3325 .Test(xnn_x32_transpose_ukernel__4x4_sse);
3326 }
3327
TEST(X32_TRANSPOSE__4X4_SSE,bh_5_8_bw_4)3328 TEST(X32_TRANSPOSE__4X4_SSE, bh_5_8_bw_4){
3329 TEST_REQUIRES_X86_SSE;
3330 for(size_t i = 5; i < 8; ++i){
3331 TransposeMicrokernelTester()
3332 .input_stride(4)
3333 .output_stride(i)
3334 .block_width(4)
3335 .block_height(i)
3336 .iterations(1)
3337 .Test(xnn_x32_transpose_ukernel__4x4_sse);
3338 }
3339 }
3340
TEST(X32_TRANSPOSE__4X4_SSE,bh_5_8_bw_8)3341 TEST(X32_TRANSPOSE__4X4_SSE, bh_5_8_bw_8){
3342 TEST_REQUIRES_X86_SSE;
3343 for(size_t i = 5; i < 8; ++i){
3344 TransposeMicrokernelTester()
3345 .input_stride(8)
3346 .output_stride(i)
3347 .block_width(8)
3348 .block_height(i)
3349 .iterations(1)
3350 .Test(xnn_x32_transpose_ukernel__4x4_sse);
3351 }
3352 }
3353
TEST(X32_TRANSPOSE__4X4_SSE,bh_5_8_bw_5_8)3354 TEST(X32_TRANSPOSE__4X4_SSE, bh_5_8_bw_5_8) {
3355 TEST_REQUIRES_X86_SSE;
3356 for(size_t i = 5; i < 8; ++i){
3357 for(size_t j = 5; j < 8; ++j){
3358 TransposeMicrokernelTester()
3359 .input_stride(j)
3360 .output_stride(i)
3361 .block_width(j)
3362 .block_height(i)
3363 .iterations(1)
3364 .Test(xnn_x32_transpose_ukernel__4x4_sse);
3365 }
3366 }
3367 }
3368
TEST(X32_TRANSPOSE__4X4_SSE,bh_4_bw_4_is_8)3369 TEST(X32_TRANSPOSE__4X4_SSE, bh_4_bw_4_is_8) {
3370 TEST_REQUIRES_X86_SSE;
3371 TransposeMicrokernelTester()
3372 .input_stride(8)
3373 .output_stride(4)
3374 .block_width(4)
3375 .block_height(4)
3376 .iterations(1)
3377 .Test(xnn_x32_transpose_ukernel__4x4_sse);
3378 }
3379
TEST(X32_TRANSPOSE__4X4_SSE,bh_4_bw_4_os_8)3380 TEST(X32_TRANSPOSE__4X4_SSE, bh_4_bw_4_os_8) {
3381 TEST_REQUIRES_X86_SSE;
3382 TransposeMicrokernelTester()
3383 .input_stride(4)
3384 .output_stride(8)
3385 .block_width(4)
3386 .block_height(4)
3387 .iterations(1)
3388 .Test(xnn_x32_transpose_ukernel__4x4_sse);
3389 }
3390
TEST(X32_TRANSPOSE__4X4_SSE,bh_4_bw_4_is_8_os_8)3391 TEST(X32_TRANSPOSE__4X4_SSE, bh_4_bw_4_is_8_os_8) {
3392 TEST_REQUIRES_X86_SSE;
3393 TransposeMicrokernelTester()
3394 .input_stride(8)
3395 .output_stride(8)
3396 .block_width(4)
3397 .block_height(4)
3398 .iterations(1)
3399 .Test(xnn_x32_transpose_ukernel__4x4_sse);
3400 }
3401 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
3402
3403
3404 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(X32_TRANSPOSE__4X4_WASMSIMD,bh_4_bw_4)3405 TEST(X32_TRANSPOSE__4X4_WASMSIMD, bh_4_bw_4) {
3406 TransposeMicrokernelTester()
3407 .input_stride(4)
3408 .output_stride(4)
3409 .block_width(4)
3410 .block_height(4)
3411 .iterations(1)
3412 .Test(xnn_x32_transpose_ukernel__4x4_wasmsimd);
3413 }
3414
TEST(X32_TRANSPOSE__4X4_WASMSIMD,bh_1_8_bw_1_8)3415 TEST(X32_TRANSPOSE__4X4_WASMSIMD, bh_1_8_bw_1_8) {
3416 for(size_t i = 1; i <= 8; ++i){
3417 for(size_t j = 1; j <= 8; ++j){
3418 TransposeMicrokernelTester()
3419 .input_stride(j)
3420 .output_stride(i)
3421 .block_width(j)
3422 .block_height(i)
3423 .iterations(1)
3424 .Test(xnn_x32_transpose_ukernel__4x4_wasmsimd);
3425 }
3426 }
3427 }
3428
TEST(X32_TRANSPOSE__4X4_WASMSIMD,bh_4_bw_8)3429 TEST(X32_TRANSPOSE__4X4_WASMSIMD, bh_4_bw_8) {
3430 TransposeMicrokernelTester()
3431 .input_stride(8)
3432 .output_stride(4)
3433 .block_width(8)
3434 .block_height(4)
3435 .iterations(1)
3436 .Test(xnn_x32_transpose_ukernel__4x4_wasmsimd);
3437 }
3438
TEST(X32_TRANSPOSE__4X4_WASMSIMD,bh_4_bw_5_8)3439 TEST(X32_TRANSPOSE__4X4_WASMSIMD, bh_4_bw_5_8) {
3440 for(size_t i = 5; i < 8; ++i){
3441 TransposeMicrokernelTester()
3442 .input_stride(i)
3443 .output_stride(4)
3444 .block_width(i)
3445 .block_height(4)
3446 .iterations(1)
3447 .Test(xnn_x32_transpose_ukernel__4x4_wasmsimd);
3448 }
3449 }
3450
TEST(X32_TRANSPOSE__4X4_WASMSIMD,bh_8_bw_5_8)3451 TEST(X32_TRANSPOSE__4X4_WASMSIMD, bh_8_bw_5_8) {
3452 for(size_t i = 5; i < 8; ++i){
3453 TransposeMicrokernelTester()
3454 .input_stride(i)
3455 .output_stride(8)
3456 .block_width(i)
3457 .block_height(8)
3458 .iterations(1)
3459 .Test(xnn_x32_transpose_ukernel__4x4_wasmsimd);
3460 }
3461 }
3462
TEST(X32_TRANSPOSE__4X4_WASMSIMD,bh_8_bw_4)3463 TEST(X32_TRANSPOSE__4X4_WASMSIMD, bh_8_bw_4) {
3464 TransposeMicrokernelTester()
3465 .input_stride(4)
3466 .output_stride(8)
3467 .block_width(4)
3468 .block_height(8)
3469 .iterations(1)
3470 .Test(xnn_x32_transpose_ukernel__4x4_wasmsimd);
3471 }
3472
TEST(X32_TRANSPOSE__4X4_WASMSIMD,bh_5_8_bw_4)3473 TEST(X32_TRANSPOSE__4X4_WASMSIMD, bh_5_8_bw_4){
3474 for(size_t i = 5; i < 8; ++i){
3475 TransposeMicrokernelTester()
3476 .input_stride(4)
3477 .output_stride(i)
3478 .block_width(4)
3479 .block_height(i)
3480 .iterations(1)
3481 .Test(xnn_x32_transpose_ukernel__4x4_wasmsimd);
3482 }
3483 }
3484
TEST(X32_TRANSPOSE__4X4_WASMSIMD,bh_5_8_bw_8)3485 TEST(X32_TRANSPOSE__4X4_WASMSIMD, bh_5_8_bw_8){
3486 for(size_t i = 5; i < 8; ++i){
3487 TransposeMicrokernelTester()
3488 .input_stride(8)
3489 .output_stride(i)
3490 .block_width(8)
3491 .block_height(i)
3492 .iterations(1)
3493 .Test(xnn_x32_transpose_ukernel__4x4_wasmsimd);
3494 }
3495 }
3496
TEST(X32_TRANSPOSE__4X4_WASMSIMD,bh_5_8_bw_5_8)3497 TEST(X32_TRANSPOSE__4X4_WASMSIMD, bh_5_8_bw_5_8) {
3498 for(size_t i = 5; i < 8; ++i){
3499 for(size_t j = 5; j < 8; ++j){
3500 TransposeMicrokernelTester()
3501 .input_stride(j)
3502 .output_stride(i)
3503 .block_width(j)
3504 .block_height(i)
3505 .iterations(1)
3506 .Test(xnn_x32_transpose_ukernel__4x4_wasmsimd);
3507 }
3508 }
3509 }
3510
TEST(X32_TRANSPOSE__4X4_WASMSIMD,bh_4_bw_4_is_8)3511 TEST(X32_TRANSPOSE__4X4_WASMSIMD, bh_4_bw_4_is_8) {
3512 TransposeMicrokernelTester()
3513 .input_stride(8)
3514 .output_stride(4)
3515 .block_width(4)
3516 .block_height(4)
3517 .iterations(1)
3518 .Test(xnn_x32_transpose_ukernel__4x4_wasmsimd);
3519 }
3520
TEST(X32_TRANSPOSE__4X4_WASMSIMD,bh_4_bw_4_os_8)3521 TEST(X32_TRANSPOSE__4X4_WASMSIMD, bh_4_bw_4_os_8) {
3522 TransposeMicrokernelTester()
3523 .input_stride(4)
3524 .output_stride(8)
3525 .block_width(4)
3526 .block_height(4)
3527 .iterations(1)
3528 .Test(xnn_x32_transpose_ukernel__4x4_wasmsimd);
3529 }
3530
TEST(X32_TRANSPOSE__4X4_WASMSIMD,bh_4_bw_4_is_8_os_8)3531 TEST(X32_TRANSPOSE__4X4_WASMSIMD, bh_4_bw_4_is_8_os_8) {
3532 TransposeMicrokernelTester()
3533 .input_stride(8)
3534 .output_stride(8)
3535 .block_width(4)
3536 .block_height(4)
3537 .iterations(1)
3538 .Test(xnn_x32_transpose_ukernel__4x4_wasmsimd);
3539 }
3540 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
3541
3542
3543 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(X32_TRANSPOSE__4X4_MULTI_DEC_ZIP_NEON,bh_4_bw_4)3544 TEST(X32_TRANSPOSE__4X4_MULTI_DEC_ZIP_NEON, bh_4_bw_4) {
3545 TEST_REQUIRES_ARM_NEON;
3546 TransposeMicrokernelTester()
3547 .input_stride(4)
3548 .output_stride(4)
3549 .block_width(4)
3550 .block_height(4)
3551 .iterations(1)
3552 .Test(xnn_x32_transpose_ukernel__4x4_multi_dec_zip_neon);
3553 }
3554
TEST(X32_TRANSPOSE__4X4_MULTI_DEC_ZIP_NEON,bh_1_8_bw_1_8)3555 TEST(X32_TRANSPOSE__4X4_MULTI_DEC_ZIP_NEON, bh_1_8_bw_1_8) {
3556 TEST_REQUIRES_ARM_NEON;
3557 for(size_t i = 1; i <= 8; ++i){
3558 for(size_t j = 1; j <= 8; ++j){
3559 TransposeMicrokernelTester()
3560 .input_stride(j)
3561 .output_stride(i)
3562 .block_width(j)
3563 .block_height(i)
3564 .iterations(1)
3565 .Test(xnn_x32_transpose_ukernel__4x4_multi_dec_zip_neon);
3566 }
3567 }
3568 }
3569
TEST(X32_TRANSPOSE__4X4_MULTI_DEC_ZIP_NEON,bh_4_bw_8)3570 TEST(X32_TRANSPOSE__4X4_MULTI_DEC_ZIP_NEON, bh_4_bw_8) {
3571 TEST_REQUIRES_ARM_NEON;
3572 TransposeMicrokernelTester()
3573 .input_stride(8)
3574 .output_stride(4)
3575 .block_width(8)
3576 .block_height(4)
3577 .iterations(1)
3578 .Test(xnn_x32_transpose_ukernel__4x4_multi_dec_zip_neon);
3579 }
3580
TEST(X32_TRANSPOSE__4X4_MULTI_DEC_ZIP_NEON,bh_4_bw_5_8)3581 TEST(X32_TRANSPOSE__4X4_MULTI_DEC_ZIP_NEON, bh_4_bw_5_8) {
3582 TEST_REQUIRES_ARM_NEON;
3583 for(size_t i = 5; i < 8; ++i){
3584 TransposeMicrokernelTester()
3585 .input_stride(i)
3586 .output_stride(4)
3587 .block_width(i)
3588 .block_height(4)
3589 .iterations(1)
3590 .Test(xnn_x32_transpose_ukernel__4x4_multi_dec_zip_neon);
3591 }
3592 }
3593
TEST(X32_TRANSPOSE__4X4_MULTI_DEC_ZIP_NEON,bh_8_bw_5_8)3594 TEST(X32_TRANSPOSE__4X4_MULTI_DEC_ZIP_NEON, bh_8_bw_5_8) {
3595 TEST_REQUIRES_ARM_NEON;
3596 for(size_t i = 5; i < 8; ++i){
3597 TransposeMicrokernelTester()
3598 .input_stride(i)
3599 .output_stride(8)
3600 .block_width(i)
3601 .block_height(8)
3602 .iterations(1)
3603 .Test(xnn_x32_transpose_ukernel__4x4_multi_dec_zip_neon);
3604 }
3605 }
3606
TEST(X32_TRANSPOSE__4X4_MULTI_DEC_ZIP_NEON,bh_8_bw_4)3607 TEST(X32_TRANSPOSE__4X4_MULTI_DEC_ZIP_NEON, bh_8_bw_4) {
3608 TEST_REQUIRES_ARM_NEON;
3609 TransposeMicrokernelTester()
3610 .input_stride(4)
3611 .output_stride(8)
3612 .block_width(4)
3613 .block_height(8)
3614 .iterations(1)
3615 .Test(xnn_x32_transpose_ukernel__4x4_multi_dec_zip_neon);
3616 }
3617
TEST(X32_TRANSPOSE__4X4_MULTI_DEC_ZIP_NEON,bh_5_8_bw_4)3618 TEST(X32_TRANSPOSE__4X4_MULTI_DEC_ZIP_NEON, bh_5_8_bw_4){
3619 TEST_REQUIRES_ARM_NEON;
3620 for(size_t i = 5; i < 8; ++i){
3621 TransposeMicrokernelTester()
3622 .input_stride(4)
3623 .output_stride(i)
3624 .block_width(4)
3625 .block_height(i)
3626 .iterations(1)
3627 .Test(xnn_x32_transpose_ukernel__4x4_multi_dec_zip_neon);
3628 }
3629 }
3630
TEST(X32_TRANSPOSE__4X4_MULTI_DEC_ZIP_NEON,bh_5_8_bw_8)3631 TEST(X32_TRANSPOSE__4X4_MULTI_DEC_ZIP_NEON, bh_5_8_bw_8){
3632 TEST_REQUIRES_ARM_NEON;
3633 for(size_t i = 5; i < 8; ++i){
3634 TransposeMicrokernelTester()
3635 .input_stride(8)
3636 .output_stride(i)
3637 .block_width(8)
3638 .block_height(i)
3639 .iterations(1)
3640 .Test(xnn_x32_transpose_ukernel__4x4_multi_dec_zip_neon);
3641 }
3642 }
3643
TEST(X32_TRANSPOSE__4X4_MULTI_DEC_ZIP_NEON,bh_5_8_bw_5_8)3644 TEST(X32_TRANSPOSE__4X4_MULTI_DEC_ZIP_NEON, bh_5_8_bw_5_8) {
3645 TEST_REQUIRES_ARM_NEON;
3646 for(size_t i = 5; i < 8; ++i){
3647 for(size_t j = 5; j < 8; ++j){
3648 TransposeMicrokernelTester()
3649 .input_stride(j)
3650 .output_stride(i)
3651 .block_width(j)
3652 .block_height(i)
3653 .iterations(1)
3654 .Test(xnn_x32_transpose_ukernel__4x4_multi_dec_zip_neon);
3655 }
3656 }
3657 }
3658
TEST(X32_TRANSPOSE__4X4_MULTI_DEC_ZIP_NEON,bh_4_bw_4_is_8)3659 TEST(X32_TRANSPOSE__4X4_MULTI_DEC_ZIP_NEON, bh_4_bw_4_is_8) {
3660 TEST_REQUIRES_ARM_NEON;
3661 TransposeMicrokernelTester()
3662 .input_stride(8)
3663 .output_stride(4)
3664 .block_width(4)
3665 .block_height(4)
3666 .iterations(1)
3667 .Test(xnn_x32_transpose_ukernel__4x4_multi_dec_zip_neon);
3668 }
3669
TEST(X32_TRANSPOSE__4X4_MULTI_DEC_ZIP_NEON,bh_4_bw_4_os_8)3670 TEST(X32_TRANSPOSE__4X4_MULTI_DEC_ZIP_NEON, bh_4_bw_4_os_8) {
3671 TEST_REQUIRES_ARM_NEON;
3672 TransposeMicrokernelTester()
3673 .input_stride(4)
3674 .output_stride(8)
3675 .block_width(4)
3676 .block_height(4)
3677 .iterations(1)
3678 .Test(xnn_x32_transpose_ukernel__4x4_multi_dec_zip_neon);
3679 }
3680
TEST(X32_TRANSPOSE__4X4_MULTI_DEC_ZIP_NEON,bh_4_bw_4_is_8_os_8)3681 TEST(X32_TRANSPOSE__4X4_MULTI_DEC_ZIP_NEON, bh_4_bw_4_is_8_os_8) {
3682 TEST_REQUIRES_ARM_NEON;
3683 TransposeMicrokernelTester()
3684 .input_stride(8)
3685 .output_stride(8)
3686 .block_width(4)
3687 .block_height(4)
3688 .iterations(1)
3689 .Test(xnn_x32_transpose_ukernel__4x4_multi_dec_zip_neon);
3690 }
3691 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
3692
3693
3694 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(X32_TRANSPOSE__4X4_MULTI_MOV_ZIP_NEON,bh_4_bw_4)3695 TEST(X32_TRANSPOSE__4X4_MULTI_MOV_ZIP_NEON, bh_4_bw_4) {
3696 TEST_REQUIRES_ARM_NEON;
3697 TransposeMicrokernelTester()
3698 .input_stride(4)
3699 .output_stride(4)
3700 .block_width(4)
3701 .block_height(4)
3702 .iterations(1)
3703 .Test(xnn_x32_transpose_ukernel__4x4_multi_mov_zip_neon);
3704 }
3705
TEST(X32_TRANSPOSE__4X4_MULTI_MOV_ZIP_NEON,bh_1_8_bw_1_8)3706 TEST(X32_TRANSPOSE__4X4_MULTI_MOV_ZIP_NEON, bh_1_8_bw_1_8) {
3707 TEST_REQUIRES_ARM_NEON;
3708 for(size_t i = 1; i <= 8; ++i){
3709 for(size_t j = 1; j <= 8; ++j){
3710 TransposeMicrokernelTester()
3711 .input_stride(j)
3712 .output_stride(i)
3713 .block_width(j)
3714 .block_height(i)
3715 .iterations(1)
3716 .Test(xnn_x32_transpose_ukernel__4x4_multi_mov_zip_neon);
3717 }
3718 }
3719 }
3720
TEST(X32_TRANSPOSE__4X4_MULTI_MOV_ZIP_NEON,bh_4_bw_8)3721 TEST(X32_TRANSPOSE__4X4_MULTI_MOV_ZIP_NEON, bh_4_bw_8) {
3722 TEST_REQUIRES_ARM_NEON;
3723 TransposeMicrokernelTester()
3724 .input_stride(8)
3725 .output_stride(4)
3726 .block_width(8)
3727 .block_height(4)
3728 .iterations(1)
3729 .Test(xnn_x32_transpose_ukernel__4x4_multi_mov_zip_neon);
3730 }
3731
TEST(X32_TRANSPOSE__4X4_MULTI_MOV_ZIP_NEON,bh_4_bw_5_8)3732 TEST(X32_TRANSPOSE__4X4_MULTI_MOV_ZIP_NEON, bh_4_bw_5_8) {
3733 TEST_REQUIRES_ARM_NEON;
3734 for(size_t i = 5; i < 8; ++i){
3735 TransposeMicrokernelTester()
3736 .input_stride(i)
3737 .output_stride(4)
3738 .block_width(i)
3739 .block_height(4)
3740 .iterations(1)
3741 .Test(xnn_x32_transpose_ukernel__4x4_multi_mov_zip_neon);
3742 }
3743 }
3744
TEST(X32_TRANSPOSE__4X4_MULTI_MOV_ZIP_NEON,bh_8_bw_5_8)3745 TEST(X32_TRANSPOSE__4X4_MULTI_MOV_ZIP_NEON, bh_8_bw_5_8) {
3746 TEST_REQUIRES_ARM_NEON;
3747 for(size_t i = 5; i < 8; ++i){
3748 TransposeMicrokernelTester()
3749 .input_stride(i)
3750 .output_stride(8)
3751 .block_width(i)
3752 .block_height(8)
3753 .iterations(1)
3754 .Test(xnn_x32_transpose_ukernel__4x4_multi_mov_zip_neon);
3755 }
3756 }
3757
TEST(X32_TRANSPOSE__4X4_MULTI_MOV_ZIP_NEON,bh_8_bw_4)3758 TEST(X32_TRANSPOSE__4X4_MULTI_MOV_ZIP_NEON, bh_8_bw_4) {
3759 TEST_REQUIRES_ARM_NEON;
3760 TransposeMicrokernelTester()
3761 .input_stride(4)
3762 .output_stride(8)
3763 .block_width(4)
3764 .block_height(8)
3765 .iterations(1)
3766 .Test(xnn_x32_transpose_ukernel__4x4_multi_mov_zip_neon);
3767 }
3768
TEST(X32_TRANSPOSE__4X4_MULTI_MOV_ZIP_NEON,bh_5_8_bw_4)3769 TEST(X32_TRANSPOSE__4X4_MULTI_MOV_ZIP_NEON, bh_5_8_bw_4){
3770 TEST_REQUIRES_ARM_NEON;
3771 for(size_t i = 5; i < 8; ++i){
3772 TransposeMicrokernelTester()
3773 .input_stride(4)
3774 .output_stride(i)
3775 .block_width(4)
3776 .block_height(i)
3777 .iterations(1)
3778 .Test(xnn_x32_transpose_ukernel__4x4_multi_mov_zip_neon);
3779 }
3780 }
3781
TEST(X32_TRANSPOSE__4X4_MULTI_MOV_ZIP_NEON,bh_5_8_bw_8)3782 TEST(X32_TRANSPOSE__4X4_MULTI_MOV_ZIP_NEON, bh_5_8_bw_8){
3783 TEST_REQUIRES_ARM_NEON;
3784 for(size_t i = 5; i < 8; ++i){
3785 TransposeMicrokernelTester()
3786 .input_stride(8)
3787 .output_stride(i)
3788 .block_width(8)
3789 .block_height(i)
3790 .iterations(1)
3791 .Test(xnn_x32_transpose_ukernel__4x4_multi_mov_zip_neon);
3792 }
3793 }
3794
TEST(X32_TRANSPOSE__4X4_MULTI_MOV_ZIP_NEON,bh_5_8_bw_5_8)3795 TEST(X32_TRANSPOSE__4X4_MULTI_MOV_ZIP_NEON, bh_5_8_bw_5_8) {
3796 TEST_REQUIRES_ARM_NEON;
3797 for(size_t i = 5; i < 8; ++i){
3798 for(size_t j = 5; j < 8; ++j){
3799 TransposeMicrokernelTester()
3800 .input_stride(j)
3801 .output_stride(i)
3802 .block_width(j)
3803 .block_height(i)
3804 .iterations(1)
3805 .Test(xnn_x32_transpose_ukernel__4x4_multi_mov_zip_neon);
3806 }
3807 }
3808 }
3809
TEST(X32_TRANSPOSE__4X4_MULTI_MOV_ZIP_NEON,bh_4_bw_4_is_8)3810 TEST(X32_TRANSPOSE__4X4_MULTI_MOV_ZIP_NEON, bh_4_bw_4_is_8) {
3811 TEST_REQUIRES_ARM_NEON;
3812 TransposeMicrokernelTester()
3813 .input_stride(8)
3814 .output_stride(4)
3815 .block_width(4)
3816 .block_height(4)
3817 .iterations(1)
3818 .Test(xnn_x32_transpose_ukernel__4x4_multi_mov_zip_neon);
3819 }
3820
TEST(X32_TRANSPOSE__4X4_MULTI_MOV_ZIP_NEON,bh_4_bw_4_os_8)3821 TEST(X32_TRANSPOSE__4X4_MULTI_MOV_ZIP_NEON, bh_4_bw_4_os_8) {
3822 TEST_REQUIRES_ARM_NEON;
3823 TransposeMicrokernelTester()
3824 .input_stride(4)
3825 .output_stride(8)
3826 .block_width(4)
3827 .block_height(4)
3828 .iterations(1)
3829 .Test(xnn_x32_transpose_ukernel__4x4_multi_mov_zip_neon);
3830 }
3831
TEST(X32_TRANSPOSE__4X4_MULTI_MOV_ZIP_NEON,bh_4_bw_4_is_8_os_8)3832 TEST(X32_TRANSPOSE__4X4_MULTI_MOV_ZIP_NEON, bh_4_bw_4_is_8_os_8) {
3833 TEST_REQUIRES_ARM_NEON;
3834 TransposeMicrokernelTester()
3835 .input_stride(8)
3836 .output_stride(8)
3837 .block_width(4)
3838 .block_height(4)
3839 .iterations(1)
3840 .Test(xnn_x32_transpose_ukernel__4x4_multi_mov_zip_neon);
3841 }
3842 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
3843
3844
3845 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(X32_TRANSPOSE__4X4_MULTI_MULTI_ZIP_NEON,bh_4_bw_4)3846 TEST(X32_TRANSPOSE__4X4_MULTI_MULTI_ZIP_NEON, bh_4_bw_4) {
3847 TEST_REQUIRES_ARM_NEON;
3848 TransposeMicrokernelTester()
3849 .input_stride(4)
3850 .output_stride(4)
3851 .block_width(4)
3852 .block_height(4)
3853 .iterations(1)
3854 .Test(xnn_x32_transpose_ukernel__4x4_multi_multi_zip_neon);
3855 }
3856
TEST(X32_TRANSPOSE__4X4_MULTI_MULTI_ZIP_NEON,bh_1_8_bw_1_8)3857 TEST(X32_TRANSPOSE__4X4_MULTI_MULTI_ZIP_NEON, bh_1_8_bw_1_8) {
3858 TEST_REQUIRES_ARM_NEON;
3859 for(size_t i = 1; i <= 8; ++i){
3860 for(size_t j = 1; j <= 8; ++j){
3861 TransposeMicrokernelTester()
3862 .input_stride(j)
3863 .output_stride(i)
3864 .block_width(j)
3865 .block_height(i)
3866 .iterations(1)
3867 .Test(xnn_x32_transpose_ukernel__4x4_multi_multi_zip_neon);
3868 }
3869 }
3870 }
3871
TEST(X32_TRANSPOSE__4X4_MULTI_MULTI_ZIP_NEON,bh_4_bw_8)3872 TEST(X32_TRANSPOSE__4X4_MULTI_MULTI_ZIP_NEON, bh_4_bw_8) {
3873 TEST_REQUIRES_ARM_NEON;
3874 TransposeMicrokernelTester()
3875 .input_stride(8)
3876 .output_stride(4)
3877 .block_width(8)
3878 .block_height(4)
3879 .iterations(1)
3880 .Test(xnn_x32_transpose_ukernel__4x4_multi_multi_zip_neon);
3881 }
3882
TEST(X32_TRANSPOSE__4X4_MULTI_MULTI_ZIP_NEON,bh_4_bw_5_8)3883 TEST(X32_TRANSPOSE__4X4_MULTI_MULTI_ZIP_NEON, bh_4_bw_5_8) {
3884 TEST_REQUIRES_ARM_NEON;
3885 for(size_t i = 5; i < 8; ++i){
3886 TransposeMicrokernelTester()
3887 .input_stride(i)
3888 .output_stride(4)
3889 .block_width(i)
3890 .block_height(4)
3891 .iterations(1)
3892 .Test(xnn_x32_transpose_ukernel__4x4_multi_multi_zip_neon);
3893 }
3894 }
3895
TEST(X32_TRANSPOSE__4X4_MULTI_MULTI_ZIP_NEON,bh_8_bw_5_8)3896 TEST(X32_TRANSPOSE__4X4_MULTI_MULTI_ZIP_NEON, bh_8_bw_5_8) {
3897 TEST_REQUIRES_ARM_NEON;
3898 for(size_t i = 5; i < 8; ++i){
3899 TransposeMicrokernelTester()
3900 .input_stride(i)
3901 .output_stride(8)
3902 .block_width(i)
3903 .block_height(8)
3904 .iterations(1)
3905 .Test(xnn_x32_transpose_ukernel__4x4_multi_multi_zip_neon);
3906 }
3907 }
3908
TEST(X32_TRANSPOSE__4X4_MULTI_MULTI_ZIP_NEON,bh_8_bw_4)3909 TEST(X32_TRANSPOSE__4X4_MULTI_MULTI_ZIP_NEON, bh_8_bw_4) {
3910 TEST_REQUIRES_ARM_NEON;
3911 TransposeMicrokernelTester()
3912 .input_stride(4)
3913 .output_stride(8)
3914 .block_width(4)
3915 .block_height(8)
3916 .iterations(1)
3917 .Test(xnn_x32_transpose_ukernel__4x4_multi_multi_zip_neon);
3918 }
3919
TEST(X32_TRANSPOSE__4X4_MULTI_MULTI_ZIP_NEON,bh_5_8_bw_4)3920 TEST(X32_TRANSPOSE__4X4_MULTI_MULTI_ZIP_NEON, bh_5_8_bw_4){
3921 TEST_REQUIRES_ARM_NEON;
3922 for(size_t i = 5; i < 8; ++i){
3923 TransposeMicrokernelTester()
3924 .input_stride(4)
3925 .output_stride(i)
3926 .block_width(4)
3927 .block_height(i)
3928 .iterations(1)
3929 .Test(xnn_x32_transpose_ukernel__4x4_multi_multi_zip_neon);
3930 }
3931 }
3932
TEST(X32_TRANSPOSE__4X4_MULTI_MULTI_ZIP_NEON,bh_5_8_bw_8)3933 TEST(X32_TRANSPOSE__4X4_MULTI_MULTI_ZIP_NEON, bh_5_8_bw_8){
3934 TEST_REQUIRES_ARM_NEON;
3935 for(size_t i = 5; i < 8; ++i){
3936 TransposeMicrokernelTester()
3937 .input_stride(8)
3938 .output_stride(i)
3939 .block_width(8)
3940 .block_height(i)
3941 .iterations(1)
3942 .Test(xnn_x32_transpose_ukernel__4x4_multi_multi_zip_neon);
3943 }
3944 }
3945
TEST(X32_TRANSPOSE__4X4_MULTI_MULTI_ZIP_NEON,bh_5_8_bw_5_8)3946 TEST(X32_TRANSPOSE__4X4_MULTI_MULTI_ZIP_NEON, bh_5_8_bw_5_8) {
3947 TEST_REQUIRES_ARM_NEON;
3948 for(size_t i = 5; i < 8; ++i){
3949 for(size_t j = 5; j < 8; ++j){
3950 TransposeMicrokernelTester()
3951 .input_stride(j)
3952 .output_stride(i)
3953 .block_width(j)
3954 .block_height(i)
3955 .iterations(1)
3956 .Test(xnn_x32_transpose_ukernel__4x4_multi_multi_zip_neon);
3957 }
3958 }
3959 }
3960
TEST(X32_TRANSPOSE__4X4_MULTI_MULTI_ZIP_NEON,bh_4_bw_4_is_8)3961 TEST(X32_TRANSPOSE__4X4_MULTI_MULTI_ZIP_NEON, bh_4_bw_4_is_8) {
3962 TEST_REQUIRES_ARM_NEON;
3963 TransposeMicrokernelTester()
3964 .input_stride(8)
3965 .output_stride(4)
3966 .block_width(4)
3967 .block_height(4)
3968 .iterations(1)
3969 .Test(xnn_x32_transpose_ukernel__4x4_multi_multi_zip_neon);
3970 }
3971
TEST(X32_TRANSPOSE__4X4_MULTI_MULTI_ZIP_NEON,bh_4_bw_4_os_8)3972 TEST(X32_TRANSPOSE__4X4_MULTI_MULTI_ZIP_NEON, bh_4_bw_4_os_8) {
3973 TEST_REQUIRES_ARM_NEON;
3974 TransposeMicrokernelTester()
3975 .input_stride(4)
3976 .output_stride(8)
3977 .block_width(4)
3978 .block_height(4)
3979 .iterations(1)
3980 .Test(xnn_x32_transpose_ukernel__4x4_multi_multi_zip_neon);
3981 }
3982
TEST(X32_TRANSPOSE__4X4_MULTI_MULTI_ZIP_NEON,bh_4_bw_4_is_8_os_8)3983 TEST(X32_TRANSPOSE__4X4_MULTI_MULTI_ZIP_NEON, bh_4_bw_4_is_8_os_8) {
3984 TEST_REQUIRES_ARM_NEON;
3985 TransposeMicrokernelTester()
3986 .input_stride(8)
3987 .output_stride(8)
3988 .block_width(4)
3989 .block_height(4)
3990 .iterations(1)
3991 .Test(xnn_x32_transpose_ukernel__4x4_multi_multi_zip_neon);
3992 }
3993 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
3994
3995
3996 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(X32_TRANSPOSE__4X4_MULTI_SWITCH_ZIP_NEON,bh_4_bw_4)3997 TEST(X32_TRANSPOSE__4X4_MULTI_SWITCH_ZIP_NEON, bh_4_bw_4) {
3998 TEST_REQUIRES_ARM_NEON;
3999 TransposeMicrokernelTester()
4000 .input_stride(4)
4001 .output_stride(4)
4002 .block_width(4)
4003 .block_height(4)
4004 .iterations(1)
4005 .Test(xnn_x32_transpose_ukernel__4x4_multi_switch_zip_neon);
4006 }
4007
TEST(X32_TRANSPOSE__4X4_MULTI_SWITCH_ZIP_NEON,bh_1_8_bw_1_8)4008 TEST(X32_TRANSPOSE__4X4_MULTI_SWITCH_ZIP_NEON, bh_1_8_bw_1_8) {
4009 TEST_REQUIRES_ARM_NEON;
4010 for(size_t i = 1; i <= 8; ++i){
4011 for(size_t j = 1; j <= 8; ++j){
4012 TransposeMicrokernelTester()
4013 .input_stride(j)
4014 .output_stride(i)
4015 .block_width(j)
4016 .block_height(i)
4017 .iterations(1)
4018 .Test(xnn_x32_transpose_ukernel__4x4_multi_switch_zip_neon);
4019 }
4020 }
4021 }
4022
TEST(X32_TRANSPOSE__4X4_MULTI_SWITCH_ZIP_NEON,bh_4_bw_8)4023 TEST(X32_TRANSPOSE__4X4_MULTI_SWITCH_ZIP_NEON, bh_4_bw_8) {
4024 TEST_REQUIRES_ARM_NEON;
4025 TransposeMicrokernelTester()
4026 .input_stride(8)
4027 .output_stride(4)
4028 .block_width(8)
4029 .block_height(4)
4030 .iterations(1)
4031 .Test(xnn_x32_transpose_ukernel__4x4_multi_switch_zip_neon);
4032 }
4033
TEST(X32_TRANSPOSE__4X4_MULTI_SWITCH_ZIP_NEON,bh_4_bw_5_8)4034 TEST(X32_TRANSPOSE__4X4_MULTI_SWITCH_ZIP_NEON, bh_4_bw_5_8) {
4035 TEST_REQUIRES_ARM_NEON;
4036 for(size_t i = 5; i < 8; ++i){
4037 TransposeMicrokernelTester()
4038 .input_stride(i)
4039 .output_stride(4)
4040 .block_width(i)
4041 .block_height(4)
4042 .iterations(1)
4043 .Test(xnn_x32_transpose_ukernel__4x4_multi_switch_zip_neon);
4044 }
4045 }
4046
TEST(X32_TRANSPOSE__4X4_MULTI_SWITCH_ZIP_NEON,bh_8_bw_5_8)4047 TEST(X32_TRANSPOSE__4X4_MULTI_SWITCH_ZIP_NEON, bh_8_bw_5_8) {
4048 TEST_REQUIRES_ARM_NEON;
4049 for(size_t i = 5; i < 8; ++i){
4050 TransposeMicrokernelTester()
4051 .input_stride(i)
4052 .output_stride(8)
4053 .block_width(i)
4054 .block_height(8)
4055 .iterations(1)
4056 .Test(xnn_x32_transpose_ukernel__4x4_multi_switch_zip_neon);
4057 }
4058 }
4059
TEST(X32_TRANSPOSE__4X4_MULTI_SWITCH_ZIP_NEON,bh_8_bw_4)4060 TEST(X32_TRANSPOSE__4X4_MULTI_SWITCH_ZIP_NEON, bh_8_bw_4) {
4061 TEST_REQUIRES_ARM_NEON;
4062 TransposeMicrokernelTester()
4063 .input_stride(4)
4064 .output_stride(8)
4065 .block_width(4)
4066 .block_height(8)
4067 .iterations(1)
4068 .Test(xnn_x32_transpose_ukernel__4x4_multi_switch_zip_neon);
4069 }
4070
TEST(X32_TRANSPOSE__4X4_MULTI_SWITCH_ZIP_NEON,bh_5_8_bw_4)4071 TEST(X32_TRANSPOSE__4X4_MULTI_SWITCH_ZIP_NEON, bh_5_8_bw_4){
4072 TEST_REQUIRES_ARM_NEON;
4073 for(size_t i = 5; i < 8; ++i){
4074 TransposeMicrokernelTester()
4075 .input_stride(4)
4076 .output_stride(i)
4077 .block_width(4)
4078 .block_height(i)
4079 .iterations(1)
4080 .Test(xnn_x32_transpose_ukernel__4x4_multi_switch_zip_neon);
4081 }
4082 }
4083
TEST(X32_TRANSPOSE__4X4_MULTI_SWITCH_ZIP_NEON,bh_5_8_bw_8)4084 TEST(X32_TRANSPOSE__4X4_MULTI_SWITCH_ZIP_NEON, bh_5_8_bw_8){
4085 TEST_REQUIRES_ARM_NEON;
4086 for(size_t i = 5; i < 8; ++i){
4087 TransposeMicrokernelTester()
4088 .input_stride(8)
4089 .output_stride(i)
4090 .block_width(8)
4091 .block_height(i)
4092 .iterations(1)
4093 .Test(xnn_x32_transpose_ukernel__4x4_multi_switch_zip_neon);
4094 }
4095 }
4096
TEST(X32_TRANSPOSE__4X4_MULTI_SWITCH_ZIP_NEON,bh_5_8_bw_5_8)4097 TEST(X32_TRANSPOSE__4X4_MULTI_SWITCH_ZIP_NEON, bh_5_8_bw_5_8) {
4098 TEST_REQUIRES_ARM_NEON;
4099 for(size_t i = 5; i < 8; ++i){
4100 for(size_t j = 5; j < 8; ++j){
4101 TransposeMicrokernelTester()
4102 .input_stride(j)
4103 .output_stride(i)
4104 .block_width(j)
4105 .block_height(i)
4106 .iterations(1)
4107 .Test(xnn_x32_transpose_ukernel__4x4_multi_switch_zip_neon);
4108 }
4109 }
4110 }
4111
TEST(X32_TRANSPOSE__4X4_MULTI_SWITCH_ZIP_NEON,bh_4_bw_4_is_8)4112 TEST(X32_TRANSPOSE__4X4_MULTI_SWITCH_ZIP_NEON, bh_4_bw_4_is_8) {
4113 TEST_REQUIRES_ARM_NEON;
4114 TransposeMicrokernelTester()
4115 .input_stride(8)
4116 .output_stride(4)
4117 .block_width(4)
4118 .block_height(4)
4119 .iterations(1)
4120 .Test(xnn_x32_transpose_ukernel__4x4_multi_switch_zip_neon);
4121 }
4122
TEST(X32_TRANSPOSE__4X4_MULTI_SWITCH_ZIP_NEON,bh_4_bw_4_os_8)4123 TEST(X32_TRANSPOSE__4X4_MULTI_SWITCH_ZIP_NEON, bh_4_bw_4_os_8) {
4124 TEST_REQUIRES_ARM_NEON;
4125 TransposeMicrokernelTester()
4126 .input_stride(4)
4127 .output_stride(8)
4128 .block_width(4)
4129 .block_height(4)
4130 .iterations(1)
4131 .Test(xnn_x32_transpose_ukernel__4x4_multi_switch_zip_neon);
4132 }
4133
TEST(X32_TRANSPOSE__4X4_MULTI_SWITCH_ZIP_NEON,bh_4_bw_4_is_8_os_8)4134 TEST(X32_TRANSPOSE__4X4_MULTI_SWITCH_ZIP_NEON, bh_4_bw_4_is_8_os_8) {
4135 TEST_REQUIRES_ARM_NEON;
4136 TransposeMicrokernelTester()
4137 .input_stride(8)
4138 .output_stride(8)
4139 .block_width(4)
4140 .block_height(4)
4141 .iterations(1)
4142 .Test(xnn_x32_transpose_ukernel__4x4_multi_switch_zip_neon);
4143 }
4144 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
4145
4146
4147 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(X32_TRANSPOSE__4X4_REUSE_DEC_ZIP_NEON,bh_4_bw_4)4148 TEST(X32_TRANSPOSE__4X4_REUSE_DEC_ZIP_NEON, bh_4_bw_4) {
4149 TEST_REQUIRES_ARM_NEON;
4150 TransposeMicrokernelTester()
4151 .input_stride(4)
4152 .output_stride(4)
4153 .block_width(4)
4154 .block_height(4)
4155 .iterations(1)
4156 .Test(xnn_x32_transpose_ukernel__4x4_reuse_dec_zip_neon);
4157 }
4158
TEST(X32_TRANSPOSE__4X4_REUSE_DEC_ZIP_NEON,bh_1_8_bw_1_8)4159 TEST(X32_TRANSPOSE__4X4_REUSE_DEC_ZIP_NEON, bh_1_8_bw_1_8) {
4160 TEST_REQUIRES_ARM_NEON;
4161 for(size_t i = 1; i <= 8; ++i){
4162 for(size_t j = 1; j <= 8; ++j){
4163 TransposeMicrokernelTester()
4164 .input_stride(j)
4165 .output_stride(i)
4166 .block_width(j)
4167 .block_height(i)
4168 .iterations(1)
4169 .Test(xnn_x32_transpose_ukernel__4x4_reuse_dec_zip_neon);
4170 }
4171 }
4172 }
4173
TEST(X32_TRANSPOSE__4X4_REUSE_DEC_ZIP_NEON,bh_4_bw_8)4174 TEST(X32_TRANSPOSE__4X4_REUSE_DEC_ZIP_NEON, bh_4_bw_8) {
4175 TEST_REQUIRES_ARM_NEON;
4176 TransposeMicrokernelTester()
4177 .input_stride(8)
4178 .output_stride(4)
4179 .block_width(8)
4180 .block_height(4)
4181 .iterations(1)
4182 .Test(xnn_x32_transpose_ukernel__4x4_reuse_dec_zip_neon);
4183 }
4184
TEST(X32_TRANSPOSE__4X4_REUSE_DEC_ZIP_NEON,bh_4_bw_5_8)4185 TEST(X32_TRANSPOSE__4X4_REUSE_DEC_ZIP_NEON, bh_4_bw_5_8) {
4186 TEST_REQUIRES_ARM_NEON;
4187 for(size_t i = 5; i < 8; ++i){
4188 TransposeMicrokernelTester()
4189 .input_stride(i)
4190 .output_stride(4)
4191 .block_width(i)
4192 .block_height(4)
4193 .iterations(1)
4194 .Test(xnn_x32_transpose_ukernel__4x4_reuse_dec_zip_neon);
4195 }
4196 }
4197
TEST(X32_TRANSPOSE__4X4_REUSE_DEC_ZIP_NEON,bh_8_bw_5_8)4198 TEST(X32_TRANSPOSE__4X4_REUSE_DEC_ZIP_NEON, bh_8_bw_5_8) {
4199 TEST_REQUIRES_ARM_NEON;
4200 for(size_t i = 5; i < 8; ++i){
4201 TransposeMicrokernelTester()
4202 .input_stride(i)
4203 .output_stride(8)
4204 .block_width(i)
4205 .block_height(8)
4206 .iterations(1)
4207 .Test(xnn_x32_transpose_ukernel__4x4_reuse_dec_zip_neon);
4208 }
4209 }
4210
TEST(X32_TRANSPOSE__4X4_REUSE_DEC_ZIP_NEON,bh_8_bw_4)4211 TEST(X32_TRANSPOSE__4X4_REUSE_DEC_ZIP_NEON, bh_8_bw_4) {
4212 TEST_REQUIRES_ARM_NEON;
4213 TransposeMicrokernelTester()
4214 .input_stride(4)
4215 .output_stride(8)
4216 .block_width(4)
4217 .block_height(8)
4218 .iterations(1)
4219 .Test(xnn_x32_transpose_ukernel__4x4_reuse_dec_zip_neon);
4220 }
4221
TEST(X32_TRANSPOSE__4X4_REUSE_DEC_ZIP_NEON,bh_5_8_bw_4)4222 TEST(X32_TRANSPOSE__4X4_REUSE_DEC_ZIP_NEON, bh_5_8_bw_4){
4223 TEST_REQUIRES_ARM_NEON;
4224 for(size_t i = 5; i < 8; ++i){
4225 TransposeMicrokernelTester()
4226 .input_stride(4)
4227 .output_stride(i)
4228 .block_width(4)
4229 .block_height(i)
4230 .iterations(1)
4231 .Test(xnn_x32_transpose_ukernel__4x4_reuse_dec_zip_neon);
4232 }
4233 }
4234
TEST(X32_TRANSPOSE__4X4_REUSE_DEC_ZIP_NEON,bh_5_8_bw_8)4235 TEST(X32_TRANSPOSE__4X4_REUSE_DEC_ZIP_NEON, bh_5_8_bw_8){
4236 TEST_REQUIRES_ARM_NEON;
4237 for(size_t i = 5; i < 8; ++i){
4238 TransposeMicrokernelTester()
4239 .input_stride(8)
4240 .output_stride(i)
4241 .block_width(8)
4242 .block_height(i)
4243 .iterations(1)
4244 .Test(xnn_x32_transpose_ukernel__4x4_reuse_dec_zip_neon);
4245 }
4246 }
4247
TEST(X32_TRANSPOSE__4X4_REUSE_DEC_ZIP_NEON,bh_5_8_bw_5_8)4248 TEST(X32_TRANSPOSE__4X4_REUSE_DEC_ZIP_NEON, bh_5_8_bw_5_8) {
4249 TEST_REQUIRES_ARM_NEON;
4250 for(size_t i = 5; i < 8; ++i){
4251 for(size_t j = 5; j < 8; ++j){
4252 TransposeMicrokernelTester()
4253 .input_stride(j)
4254 .output_stride(i)
4255 .block_width(j)
4256 .block_height(i)
4257 .iterations(1)
4258 .Test(xnn_x32_transpose_ukernel__4x4_reuse_dec_zip_neon);
4259 }
4260 }
4261 }
4262
TEST(X32_TRANSPOSE__4X4_REUSE_DEC_ZIP_NEON,bh_4_bw_4_is_8)4263 TEST(X32_TRANSPOSE__4X4_REUSE_DEC_ZIP_NEON, bh_4_bw_4_is_8) {
4264 TEST_REQUIRES_ARM_NEON;
4265 TransposeMicrokernelTester()
4266 .input_stride(8)
4267 .output_stride(4)
4268 .block_width(4)
4269 .block_height(4)
4270 .iterations(1)
4271 .Test(xnn_x32_transpose_ukernel__4x4_reuse_dec_zip_neon);
4272 }
4273
TEST(X32_TRANSPOSE__4X4_REUSE_DEC_ZIP_NEON,bh_4_bw_4_os_8)4274 TEST(X32_TRANSPOSE__4X4_REUSE_DEC_ZIP_NEON, bh_4_bw_4_os_8) {
4275 TEST_REQUIRES_ARM_NEON;
4276 TransposeMicrokernelTester()
4277 .input_stride(4)
4278 .output_stride(8)
4279 .block_width(4)
4280 .block_height(4)
4281 .iterations(1)
4282 .Test(xnn_x32_transpose_ukernel__4x4_reuse_dec_zip_neon);
4283 }
4284
TEST(X32_TRANSPOSE__4X4_REUSE_DEC_ZIP_NEON,bh_4_bw_4_is_8_os_8)4285 TEST(X32_TRANSPOSE__4X4_REUSE_DEC_ZIP_NEON, bh_4_bw_4_is_8_os_8) {
4286 TEST_REQUIRES_ARM_NEON;
4287 TransposeMicrokernelTester()
4288 .input_stride(8)
4289 .output_stride(8)
4290 .block_width(4)
4291 .block_height(4)
4292 .iterations(1)
4293 .Test(xnn_x32_transpose_ukernel__4x4_reuse_dec_zip_neon);
4294 }
4295 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
4296
4297
4298 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(X32_TRANSPOSE__4X4_REUSE_MOV_ZIP_NEON,bh_4_bw_4)4299 TEST(X32_TRANSPOSE__4X4_REUSE_MOV_ZIP_NEON, bh_4_bw_4) {
4300 TEST_REQUIRES_ARM_NEON;
4301 TransposeMicrokernelTester()
4302 .input_stride(4)
4303 .output_stride(4)
4304 .block_width(4)
4305 .block_height(4)
4306 .iterations(1)
4307 .Test(xnn_x32_transpose_ukernel__4x4_reuse_mov_zip_neon);
4308 }
4309
TEST(X32_TRANSPOSE__4X4_REUSE_MOV_ZIP_NEON,bh_1_8_bw_1_8)4310 TEST(X32_TRANSPOSE__4X4_REUSE_MOV_ZIP_NEON, bh_1_8_bw_1_8) {
4311 TEST_REQUIRES_ARM_NEON;
4312 for(size_t i = 1; i <= 8; ++i){
4313 for(size_t j = 1; j <= 8; ++j){
4314 TransposeMicrokernelTester()
4315 .input_stride(j)
4316 .output_stride(i)
4317 .block_width(j)
4318 .block_height(i)
4319 .iterations(1)
4320 .Test(xnn_x32_transpose_ukernel__4x4_reuse_mov_zip_neon);
4321 }
4322 }
4323 }
4324
TEST(X32_TRANSPOSE__4X4_REUSE_MOV_ZIP_NEON,bh_4_bw_8)4325 TEST(X32_TRANSPOSE__4X4_REUSE_MOV_ZIP_NEON, bh_4_bw_8) {
4326 TEST_REQUIRES_ARM_NEON;
4327 TransposeMicrokernelTester()
4328 .input_stride(8)
4329 .output_stride(4)
4330 .block_width(8)
4331 .block_height(4)
4332 .iterations(1)
4333 .Test(xnn_x32_transpose_ukernel__4x4_reuse_mov_zip_neon);
4334 }
4335
TEST(X32_TRANSPOSE__4X4_REUSE_MOV_ZIP_NEON,bh_4_bw_5_8)4336 TEST(X32_TRANSPOSE__4X4_REUSE_MOV_ZIP_NEON, bh_4_bw_5_8) {
4337 TEST_REQUIRES_ARM_NEON;
4338 for(size_t i = 5; i < 8; ++i){
4339 TransposeMicrokernelTester()
4340 .input_stride(i)
4341 .output_stride(4)
4342 .block_width(i)
4343 .block_height(4)
4344 .iterations(1)
4345 .Test(xnn_x32_transpose_ukernel__4x4_reuse_mov_zip_neon);
4346 }
4347 }
4348
TEST(X32_TRANSPOSE__4X4_REUSE_MOV_ZIP_NEON,bh_8_bw_5_8)4349 TEST(X32_TRANSPOSE__4X4_REUSE_MOV_ZIP_NEON, bh_8_bw_5_8) {
4350 TEST_REQUIRES_ARM_NEON;
4351 for(size_t i = 5; i < 8; ++i){
4352 TransposeMicrokernelTester()
4353 .input_stride(i)
4354 .output_stride(8)
4355 .block_width(i)
4356 .block_height(8)
4357 .iterations(1)
4358 .Test(xnn_x32_transpose_ukernel__4x4_reuse_mov_zip_neon);
4359 }
4360 }
4361
TEST(X32_TRANSPOSE__4X4_REUSE_MOV_ZIP_NEON,bh_8_bw_4)4362 TEST(X32_TRANSPOSE__4X4_REUSE_MOV_ZIP_NEON, bh_8_bw_4) {
4363 TEST_REQUIRES_ARM_NEON;
4364 TransposeMicrokernelTester()
4365 .input_stride(4)
4366 .output_stride(8)
4367 .block_width(4)
4368 .block_height(8)
4369 .iterations(1)
4370 .Test(xnn_x32_transpose_ukernel__4x4_reuse_mov_zip_neon);
4371 }
4372
TEST(X32_TRANSPOSE__4X4_REUSE_MOV_ZIP_NEON,bh_5_8_bw_4)4373 TEST(X32_TRANSPOSE__4X4_REUSE_MOV_ZIP_NEON, bh_5_8_bw_4){
4374 TEST_REQUIRES_ARM_NEON;
4375 for(size_t i = 5; i < 8; ++i){
4376 TransposeMicrokernelTester()
4377 .input_stride(4)
4378 .output_stride(i)
4379 .block_width(4)
4380 .block_height(i)
4381 .iterations(1)
4382 .Test(xnn_x32_transpose_ukernel__4x4_reuse_mov_zip_neon);
4383 }
4384 }
4385
TEST(X32_TRANSPOSE__4X4_REUSE_MOV_ZIP_NEON,bh_5_8_bw_8)4386 TEST(X32_TRANSPOSE__4X4_REUSE_MOV_ZIP_NEON, bh_5_8_bw_8){
4387 TEST_REQUIRES_ARM_NEON;
4388 for(size_t i = 5; i < 8; ++i){
4389 TransposeMicrokernelTester()
4390 .input_stride(8)
4391 .output_stride(i)
4392 .block_width(8)
4393 .block_height(i)
4394 .iterations(1)
4395 .Test(xnn_x32_transpose_ukernel__4x4_reuse_mov_zip_neon);
4396 }
4397 }
4398
TEST(X32_TRANSPOSE__4X4_REUSE_MOV_ZIP_NEON,bh_5_8_bw_5_8)4399 TEST(X32_TRANSPOSE__4X4_REUSE_MOV_ZIP_NEON, bh_5_8_bw_5_8) {
4400 TEST_REQUIRES_ARM_NEON;
4401 for(size_t i = 5; i < 8; ++i){
4402 for(size_t j = 5; j < 8; ++j){
4403 TransposeMicrokernelTester()
4404 .input_stride(j)
4405 .output_stride(i)
4406 .block_width(j)
4407 .block_height(i)
4408 .iterations(1)
4409 .Test(xnn_x32_transpose_ukernel__4x4_reuse_mov_zip_neon);
4410 }
4411 }
4412 }
4413
TEST(X32_TRANSPOSE__4X4_REUSE_MOV_ZIP_NEON,bh_4_bw_4_is_8)4414 TEST(X32_TRANSPOSE__4X4_REUSE_MOV_ZIP_NEON, bh_4_bw_4_is_8) {
4415 TEST_REQUIRES_ARM_NEON;
4416 TransposeMicrokernelTester()
4417 .input_stride(8)
4418 .output_stride(4)
4419 .block_width(4)
4420 .block_height(4)
4421 .iterations(1)
4422 .Test(xnn_x32_transpose_ukernel__4x4_reuse_mov_zip_neon);
4423 }
4424
TEST(X32_TRANSPOSE__4X4_REUSE_MOV_ZIP_NEON,bh_4_bw_4_os_8)4425 TEST(X32_TRANSPOSE__4X4_REUSE_MOV_ZIP_NEON, bh_4_bw_4_os_8) {
4426 TEST_REQUIRES_ARM_NEON;
4427 TransposeMicrokernelTester()
4428 .input_stride(4)
4429 .output_stride(8)
4430 .block_width(4)
4431 .block_height(4)
4432 .iterations(1)
4433 .Test(xnn_x32_transpose_ukernel__4x4_reuse_mov_zip_neon);
4434 }
4435
TEST(X32_TRANSPOSE__4X4_REUSE_MOV_ZIP_NEON,bh_4_bw_4_is_8_os_8)4436 TEST(X32_TRANSPOSE__4X4_REUSE_MOV_ZIP_NEON, bh_4_bw_4_is_8_os_8) {
4437 TEST_REQUIRES_ARM_NEON;
4438 TransposeMicrokernelTester()
4439 .input_stride(8)
4440 .output_stride(8)
4441 .block_width(4)
4442 .block_height(4)
4443 .iterations(1)
4444 .Test(xnn_x32_transpose_ukernel__4x4_reuse_mov_zip_neon);
4445 }
4446 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
4447
4448
4449 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(X32_TRANSPOSE__4X4_REUSE_MULTI_ZIP_NEON,bh_4_bw_4)4450 TEST(X32_TRANSPOSE__4X4_REUSE_MULTI_ZIP_NEON, bh_4_bw_4) {
4451 TEST_REQUIRES_ARM_NEON;
4452 TransposeMicrokernelTester()
4453 .input_stride(4)
4454 .output_stride(4)
4455 .block_width(4)
4456 .block_height(4)
4457 .iterations(1)
4458 .Test(xnn_x32_transpose_ukernel__4x4_reuse_multi_zip_neon);
4459 }
4460
TEST(X32_TRANSPOSE__4X4_REUSE_MULTI_ZIP_NEON,bh_1_8_bw_1_8)4461 TEST(X32_TRANSPOSE__4X4_REUSE_MULTI_ZIP_NEON, bh_1_8_bw_1_8) {
4462 TEST_REQUIRES_ARM_NEON;
4463 for(size_t i = 1; i <= 8; ++i){
4464 for(size_t j = 1; j <= 8; ++j){
4465 TransposeMicrokernelTester()
4466 .input_stride(j)
4467 .output_stride(i)
4468 .block_width(j)
4469 .block_height(i)
4470 .iterations(1)
4471 .Test(xnn_x32_transpose_ukernel__4x4_reuse_multi_zip_neon);
4472 }
4473 }
4474 }
4475
TEST(X32_TRANSPOSE__4X4_REUSE_MULTI_ZIP_NEON,bh_4_bw_8)4476 TEST(X32_TRANSPOSE__4X4_REUSE_MULTI_ZIP_NEON, bh_4_bw_8) {
4477 TEST_REQUIRES_ARM_NEON;
4478 TransposeMicrokernelTester()
4479 .input_stride(8)
4480 .output_stride(4)
4481 .block_width(8)
4482 .block_height(4)
4483 .iterations(1)
4484 .Test(xnn_x32_transpose_ukernel__4x4_reuse_multi_zip_neon);
4485 }
4486
TEST(X32_TRANSPOSE__4X4_REUSE_MULTI_ZIP_NEON,bh_4_bw_5_8)4487 TEST(X32_TRANSPOSE__4X4_REUSE_MULTI_ZIP_NEON, bh_4_bw_5_8) {
4488 TEST_REQUIRES_ARM_NEON;
4489 for(size_t i = 5; i < 8; ++i){
4490 TransposeMicrokernelTester()
4491 .input_stride(i)
4492 .output_stride(4)
4493 .block_width(i)
4494 .block_height(4)
4495 .iterations(1)
4496 .Test(xnn_x32_transpose_ukernel__4x4_reuse_multi_zip_neon);
4497 }
4498 }
4499
TEST(X32_TRANSPOSE__4X4_REUSE_MULTI_ZIP_NEON,bh_8_bw_5_8)4500 TEST(X32_TRANSPOSE__4X4_REUSE_MULTI_ZIP_NEON, bh_8_bw_5_8) {
4501 TEST_REQUIRES_ARM_NEON;
4502 for(size_t i = 5; i < 8; ++i){
4503 TransposeMicrokernelTester()
4504 .input_stride(i)
4505 .output_stride(8)
4506 .block_width(i)
4507 .block_height(8)
4508 .iterations(1)
4509 .Test(xnn_x32_transpose_ukernel__4x4_reuse_multi_zip_neon);
4510 }
4511 }
4512
TEST(X32_TRANSPOSE__4X4_REUSE_MULTI_ZIP_NEON,bh_8_bw_4)4513 TEST(X32_TRANSPOSE__4X4_REUSE_MULTI_ZIP_NEON, bh_8_bw_4) {
4514 TEST_REQUIRES_ARM_NEON;
4515 TransposeMicrokernelTester()
4516 .input_stride(4)
4517 .output_stride(8)
4518 .block_width(4)
4519 .block_height(8)
4520 .iterations(1)
4521 .Test(xnn_x32_transpose_ukernel__4x4_reuse_multi_zip_neon);
4522 }
4523
TEST(X32_TRANSPOSE__4X4_REUSE_MULTI_ZIP_NEON,bh_5_8_bw_4)4524 TEST(X32_TRANSPOSE__4X4_REUSE_MULTI_ZIP_NEON, bh_5_8_bw_4){
4525 TEST_REQUIRES_ARM_NEON;
4526 for(size_t i = 5; i < 8; ++i){
4527 TransposeMicrokernelTester()
4528 .input_stride(4)
4529 .output_stride(i)
4530 .block_width(4)
4531 .block_height(i)
4532 .iterations(1)
4533 .Test(xnn_x32_transpose_ukernel__4x4_reuse_multi_zip_neon);
4534 }
4535 }
4536
TEST(X32_TRANSPOSE__4X4_REUSE_MULTI_ZIP_NEON,bh_5_8_bw_8)4537 TEST(X32_TRANSPOSE__4X4_REUSE_MULTI_ZIP_NEON, bh_5_8_bw_8){
4538 TEST_REQUIRES_ARM_NEON;
4539 for(size_t i = 5; i < 8; ++i){
4540 TransposeMicrokernelTester()
4541 .input_stride(8)
4542 .output_stride(i)
4543 .block_width(8)
4544 .block_height(i)
4545 .iterations(1)
4546 .Test(xnn_x32_transpose_ukernel__4x4_reuse_multi_zip_neon);
4547 }
4548 }
4549
TEST(X32_TRANSPOSE__4X4_REUSE_MULTI_ZIP_NEON,bh_5_8_bw_5_8)4550 TEST(X32_TRANSPOSE__4X4_REUSE_MULTI_ZIP_NEON, bh_5_8_bw_5_8) {
4551 TEST_REQUIRES_ARM_NEON;
4552 for(size_t i = 5; i < 8; ++i){
4553 for(size_t j = 5; j < 8; ++j){
4554 TransposeMicrokernelTester()
4555 .input_stride(j)
4556 .output_stride(i)
4557 .block_width(j)
4558 .block_height(i)
4559 .iterations(1)
4560 .Test(xnn_x32_transpose_ukernel__4x4_reuse_multi_zip_neon);
4561 }
4562 }
4563 }
4564
TEST(X32_TRANSPOSE__4X4_REUSE_MULTI_ZIP_NEON,bh_4_bw_4_is_8)4565 TEST(X32_TRANSPOSE__4X4_REUSE_MULTI_ZIP_NEON, bh_4_bw_4_is_8) {
4566 TEST_REQUIRES_ARM_NEON;
4567 TransposeMicrokernelTester()
4568 .input_stride(8)
4569 .output_stride(4)
4570 .block_width(4)
4571 .block_height(4)
4572 .iterations(1)
4573 .Test(xnn_x32_transpose_ukernel__4x4_reuse_multi_zip_neon);
4574 }
4575
TEST(X32_TRANSPOSE__4X4_REUSE_MULTI_ZIP_NEON,bh_4_bw_4_os_8)4576 TEST(X32_TRANSPOSE__4X4_REUSE_MULTI_ZIP_NEON, bh_4_bw_4_os_8) {
4577 TEST_REQUIRES_ARM_NEON;
4578 TransposeMicrokernelTester()
4579 .input_stride(4)
4580 .output_stride(8)
4581 .block_width(4)
4582 .block_height(4)
4583 .iterations(1)
4584 .Test(xnn_x32_transpose_ukernel__4x4_reuse_multi_zip_neon);
4585 }
4586
TEST(X32_TRANSPOSE__4X4_REUSE_MULTI_ZIP_NEON,bh_4_bw_4_is_8_os_8)4587 TEST(X32_TRANSPOSE__4X4_REUSE_MULTI_ZIP_NEON, bh_4_bw_4_is_8_os_8) {
4588 TEST_REQUIRES_ARM_NEON;
4589 TransposeMicrokernelTester()
4590 .input_stride(8)
4591 .output_stride(8)
4592 .block_width(4)
4593 .block_height(4)
4594 .iterations(1)
4595 .Test(xnn_x32_transpose_ukernel__4x4_reuse_multi_zip_neon);
4596 }
4597 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
4598
4599
4600 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(X32_TRANSPOSE__4X4_REUSE_SWITCH_ZIP_NEON,bh_4_bw_4)4601 TEST(X32_TRANSPOSE__4X4_REUSE_SWITCH_ZIP_NEON, bh_4_bw_4) {
4602 TEST_REQUIRES_ARM_NEON;
4603 TransposeMicrokernelTester()
4604 .input_stride(4)
4605 .output_stride(4)
4606 .block_width(4)
4607 .block_height(4)
4608 .iterations(1)
4609 .Test(xnn_x32_transpose_ukernel__4x4_reuse_switch_zip_neon);
4610 }
4611
TEST(X32_TRANSPOSE__4X4_REUSE_SWITCH_ZIP_NEON,bh_1_8_bw_1_8)4612 TEST(X32_TRANSPOSE__4X4_REUSE_SWITCH_ZIP_NEON, bh_1_8_bw_1_8) {
4613 TEST_REQUIRES_ARM_NEON;
4614 for(size_t i = 1; i <= 8; ++i){
4615 for(size_t j = 1; j <= 8; ++j){
4616 TransposeMicrokernelTester()
4617 .input_stride(j)
4618 .output_stride(i)
4619 .block_width(j)
4620 .block_height(i)
4621 .iterations(1)
4622 .Test(xnn_x32_transpose_ukernel__4x4_reuse_switch_zip_neon);
4623 }
4624 }
4625 }
4626
TEST(X32_TRANSPOSE__4X4_REUSE_SWITCH_ZIP_NEON,bh_4_bw_8)4627 TEST(X32_TRANSPOSE__4X4_REUSE_SWITCH_ZIP_NEON, bh_4_bw_8) {
4628 TEST_REQUIRES_ARM_NEON;
4629 TransposeMicrokernelTester()
4630 .input_stride(8)
4631 .output_stride(4)
4632 .block_width(8)
4633 .block_height(4)
4634 .iterations(1)
4635 .Test(xnn_x32_transpose_ukernel__4x4_reuse_switch_zip_neon);
4636 }
4637
TEST(X32_TRANSPOSE__4X4_REUSE_SWITCH_ZIP_NEON,bh_4_bw_5_8)4638 TEST(X32_TRANSPOSE__4X4_REUSE_SWITCH_ZIP_NEON, bh_4_bw_5_8) {
4639 TEST_REQUIRES_ARM_NEON;
4640 for(size_t i = 5; i < 8; ++i){
4641 TransposeMicrokernelTester()
4642 .input_stride(i)
4643 .output_stride(4)
4644 .block_width(i)
4645 .block_height(4)
4646 .iterations(1)
4647 .Test(xnn_x32_transpose_ukernel__4x4_reuse_switch_zip_neon);
4648 }
4649 }
4650
TEST(X32_TRANSPOSE__4X4_REUSE_SWITCH_ZIP_NEON,bh_8_bw_5_8)4651 TEST(X32_TRANSPOSE__4X4_REUSE_SWITCH_ZIP_NEON, bh_8_bw_5_8) {
4652 TEST_REQUIRES_ARM_NEON;
4653 for(size_t i = 5; i < 8; ++i){
4654 TransposeMicrokernelTester()
4655 .input_stride(i)
4656 .output_stride(8)
4657 .block_width(i)
4658 .block_height(8)
4659 .iterations(1)
4660 .Test(xnn_x32_transpose_ukernel__4x4_reuse_switch_zip_neon);
4661 }
4662 }
4663
TEST(X32_TRANSPOSE__4X4_REUSE_SWITCH_ZIP_NEON,bh_8_bw_4)4664 TEST(X32_TRANSPOSE__4X4_REUSE_SWITCH_ZIP_NEON, bh_8_bw_4) {
4665 TEST_REQUIRES_ARM_NEON;
4666 TransposeMicrokernelTester()
4667 .input_stride(4)
4668 .output_stride(8)
4669 .block_width(4)
4670 .block_height(8)
4671 .iterations(1)
4672 .Test(xnn_x32_transpose_ukernel__4x4_reuse_switch_zip_neon);
4673 }
4674
TEST(X32_TRANSPOSE__4X4_REUSE_SWITCH_ZIP_NEON,bh_5_8_bw_4)4675 TEST(X32_TRANSPOSE__4X4_REUSE_SWITCH_ZIP_NEON, bh_5_8_bw_4){
4676 TEST_REQUIRES_ARM_NEON;
4677 for(size_t i = 5; i < 8; ++i){
4678 TransposeMicrokernelTester()
4679 .input_stride(4)
4680 .output_stride(i)
4681 .block_width(4)
4682 .block_height(i)
4683 .iterations(1)
4684 .Test(xnn_x32_transpose_ukernel__4x4_reuse_switch_zip_neon);
4685 }
4686 }
4687
TEST(X32_TRANSPOSE__4X4_REUSE_SWITCH_ZIP_NEON,bh_5_8_bw_8)4688 TEST(X32_TRANSPOSE__4X4_REUSE_SWITCH_ZIP_NEON, bh_5_8_bw_8){
4689 TEST_REQUIRES_ARM_NEON;
4690 for(size_t i = 5; i < 8; ++i){
4691 TransposeMicrokernelTester()
4692 .input_stride(8)
4693 .output_stride(i)
4694 .block_width(8)
4695 .block_height(i)
4696 .iterations(1)
4697 .Test(xnn_x32_transpose_ukernel__4x4_reuse_switch_zip_neon);
4698 }
4699 }
4700
TEST(X32_TRANSPOSE__4X4_REUSE_SWITCH_ZIP_NEON,bh_5_8_bw_5_8)4701 TEST(X32_TRANSPOSE__4X4_REUSE_SWITCH_ZIP_NEON, bh_5_8_bw_5_8) {
4702 TEST_REQUIRES_ARM_NEON;
4703 for(size_t i = 5; i < 8; ++i){
4704 for(size_t j = 5; j < 8; ++j){
4705 TransposeMicrokernelTester()
4706 .input_stride(j)
4707 .output_stride(i)
4708 .block_width(j)
4709 .block_height(i)
4710 .iterations(1)
4711 .Test(xnn_x32_transpose_ukernel__4x4_reuse_switch_zip_neon);
4712 }
4713 }
4714 }
4715
TEST(X32_TRANSPOSE__4X4_REUSE_SWITCH_ZIP_NEON,bh_4_bw_4_is_8)4716 TEST(X32_TRANSPOSE__4X4_REUSE_SWITCH_ZIP_NEON, bh_4_bw_4_is_8) {
4717 TEST_REQUIRES_ARM_NEON;
4718 TransposeMicrokernelTester()
4719 .input_stride(8)
4720 .output_stride(4)
4721 .block_width(4)
4722 .block_height(4)
4723 .iterations(1)
4724 .Test(xnn_x32_transpose_ukernel__4x4_reuse_switch_zip_neon);
4725 }
4726
TEST(X32_TRANSPOSE__4X4_REUSE_SWITCH_ZIP_NEON,bh_4_bw_4_os_8)4727 TEST(X32_TRANSPOSE__4X4_REUSE_SWITCH_ZIP_NEON, bh_4_bw_4_os_8) {
4728 TEST_REQUIRES_ARM_NEON;
4729 TransposeMicrokernelTester()
4730 .input_stride(4)
4731 .output_stride(8)
4732 .block_width(4)
4733 .block_height(4)
4734 .iterations(1)
4735 .Test(xnn_x32_transpose_ukernel__4x4_reuse_switch_zip_neon);
4736 }
4737
TEST(X32_TRANSPOSE__4X4_REUSE_SWITCH_ZIP_NEON,bh_4_bw_4_is_8_os_8)4738 TEST(X32_TRANSPOSE__4X4_REUSE_SWITCH_ZIP_NEON, bh_4_bw_4_is_8_os_8) {
4739 TEST_REQUIRES_ARM_NEON;
4740 TransposeMicrokernelTester()
4741 .input_stride(8)
4742 .output_stride(8)
4743 .block_width(4)
4744 .block_height(4)
4745 .iterations(1)
4746 .Test(xnn_x32_transpose_ukernel__4x4_reuse_switch_zip_neon);
4747 }
4748 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
4749