• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2021 Google LLC
2 //
3 // This source code is licensed under the BSD-style license found in the
4 // LICENSE file in the root directory of this source tree.
5 //
6 // Auto-generated file. Do not edit!
7 //   Specification: test/x8-transpose.yaml
8 //   Generator: tools/generate-transpose-test.py
9 
10 
11 #include <gtest/gtest.h>
12 
13 #include <xnnpack/common.h>
14 #include <xnnpack/isa-checks.h>
15 
16 #include <xnnpack/transpose.h>
17 #include "transpose-microkernel-tester.h"
18 
19 
TEST(X8_TRANSPOSE__1X2_SCALAR_INT,bh_1_bw_2)20 TEST(X8_TRANSPOSE__1X2_SCALAR_INT, bh_1_bw_2) {
21   TransposeMicrokernelTester()
22     .input_stride(2)
23     .output_stride(1)
24     .block_width(2)
25     .block_height(1)
26     .iterations(1)
27     .Test(xnn_x8_transpose_ukernel__1x2_scalar_int);
28 }
29 
TEST(X8_TRANSPOSE__1X2_SCALAR_INT,bh_1_2_bw_1_4)30 TEST(X8_TRANSPOSE__1X2_SCALAR_INT, bh_1_2_bw_1_4) {
31   for(size_t i = 1; i <= 2; ++i){
32     for(size_t j = 1; j <= 4; ++j){
33       TransposeMicrokernelTester()
34         .input_stride(j)
35         .output_stride(i)
36         .block_width(j)
37         .block_height(i)
38         .iterations(1)
39         .Test(xnn_x8_transpose_ukernel__1x2_scalar_int);
40     }
41   }
42 }
43 
TEST(X8_TRANSPOSE__1X2_SCALAR_INT,bh_1_bw_4)44 TEST(X8_TRANSPOSE__1X2_SCALAR_INT, bh_1_bw_4) {
45   TransposeMicrokernelTester()
46     .input_stride(4)
47     .output_stride(1)
48     .block_width(4)
49     .block_height(1)
50     .iterations(1)
51     .Test(xnn_x8_transpose_ukernel__1x2_scalar_int);
52 }
53 
TEST(X8_TRANSPOSE__1X2_SCALAR_INT,bh_1_bw_3_4)54 TEST(X8_TRANSPOSE__1X2_SCALAR_INT, bh_1_bw_3_4) {
55   for(size_t i = 3; i < 4; ++i){
56     TransposeMicrokernelTester()
57       .input_stride(i)
58       .output_stride(1)
59       .block_width(i)
60       .block_height(1)
61       .iterations(1)
62       .Test(xnn_x8_transpose_ukernel__1x2_scalar_int);
63   }
64 }
65 
TEST(X8_TRANSPOSE__1X2_SCALAR_INT,bh_2_bw_3_4)66 TEST(X8_TRANSPOSE__1X2_SCALAR_INT, bh_2_bw_3_4) {
67   for(size_t i = 3; i < 4; ++i){
68     TransposeMicrokernelTester()
69       .input_stride(i)
70       .output_stride(2)
71       .block_width(i)
72       .block_height(2)
73       .iterations(1)
74       .Test(xnn_x8_transpose_ukernel__1x2_scalar_int);
75   }
76 }
77 
TEST(X8_TRANSPOSE__1X2_SCALAR_INT,bh_2_bw_2)78 TEST(X8_TRANSPOSE__1X2_SCALAR_INT, bh_2_bw_2) {
79   TransposeMicrokernelTester()
80     .input_stride(2)
81     .output_stride(2)
82     .block_width(2)
83     .block_height(2)
84     .iterations(1)
85     .Test(xnn_x8_transpose_ukernel__1x2_scalar_int);
86 }
87 
TEST(X8_TRANSPOSE__1X2_SCALAR_INT,bh_2_2_bw_2)88 TEST(X8_TRANSPOSE__1X2_SCALAR_INT, bh_2_2_bw_2){
89   for(size_t i = 2; i < 2; ++i){
90     TransposeMicrokernelTester()
91       .input_stride(2)
92       .output_stride(i)
93       .block_width(2)
94       .block_height(i)
95       .iterations(1)
96       .Test(xnn_x8_transpose_ukernel__1x2_scalar_int);
97   }
98 }
99 
TEST(X8_TRANSPOSE__1X2_SCALAR_INT,bh_2_2_bw_4)100 TEST(X8_TRANSPOSE__1X2_SCALAR_INT, bh_2_2_bw_4){
101   for(size_t i = 2; i < 2; ++i){
102     TransposeMicrokernelTester()
103       .input_stride(4)
104       .output_stride(i)
105       .block_width(4)
106       .block_height(i)
107       .iterations(1)
108       .Test(xnn_x8_transpose_ukernel__1x2_scalar_int);
109   }
110 }
111 
TEST(X8_TRANSPOSE__1X2_SCALAR_INT,bh_2_2_bw_3_4)112 TEST(X8_TRANSPOSE__1X2_SCALAR_INT, bh_2_2_bw_3_4) {
113   for(size_t i = 2; i < 2; ++i){
114     for(size_t j = 3; j < 4; ++j){
115       TransposeMicrokernelTester()
116         .input_stride(j)
117         .output_stride(i)
118         .block_width(j)
119         .block_height(i)
120         .iterations(1)
121         .Test(xnn_x8_transpose_ukernel__1x2_scalar_int);
122     }
123   }
124 }
125 
TEST(X8_TRANSPOSE__1X2_SCALAR_INT,bh_1_bw_2_is_4)126 TEST(X8_TRANSPOSE__1X2_SCALAR_INT, bh_1_bw_2_is_4) {
127   TransposeMicrokernelTester()
128     .input_stride(4)
129     .output_stride(1)
130     .block_width(2)
131     .block_height(1)
132     .iterations(1)
133     .Test(xnn_x8_transpose_ukernel__1x2_scalar_int);
134 }
135 
TEST(X8_TRANSPOSE__1X2_SCALAR_INT,bh_1_bw_2_os_2)136 TEST(X8_TRANSPOSE__1X2_SCALAR_INT, bh_1_bw_2_os_2) {
137   TransposeMicrokernelTester()
138     .input_stride(2)
139     .output_stride(2)
140     .block_width(2)
141     .block_height(1)
142     .iterations(1)
143     .Test(xnn_x8_transpose_ukernel__1x2_scalar_int);
144 }
145 
TEST(X8_TRANSPOSE__1X2_SCALAR_INT,bh_1_bw_2_is_4_os_2)146 TEST(X8_TRANSPOSE__1X2_SCALAR_INT, bh_1_bw_2_is_4_os_2) {
147   TransposeMicrokernelTester()
148     .input_stride(4)
149     .output_stride(2)
150     .block_width(2)
151     .block_height(1)
152     .iterations(1)
153     .Test(xnn_x8_transpose_ukernel__1x2_scalar_int);
154 }
155 
TEST(X8_TRANSPOSE__1X4_SCALAR_INT,bh_1_bw_4)156 TEST(X8_TRANSPOSE__1X4_SCALAR_INT, bh_1_bw_4) {
157   TransposeMicrokernelTester()
158     .input_stride(4)
159     .output_stride(1)
160     .block_width(4)
161     .block_height(1)
162     .iterations(1)
163     .Test(xnn_x8_transpose_ukernel__1x4_scalar_int);
164 }
165 
TEST(X8_TRANSPOSE__1X4_SCALAR_INT,bh_1_2_bw_1_8)166 TEST(X8_TRANSPOSE__1X4_SCALAR_INT, bh_1_2_bw_1_8) {
167   for(size_t i = 1; i <= 2; ++i){
168     for(size_t j = 1; j <= 8; ++j){
169       TransposeMicrokernelTester()
170         .input_stride(j)
171         .output_stride(i)
172         .block_width(j)
173         .block_height(i)
174         .iterations(1)
175         .Test(xnn_x8_transpose_ukernel__1x4_scalar_int);
176     }
177   }
178 }
179 
TEST(X8_TRANSPOSE__1X4_SCALAR_INT,bh_1_bw_8)180 TEST(X8_TRANSPOSE__1X4_SCALAR_INT, bh_1_bw_8) {
181   TransposeMicrokernelTester()
182     .input_stride(8)
183     .output_stride(1)
184     .block_width(8)
185     .block_height(1)
186     .iterations(1)
187     .Test(xnn_x8_transpose_ukernel__1x4_scalar_int);
188 }
189 
TEST(X8_TRANSPOSE__1X4_SCALAR_INT,bh_1_bw_5_8)190 TEST(X8_TRANSPOSE__1X4_SCALAR_INT, bh_1_bw_5_8) {
191   for(size_t i = 5; i < 8; ++i){
192     TransposeMicrokernelTester()
193       .input_stride(i)
194       .output_stride(1)
195       .block_width(i)
196       .block_height(1)
197       .iterations(1)
198       .Test(xnn_x8_transpose_ukernel__1x4_scalar_int);
199   }
200 }
201 
TEST(X8_TRANSPOSE__1X4_SCALAR_INT,bh_2_bw_5_8)202 TEST(X8_TRANSPOSE__1X4_SCALAR_INT, bh_2_bw_5_8) {
203   for(size_t i = 5; i < 8; ++i){
204     TransposeMicrokernelTester()
205       .input_stride(i)
206       .output_stride(2)
207       .block_width(i)
208       .block_height(2)
209       .iterations(1)
210       .Test(xnn_x8_transpose_ukernel__1x4_scalar_int);
211   }
212 }
213 
TEST(X8_TRANSPOSE__1X4_SCALAR_INT,bh_2_bw_4)214 TEST(X8_TRANSPOSE__1X4_SCALAR_INT, bh_2_bw_4) {
215   TransposeMicrokernelTester()
216     .input_stride(4)
217     .output_stride(2)
218     .block_width(4)
219     .block_height(2)
220     .iterations(1)
221     .Test(xnn_x8_transpose_ukernel__1x4_scalar_int);
222 }
223 
TEST(X8_TRANSPOSE__1X4_SCALAR_INT,bh_2_2_bw_4)224 TEST(X8_TRANSPOSE__1X4_SCALAR_INT, bh_2_2_bw_4){
225   for(size_t i = 2; i < 2; ++i){
226     TransposeMicrokernelTester()
227       .input_stride(4)
228       .output_stride(i)
229       .block_width(4)
230       .block_height(i)
231       .iterations(1)
232       .Test(xnn_x8_transpose_ukernel__1x4_scalar_int);
233   }
234 }
235 
TEST(X8_TRANSPOSE__1X4_SCALAR_INT,bh_2_2_bw_8)236 TEST(X8_TRANSPOSE__1X4_SCALAR_INT, bh_2_2_bw_8){
237   for(size_t i = 2; i < 2; ++i){
238     TransposeMicrokernelTester()
239       .input_stride(8)
240       .output_stride(i)
241       .block_width(8)
242       .block_height(i)
243       .iterations(1)
244       .Test(xnn_x8_transpose_ukernel__1x4_scalar_int);
245   }
246 }
247 
TEST(X8_TRANSPOSE__1X4_SCALAR_INT,bh_2_2_bw_5_8)248 TEST(X8_TRANSPOSE__1X4_SCALAR_INT, bh_2_2_bw_5_8) {
249   for(size_t i = 2; i < 2; ++i){
250     for(size_t j = 5; j < 8; ++j){
251       TransposeMicrokernelTester()
252         .input_stride(j)
253         .output_stride(i)
254         .block_width(j)
255         .block_height(i)
256         .iterations(1)
257         .Test(xnn_x8_transpose_ukernel__1x4_scalar_int);
258     }
259   }
260 }
261 
TEST(X8_TRANSPOSE__1X4_SCALAR_INT,bh_1_bw_4_is_8)262 TEST(X8_TRANSPOSE__1X4_SCALAR_INT, bh_1_bw_4_is_8) {
263   TransposeMicrokernelTester()
264     .input_stride(8)
265     .output_stride(1)
266     .block_width(4)
267     .block_height(1)
268     .iterations(1)
269     .Test(xnn_x8_transpose_ukernel__1x4_scalar_int);
270 }
271 
TEST(X8_TRANSPOSE__1X4_SCALAR_INT,bh_1_bw_4_os_2)272 TEST(X8_TRANSPOSE__1X4_SCALAR_INT, bh_1_bw_4_os_2) {
273   TransposeMicrokernelTester()
274     .input_stride(4)
275     .output_stride(2)
276     .block_width(4)
277     .block_height(1)
278     .iterations(1)
279     .Test(xnn_x8_transpose_ukernel__1x4_scalar_int);
280 }
281 
TEST(X8_TRANSPOSE__1X4_SCALAR_INT,bh_1_bw_4_is_8_os_2)282 TEST(X8_TRANSPOSE__1X4_SCALAR_INT, bh_1_bw_4_is_8_os_2) {
283   TransposeMicrokernelTester()
284     .input_stride(8)
285     .output_stride(2)
286     .block_width(4)
287     .block_height(1)
288     .iterations(1)
289     .Test(xnn_x8_transpose_ukernel__1x4_scalar_int);
290 }
291 
TEST(X8_TRANSPOSE__2X1_SCALAR_INT,bh_2_bw_1)292 TEST(X8_TRANSPOSE__2X1_SCALAR_INT, bh_2_bw_1) {
293   TransposeMicrokernelTester()
294     .input_stride(1)
295     .output_stride(2)
296     .block_width(1)
297     .block_height(2)
298     .iterations(1)
299     .Test(xnn_x8_transpose_ukernel__2x1_scalar_int);
300 }
301 
TEST(X8_TRANSPOSE__2X1_SCALAR_INT,bh_1_4_bw_1_2)302 TEST(X8_TRANSPOSE__2X1_SCALAR_INT, bh_1_4_bw_1_2) {
303   for(size_t i = 1; i <= 4; ++i){
304     for(size_t j = 1; j <= 2; ++j){
305       TransposeMicrokernelTester()
306         .input_stride(j)
307         .output_stride(i)
308         .block_width(j)
309         .block_height(i)
310         .iterations(1)
311         .Test(xnn_x8_transpose_ukernel__2x1_scalar_int);
312     }
313   }
314 }
315 
TEST(X8_TRANSPOSE__2X1_SCALAR_INT,bh_2_bw_2)316 TEST(X8_TRANSPOSE__2X1_SCALAR_INT, bh_2_bw_2) {
317   TransposeMicrokernelTester()
318     .input_stride(2)
319     .output_stride(2)
320     .block_width(2)
321     .block_height(2)
322     .iterations(1)
323     .Test(xnn_x8_transpose_ukernel__2x1_scalar_int);
324 }
325 
TEST(X8_TRANSPOSE__2X1_SCALAR_INT,bh_2_bw_2_2)326 TEST(X8_TRANSPOSE__2X1_SCALAR_INT, bh_2_bw_2_2) {
327   for(size_t i = 2; i < 2; ++i){
328     TransposeMicrokernelTester()
329       .input_stride(i)
330       .output_stride(2)
331       .block_width(i)
332       .block_height(2)
333       .iterations(1)
334       .Test(xnn_x8_transpose_ukernel__2x1_scalar_int);
335   }
336 }
337 
TEST(X8_TRANSPOSE__2X1_SCALAR_INT,bh_4_bw_2_2)338 TEST(X8_TRANSPOSE__2X1_SCALAR_INT, bh_4_bw_2_2) {
339   for(size_t i = 2; i < 2; ++i){
340     TransposeMicrokernelTester()
341       .input_stride(i)
342       .output_stride(4)
343       .block_width(i)
344       .block_height(4)
345       .iterations(1)
346       .Test(xnn_x8_transpose_ukernel__2x1_scalar_int);
347   }
348 }
349 
TEST(X8_TRANSPOSE__2X1_SCALAR_INT,bh_4_bw_1)350 TEST(X8_TRANSPOSE__2X1_SCALAR_INT, bh_4_bw_1) {
351   TransposeMicrokernelTester()
352     .input_stride(1)
353     .output_stride(4)
354     .block_width(1)
355     .block_height(4)
356     .iterations(1)
357     .Test(xnn_x8_transpose_ukernel__2x1_scalar_int);
358 }
359 
TEST(X8_TRANSPOSE__2X1_SCALAR_INT,bh_3_4_bw_1)360 TEST(X8_TRANSPOSE__2X1_SCALAR_INT, bh_3_4_bw_1){
361   for(size_t i = 3; i < 4; ++i){
362     TransposeMicrokernelTester()
363       .input_stride(1)
364       .output_stride(i)
365       .block_width(1)
366       .block_height(i)
367       .iterations(1)
368       .Test(xnn_x8_transpose_ukernel__2x1_scalar_int);
369   }
370 }
371 
TEST(X8_TRANSPOSE__2X1_SCALAR_INT,bh_3_4_bw_2)372 TEST(X8_TRANSPOSE__2X1_SCALAR_INT, bh_3_4_bw_2){
373   for(size_t i = 3; i < 4; ++i){
374     TransposeMicrokernelTester()
375       .input_stride(2)
376       .output_stride(i)
377       .block_width(2)
378       .block_height(i)
379       .iterations(1)
380       .Test(xnn_x8_transpose_ukernel__2x1_scalar_int);
381   }
382 }
383 
TEST(X8_TRANSPOSE__2X1_SCALAR_INT,bh_3_4_bw_2_2)384 TEST(X8_TRANSPOSE__2X1_SCALAR_INT, bh_3_4_bw_2_2) {
385   for(size_t i = 3; i < 4; ++i){
386     for(size_t j = 2; j < 2; ++j){
387       TransposeMicrokernelTester()
388         .input_stride(j)
389         .output_stride(i)
390         .block_width(j)
391         .block_height(i)
392         .iterations(1)
393         .Test(xnn_x8_transpose_ukernel__2x1_scalar_int);
394     }
395   }
396 }
397 
TEST(X8_TRANSPOSE__2X1_SCALAR_INT,bh_2_bw_1_is_2)398 TEST(X8_TRANSPOSE__2X1_SCALAR_INT, bh_2_bw_1_is_2) {
399   TransposeMicrokernelTester()
400     .input_stride(2)
401     .output_stride(2)
402     .block_width(1)
403     .block_height(2)
404     .iterations(1)
405     .Test(xnn_x8_transpose_ukernel__2x1_scalar_int);
406 }
407 
TEST(X8_TRANSPOSE__2X1_SCALAR_INT,bh_2_bw_1_os_4)408 TEST(X8_TRANSPOSE__2X1_SCALAR_INT, bh_2_bw_1_os_4) {
409   TransposeMicrokernelTester()
410     .input_stride(1)
411     .output_stride(4)
412     .block_width(1)
413     .block_height(2)
414     .iterations(1)
415     .Test(xnn_x8_transpose_ukernel__2x1_scalar_int);
416 }
417 
TEST(X8_TRANSPOSE__2X1_SCALAR_INT,bh_2_bw_1_is_2_os_4)418 TEST(X8_TRANSPOSE__2X1_SCALAR_INT, bh_2_bw_1_is_2_os_4) {
419   TransposeMicrokernelTester()
420     .input_stride(2)
421     .output_stride(4)
422     .block_width(1)
423     .block_height(2)
424     .iterations(1)
425     .Test(xnn_x8_transpose_ukernel__2x1_scalar_int);
426 }
427 
TEST(X8_TRANSPOSE__2X2_SCALAR_INT,bh_2_bw_2)428 TEST(X8_TRANSPOSE__2X2_SCALAR_INT, bh_2_bw_2) {
429   TransposeMicrokernelTester()
430     .input_stride(2)
431     .output_stride(2)
432     .block_width(2)
433     .block_height(2)
434     .iterations(1)
435     .Test(xnn_x8_transpose_ukernel__2x2_scalar_int);
436 }
437 
TEST(X8_TRANSPOSE__2X2_SCALAR_INT,bh_1_4_bw_1_4)438 TEST(X8_TRANSPOSE__2X2_SCALAR_INT, bh_1_4_bw_1_4) {
439   for(size_t i = 1; i <= 4; ++i){
440     for(size_t j = 1; j <= 4; ++j){
441       TransposeMicrokernelTester()
442         .input_stride(j)
443         .output_stride(i)
444         .block_width(j)
445         .block_height(i)
446         .iterations(1)
447         .Test(xnn_x8_transpose_ukernel__2x2_scalar_int);
448     }
449   }
450 }
451 
TEST(X8_TRANSPOSE__2X2_SCALAR_INT,bh_2_bw_4)452 TEST(X8_TRANSPOSE__2X2_SCALAR_INT, bh_2_bw_4) {
453   TransposeMicrokernelTester()
454     .input_stride(4)
455     .output_stride(2)
456     .block_width(4)
457     .block_height(2)
458     .iterations(1)
459     .Test(xnn_x8_transpose_ukernel__2x2_scalar_int);
460 }
461 
TEST(X8_TRANSPOSE__2X2_SCALAR_INT,bh_2_bw_3_4)462 TEST(X8_TRANSPOSE__2X2_SCALAR_INT, bh_2_bw_3_4) {
463   for(size_t i = 3; i < 4; ++i){
464     TransposeMicrokernelTester()
465       .input_stride(i)
466       .output_stride(2)
467       .block_width(i)
468       .block_height(2)
469       .iterations(1)
470       .Test(xnn_x8_transpose_ukernel__2x2_scalar_int);
471   }
472 }
473 
TEST(X8_TRANSPOSE__2X2_SCALAR_INT,bh_4_bw_3_4)474 TEST(X8_TRANSPOSE__2X2_SCALAR_INT, bh_4_bw_3_4) {
475   for(size_t i = 3; i < 4; ++i){
476     TransposeMicrokernelTester()
477       .input_stride(i)
478       .output_stride(4)
479       .block_width(i)
480       .block_height(4)
481       .iterations(1)
482       .Test(xnn_x8_transpose_ukernel__2x2_scalar_int);
483   }
484 }
485 
TEST(X8_TRANSPOSE__2X2_SCALAR_INT,bh_4_bw_2)486 TEST(X8_TRANSPOSE__2X2_SCALAR_INT, bh_4_bw_2) {
487   TransposeMicrokernelTester()
488     .input_stride(2)
489     .output_stride(4)
490     .block_width(2)
491     .block_height(4)
492     .iterations(1)
493     .Test(xnn_x8_transpose_ukernel__2x2_scalar_int);
494 }
495 
TEST(X8_TRANSPOSE__2X2_SCALAR_INT,bh_3_4_bw_2)496 TEST(X8_TRANSPOSE__2X2_SCALAR_INT, bh_3_4_bw_2){
497   for(size_t i = 3; i < 4; ++i){
498     TransposeMicrokernelTester()
499       .input_stride(2)
500       .output_stride(i)
501       .block_width(2)
502       .block_height(i)
503       .iterations(1)
504       .Test(xnn_x8_transpose_ukernel__2x2_scalar_int);
505   }
506 }
507 
TEST(X8_TRANSPOSE__2X2_SCALAR_INT,bh_3_4_bw_4)508 TEST(X8_TRANSPOSE__2X2_SCALAR_INT, bh_3_4_bw_4){
509   for(size_t i = 3; i < 4; ++i){
510     TransposeMicrokernelTester()
511       .input_stride(4)
512       .output_stride(i)
513       .block_width(4)
514       .block_height(i)
515       .iterations(1)
516       .Test(xnn_x8_transpose_ukernel__2x2_scalar_int);
517   }
518 }
519 
TEST(X8_TRANSPOSE__2X2_SCALAR_INT,bh_3_4_bw_3_4)520 TEST(X8_TRANSPOSE__2X2_SCALAR_INT, bh_3_4_bw_3_4) {
521   for(size_t i = 3; i < 4; ++i){
522     for(size_t j = 3; j < 4; ++j){
523       TransposeMicrokernelTester()
524         .input_stride(j)
525         .output_stride(i)
526         .block_width(j)
527         .block_height(i)
528         .iterations(1)
529         .Test(xnn_x8_transpose_ukernel__2x2_scalar_int);
530     }
531   }
532 }
533 
TEST(X8_TRANSPOSE__2X2_SCALAR_INT,bh_2_bw_2_is_4)534 TEST(X8_TRANSPOSE__2X2_SCALAR_INT, bh_2_bw_2_is_4) {
535   TransposeMicrokernelTester()
536     .input_stride(4)
537     .output_stride(2)
538     .block_width(2)
539     .block_height(2)
540     .iterations(1)
541     .Test(xnn_x8_transpose_ukernel__2x2_scalar_int);
542 }
543 
TEST(X8_TRANSPOSE__2X2_SCALAR_INT,bh_2_bw_2_os_4)544 TEST(X8_TRANSPOSE__2X2_SCALAR_INT, bh_2_bw_2_os_4) {
545   TransposeMicrokernelTester()
546     .input_stride(2)
547     .output_stride(4)
548     .block_width(2)
549     .block_height(2)
550     .iterations(1)
551     .Test(xnn_x8_transpose_ukernel__2x2_scalar_int);
552 }
553 
TEST(X8_TRANSPOSE__2X2_SCALAR_INT,bh_2_bw_2_is_4_os_4)554 TEST(X8_TRANSPOSE__2X2_SCALAR_INT, bh_2_bw_2_is_4_os_4) {
555   TransposeMicrokernelTester()
556     .input_stride(4)
557     .output_stride(4)
558     .block_width(2)
559     .block_height(2)
560     .iterations(1)
561     .Test(xnn_x8_transpose_ukernel__2x2_scalar_int);
562 }
563 
TEST(X8_TRANSPOSE__2X4_SCALAR_INT,bh_2_bw_4)564 TEST(X8_TRANSPOSE__2X4_SCALAR_INT, bh_2_bw_4) {
565   TransposeMicrokernelTester()
566     .input_stride(4)
567     .output_stride(2)
568     .block_width(4)
569     .block_height(2)
570     .iterations(1)
571     .Test(xnn_x8_transpose_ukernel__2x4_scalar_int);
572 }
573 
TEST(X8_TRANSPOSE__2X4_SCALAR_INT,bh_1_4_bw_1_8)574 TEST(X8_TRANSPOSE__2X4_SCALAR_INT, bh_1_4_bw_1_8) {
575   for(size_t i = 1; i <= 4; ++i){
576     for(size_t j = 1; j <= 8; ++j){
577       TransposeMicrokernelTester()
578         .input_stride(j)
579         .output_stride(i)
580         .block_width(j)
581         .block_height(i)
582         .iterations(1)
583         .Test(xnn_x8_transpose_ukernel__2x4_scalar_int);
584     }
585   }
586 }
587 
TEST(X8_TRANSPOSE__2X4_SCALAR_INT,bh_2_bw_8)588 TEST(X8_TRANSPOSE__2X4_SCALAR_INT, bh_2_bw_8) {
589   TransposeMicrokernelTester()
590     .input_stride(8)
591     .output_stride(2)
592     .block_width(8)
593     .block_height(2)
594     .iterations(1)
595     .Test(xnn_x8_transpose_ukernel__2x4_scalar_int);
596 }
597 
TEST(X8_TRANSPOSE__2X4_SCALAR_INT,bh_2_bw_5_8)598 TEST(X8_TRANSPOSE__2X4_SCALAR_INT, bh_2_bw_5_8) {
599   for(size_t i = 5; i < 8; ++i){
600     TransposeMicrokernelTester()
601       .input_stride(i)
602       .output_stride(2)
603       .block_width(i)
604       .block_height(2)
605       .iterations(1)
606       .Test(xnn_x8_transpose_ukernel__2x4_scalar_int);
607   }
608 }
609 
TEST(X8_TRANSPOSE__2X4_SCALAR_INT,bh_4_bw_5_8)610 TEST(X8_TRANSPOSE__2X4_SCALAR_INT, bh_4_bw_5_8) {
611   for(size_t i = 5; i < 8; ++i){
612     TransposeMicrokernelTester()
613       .input_stride(i)
614       .output_stride(4)
615       .block_width(i)
616       .block_height(4)
617       .iterations(1)
618       .Test(xnn_x8_transpose_ukernel__2x4_scalar_int);
619   }
620 }
621 
TEST(X8_TRANSPOSE__2X4_SCALAR_INT,bh_4_bw_4)622 TEST(X8_TRANSPOSE__2X4_SCALAR_INT, bh_4_bw_4) {
623   TransposeMicrokernelTester()
624     .input_stride(4)
625     .output_stride(4)
626     .block_width(4)
627     .block_height(4)
628     .iterations(1)
629     .Test(xnn_x8_transpose_ukernel__2x4_scalar_int);
630 }
631 
TEST(X8_TRANSPOSE__2X4_SCALAR_INT,bh_3_4_bw_4)632 TEST(X8_TRANSPOSE__2X4_SCALAR_INT, bh_3_4_bw_4){
633   for(size_t i = 3; i < 4; ++i){
634     TransposeMicrokernelTester()
635       .input_stride(4)
636       .output_stride(i)
637       .block_width(4)
638       .block_height(i)
639       .iterations(1)
640       .Test(xnn_x8_transpose_ukernel__2x4_scalar_int);
641   }
642 }
643 
TEST(X8_TRANSPOSE__2X4_SCALAR_INT,bh_3_4_bw_8)644 TEST(X8_TRANSPOSE__2X4_SCALAR_INT, bh_3_4_bw_8){
645   for(size_t i = 3; i < 4; ++i){
646     TransposeMicrokernelTester()
647       .input_stride(8)
648       .output_stride(i)
649       .block_width(8)
650       .block_height(i)
651       .iterations(1)
652       .Test(xnn_x8_transpose_ukernel__2x4_scalar_int);
653   }
654 }
655 
TEST(X8_TRANSPOSE__2X4_SCALAR_INT,bh_3_4_bw_5_8)656 TEST(X8_TRANSPOSE__2X4_SCALAR_INT, bh_3_4_bw_5_8) {
657   for(size_t i = 3; i < 4; ++i){
658     for(size_t j = 5; j < 8; ++j){
659       TransposeMicrokernelTester()
660         .input_stride(j)
661         .output_stride(i)
662         .block_width(j)
663         .block_height(i)
664         .iterations(1)
665         .Test(xnn_x8_transpose_ukernel__2x4_scalar_int);
666     }
667   }
668 }
669 
TEST(X8_TRANSPOSE__2X4_SCALAR_INT,bh_2_bw_4_is_8)670 TEST(X8_TRANSPOSE__2X4_SCALAR_INT, bh_2_bw_4_is_8) {
671   TransposeMicrokernelTester()
672     .input_stride(8)
673     .output_stride(2)
674     .block_width(4)
675     .block_height(2)
676     .iterations(1)
677     .Test(xnn_x8_transpose_ukernel__2x4_scalar_int);
678 }
679 
TEST(X8_TRANSPOSE__2X4_SCALAR_INT,bh_2_bw_4_os_4)680 TEST(X8_TRANSPOSE__2X4_SCALAR_INT, bh_2_bw_4_os_4) {
681   TransposeMicrokernelTester()
682     .input_stride(4)
683     .output_stride(4)
684     .block_width(4)
685     .block_height(2)
686     .iterations(1)
687     .Test(xnn_x8_transpose_ukernel__2x4_scalar_int);
688 }
689 
TEST(X8_TRANSPOSE__2X4_SCALAR_INT,bh_2_bw_4_is_8_os_4)690 TEST(X8_TRANSPOSE__2X4_SCALAR_INT, bh_2_bw_4_is_8_os_4) {
691   TransposeMicrokernelTester()
692     .input_stride(8)
693     .output_stride(4)
694     .block_width(4)
695     .block_height(2)
696     .iterations(1)
697     .Test(xnn_x8_transpose_ukernel__2x4_scalar_int);
698 }
699 
TEST(X8_TRANSPOSE__4X1_SCALAR_INT,bh_4_bw_1)700 TEST(X8_TRANSPOSE__4X1_SCALAR_INT, bh_4_bw_1) {
701   TransposeMicrokernelTester()
702     .input_stride(1)
703     .output_stride(4)
704     .block_width(1)
705     .block_height(4)
706     .iterations(1)
707     .Test(xnn_x8_transpose_ukernel__4x1_scalar_int);
708 }
709 
TEST(X8_TRANSPOSE__4X1_SCALAR_INT,bh_1_8_bw_1_2)710 TEST(X8_TRANSPOSE__4X1_SCALAR_INT, bh_1_8_bw_1_2) {
711   for(size_t i = 1; i <= 8; ++i){
712     for(size_t j = 1; j <= 2; ++j){
713       TransposeMicrokernelTester()
714         .input_stride(j)
715         .output_stride(i)
716         .block_width(j)
717         .block_height(i)
718         .iterations(1)
719         .Test(xnn_x8_transpose_ukernel__4x1_scalar_int);
720     }
721   }
722 }
723 
TEST(X8_TRANSPOSE__4X1_SCALAR_INT,bh_4_bw_2)724 TEST(X8_TRANSPOSE__4X1_SCALAR_INT, bh_4_bw_2) {
725   TransposeMicrokernelTester()
726     .input_stride(2)
727     .output_stride(4)
728     .block_width(2)
729     .block_height(4)
730     .iterations(1)
731     .Test(xnn_x8_transpose_ukernel__4x1_scalar_int);
732 }
733 
TEST(X8_TRANSPOSE__4X1_SCALAR_INT,bh_4_bw_2_2)734 TEST(X8_TRANSPOSE__4X1_SCALAR_INT, bh_4_bw_2_2) {
735   for(size_t i = 2; i < 2; ++i){
736     TransposeMicrokernelTester()
737       .input_stride(i)
738       .output_stride(4)
739       .block_width(i)
740       .block_height(4)
741       .iterations(1)
742       .Test(xnn_x8_transpose_ukernel__4x1_scalar_int);
743   }
744 }
745 
TEST(X8_TRANSPOSE__4X1_SCALAR_INT,bh_8_bw_2_2)746 TEST(X8_TRANSPOSE__4X1_SCALAR_INT, bh_8_bw_2_2) {
747   for(size_t i = 2; i < 2; ++i){
748     TransposeMicrokernelTester()
749       .input_stride(i)
750       .output_stride(8)
751       .block_width(i)
752       .block_height(8)
753       .iterations(1)
754       .Test(xnn_x8_transpose_ukernel__4x1_scalar_int);
755   }
756 }
757 
TEST(X8_TRANSPOSE__4X1_SCALAR_INT,bh_8_bw_1)758 TEST(X8_TRANSPOSE__4X1_SCALAR_INT, bh_8_bw_1) {
759   TransposeMicrokernelTester()
760     .input_stride(1)
761     .output_stride(8)
762     .block_width(1)
763     .block_height(8)
764     .iterations(1)
765     .Test(xnn_x8_transpose_ukernel__4x1_scalar_int);
766 }
767 
TEST(X8_TRANSPOSE__4X1_SCALAR_INT,bh_5_8_bw_1)768 TEST(X8_TRANSPOSE__4X1_SCALAR_INT, bh_5_8_bw_1){
769   for(size_t i = 5; i < 8; ++i){
770     TransposeMicrokernelTester()
771       .input_stride(1)
772       .output_stride(i)
773       .block_width(1)
774       .block_height(i)
775       .iterations(1)
776       .Test(xnn_x8_transpose_ukernel__4x1_scalar_int);
777   }
778 }
779 
TEST(X8_TRANSPOSE__4X1_SCALAR_INT,bh_5_8_bw_2)780 TEST(X8_TRANSPOSE__4X1_SCALAR_INT, bh_5_8_bw_2){
781   for(size_t i = 5; i < 8; ++i){
782     TransposeMicrokernelTester()
783       .input_stride(2)
784       .output_stride(i)
785       .block_width(2)
786       .block_height(i)
787       .iterations(1)
788       .Test(xnn_x8_transpose_ukernel__4x1_scalar_int);
789   }
790 }
791 
TEST(X8_TRANSPOSE__4X1_SCALAR_INT,bh_5_8_bw_2_2)792 TEST(X8_TRANSPOSE__4X1_SCALAR_INT, bh_5_8_bw_2_2) {
793   for(size_t i = 5; i < 8; ++i){
794     for(size_t j = 2; j < 2; ++j){
795       TransposeMicrokernelTester()
796         .input_stride(j)
797         .output_stride(i)
798         .block_width(j)
799         .block_height(i)
800         .iterations(1)
801         .Test(xnn_x8_transpose_ukernel__4x1_scalar_int);
802     }
803   }
804 }
805 
TEST(X8_TRANSPOSE__4X1_SCALAR_INT,bh_4_bw_1_is_2)806 TEST(X8_TRANSPOSE__4X1_SCALAR_INT, bh_4_bw_1_is_2) {
807   TransposeMicrokernelTester()
808     .input_stride(2)
809     .output_stride(4)
810     .block_width(1)
811     .block_height(4)
812     .iterations(1)
813     .Test(xnn_x8_transpose_ukernel__4x1_scalar_int);
814 }
815 
TEST(X8_TRANSPOSE__4X1_SCALAR_INT,bh_4_bw_1_os_8)816 TEST(X8_TRANSPOSE__4X1_SCALAR_INT, bh_4_bw_1_os_8) {
817   TransposeMicrokernelTester()
818     .input_stride(1)
819     .output_stride(8)
820     .block_width(1)
821     .block_height(4)
822     .iterations(1)
823     .Test(xnn_x8_transpose_ukernel__4x1_scalar_int);
824 }
825 
TEST(X8_TRANSPOSE__4X1_SCALAR_INT,bh_4_bw_1_is_2_os_8)826 TEST(X8_TRANSPOSE__4X1_SCALAR_INT, bh_4_bw_1_is_2_os_8) {
827   TransposeMicrokernelTester()
828     .input_stride(2)
829     .output_stride(8)
830     .block_width(1)
831     .block_height(4)
832     .iterations(1)
833     .Test(xnn_x8_transpose_ukernel__4x1_scalar_int);
834 }
835 
TEST(X8_TRANSPOSE__4X2_SCALAR_INT,bh_4_bw_2)836 TEST(X8_TRANSPOSE__4X2_SCALAR_INT, bh_4_bw_2) {
837   TransposeMicrokernelTester()
838     .input_stride(2)
839     .output_stride(4)
840     .block_width(2)
841     .block_height(4)
842     .iterations(1)
843     .Test(xnn_x8_transpose_ukernel__4x2_scalar_int);
844 }
845 
TEST(X8_TRANSPOSE__4X2_SCALAR_INT,bh_1_8_bw_1_4)846 TEST(X8_TRANSPOSE__4X2_SCALAR_INT, bh_1_8_bw_1_4) {
847   for(size_t i = 1; i <= 8; ++i){
848     for(size_t j = 1; j <= 4; ++j){
849       TransposeMicrokernelTester()
850         .input_stride(j)
851         .output_stride(i)
852         .block_width(j)
853         .block_height(i)
854         .iterations(1)
855         .Test(xnn_x8_transpose_ukernel__4x2_scalar_int);
856     }
857   }
858 }
859 
TEST(X8_TRANSPOSE__4X2_SCALAR_INT,bh_4_bw_4)860 TEST(X8_TRANSPOSE__4X2_SCALAR_INT, bh_4_bw_4) {
861   TransposeMicrokernelTester()
862     .input_stride(4)
863     .output_stride(4)
864     .block_width(4)
865     .block_height(4)
866     .iterations(1)
867     .Test(xnn_x8_transpose_ukernel__4x2_scalar_int);
868 }
869 
TEST(X8_TRANSPOSE__4X2_SCALAR_INT,bh_4_bw_3_4)870 TEST(X8_TRANSPOSE__4X2_SCALAR_INT, bh_4_bw_3_4) {
871   for(size_t i = 3; i < 4; ++i){
872     TransposeMicrokernelTester()
873       .input_stride(i)
874       .output_stride(4)
875       .block_width(i)
876       .block_height(4)
877       .iterations(1)
878       .Test(xnn_x8_transpose_ukernel__4x2_scalar_int);
879   }
880 }
881 
TEST(X8_TRANSPOSE__4X2_SCALAR_INT,bh_8_bw_3_4)882 TEST(X8_TRANSPOSE__4X2_SCALAR_INT, bh_8_bw_3_4) {
883   for(size_t i = 3; i < 4; ++i){
884     TransposeMicrokernelTester()
885       .input_stride(i)
886       .output_stride(8)
887       .block_width(i)
888       .block_height(8)
889       .iterations(1)
890       .Test(xnn_x8_transpose_ukernel__4x2_scalar_int);
891   }
892 }
893 
TEST(X8_TRANSPOSE__4X2_SCALAR_INT,bh_8_bw_2)894 TEST(X8_TRANSPOSE__4X2_SCALAR_INT, bh_8_bw_2) {
895   TransposeMicrokernelTester()
896     .input_stride(2)
897     .output_stride(8)
898     .block_width(2)
899     .block_height(8)
900     .iterations(1)
901     .Test(xnn_x8_transpose_ukernel__4x2_scalar_int);
902 }
903 
TEST(X8_TRANSPOSE__4X2_SCALAR_INT,bh_5_8_bw_2)904 TEST(X8_TRANSPOSE__4X2_SCALAR_INT, bh_5_8_bw_2){
905   for(size_t i = 5; i < 8; ++i){
906     TransposeMicrokernelTester()
907       .input_stride(2)
908       .output_stride(i)
909       .block_width(2)
910       .block_height(i)
911       .iterations(1)
912       .Test(xnn_x8_transpose_ukernel__4x2_scalar_int);
913   }
914 }
915 
TEST(X8_TRANSPOSE__4X2_SCALAR_INT,bh_5_8_bw_4)916 TEST(X8_TRANSPOSE__4X2_SCALAR_INT, bh_5_8_bw_4){
917   for(size_t i = 5; i < 8; ++i){
918     TransposeMicrokernelTester()
919       .input_stride(4)
920       .output_stride(i)
921       .block_width(4)
922       .block_height(i)
923       .iterations(1)
924       .Test(xnn_x8_transpose_ukernel__4x2_scalar_int);
925   }
926 }
927 
TEST(X8_TRANSPOSE__4X2_SCALAR_INT,bh_5_8_bw_3_4)928 TEST(X8_TRANSPOSE__4X2_SCALAR_INT, bh_5_8_bw_3_4) {
929   for(size_t i = 5; i < 8; ++i){
930     for(size_t j = 3; j < 4; ++j){
931       TransposeMicrokernelTester()
932         .input_stride(j)
933         .output_stride(i)
934         .block_width(j)
935         .block_height(i)
936         .iterations(1)
937         .Test(xnn_x8_transpose_ukernel__4x2_scalar_int);
938     }
939   }
940 }
941 
TEST(X8_TRANSPOSE__4X2_SCALAR_INT,bh_4_bw_2_is_4)942 TEST(X8_TRANSPOSE__4X2_SCALAR_INT, bh_4_bw_2_is_4) {
943   TransposeMicrokernelTester()
944     .input_stride(4)
945     .output_stride(4)
946     .block_width(2)
947     .block_height(4)
948     .iterations(1)
949     .Test(xnn_x8_transpose_ukernel__4x2_scalar_int);
950 }
951 
TEST(X8_TRANSPOSE__4X2_SCALAR_INT,bh_4_bw_2_os_8)952 TEST(X8_TRANSPOSE__4X2_SCALAR_INT, bh_4_bw_2_os_8) {
953   TransposeMicrokernelTester()
954     .input_stride(2)
955     .output_stride(8)
956     .block_width(2)
957     .block_height(4)
958     .iterations(1)
959     .Test(xnn_x8_transpose_ukernel__4x2_scalar_int);
960 }
961 
TEST(X8_TRANSPOSE__4X2_SCALAR_INT,bh_4_bw_2_is_4_os_8)962 TEST(X8_TRANSPOSE__4X2_SCALAR_INT, bh_4_bw_2_is_4_os_8) {
963   TransposeMicrokernelTester()
964     .input_stride(4)
965     .output_stride(8)
966     .block_width(2)
967     .block_height(4)
968     .iterations(1)
969     .Test(xnn_x8_transpose_ukernel__4x2_scalar_int);
970 }
971 
TEST(X8_TRANSPOSE__4X4_SCALAR_INT,bh_4_bw_4)972 TEST(X8_TRANSPOSE__4X4_SCALAR_INT, bh_4_bw_4) {
973   TransposeMicrokernelTester()
974     .input_stride(4)
975     .output_stride(4)
976     .block_width(4)
977     .block_height(4)
978     .iterations(1)
979     .Test(xnn_x8_transpose_ukernel__4x4_scalar_int);
980 }
981 
TEST(X8_TRANSPOSE__4X4_SCALAR_INT,bh_1_8_bw_1_8)982 TEST(X8_TRANSPOSE__4X4_SCALAR_INT, bh_1_8_bw_1_8) {
983   for(size_t i = 1; i <= 8; ++i){
984     for(size_t j = 1; j <= 8; ++j){
985       TransposeMicrokernelTester()
986         .input_stride(j)
987         .output_stride(i)
988         .block_width(j)
989         .block_height(i)
990         .iterations(1)
991         .Test(xnn_x8_transpose_ukernel__4x4_scalar_int);
992     }
993   }
994 }
995 
TEST(X8_TRANSPOSE__4X4_SCALAR_INT,bh_4_bw_8)996 TEST(X8_TRANSPOSE__4X4_SCALAR_INT, bh_4_bw_8) {
997   TransposeMicrokernelTester()
998     .input_stride(8)
999     .output_stride(4)
1000     .block_width(8)
1001     .block_height(4)
1002     .iterations(1)
1003     .Test(xnn_x8_transpose_ukernel__4x4_scalar_int);
1004 }
1005 
TEST(X8_TRANSPOSE__4X4_SCALAR_INT,bh_4_bw_5_8)1006 TEST(X8_TRANSPOSE__4X4_SCALAR_INT, bh_4_bw_5_8) {
1007   for(size_t i = 5; i < 8; ++i){
1008     TransposeMicrokernelTester()
1009       .input_stride(i)
1010       .output_stride(4)
1011       .block_width(i)
1012       .block_height(4)
1013       .iterations(1)
1014       .Test(xnn_x8_transpose_ukernel__4x4_scalar_int);
1015   }
1016 }
1017 
TEST(X8_TRANSPOSE__4X4_SCALAR_INT,bh_8_bw_5_8)1018 TEST(X8_TRANSPOSE__4X4_SCALAR_INT, bh_8_bw_5_8) {
1019   for(size_t i = 5; i < 8; ++i){
1020     TransposeMicrokernelTester()
1021       .input_stride(i)
1022       .output_stride(8)
1023       .block_width(i)
1024       .block_height(8)
1025       .iterations(1)
1026       .Test(xnn_x8_transpose_ukernel__4x4_scalar_int);
1027   }
1028 }
1029 
TEST(X8_TRANSPOSE__4X4_SCALAR_INT,bh_8_bw_4)1030 TEST(X8_TRANSPOSE__4X4_SCALAR_INT, bh_8_bw_4) {
1031   TransposeMicrokernelTester()
1032     .input_stride(4)
1033     .output_stride(8)
1034     .block_width(4)
1035     .block_height(8)
1036     .iterations(1)
1037     .Test(xnn_x8_transpose_ukernel__4x4_scalar_int);
1038 }
1039 
TEST(X8_TRANSPOSE__4X4_SCALAR_INT,bh_5_8_bw_4)1040 TEST(X8_TRANSPOSE__4X4_SCALAR_INT, bh_5_8_bw_4){
1041   for(size_t i = 5; i < 8; ++i){
1042     TransposeMicrokernelTester()
1043       .input_stride(4)
1044       .output_stride(i)
1045       .block_width(4)
1046       .block_height(i)
1047       .iterations(1)
1048       .Test(xnn_x8_transpose_ukernel__4x4_scalar_int);
1049   }
1050 }
1051 
TEST(X8_TRANSPOSE__4X4_SCALAR_INT,bh_5_8_bw_8)1052 TEST(X8_TRANSPOSE__4X4_SCALAR_INT, bh_5_8_bw_8){
1053   for(size_t i = 5; i < 8; ++i){
1054     TransposeMicrokernelTester()
1055       .input_stride(8)
1056       .output_stride(i)
1057       .block_width(8)
1058       .block_height(i)
1059       .iterations(1)
1060       .Test(xnn_x8_transpose_ukernel__4x4_scalar_int);
1061   }
1062 }
1063 
TEST(X8_TRANSPOSE__4X4_SCALAR_INT,bh_5_8_bw_5_8)1064 TEST(X8_TRANSPOSE__4X4_SCALAR_INT, bh_5_8_bw_5_8) {
1065   for(size_t i = 5; i < 8; ++i){
1066     for(size_t j = 5; j < 8; ++j){
1067       TransposeMicrokernelTester()
1068         .input_stride(j)
1069         .output_stride(i)
1070         .block_width(j)
1071         .block_height(i)
1072         .iterations(1)
1073         .Test(xnn_x8_transpose_ukernel__4x4_scalar_int);
1074     }
1075   }
1076 }
1077 
TEST(X8_TRANSPOSE__4X4_SCALAR_INT,bh_4_bw_4_is_8)1078 TEST(X8_TRANSPOSE__4X4_SCALAR_INT, bh_4_bw_4_is_8) {
1079   TransposeMicrokernelTester()
1080     .input_stride(8)
1081     .output_stride(4)
1082     .block_width(4)
1083     .block_height(4)
1084     .iterations(1)
1085     .Test(xnn_x8_transpose_ukernel__4x4_scalar_int);
1086 }
1087 
TEST(X8_TRANSPOSE__4X4_SCALAR_INT,bh_4_bw_4_os_8)1088 TEST(X8_TRANSPOSE__4X4_SCALAR_INT, bh_4_bw_4_os_8) {
1089   TransposeMicrokernelTester()
1090     .input_stride(4)
1091     .output_stride(8)
1092     .block_width(4)
1093     .block_height(4)
1094     .iterations(1)
1095     .Test(xnn_x8_transpose_ukernel__4x4_scalar_int);
1096 }
1097 
TEST(X8_TRANSPOSE__4X4_SCALAR_INT,bh_4_bw_4_is_8_os_8)1098 TEST(X8_TRANSPOSE__4X4_SCALAR_INT, bh_4_bw_4_is_8_os_8) {
1099   TransposeMicrokernelTester()
1100     .input_stride(8)
1101     .output_stride(8)
1102     .block_width(4)
1103     .block_height(4)
1104     .iterations(1)
1105     .Test(xnn_x8_transpose_ukernel__4x4_scalar_int);
1106 }
1107 
1108 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(X8_TRANSPOSE__16X16_REUSE_MOV_SSE2,bh_16_bw_16)1109   TEST(X8_TRANSPOSE__16X16_REUSE_MOV_SSE2, bh_16_bw_16) {
1110     TEST_REQUIRES_X86_SSE2;
1111     TransposeMicrokernelTester()
1112       .input_stride(16)
1113       .output_stride(16)
1114       .block_width(16)
1115       .block_height(16)
1116       .iterations(1)
1117       .Test(xnn_x8_transpose_ukernel__16x16_reuse_mov_sse2);
1118   }
1119 
TEST(X8_TRANSPOSE__16X16_REUSE_MOV_SSE2,bh_1_32_bw_1_32)1120   TEST(X8_TRANSPOSE__16X16_REUSE_MOV_SSE2, bh_1_32_bw_1_32) {
1121     TEST_REQUIRES_X86_SSE2;
1122     for(size_t i = 1; i <= 32; ++i){
1123       for(size_t j = 1; j <= 32; ++j){
1124         TransposeMicrokernelTester()
1125           .input_stride(j)
1126           .output_stride(i)
1127           .block_width(j)
1128           .block_height(i)
1129           .iterations(1)
1130           .Test(xnn_x8_transpose_ukernel__16x16_reuse_mov_sse2);
1131       }
1132     }
1133   }
1134 
TEST(X8_TRANSPOSE__16X16_REUSE_MOV_SSE2,bh_16_bw_32)1135   TEST(X8_TRANSPOSE__16X16_REUSE_MOV_SSE2, bh_16_bw_32) {
1136     TEST_REQUIRES_X86_SSE2;
1137     TransposeMicrokernelTester()
1138       .input_stride(32)
1139       .output_stride(16)
1140       .block_width(32)
1141       .block_height(16)
1142       .iterations(1)
1143       .Test(xnn_x8_transpose_ukernel__16x16_reuse_mov_sse2);
1144   }
1145 
TEST(X8_TRANSPOSE__16X16_REUSE_MOV_SSE2,bh_16_bw_17_32)1146   TEST(X8_TRANSPOSE__16X16_REUSE_MOV_SSE2, bh_16_bw_17_32) {
1147     TEST_REQUIRES_X86_SSE2;
1148     for(size_t i = 17; i < 32; ++i){
1149       TransposeMicrokernelTester()
1150         .input_stride(i)
1151         .output_stride(16)
1152         .block_width(i)
1153         .block_height(16)
1154         .iterations(1)
1155         .Test(xnn_x8_transpose_ukernel__16x16_reuse_mov_sse2);
1156     }
1157   }
1158 
TEST(X8_TRANSPOSE__16X16_REUSE_MOV_SSE2,bh_32_bw_17_32)1159   TEST(X8_TRANSPOSE__16X16_REUSE_MOV_SSE2, bh_32_bw_17_32) {
1160     TEST_REQUIRES_X86_SSE2;
1161     for(size_t i = 17; i < 32; ++i){
1162       TransposeMicrokernelTester()
1163         .input_stride(i)
1164         .output_stride(32)
1165         .block_width(i)
1166         .block_height(32)
1167         .iterations(1)
1168         .Test(xnn_x8_transpose_ukernel__16x16_reuse_mov_sse2);
1169     }
1170   }
1171 
TEST(X8_TRANSPOSE__16X16_REUSE_MOV_SSE2,bh_32_bw_16)1172   TEST(X8_TRANSPOSE__16X16_REUSE_MOV_SSE2, bh_32_bw_16) {
1173     TEST_REQUIRES_X86_SSE2;
1174     TransposeMicrokernelTester()
1175       .input_stride(16)
1176       .output_stride(32)
1177       .block_width(16)
1178       .block_height(32)
1179       .iterations(1)
1180       .Test(xnn_x8_transpose_ukernel__16x16_reuse_mov_sse2);
1181   }
1182 
TEST(X8_TRANSPOSE__16X16_REUSE_MOV_SSE2,bh_17_32_bw_16)1183   TEST(X8_TRANSPOSE__16X16_REUSE_MOV_SSE2, bh_17_32_bw_16){
1184     TEST_REQUIRES_X86_SSE2;
1185     for(size_t i = 17; i < 32; ++i){
1186       TransposeMicrokernelTester()
1187         .input_stride(16)
1188         .output_stride(i)
1189         .block_width(16)
1190         .block_height(i)
1191         .iterations(1)
1192         .Test(xnn_x8_transpose_ukernel__16x16_reuse_mov_sse2);
1193     }
1194   }
1195 
TEST(X8_TRANSPOSE__16X16_REUSE_MOV_SSE2,bh_17_32_bw_32)1196   TEST(X8_TRANSPOSE__16X16_REUSE_MOV_SSE2, bh_17_32_bw_32){
1197     TEST_REQUIRES_X86_SSE2;
1198     for(size_t i = 17; i < 32; ++i){
1199       TransposeMicrokernelTester()
1200         .input_stride(32)
1201         .output_stride(i)
1202         .block_width(32)
1203         .block_height(i)
1204         .iterations(1)
1205         .Test(xnn_x8_transpose_ukernel__16x16_reuse_mov_sse2);
1206     }
1207   }
1208 
TEST(X8_TRANSPOSE__16X16_REUSE_MOV_SSE2,bh_17_32_bw_17_32)1209   TEST(X8_TRANSPOSE__16X16_REUSE_MOV_SSE2, bh_17_32_bw_17_32) {
1210     TEST_REQUIRES_X86_SSE2;
1211     for(size_t i = 17; i < 32; ++i){
1212       for(size_t j = 17; j < 32; ++j){
1213         TransposeMicrokernelTester()
1214           .input_stride(j)
1215           .output_stride(i)
1216           .block_width(j)
1217           .block_height(i)
1218           .iterations(1)
1219           .Test(xnn_x8_transpose_ukernel__16x16_reuse_mov_sse2);
1220       }
1221     }
1222   }
1223 
TEST(X8_TRANSPOSE__16X16_REUSE_MOV_SSE2,bh_16_bw_16_is_32)1224   TEST(X8_TRANSPOSE__16X16_REUSE_MOV_SSE2, bh_16_bw_16_is_32) {
1225     TEST_REQUIRES_X86_SSE2;
1226     TransposeMicrokernelTester()
1227       .input_stride(32)
1228       .output_stride(16)
1229       .block_width(16)
1230       .block_height(16)
1231       .iterations(1)
1232       .Test(xnn_x8_transpose_ukernel__16x16_reuse_mov_sse2);
1233   }
1234 
TEST(X8_TRANSPOSE__16X16_REUSE_MOV_SSE2,bh_16_bw_16_os_32)1235   TEST(X8_TRANSPOSE__16X16_REUSE_MOV_SSE2, bh_16_bw_16_os_32) {
1236     TEST_REQUIRES_X86_SSE2;
1237     TransposeMicrokernelTester()
1238       .input_stride(16)
1239       .output_stride(32)
1240       .block_width(16)
1241       .block_height(16)
1242       .iterations(1)
1243       .Test(xnn_x8_transpose_ukernel__16x16_reuse_mov_sse2);
1244   }
1245 
TEST(X8_TRANSPOSE__16X16_REUSE_MOV_SSE2,bh_16_bw_16_is_32_os_32)1246   TEST(X8_TRANSPOSE__16X16_REUSE_MOV_SSE2, bh_16_bw_16_is_32_os_32) {
1247     TEST_REQUIRES_X86_SSE2;
1248     TransposeMicrokernelTester()
1249       .input_stride(32)
1250       .output_stride(32)
1251       .block_width(16)
1252       .block_height(16)
1253       .iterations(1)
1254       .Test(xnn_x8_transpose_ukernel__16x16_reuse_mov_sse2);
1255   }
1256 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
1257 
1258 
1259 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(X8_TRANSPOSE__16X16_REUSE_SWITCH_SSE2,bh_16_bw_16)1260   TEST(X8_TRANSPOSE__16X16_REUSE_SWITCH_SSE2, bh_16_bw_16) {
1261     TEST_REQUIRES_X86_SSE2;
1262     TransposeMicrokernelTester()
1263       .input_stride(16)
1264       .output_stride(16)
1265       .block_width(16)
1266       .block_height(16)
1267       .iterations(1)
1268       .Test(xnn_x8_transpose_ukernel__16x16_reuse_switch_sse2);
1269   }
1270 
TEST(X8_TRANSPOSE__16X16_REUSE_SWITCH_SSE2,bh_1_32_bw_1_32)1271   TEST(X8_TRANSPOSE__16X16_REUSE_SWITCH_SSE2, bh_1_32_bw_1_32) {
1272     TEST_REQUIRES_X86_SSE2;
1273     for(size_t i = 1; i <= 32; ++i){
1274       for(size_t j = 1; j <= 32; ++j){
1275         TransposeMicrokernelTester()
1276           .input_stride(j)
1277           .output_stride(i)
1278           .block_width(j)
1279           .block_height(i)
1280           .iterations(1)
1281           .Test(xnn_x8_transpose_ukernel__16x16_reuse_switch_sse2);
1282       }
1283     }
1284   }
1285 
TEST(X8_TRANSPOSE__16X16_REUSE_SWITCH_SSE2,bh_16_bw_32)1286   TEST(X8_TRANSPOSE__16X16_REUSE_SWITCH_SSE2, bh_16_bw_32) {
1287     TEST_REQUIRES_X86_SSE2;
1288     TransposeMicrokernelTester()
1289       .input_stride(32)
1290       .output_stride(16)
1291       .block_width(32)
1292       .block_height(16)
1293       .iterations(1)
1294       .Test(xnn_x8_transpose_ukernel__16x16_reuse_switch_sse2);
1295   }
1296 
TEST(X8_TRANSPOSE__16X16_REUSE_SWITCH_SSE2,bh_16_bw_17_32)1297   TEST(X8_TRANSPOSE__16X16_REUSE_SWITCH_SSE2, bh_16_bw_17_32) {
1298     TEST_REQUIRES_X86_SSE2;
1299     for(size_t i = 17; i < 32; ++i){
1300       TransposeMicrokernelTester()
1301         .input_stride(i)
1302         .output_stride(16)
1303         .block_width(i)
1304         .block_height(16)
1305         .iterations(1)
1306         .Test(xnn_x8_transpose_ukernel__16x16_reuse_switch_sse2);
1307     }
1308   }
1309 
TEST(X8_TRANSPOSE__16X16_REUSE_SWITCH_SSE2,bh_32_bw_17_32)1310   TEST(X8_TRANSPOSE__16X16_REUSE_SWITCH_SSE2, bh_32_bw_17_32) {
1311     TEST_REQUIRES_X86_SSE2;
1312     for(size_t i = 17; i < 32; ++i){
1313       TransposeMicrokernelTester()
1314         .input_stride(i)
1315         .output_stride(32)
1316         .block_width(i)
1317         .block_height(32)
1318         .iterations(1)
1319         .Test(xnn_x8_transpose_ukernel__16x16_reuse_switch_sse2);
1320     }
1321   }
1322 
TEST(X8_TRANSPOSE__16X16_REUSE_SWITCH_SSE2,bh_32_bw_16)1323   TEST(X8_TRANSPOSE__16X16_REUSE_SWITCH_SSE2, bh_32_bw_16) {
1324     TEST_REQUIRES_X86_SSE2;
1325     TransposeMicrokernelTester()
1326       .input_stride(16)
1327       .output_stride(32)
1328       .block_width(16)
1329       .block_height(32)
1330       .iterations(1)
1331       .Test(xnn_x8_transpose_ukernel__16x16_reuse_switch_sse2);
1332   }
1333 
TEST(X8_TRANSPOSE__16X16_REUSE_SWITCH_SSE2,bh_17_32_bw_16)1334   TEST(X8_TRANSPOSE__16X16_REUSE_SWITCH_SSE2, bh_17_32_bw_16){
1335     TEST_REQUIRES_X86_SSE2;
1336     for(size_t i = 17; i < 32; ++i){
1337       TransposeMicrokernelTester()
1338         .input_stride(16)
1339         .output_stride(i)
1340         .block_width(16)
1341         .block_height(i)
1342         .iterations(1)
1343         .Test(xnn_x8_transpose_ukernel__16x16_reuse_switch_sse2);
1344     }
1345   }
1346 
TEST(X8_TRANSPOSE__16X16_REUSE_SWITCH_SSE2,bh_17_32_bw_32)1347   TEST(X8_TRANSPOSE__16X16_REUSE_SWITCH_SSE2, bh_17_32_bw_32){
1348     TEST_REQUIRES_X86_SSE2;
1349     for(size_t i = 17; i < 32; ++i){
1350       TransposeMicrokernelTester()
1351         .input_stride(32)
1352         .output_stride(i)
1353         .block_width(32)
1354         .block_height(i)
1355         .iterations(1)
1356         .Test(xnn_x8_transpose_ukernel__16x16_reuse_switch_sse2);
1357     }
1358   }
1359 
TEST(X8_TRANSPOSE__16X16_REUSE_SWITCH_SSE2,bh_17_32_bw_17_32)1360   TEST(X8_TRANSPOSE__16X16_REUSE_SWITCH_SSE2, bh_17_32_bw_17_32) {
1361     TEST_REQUIRES_X86_SSE2;
1362     for(size_t i = 17; i < 32; ++i){
1363       for(size_t j = 17; j < 32; ++j){
1364         TransposeMicrokernelTester()
1365           .input_stride(j)
1366           .output_stride(i)
1367           .block_width(j)
1368           .block_height(i)
1369           .iterations(1)
1370           .Test(xnn_x8_transpose_ukernel__16x16_reuse_switch_sse2);
1371       }
1372     }
1373   }
1374 
TEST(X8_TRANSPOSE__16X16_REUSE_SWITCH_SSE2,bh_16_bw_16_is_32)1375   TEST(X8_TRANSPOSE__16X16_REUSE_SWITCH_SSE2, bh_16_bw_16_is_32) {
1376     TEST_REQUIRES_X86_SSE2;
1377     TransposeMicrokernelTester()
1378       .input_stride(32)
1379       .output_stride(16)
1380       .block_width(16)
1381       .block_height(16)
1382       .iterations(1)
1383       .Test(xnn_x8_transpose_ukernel__16x16_reuse_switch_sse2);
1384   }
1385 
TEST(X8_TRANSPOSE__16X16_REUSE_SWITCH_SSE2,bh_16_bw_16_os_32)1386   TEST(X8_TRANSPOSE__16X16_REUSE_SWITCH_SSE2, bh_16_bw_16_os_32) {
1387     TEST_REQUIRES_X86_SSE2;
1388     TransposeMicrokernelTester()
1389       .input_stride(16)
1390       .output_stride(32)
1391       .block_width(16)
1392       .block_height(16)
1393       .iterations(1)
1394       .Test(xnn_x8_transpose_ukernel__16x16_reuse_switch_sse2);
1395   }
1396 
TEST(X8_TRANSPOSE__16X16_REUSE_SWITCH_SSE2,bh_16_bw_16_is_32_os_32)1397   TEST(X8_TRANSPOSE__16X16_REUSE_SWITCH_SSE2, bh_16_bw_16_is_32_os_32) {
1398     TEST_REQUIRES_X86_SSE2;
1399     TransposeMicrokernelTester()
1400       .input_stride(32)
1401       .output_stride(32)
1402       .block_width(16)
1403       .block_height(16)
1404       .iterations(1)
1405       .Test(xnn_x8_transpose_ukernel__16x16_reuse_switch_sse2);
1406   }
1407 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
1408 
1409 
1410 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(X8_TRANSPOSE__16X16_REUSE_DEC_ZIP_NEON,bh_16_bw_16)1411   TEST(X8_TRANSPOSE__16X16_REUSE_DEC_ZIP_NEON, bh_16_bw_16) {
1412     TEST_REQUIRES_ARM_NEON;
1413     TransposeMicrokernelTester()
1414       .input_stride(16)
1415       .output_stride(16)
1416       .block_width(16)
1417       .block_height(16)
1418       .iterations(1)
1419       .Test(xnn_x8_transpose_ukernel__16x16_reuse_dec_zip_neon);
1420   }
1421 
TEST(X8_TRANSPOSE__16X16_REUSE_DEC_ZIP_NEON,bh_1_32_bw_1_32)1422   TEST(X8_TRANSPOSE__16X16_REUSE_DEC_ZIP_NEON, bh_1_32_bw_1_32) {
1423     TEST_REQUIRES_ARM_NEON;
1424     for(size_t i = 1; i <= 32; ++i){
1425       for(size_t j = 1; j <= 32; ++j){
1426         TransposeMicrokernelTester()
1427           .input_stride(j)
1428           .output_stride(i)
1429           .block_width(j)
1430           .block_height(i)
1431           .iterations(1)
1432           .Test(xnn_x8_transpose_ukernel__16x16_reuse_dec_zip_neon);
1433       }
1434     }
1435   }
1436 
TEST(X8_TRANSPOSE__16X16_REUSE_DEC_ZIP_NEON,bh_16_bw_32)1437   TEST(X8_TRANSPOSE__16X16_REUSE_DEC_ZIP_NEON, bh_16_bw_32) {
1438     TEST_REQUIRES_ARM_NEON;
1439     TransposeMicrokernelTester()
1440       .input_stride(32)
1441       .output_stride(16)
1442       .block_width(32)
1443       .block_height(16)
1444       .iterations(1)
1445       .Test(xnn_x8_transpose_ukernel__16x16_reuse_dec_zip_neon);
1446   }
1447 
TEST(X8_TRANSPOSE__16X16_REUSE_DEC_ZIP_NEON,bh_16_bw_17_32)1448   TEST(X8_TRANSPOSE__16X16_REUSE_DEC_ZIP_NEON, bh_16_bw_17_32) {
1449     TEST_REQUIRES_ARM_NEON;
1450     for(size_t i = 17; i < 32; ++i){
1451       TransposeMicrokernelTester()
1452         .input_stride(i)
1453         .output_stride(16)
1454         .block_width(i)
1455         .block_height(16)
1456         .iterations(1)
1457         .Test(xnn_x8_transpose_ukernel__16x16_reuse_dec_zip_neon);
1458     }
1459   }
1460 
TEST(X8_TRANSPOSE__16X16_REUSE_DEC_ZIP_NEON,bh_32_bw_17_32)1461   TEST(X8_TRANSPOSE__16X16_REUSE_DEC_ZIP_NEON, bh_32_bw_17_32) {
1462     TEST_REQUIRES_ARM_NEON;
1463     for(size_t i = 17; i < 32; ++i){
1464       TransposeMicrokernelTester()
1465         .input_stride(i)
1466         .output_stride(32)
1467         .block_width(i)
1468         .block_height(32)
1469         .iterations(1)
1470         .Test(xnn_x8_transpose_ukernel__16x16_reuse_dec_zip_neon);
1471     }
1472   }
1473 
TEST(X8_TRANSPOSE__16X16_REUSE_DEC_ZIP_NEON,bh_32_bw_16)1474   TEST(X8_TRANSPOSE__16X16_REUSE_DEC_ZIP_NEON, bh_32_bw_16) {
1475     TEST_REQUIRES_ARM_NEON;
1476     TransposeMicrokernelTester()
1477       .input_stride(16)
1478       .output_stride(32)
1479       .block_width(16)
1480       .block_height(32)
1481       .iterations(1)
1482       .Test(xnn_x8_transpose_ukernel__16x16_reuse_dec_zip_neon);
1483   }
1484 
TEST(X8_TRANSPOSE__16X16_REUSE_DEC_ZIP_NEON,bh_17_32_bw_16)1485   TEST(X8_TRANSPOSE__16X16_REUSE_DEC_ZIP_NEON, bh_17_32_bw_16){
1486     TEST_REQUIRES_ARM_NEON;
1487     for(size_t i = 17; i < 32; ++i){
1488       TransposeMicrokernelTester()
1489         .input_stride(16)
1490         .output_stride(i)
1491         .block_width(16)
1492         .block_height(i)
1493         .iterations(1)
1494         .Test(xnn_x8_transpose_ukernel__16x16_reuse_dec_zip_neon);
1495     }
1496   }
1497 
TEST(X8_TRANSPOSE__16X16_REUSE_DEC_ZIP_NEON,bh_17_32_bw_32)1498   TEST(X8_TRANSPOSE__16X16_REUSE_DEC_ZIP_NEON, bh_17_32_bw_32){
1499     TEST_REQUIRES_ARM_NEON;
1500     for(size_t i = 17; i < 32; ++i){
1501       TransposeMicrokernelTester()
1502         .input_stride(32)
1503         .output_stride(i)
1504         .block_width(32)
1505         .block_height(i)
1506         .iterations(1)
1507         .Test(xnn_x8_transpose_ukernel__16x16_reuse_dec_zip_neon);
1508     }
1509   }
1510 
TEST(X8_TRANSPOSE__16X16_REUSE_DEC_ZIP_NEON,bh_17_32_bw_17_32)1511   TEST(X8_TRANSPOSE__16X16_REUSE_DEC_ZIP_NEON, bh_17_32_bw_17_32) {
1512     TEST_REQUIRES_ARM_NEON;
1513     for(size_t i = 17; i < 32; ++i){
1514       for(size_t j = 17; j < 32; ++j){
1515         TransposeMicrokernelTester()
1516           .input_stride(j)
1517           .output_stride(i)
1518           .block_width(j)
1519           .block_height(i)
1520           .iterations(1)
1521           .Test(xnn_x8_transpose_ukernel__16x16_reuse_dec_zip_neon);
1522       }
1523     }
1524   }
1525 
TEST(X8_TRANSPOSE__16X16_REUSE_DEC_ZIP_NEON,bh_16_bw_16_is_32)1526   TEST(X8_TRANSPOSE__16X16_REUSE_DEC_ZIP_NEON, bh_16_bw_16_is_32) {
1527     TEST_REQUIRES_ARM_NEON;
1528     TransposeMicrokernelTester()
1529       .input_stride(32)
1530       .output_stride(16)
1531       .block_width(16)
1532       .block_height(16)
1533       .iterations(1)
1534       .Test(xnn_x8_transpose_ukernel__16x16_reuse_dec_zip_neon);
1535   }
1536 
TEST(X8_TRANSPOSE__16X16_REUSE_DEC_ZIP_NEON,bh_16_bw_16_os_32)1537   TEST(X8_TRANSPOSE__16X16_REUSE_DEC_ZIP_NEON, bh_16_bw_16_os_32) {
1538     TEST_REQUIRES_ARM_NEON;
1539     TransposeMicrokernelTester()
1540       .input_stride(16)
1541       .output_stride(32)
1542       .block_width(16)
1543       .block_height(16)
1544       .iterations(1)
1545       .Test(xnn_x8_transpose_ukernel__16x16_reuse_dec_zip_neon);
1546   }
1547 
TEST(X8_TRANSPOSE__16X16_REUSE_DEC_ZIP_NEON,bh_16_bw_16_is_32_os_32)1548   TEST(X8_TRANSPOSE__16X16_REUSE_DEC_ZIP_NEON, bh_16_bw_16_is_32_os_32) {
1549     TEST_REQUIRES_ARM_NEON;
1550     TransposeMicrokernelTester()
1551       .input_stride(32)
1552       .output_stride(32)
1553       .block_width(16)
1554       .block_height(16)
1555       .iterations(1)
1556       .Test(xnn_x8_transpose_ukernel__16x16_reuse_dec_zip_neon);
1557   }
1558 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
1559 
1560 
1561 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(X8_TRANSPOSE__16X16_REUSE_MOV_ZIP_NEON,bh_16_bw_16)1562   TEST(X8_TRANSPOSE__16X16_REUSE_MOV_ZIP_NEON, bh_16_bw_16) {
1563     TEST_REQUIRES_ARM_NEON;
1564     TransposeMicrokernelTester()
1565       .input_stride(16)
1566       .output_stride(16)
1567       .block_width(16)
1568       .block_height(16)
1569       .iterations(1)
1570       .Test(xnn_x8_transpose_ukernel__16x16_reuse_mov_zip_neon);
1571   }
1572 
TEST(X8_TRANSPOSE__16X16_REUSE_MOV_ZIP_NEON,bh_1_32_bw_1_32)1573   TEST(X8_TRANSPOSE__16X16_REUSE_MOV_ZIP_NEON, bh_1_32_bw_1_32) {
1574     TEST_REQUIRES_ARM_NEON;
1575     for(size_t i = 1; i <= 32; ++i){
1576       for(size_t j = 1; j <= 32; ++j){
1577         TransposeMicrokernelTester()
1578           .input_stride(j)
1579           .output_stride(i)
1580           .block_width(j)
1581           .block_height(i)
1582           .iterations(1)
1583           .Test(xnn_x8_transpose_ukernel__16x16_reuse_mov_zip_neon);
1584       }
1585     }
1586   }
1587 
TEST(X8_TRANSPOSE__16X16_REUSE_MOV_ZIP_NEON,bh_16_bw_32)1588   TEST(X8_TRANSPOSE__16X16_REUSE_MOV_ZIP_NEON, bh_16_bw_32) {
1589     TEST_REQUIRES_ARM_NEON;
1590     TransposeMicrokernelTester()
1591       .input_stride(32)
1592       .output_stride(16)
1593       .block_width(32)
1594       .block_height(16)
1595       .iterations(1)
1596       .Test(xnn_x8_transpose_ukernel__16x16_reuse_mov_zip_neon);
1597   }
1598 
TEST(X8_TRANSPOSE__16X16_REUSE_MOV_ZIP_NEON,bh_16_bw_17_32)1599   TEST(X8_TRANSPOSE__16X16_REUSE_MOV_ZIP_NEON, bh_16_bw_17_32) {
1600     TEST_REQUIRES_ARM_NEON;
1601     for(size_t i = 17; i < 32; ++i){
1602       TransposeMicrokernelTester()
1603         .input_stride(i)
1604         .output_stride(16)
1605         .block_width(i)
1606         .block_height(16)
1607         .iterations(1)
1608         .Test(xnn_x8_transpose_ukernel__16x16_reuse_mov_zip_neon);
1609     }
1610   }
1611 
TEST(X8_TRANSPOSE__16X16_REUSE_MOV_ZIP_NEON,bh_32_bw_17_32)1612   TEST(X8_TRANSPOSE__16X16_REUSE_MOV_ZIP_NEON, bh_32_bw_17_32) {
1613     TEST_REQUIRES_ARM_NEON;
1614     for(size_t i = 17; i < 32; ++i){
1615       TransposeMicrokernelTester()
1616         .input_stride(i)
1617         .output_stride(32)
1618         .block_width(i)
1619         .block_height(32)
1620         .iterations(1)
1621         .Test(xnn_x8_transpose_ukernel__16x16_reuse_mov_zip_neon);
1622     }
1623   }
1624 
TEST(X8_TRANSPOSE__16X16_REUSE_MOV_ZIP_NEON,bh_32_bw_16)1625   TEST(X8_TRANSPOSE__16X16_REUSE_MOV_ZIP_NEON, bh_32_bw_16) {
1626     TEST_REQUIRES_ARM_NEON;
1627     TransposeMicrokernelTester()
1628       .input_stride(16)
1629       .output_stride(32)
1630       .block_width(16)
1631       .block_height(32)
1632       .iterations(1)
1633       .Test(xnn_x8_transpose_ukernel__16x16_reuse_mov_zip_neon);
1634   }
1635 
TEST(X8_TRANSPOSE__16X16_REUSE_MOV_ZIP_NEON,bh_17_32_bw_16)1636   TEST(X8_TRANSPOSE__16X16_REUSE_MOV_ZIP_NEON, bh_17_32_bw_16){
1637     TEST_REQUIRES_ARM_NEON;
1638     for(size_t i = 17; i < 32; ++i){
1639       TransposeMicrokernelTester()
1640         .input_stride(16)
1641         .output_stride(i)
1642         .block_width(16)
1643         .block_height(i)
1644         .iterations(1)
1645         .Test(xnn_x8_transpose_ukernel__16x16_reuse_mov_zip_neon);
1646     }
1647   }
1648 
TEST(X8_TRANSPOSE__16X16_REUSE_MOV_ZIP_NEON,bh_17_32_bw_32)1649   TEST(X8_TRANSPOSE__16X16_REUSE_MOV_ZIP_NEON, bh_17_32_bw_32){
1650     TEST_REQUIRES_ARM_NEON;
1651     for(size_t i = 17; i < 32; ++i){
1652       TransposeMicrokernelTester()
1653         .input_stride(32)
1654         .output_stride(i)
1655         .block_width(32)
1656         .block_height(i)
1657         .iterations(1)
1658         .Test(xnn_x8_transpose_ukernel__16x16_reuse_mov_zip_neon);
1659     }
1660   }
1661 
TEST(X8_TRANSPOSE__16X16_REUSE_MOV_ZIP_NEON,bh_17_32_bw_17_32)1662   TEST(X8_TRANSPOSE__16X16_REUSE_MOV_ZIP_NEON, bh_17_32_bw_17_32) {
1663     TEST_REQUIRES_ARM_NEON;
1664     for(size_t i = 17; i < 32; ++i){
1665       for(size_t j = 17; j < 32; ++j){
1666         TransposeMicrokernelTester()
1667           .input_stride(j)
1668           .output_stride(i)
1669           .block_width(j)
1670           .block_height(i)
1671           .iterations(1)
1672           .Test(xnn_x8_transpose_ukernel__16x16_reuse_mov_zip_neon);
1673       }
1674     }
1675   }
1676 
TEST(X8_TRANSPOSE__16X16_REUSE_MOV_ZIP_NEON,bh_16_bw_16_is_32)1677   TEST(X8_TRANSPOSE__16X16_REUSE_MOV_ZIP_NEON, bh_16_bw_16_is_32) {
1678     TEST_REQUIRES_ARM_NEON;
1679     TransposeMicrokernelTester()
1680       .input_stride(32)
1681       .output_stride(16)
1682       .block_width(16)
1683       .block_height(16)
1684       .iterations(1)
1685       .Test(xnn_x8_transpose_ukernel__16x16_reuse_mov_zip_neon);
1686   }
1687 
TEST(X8_TRANSPOSE__16X16_REUSE_MOV_ZIP_NEON,bh_16_bw_16_os_32)1688   TEST(X8_TRANSPOSE__16X16_REUSE_MOV_ZIP_NEON, bh_16_bw_16_os_32) {
1689     TEST_REQUIRES_ARM_NEON;
1690     TransposeMicrokernelTester()
1691       .input_stride(16)
1692       .output_stride(32)
1693       .block_width(16)
1694       .block_height(16)
1695       .iterations(1)
1696       .Test(xnn_x8_transpose_ukernel__16x16_reuse_mov_zip_neon);
1697   }
1698 
TEST(X8_TRANSPOSE__16X16_REUSE_MOV_ZIP_NEON,bh_16_bw_16_is_32_os_32)1699   TEST(X8_TRANSPOSE__16X16_REUSE_MOV_ZIP_NEON, bh_16_bw_16_is_32_os_32) {
1700     TEST_REQUIRES_ARM_NEON;
1701     TransposeMicrokernelTester()
1702       .input_stride(32)
1703       .output_stride(32)
1704       .block_width(16)
1705       .block_height(16)
1706       .iterations(1)
1707       .Test(xnn_x8_transpose_ukernel__16x16_reuse_mov_zip_neon);
1708   }
1709 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
1710 
1711 
1712 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(X8_TRANSPOSE__16X16_REUSE_SWITCH_ZIP_NEON,bh_16_bw_16)1713   TEST(X8_TRANSPOSE__16X16_REUSE_SWITCH_ZIP_NEON, bh_16_bw_16) {
1714     TEST_REQUIRES_ARM_NEON;
1715     TransposeMicrokernelTester()
1716       .input_stride(16)
1717       .output_stride(16)
1718       .block_width(16)
1719       .block_height(16)
1720       .iterations(1)
1721       .Test(xnn_x8_transpose_ukernel__16x16_reuse_switch_zip_neon);
1722   }
1723 
TEST(X8_TRANSPOSE__16X16_REUSE_SWITCH_ZIP_NEON,bh_1_32_bw_1_32)1724   TEST(X8_TRANSPOSE__16X16_REUSE_SWITCH_ZIP_NEON, bh_1_32_bw_1_32) {
1725     TEST_REQUIRES_ARM_NEON;
1726     for(size_t i = 1; i <= 32; ++i){
1727       for(size_t j = 1; j <= 32; ++j){
1728         TransposeMicrokernelTester()
1729           .input_stride(j)
1730           .output_stride(i)
1731           .block_width(j)
1732           .block_height(i)
1733           .iterations(1)
1734           .Test(xnn_x8_transpose_ukernel__16x16_reuse_switch_zip_neon);
1735       }
1736     }
1737   }
1738 
TEST(X8_TRANSPOSE__16X16_REUSE_SWITCH_ZIP_NEON,bh_16_bw_32)1739   TEST(X8_TRANSPOSE__16X16_REUSE_SWITCH_ZIP_NEON, bh_16_bw_32) {
1740     TEST_REQUIRES_ARM_NEON;
1741     TransposeMicrokernelTester()
1742       .input_stride(32)
1743       .output_stride(16)
1744       .block_width(32)
1745       .block_height(16)
1746       .iterations(1)
1747       .Test(xnn_x8_transpose_ukernel__16x16_reuse_switch_zip_neon);
1748   }
1749 
TEST(X8_TRANSPOSE__16X16_REUSE_SWITCH_ZIP_NEON,bh_16_bw_17_32)1750   TEST(X8_TRANSPOSE__16X16_REUSE_SWITCH_ZIP_NEON, bh_16_bw_17_32) {
1751     TEST_REQUIRES_ARM_NEON;
1752     for(size_t i = 17; i < 32; ++i){
1753       TransposeMicrokernelTester()
1754         .input_stride(i)
1755         .output_stride(16)
1756         .block_width(i)
1757         .block_height(16)
1758         .iterations(1)
1759         .Test(xnn_x8_transpose_ukernel__16x16_reuse_switch_zip_neon);
1760     }
1761   }
1762 
TEST(X8_TRANSPOSE__16X16_REUSE_SWITCH_ZIP_NEON,bh_32_bw_17_32)1763   TEST(X8_TRANSPOSE__16X16_REUSE_SWITCH_ZIP_NEON, bh_32_bw_17_32) {
1764     TEST_REQUIRES_ARM_NEON;
1765     for(size_t i = 17; i < 32; ++i){
1766       TransposeMicrokernelTester()
1767         .input_stride(i)
1768         .output_stride(32)
1769         .block_width(i)
1770         .block_height(32)
1771         .iterations(1)
1772         .Test(xnn_x8_transpose_ukernel__16x16_reuse_switch_zip_neon);
1773     }
1774   }
1775 
TEST(X8_TRANSPOSE__16X16_REUSE_SWITCH_ZIP_NEON,bh_32_bw_16)1776   TEST(X8_TRANSPOSE__16X16_REUSE_SWITCH_ZIP_NEON, bh_32_bw_16) {
1777     TEST_REQUIRES_ARM_NEON;
1778     TransposeMicrokernelTester()
1779       .input_stride(16)
1780       .output_stride(32)
1781       .block_width(16)
1782       .block_height(32)
1783       .iterations(1)
1784       .Test(xnn_x8_transpose_ukernel__16x16_reuse_switch_zip_neon);
1785   }
1786 
TEST(X8_TRANSPOSE__16X16_REUSE_SWITCH_ZIP_NEON,bh_17_32_bw_16)1787   TEST(X8_TRANSPOSE__16X16_REUSE_SWITCH_ZIP_NEON, bh_17_32_bw_16){
1788     TEST_REQUIRES_ARM_NEON;
1789     for(size_t i = 17; i < 32; ++i){
1790       TransposeMicrokernelTester()
1791         .input_stride(16)
1792         .output_stride(i)
1793         .block_width(16)
1794         .block_height(i)
1795         .iterations(1)
1796         .Test(xnn_x8_transpose_ukernel__16x16_reuse_switch_zip_neon);
1797     }
1798   }
1799 
TEST(X8_TRANSPOSE__16X16_REUSE_SWITCH_ZIP_NEON,bh_17_32_bw_32)1800   TEST(X8_TRANSPOSE__16X16_REUSE_SWITCH_ZIP_NEON, bh_17_32_bw_32){
1801     TEST_REQUIRES_ARM_NEON;
1802     for(size_t i = 17; i < 32; ++i){
1803       TransposeMicrokernelTester()
1804         .input_stride(32)
1805         .output_stride(i)
1806         .block_width(32)
1807         .block_height(i)
1808         .iterations(1)
1809         .Test(xnn_x8_transpose_ukernel__16x16_reuse_switch_zip_neon);
1810     }
1811   }
1812 
TEST(X8_TRANSPOSE__16X16_REUSE_SWITCH_ZIP_NEON,bh_17_32_bw_17_32)1813   TEST(X8_TRANSPOSE__16X16_REUSE_SWITCH_ZIP_NEON, bh_17_32_bw_17_32) {
1814     TEST_REQUIRES_ARM_NEON;
1815     for(size_t i = 17; i < 32; ++i){
1816       for(size_t j = 17; j < 32; ++j){
1817         TransposeMicrokernelTester()
1818           .input_stride(j)
1819           .output_stride(i)
1820           .block_width(j)
1821           .block_height(i)
1822           .iterations(1)
1823           .Test(xnn_x8_transpose_ukernel__16x16_reuse_switch_zip_neon);
1824       }
1825     }
1826   }
1827 
TEST(X8_TRANSPOSE__16X16_REUSE_SWITCH_ZIP_NEON,bh_16_bw_16_is_32)1828   TEST(X8_TRANSPOSE__16X16_REUSE_SWITCH_ZIP_NEON, bh_16_bw_16_is_32) {
1829     TEST_REQUIRES_ARM_NEON;
1830     TransposeMicrokernelTester()
1831       .input_stride(32)
1832       .output_stride(16)
1833       .block_width(16)
1834       .block_height(16)
1835       .iterations(1)
1836       .Test(xnn_x8_transpose_ukernel__16x16_reuse_switch_zip_neon);
1837   }
1838 
TEST(X8_TRANSPOSE__16X16_REUSE_SWITCH_ZIP_NEON,bh_16_bw_16_os_32)1839   TEST(X8_TRANSPOSE__16X16_REUSE_SWITCH_ZIP_NEON, bh_16_bw_16_os_32) {
1840     TEST_REQUIRES_ARM_NEON;
1841     TransposeMicrokernelTester()
1842       .input_stride(16)
1843       .output_stride(32)
1844       .block_width(16)
1845       .block_height(16)
1846       .iterations(1)
1847       .Test(xnn_x8_transpose_ukernel__16x16_reuse_switch_zip_neon);
1848   }
1849 
TEST(X8_TRANSPOSE__16X16_REUSE_SWITCH_ZIP_NEON,bh_16_bw_16_is_32_os_32)1850   TEST(X8_TRANSPOSE__16X16_REUSE_SWITCH_ZIP_NEON, bh_16_bw_16_is_32_os_32) {
1851     TEST_REQUIRES_ARM_NEON;
1852     TransposeMicrokernelTester()
1853       .input_stride(32)
1854       .output_stride(32)
1855       .block_width(16)
1856       .block_height(16)
1857       .iterations(1)
1858       .Test(xnn_x8_transpose_ukernel__16x16_reuse_switch_zip_neon);
1859   }
1860 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
1861