• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2021 Google LLC
2 //
3 // This source code is licensed under the BSD-style license found in the
4 // LICENSE file in the root directory of this source tree.
5 //
6 // Auto-generated file. Do not edit!
7 //   Specification: test/x16-transpose.yaml
8 //   Generator: tools/generate-transpose-test.py
9 
10 
11 #include <gtest/gtest.h>
12 
13 #include <xnnpack/common.h>
14 #include <xnnpack/isa-checks.h>
15 
16 #include <xnnpack/transpose.h>
17 #include "transpose-microkernel-tester.h"
18 
19 
TEST(X16_TRANSPOSE__1X2_SCALAR_INT,bh_1_bw_2)20 TEST(X16_TRANSPOSE__1X2_SCALAR_INT, bh_1_bw_2) {
21   TransposeMicrokernelTester()
22     .input_stride(2)
23     .output_stride(1)
24     .block_width(2)
25     .block_height(1)
26     .iterations(1)
27     .Test(xnn_x16_transpose_ukernel__1x2_scalar_int);
28 }
29 
TEST(X16_TRANSPOSE__1X2_SCALAR_INT,bh_1_2_bw_1_4)30 TEST(X16_TRANSPOSE__1X2_SCALAR_INT, bh_1_2_bw_1_4) {
31   for(size_t i = 1; i <= 2; ++i){
32     for(size_t j = 1; j <= 4; ++j){
33       TransposeMicrokernelTester()
34         .input_stride(j)
35         .output_stride(i)
36         .block_width(j)
37         .block_height(i)
38         .iterations(1)
39         .Test(xnn_x16_transpose_ukernel__1x2_scalar_int);
40     }
41   }
42 }
43 
TEST(X16_TRANSPOSE__1X2_SCALAR_INT,bh_1_bw_4)44 TEST(X16_TRANSPOSE__1X2_SCALAR_INT, bh_1_bw_4) {
45   TransposeMicrokernelTester()
46     .input_stride(4)
47     .output_stride(1)
48     .block_width(4)
49     .block_height(1)
50     .iterations(1)
51     .Test(xnn_x16_transpose_ukernel__1x2_scalar_int);
52 }
53 
TEST(X16_TRANSPOSE__1X2_SCALAR_INT,bh_1_bw_3_4)54 TEST(X16_TRANSPOSE__1X2_SCALAR_INT, bh_1_bw_3_4) {
55   for(size_t i = 3; i < 4; ++i){
56     TransposeMicrokernelTester()
57       .input_stride(i)
58       .output_stride(1)
59       .block_width(i)
60       .block_height(1)
61       .iterations(1)
62       .Test(xnn_x16_transpose_ukernel__1x2_scalar_int);
63   }
64 }
65 
TEST(X16_TRANSPOSE__1X2_SCALAR_INT,bh_2_bw_3_4)66 TEST(X16_TRANSPOSE__1X2_SCALAR_INT, bh_2_bw_3_4) {
67   for(size_t i = 3; i < 4; ++i){
68     TransposeMicrokernelTester()
69       .input_stride(i)
70       .output_stride(2)
71       .block_width(i)
72       .block_height(2)
73       .iterations(1)
74       .Test(xnn_x16_transpose_ukernel__1x2_scalar_int);
75   }
76 }
77 
TEST(X16_TRANSPOSE__1X2_SCALAR_INT,bh_2_bw_2)78 TEST(X16_TRANSPOSE__1X2_SCALAR_INT, bh_2_bw_2) {
79   TransposeMicrokernelTester()
80     .input_stride(2)
81     .output_stride(2)
82     .block_width(2)
83     .block_height(2)
84     .iterations(1)
85     .Test(xnn_x16_transpose_ukernel__1x2_scalar_int);
86 }
87 
TEST(X16_TRANSPOSE__1X2_SCALAR_INT,bh_2_2_bw_2)88 TEST(X16_TRANSPOSE__1X2_SCALAR_INT, bh_2_2_bw_2){
89   for(size_t i = 2; i < 2; ++i){
90     TransposeMicrokernelTester()
91       .input_stride(2)
92       .output_stride(i)
93       .block_width(2)
94       .block_height(i)
95       .iterations(1)
96       .Test(xnn_x16_transpose_ukernel__1x2_scalar_int);
97   }
98 }
99 
TEST(X16_TRANSPOSE__1X2_SCALAR_INT,bh_2_2_bw_4)100 TEST(X16_TRANSPOSE__1X2_SCALAR_INT, bh_2_2_bw_4){
101   for(size_t i = 2; i < 2; ++i){
102     TransposeMicrokernelTester()
103       .input_stride(4)
104       .output_stride(i)
105       .block_width(4)
106       .block_height(i)
107       .iterations(1)
108       .Test(xnn_x16_transpose_ukernel__1x2_scalar_int);
109   }
110 }
111 
TEST(X16_TRANSPOSE__1X2_SCALAR_INT,bh_2_2_bw_3_4)112 TEST(X16_TRANSPOSE__1X2_SCALAR_INT, bh_2_2_bw_3_4) {
113   for(size_t i = 2; i < 2; ++i){
114     for(size_t j = 3; j < 4; ++j){
115       TransposeMicrokernelTester()
116         .input_stride(j)
117         .output_stride(i)
118         .block_width(j)
119         .block_height(i)
120         .iterations(1)
121         .Test(xnn_x16_transpose_ukernel__1x2_scalar_int);
122     }
123   }
124 }
125 
TEST(X16_TRANSPOSE__1X2_SCALAR_INT,bh_1_bw_2_is_4)126 TEST(X16_TRANSPOSE__1X2_SCALAR_INT, bh_1_bw_2_is_4) {
127   TransposeMicrokernelTester()
128     .input_stride(4)
129     .output_stride(1)
130     .block_width(2)
131     .block_height(1)
132     .iterations(1)
133     .Test(xnn_x16_transpose_ukernel__1x2_scalar_int);
134 }
135 
TEST(X16_TRANSPOSE__1X2_SCALAR_INT,bh_1_bw_2_os_2)136 TEST(X16_TRANSPOSE__1X2_SCALAR_INT, bh_1_bw_2_os_2) {
137   TransposeMicrokernelTester()
138     .input_stride(2)
139     .output_stride(2)
140     .block_width(2)
141     .block_height(1)
142     .iterations(1)
143     .Test(xnn_x16_transpose_ukernel__1x2_scalar_int);
144 }
145 
TEST(X16_TRANSPOSE__1X2_SCALAR_INT,bh_1_bw_2_is_4_os_2)146 TEST(X16_TRANSPOSE__1X2_SCALAR_INT, bh_1_bw_2_is_4_os_2) {
147   TransposeMicrokernelTester()
148     .input_stride(4)
149     .output_stride(2)
150     .block_width(2)
151     .block_height(1)
152     .iterations(1)
153     .Test(xnn_x16_transpose_ukernel__1x2_scalar_int);
154 }
155 
TEST(X16_TRANSPOSE__1X4_SCALAR_INT,bh_1_bw_4)156 TEST(X16_TRANSPOSE__1X4_SCALAR_INT, bh_1_bw_4) {
157   TransposeMicrokernelTester()
158     .input_stride(4)
159     .output_stride(1)
160     .block_width(4)
161     .block_height(1)
162     .iterations(1)
163     .Test(xnn_x16_transpose_ukernel__1x4_scalar_int);
164 }
165 
TEST(X16_TRANSPOSE__1X4_SCALAR_INT,bh_1_2_bw_1_8)166 TEST(X16_TRANSPOSE__1X4_SCALAR_INT, bh_1_2_bw_1_8) {
167   for(size_t i = 1; i <= 2; ++i){
168     for(size_t j = 1; j <= 8; ++j){
169       TransposeMicrokernelTester()
170         .input_stride(j)
171         .output_stride(i)
172         .block_width(j)
173         .block_height(i)
174         .iterations(1)
175         .Test(xnn_x16_transpose_ukernel__1x4_scalar_int);
176     }
177   }
178 }
179 
TEST(X16_TRANSPOSE__1X4_SCALAR_INT,bh_1_bw_8)180 TEST(X16_TRANSPOSE__1X4_SCALAR_INT, bh_1_bw_8) {
181   TransposeMicrokernelTester()
182     .input_stride(8)
183     .output_stride(1)
184     .block_width(8)
185     .block_height(1)
186     .iterations(1)
187     .Test(xnn_x16_transpose_ukernel__1x4_scalar_int);
188 }
189 
TEST(X16_TRANSPOSE__1X4_SCALAR_INT,bh_1_bw_5_8)190 TEST(X16_TRANSPOSE__1X4_SCALAR_INT, bh_1_bw_5_8) {
191   for(size_t i = 5; i < 8; ++i){
192     TransposeMicrokernelTester()
193       .input_stride(i)
194       .output_stride(1)
195       .block_width(i)
196       .block_height(1)
197       .iterations(1)
198       .Test(xnn_x16_transpose_ukernel__1x4_scalar_int);
199   }
200 }
201 
TEST(X16_TRANSPOSE__1X4_SCALAR_INT,bh_2_bw_5_8)202 TEST(X16_TRANSPOSE__1X4_SCALAR_INT, bh_2_bw_5_8) {
203   for(size_t i = 5; i < 8; ++i){
204     TransposeMicrokernelTester()
205       .input_stride(i)
206       .output_stride(2)
207       .block_width(i)
208       .block_height(2)
209       .iterations(1)
210       .Test(xnn_x16_transpose_ukernel__1x4_scalar_int);
211   }
212 }
213 
TEST(X16_TRANSPOSE__1X4_SCALAR_INT,bh_2_bw_4)214 TEST(X16_TRANSPOSE__1X4_SCALAR_INT, bh_2_bw_4) {
215   TransposeMicrokernelTester()
216     .input_stride(4)
217     .output_stride(2)
218     .block_width(4)
219     .block_height(2)
220     .iterations(1)
221     .Test(xnn_x16_transpose_ukernel__1x4_scalar_int);
222 }
223 
TEST(X16_TRANSPOSE__1X4_SCALAR_INT,bh_2_2_bw_4)224 TEST(X16_TRANSPOSE__1X4_SCALAR_INT, bh_2_2_bw_4){
225   for(size_t i = 2; i < 2; ++i){
226     TransposeMicrokernelTester()
227       .input_stride(4)
228       .output_stride(i)
229       .block_width(4)
230       .block_height(i)
231       .iterations(1)
232       .Test(xnn_x16_transpose_ukernel__1x4_scalar_int);
233   }
234 }
235 
TEST(X16_TRANSPOSE__1X4_SCALAR_INT,bh_2_2_bw_8)236 TEST(X16_TRANSPOSE__1X4_SCALAR_INT, bh_2_2_bw_8){
237   for(size_t i = 2; i < 2; ++i){
238     TransposeMicrokernelTester()
239       .input_stride(8)
240       .output_stride(i)
241       .block_width(8)
242       .block_height(i)
243       .iterations(1)
244       .Test(xnn_x16_transpose_ukernel__1x4_scalar_int);
245   }
246 }
247 
TEST(X16_TRANSPOSE__1X4_SCALAR_INT,bh_2_2_bw_5_8)248 TEST(X16_TRANSPOSE__1X4_SCALAR_INT, bh_2_2_bw_5_8) {
249   for(size_t i = 2; i < 2; ++i){
250     for(size_t j = 5; j < 8; ++j){
251       TransposeMicrokernelTester()
252         .input_stride(j)
253         .output_stride(i)
254         .block_width(j)
255         .block_height(i)
256         .iterations(1)
257         .Test(xnn_x16_transpose_ukernel__1x4_scalar_int);
258     }
259   }
260 }
261 
TEST(X16_TRANSPOSE__1X4_SCALAR_INT,bh_1_bw_4_is_8)262 TEST(X16_TRANSPOSE__1X4_SCALAR_INT, bh_1_bw_4_is_8) {
263   TransposeMicrokernelTester()
264     .input_stride(8)
265     .output_stride(1)
266     .block_width(4)
267     .block_height(1)
268     .iterations(1)
269     .Test(xnn_x16_transpose_ukernel__1x4_scalar_int);
270 }
271 
TEST(X16_TRANSPOSE__1X4_SCALAR_INT,bh_1_bw_4_os_2)272 TEST(X16_TRANSPOSE__1X4_SCALAR_INT, bh_1_bw_4_os_2) {
273   TransposeMicrokernelTester()
274     .input_stride(4)
275     .output_stride(2)
276     .block_width(4)
277     .block_height(1)
278     .iterations(1)
279     .Test(xnn_x16_transpose_ukernel__1x4_scalar_int);
280 }
281 
TEST(X16_TRANSPOSE__1X4_SCALAR_INT,bh_1_bw_4_is_8_os_2)282 TEST(X16_TRANSPOSE__1X4_SCALAR_INT, bh_1_bw_4_is_8_os_2) {
283   TransposeMicrokernelTester()
284     .input_stride(8)
285     .output_stride(2)
286     .block_width(4)
287     .block_height(1)
288     .iterations(1)
289     .Test(xnn_x16_transpose_ukernel__1x4_scalar_int);
290 }
291 
TEST(X16_TRANSPOSE__2X1_SCALAR_INT,bh_2_bw_1)292 TEST(X16_TRANSPOSE__2X1_SCALAR_INT, bh_2_bw_1) {
293   TransposeMicrokernelTester()
294     .input_stride(1)
295     .output_stride(2)
296     .block_width(1)
297     .block_height(2)
298     .iterations(1)
299     .Test(xnn_x16_transpose_ukernel__2x1_scalar_int);
300 }
301 
TEST(X16_TRANSPOSE__2X1_SCALAR_INT,bh_1_4_bw_1_2)302 TEST(X16_TRANSPOSE__2X1_SCALAR_INT, bh_1_4_bw_1_2) {
303   for(size_t i = 1; i <= 4; ++i){
304     for(size_t j = 1; j <= 2; ++j){
305       TransposeMicrokernelTester()
306         .input_stride(j)
307         .output_stride(i)
308         .block_width(j)
309         .block_height(i)
310         .iterations(1)
311         .Test(xnn_x16_transpose_ukernel__2x1_scalar_int);
312     }
313   }
314 }
315 
TEST(X16_TRANSPOSE__2X1_SCALAR_INT,bh_2_bw_2)316 TEST(X16_TRANSPOSE__2X1_SCALAR_INT, bh_2_bw_2) {
317   TransposeMicrokernelTester()
318     .input_stride(2)
319     .output_stride(2)
320     .block_width(2)
321     .block_height(2)
322     .iterations(1)
323     .Test(xnn_x16_transpose_ukernel__2x1_scalar_int);
324 }
325 
TEST(X16_TRANSPOSE__2X1_SCALAR_INT,bh_2_bw_2_2)326 TEST(X16_TRANSPOSE__2X1_SCALAR_INT, bh_2_bw_2_2) {
327   for(size_t i = 2; i < 2; ++i){
328     TransposeMicrokernelTester()
329       .input_stride(i)
330       .output_stride(2)
331       .block_width(i)
332       .block_height(2)
333       .iterations(1)
334       .Test(xnn_x16_transpose_ukernel__2x1_scalar_int);
335   }
336 }
337 
TEST(X16_TRANSPOSE__2X1_SCALAR_INT,bh_4_bw_2_2)338 TEST(X16_TRANSPOSE__2X1_SCALAR_INT, bh_4_bw_2_2) {
339   for(size_t i = 2; i < 2; ++i){
340     TransposeMicrokernelTester()
341       .input_stride(i)
342       .output_stride(4)
343       .block_width(i)
344       .block_height(4)
345       .iterations(1)
346       .Test(xnn_x16_transpose_ukernel__2x1_scalar_int);
347   }
348 }
349 
TEST(X16_TRANSPOSE__2X1_SCALAR_INT,bh_4_bw_1)350 TEST(X16_TRANSPOSE__2X1_SCALAR_INT, bh_4_bw_1) {
351   TransposeMicrokernelTester()
352     .input_stride(1)
353     .output_stride(4)
354     .block_width(1)
355     .block_height(4)
356     .iterations(1)
357     .Test(xnn_x16_transpose_ukernel__2x1_scalar_int);
358 }
359 
TEST(X16_TRANSPOSE__2X1_SCALAR_INT,bh_3_4_bw_1)360 TEST(X16_TRANSPOSE__2X1_SCALAR_INT, bh_3_4_bw_1){
361   for(size_t i = 3; i < 4; ++i){
362     TransposeMicrokernelTester()
363       .input_stride(1)
364       .output_stride(i)
365       .block_width(1)
366       .block_height(i)
367       .iterations(1)
368       .Test(xnn_x16_transpose_ukernel__2x1_scalar_int);
369   }
370 }
371 
TEST(X16_TRANSPOSE__2X1_SCALAR_INT,bh_3_4_bw_2)372 TEST(X16_TRANSPOSE__2X1_SCALAR_INT, bh_3_4_bw_2){
373   for(size_t i = 3; i < 4; ++i){
374     TransposeMicrokernelTester()
375       .input_stride(2)
376       .output_stride(i)
377       .block_width(2)
378       .block_height(i)
379       .iterations(1)
380       .Test(xnn_x16_transpose_ukernel__2x1_scalar_int);
381   }
382 }
383 
TEST(X16_TRANSPOSE__2X1_SCALAR_INT,bh_3_4_bw_2_2)384 TEST(X16_TRANSPOSE__2X1_SCALAR_INT, bh_3_4_bw_2_2) {
385   for(size_t i = 3; i < 4; ++i){
386     for(size_t j = 2; j < 2; ++j){
387       TransposeMicrokernelTester()
388         .input_stride(j)
389         .output_stride(i)
390         .block_width(j)
391         .block_height(i)
392         .iterations(1)
393         .Test(xnn_x16_transpose_ukernel__2x1_scalar_int);
394     }
395   }
396 }
397 
TEST(X16_TRANSPOSE__2X1_SCALAR_INT,bh_2_bw_1_is_2)398 TEST(X16_TRANSPOSE__2X1_SCALAR_INT, bh_2_bw_1_is_2) {
399   TransposeMicrokernelTester()
400     .input_stride(2)
401     .output_stride(2)
402     .block_width(1)
403     .block_height(2)
404     .iterations(1)
405     .Test(xnn_x16_transpose_ukernel__2x1_scalar_int);
406 }
407 
TEST(X16_TRANSPOSE__2X1_SCALAR_INT,bh_2_bw_1_os_4)408 TEST(X16_TRANSPOSE__2X1_SCALAR_INT, bh_2_bw_1_os_4) {
409   TransposeMicrokernelTester()
410     .input_stride(1)
411     .output_stride(4)
412     .block_width(1)
413     .block_height(2)
414     .iterations(1)
415     .Test(xnn_x16_transpose_ukernel__2x1_scalar_int);
416 }
417 
TEST(X16_TRANSPOSE__2X1_SCALAR_INT,bh_2_bw_1_is_2_os_4)418 TEST(X16_TRANSPOSE__2X1_SCALAR_INT, bh_2_bw_1_is_2_os_4) {
419   TransposeMicrokernelTester()
420     .input_stride(2)
421     .output_stride(4)
422     .block_width(1)
423     .block_height(2)
424     .iterations(1)
425     .Test(xnn_x16_transpose_ukernel__2x1_scalar_int);
426 }
427 
TEST(X16_TRANSPOSE__2X2_SCALAR_INT,bh_2_bw_2)428 TEST(X16_TRANSPOSE__2X2_SCALAR_INT, bh_2_bw_2) {
429   TransposeMicrokernelTester()
430     .input_stride(2)
431     .output_stride(2)
432     .block_width(2)
433     .block_height(2)
434     .iterations(1)
435     .Test(xnn_x16_transpose_ukernel__2x2_scalar_int);
436 }
437 
TEST(X16_TRANSPOSE__2X2_SCALAR_INT,bh_1_4_bw_1_4)438 TEST(X16_TRANSPOSE__2X2_SCALAR_INT, bh_1_4_bw_1_4) {
439   for(size_t i = 1; i <= 4; ++i){
440     for(size_t j = 1; j <= 4; ++j){
441       TransposeMicrokernelTester()
442         .input_stride(j)
443         .output_stride(i)
444         .block_width(j)
445         .block_height(i)
446         .iterations(1)
447         .Test(xnn_x16_transpose_ukernel__2x2_scalar_int);
448     }
449   }
450 }
451 
TEST(X16_TRANSPOSE__2X2_SCALAR_INT,bh_2_bw_4)452 TEST(X16_TRANSPOSE__2X2_SCALAR_INT, bh_2_bw_4) {
453   TransposeMicrokernelTester()
454     .input_stride(4)
455     .output_stride(2)
456     .block_width(4)
457     .block_height(2)
458     .iterations(1)
459     .Test(xnn_x16_transpose_ukernel__2x2_scalar_int);
460 }
461 
TEST(X16_TRANSPOSE__2X2_SCALAR_INT,bh_2_bw_3_4)462 TEST(X16_TRANSPOSE__2X2_SCALAR_INT, bh_2_bw_3_4) {
463   for(size_t i = 3; i < 4; ++i){
464     TransposeMicrokernelTester()
465       .input_stride(i)
466       .output_stride(2)
467       .block_width(i)
468       .block_height(2)
469       .iterations(1)
470       .Test(xnn_x16_transpose_ukernel__2x2_scalar_int);
471   }
472 }
473 
TEST(X16_TRANSPOSE__2X2_SCALAR_INT,bh_4_bw_3_4)474 TEST(X16_TRANSPOSE__2X2_SCALAR_INT, bh_4_bw_3_4) {
475   for(size_t i = 3; i < 4; ++i){
476     TransposeMicrokernelTester()
477       .input_stride(i)
478       .output_stride(4)
479       .block_width(i)
480       .block_height(4)
481       .iterations(1)
482       .Test(xnn_x16_transpose_ukernel__2x2_scalar_int);
483   }
484 }
485 
TEST(X16_TRANSPOSE__2X2_SCALAR_INT,bh_4_bw_2)486 TEST(X16_TRANSPOSE__2X2_SCALAR_INT, bh_4_bw_2) {
487   TransposeMicrokernelTester()
488     .input_stride(2)
489     .output_stride(4)
490     .block_width(2)
491     .block_height(4)
492     .iterations(1)
493     .Test(xnn_x16_transpose_ukernel__2x2_scalar_int);
494 }
495 
TEST(X16_TRANSPOSE__2X2_SCALAR_INT,bh_3_4_bw_2)496 TEST(X16_TRANSPOSE__2X2_SCALAR_INT, bh_3_4_bw_2){
497   for(size_t i = 3; i < 4; ++i){
498     TransposeMicrokernelTester()
499       .input_stride(2)
500       .output_stride(i)
501       .block_width(2)
502       .block_height(i)
503       .iterations(1)
504       .Test(xnn_x16_transpose_ukernel__2x2_scalar_int);
505   }
506 }
507 
TEST(X16_TRANSPOSE__2X2_SCALAR_INT,bh_3_4_bw_4)508 TEST(X16_TRANSPOSE__2X2_SCALAR_INT, bh_3_4_bw_4){
509   for(size_t i = 3; i < 4; ++i){
510     TransposeMicrokernelTester()
511       .input_stride(4)
512       .output_stride(i)
513       .block_width(4)
514       .block_height(i)
515       .iterations(1)
516       .Test(xnn_x16_transpose_ukernel__2x2_scalar_int);
517   }
518 }
519 
TEST(X16_TRANSPOSE__2X2_SCALAR_INT,bh_3_4_bw_3_4)520 TEST(X16_TRANSPOSE__2X2_SCALAR_INT, bh_3_4_bw_3_4) {
521   for(size_t i = 3; i < 4; ++i){
522     for(size_t j = 3; j < 4; ++j){
523       TransposeMicrokernelTester()
524         .input_stride(j)
525         .output_stride(i)
526         .block_width(j)
527         .block_height(i)
528         .iterations(1)
529         .Test(xnn_x16_transpose_ukernel__2x2_scalar_int);
530     }
531   }
532 }
533 
TEST(X16_TRANSPOSE__2X2_SCALAR_INT,bh_2_bw_2_is_4)534 TEST(X16_TRANSPOSE__2X2_SCALAR_INT, bh_2_bw_2_is_4) {
535   TransposeMicrokernelTester()
536     .input_stride(4)
537     .output_stride(2)
538     .block_width(2)
539     .block_height(2)
540     .iterations(1)
541     .Test(xnn_x16_transpose_ukernel__2x2_scalar_int);
542 }
543 
TEST(X16_TRANSPOSE__2X2_SCALAR_INT,bh_2_bw_2_os_4)544 TEST(X16_TRANSPOSE__2X2_SCALAR_INT, bh_2_bw_2_os_4) {
545   TransposeMicrokernelTester()
546     .input_stride(2)
547     .output_stride(4)
548     .block_width(2)
549     .block_height(2)
550     .iterations(1)
551     .Test(xnn_x16_transpose_ukernel__2x2_scalar_int);
552 }
553 
TEST(X16_TRANSPOSE__2X2_SCALAR_INT,bh_2_bw_2_is_4_os_4)554 TEST(X16_TRANSPOSE__2X2_SCALAR_INT, bh_2_bw_2_is_4_os_4) {
555   TransposeMicrokernelTester()
556     .input_stride(4)
557     .output_stride(4)
558     .block_width(2)
559     .block_height(2)
560     .iterations(1)
561     .Test(xnn_x16_transpose_ukernel__2x2_scalar_int);
562 }
563 
TEST(X16_TRANSPOSE__2X4_SCALAR_INT,bh_2_bw_4)564 TEST(X16_TRANSPOSE__2X4_SCALAR_INT, bh_2_bw_4) {
565   TransposeMicrokernelTester()
566     .input_stride(4)
567     .output_stride(2)
568     .block_width(4)
569     .block_height(2)
570     .iterations(1)
571     .Test(xnn_x16_transpose_ukernel__2x4_scalar_int);
572 }
573 
TEST(X16_TRANSPOSE__2X4_SCALAR_INT,bh_1_4_bw_1_8)574 TEST(X16_TRANSPOSE__2X4_SCALAR_INT, bh_1_4_bw_1_8) {
575   for(size_t i = 1; i <= 4; ++i){
576     for(size_t j = 1; j <= 8; ++j){
577       TransposeMicrokernelTester()
578         .input_stride(j)
579         .output_stride(i)
580         .block_width(j)
581         .block_height(i)
582         .iterations(1)
583         .Test(xnn_x16_transpose_ukernel__2x4_scalar_int);
584     }
585   }
586 }
587 
TEST(X16_TRANSPOSE__2X4_SCALAR_INT,bh_2_bw_8)588 TEST(X16_TRANSPOSE__2X4_SCALAR_INT, bh_2_bw_8) {
589   TransposeMicrokernelTester()
590     .input_stride(8)
591     .output_stride(2)
592     .block_width(8)
593     .block_height(2)
594     .iterations(1)
595     .Test(xnn_x16_transpose_ukernel__2x4_scalar_int);
596 }
597 
TEST(X16_TRANSPOSE__2X4_SCALAR_INT,bh_2_bw_5_8)598 TEST(X16_TRANSPOSE__2X4_SCALAR_INT, bh_2_bw_5_8) {
599   for(size_t i = 5; i < 8; ++i){
600     TransposeMicrokernelTester()
601       .input_stride(i)
602       .output_stride(2)
603       .block_width(i)
604       .block_height(2)
605       .iterations(1)
606       .Test(xnn_x16_transpose_ukernel__2x4_scalar_int);
607   }
608 }
609 
TEST(X16_TRANSPOSE__2X4_SCALAR_INT,bh_4_bw_5_8)610 TEST(X16_TRANSPOSE__2X4_SCALAR_INT, bh_4_bw_5_8) {
611   for(size_t i = 5; i < 8; ++i){
612     TransposeMicrokernelTester()
613       .input_stride(i)
614       .output_stride(4)
615       .block_width(i)
616       .block_height(4)
617       .iterations(1)
618       .Test(xnn_x16_transpose_ukernel__2x4_scalar_int);
619   }
620 }
621 
TEST(X16_TRANSPOSE__2X4_SCALAR_INT,bh_4_bw_4)622 TEST(X16_TRANSPOSE__2X4_SCALAR_INT, bh_4_bw_4) {
623   TransposeMicrokernelTester()
624     .input_stride(4)
625     .output_stride(4)
626     .block_width(4)
627     .block_height(4)
628     .iterations(1)
629     .Test(xnn_x16_transpose_ukernel__2x4_scalar_int);
630 }
631 
TEST(X16_TRANSPOSE__2X4_SCALAR_INT,bh_3_4_bw_4)632 TEST(X16_TRANSPOSE__2X4_SCALAR_INT, bh_3_4_bw_4){
633   for(size_t i = 3; i < 4; ++i){
634     TransposeMicrokernelTester()
635       .input_stride(4)
636       .output_stride(i)
637       .block_width(4)
638       .block_height(i)
639       .iterations(1)
640       .Test(xnn_x16_transpose_ukernel__2x4_scalar_int);
641   }
642 }
643 
TEST(X16_TRANSPOSE__2X4_SCALAR_INT,bh_3_4_bw_8)644 TEST(X16_TRANSPOSE__2X4_SCALAR_INT, bh_3_4_bw_8){
645   for(size_t i = 3; i < 4; ++i){
646     TransposeMicrokernelTester()
647       .input_stride(8)
648       .output_stride(i)
649       .block_width(8)
650       .block_height(i)
651       .iterations(1)
652       .Test(xnn_x16_transpose_ukernel__2x4_scalar_int);
653   }
654 }
655 
TEST(X16_TRANSPOSE__2X4_SCALAR_INT,bh_3_4_bw_5_8)656 TEST(X16_TRANSPOSE__2X4_SCALAR_INT, bh_3_4_bw_5_8) {
657   for(size_t i = 3; i < 4; ++i){
658     for(size_t j = 5; j < 8; ++j){
659       TransposeMicrokernelTester()
660         .input_stride(j)
661         .output_stride(i)
662         .block_width(j)
663         .block_height(i)
664         .iterations(1)
665         .Test(xnn_x16_transpose_ukernel__2x4_scalar_int);
666     }
667   }
668 }
669 
TEST(X16_TRANSPOSE__2X4_SCALAR_INT,bh_2_bw_4_is_8)670 TEST(X16_TRANSPOSE__2X4_SCALAR_INT, bh_2_bw_4_is_8) {
671   TransposeMicrokernelTester()
672     .input_stride(8)
673     .output_stride(2)
674     .block_width(4)
675     .block_height(2)
676     .iterations(1)
677     .Test(xnn_x16_transpose_ukernel__2x4_scalar_int);
678 }
679 
TEST(X16_TRANSPOSE__2X4_SCALAR_INT,bh_2_bw_4_os_4)680 TEST(X16_TRANSPOSE__2X4_SCALAR_INT, bh_2_bw_4_os_4) {
681   TransposeMicrokernelTester()
682     .input_stride(4)
683     .output_stride(4)
684     .block_width(4)
685     .block_height(2)
686     .iterations(1)
687     .Test(xnn_x16_transpose_ukernel__2x4_scalar_int);
688 }
689 
TEST(X16_TRANSPOSE__2X4_SCALAR_INT,bh_2_bw_4_is_8_os_4)690 TEST(X16_TRANSPOSE__2X4_SCALAR_INT, bh_2_bw_4_is_8_os_4) {
691   TransposeMicrokernelTester()
692     .input_stride(8)
693     .output_stride(4)
694     .block_width(4)
695     .block_height(2)
696     .iterations(1)
697     .Test(xnn_x16_transpose_ukernel__2x4_scalar_int);
698 }
699 
TEST(X16_TRANSPOSE__4X1_SCALAR_INT,bh_4_bw_1)700 TEST(X16_TRANSPOSE__4X1_SCALAR_INT, bh_4_bw_1) {
701   TransposeMicrokernelTester()
702     .input_stride(1)
703     .output_stride(4)
704     .block_width(1)
705     .block_height(4)
706     .iterations(1)
707     .Test(xnn_x16_transpose_ukernel__4x1_scalar_int);
708 }
709 
TEST(X16_TRANSPOSE__4X1_SCALAR_INT,bh_1_8_bw_1_2)710 TEST(X16_TRANSPOSE__4X1_SCALAR_INT, bh_1_8_bw_1_2) {
711   for(size_t i = 1; i <= 8; ++i){
712     for(size_t j = 1; j <= 2; ++j){
713       TransposeMicrokernelTester()
714         .input_stride(j)
715         .output_stride(i)
716         .block_width(j)
717         .block_height(i)
718         .iterations(1)
719         .Test(xnn_x16_transpose_ukernel__4x1_scalar_int);
720     }
721   }
722 }
723 
TEST(X16_TRANSPOSE__4X1_SCALAR_INT,bh_4_bw_2)724 TEST(X16_TRANSPOSE__4X1_SCALAR_INT, bh_4_bw_2) {
725   TransposeMicrokernelTester()
726     .input_stride(2)
727     .output_stride(4)
728     .block_width(2)
729     .block_height(4)
730     .iterations(1)
731     .Test(xnn_x16_transpose_ukernel__4x1_scalar_int);
732 }
733 
TEST(X16_TRANSPOSE__4X1_SCALAR_INT,bh_4_bw_2_2)734 TEST(X16_TRANSPOSE__4X1_SCALAR_INT, bh_4_bw_2_2) {
735   for(size_t i = 2; i < 2; ++i){
736     TransposeMicrokernelTester()
737       .input_stride(i)
738       .output_stride(4)
739       .block_width(i)
740       .block_height(4)
741       .iterations(1)
742       .Test(xnn_x16_transpose_ukernel__4x1_scalar_int);
743   }
744 }
745 
TEST(X16_TRANSPOSE__4X1_SCALAR_INT,bh_8_bw_2_2)746 TEST(X16_TRANSPOSE__4X1_SCALAR_INT, bh_8_bw_2_2) {
747   for(size_t i = 2; i < 2; ++i){
748     TransposeMicrokernelTester()
749       .input_stride(i)
750       .output_stride(8)
751       .block_width(i)
752       .block_height(8)
753       .iterations(1)
754       .Test(xnn_x16_transpose_ukernel__4x1_scalar_int);
755   }
756 }
757 
TEST(X16_TRANSPOSE__4X1_SCALAR_INT,bh_8_bw_1)758 TEST(X16_TRANSPOSE__4X1_SCALAR_INT, bh_8_bw_1) {
759   TransposeMicrokernelTester()
760     .input_stride(1)
761     .output_stride(8)
762     .block_width(1)
763     .block_height(8)
764     .iterations(1)
765     .Test(xnn_x16_transpose_ukernel__4x1_scalar_int);
766 }
767 
TEST(X16_TRANSPOSE__4X1_SCALAR_INT,bh_5_8_bw_1)768 TEST(X16_TRANSPOSE__4X1_SCALAR_INT, bh_5_8_bw_1){
769   for(size_t i = 5; i < 8; ++i){
770     TransposeMicrokernelTester()
771       .input_stride(1)
772       .output_stride(i)
773       .block_width(1)
774       .block_height(i)
775       .iterations(1)
776       .Test(xnn_x16_transpose_ukernel__4x1_scalar_int);
777   }
778 }
779 
TEST(X16_TRANSPOSE__4X1_SCALAR_INT,bh_5_8_bw_2)780 TEST(X16_TRANSPOSE__4X1_SCALAR_INT, bh_5_8_bw_2){
781   for(size_t i = 5; i < 8; ++i){
782     TransposeMicrokernelTester()
783       .input_stride(2)
784       .output_stride(i)
785       .block_width(2)
786       .block_height(i)
787       .iterations(1)
788       .Test(xnn_x16_transpose_ukernel__4x1_scalar_int);
789   }
790 }
791 
TEST(X16_TRANSPOSE__4X1_SCALAR_INT,bh_5_8_bw_2_2)792 TEST(X16_TRANSPOSE__4X1_SCALAR_INT, bh_5_8_bw_2_2) {
793   for(size_t i = 5; i < 8; ++i){
794     for(size_t j = 2; j < 2; ++j){
795       TransposeMicrokernelTester()
796         .input_stride(j)
797         .output_stride(i)
798         .block_width(j)
799         .block_height(i)
800         .iterations(1)
801         .Test(xnn_x16_transpose_ukernel__4x1_scalar_int);
802     }
803   }
804 }
805 
TEST(X16_TRANSPOSE__4X1_SCALAR_INT,bh_4_bw_1_is_2)806 TEST(X16_TRANSPOSE__4X1_SCALAR_INT, bh_4_bw_1_is_2) {
807   TransposeMicrokernelTester()
808     .input_stride(2)
809     .output_stride(4)
810     .block_width(1)
811     .block_height(4)
812     .iterations(1)
813     .Test(xnn_x16_transpose_ukernel__4x1_scalar_int);
814 }
815 
TEST(X16_TRANSPOSE__4X1_SCALAR_INT,bh_4_bw_1_os_8)816 TEST(X16_TRANSPOSE__4X1_SCALAR_INT, bh_4_bw_1_os_8) {
817   TransposeMicrokernelTester()
818     .input_stride(1)
819     .output_stride(8)
820     .block_width(1)
821     .block_height(4)
822     .iterations(1)
823     .Test(xnn_x16_transpose_ukernel__4x1_scalar_int);
824 }
825 
TEST(X16_TRANSPOSE__4X1_SCALAR_INT,bh_4_bw_1_is_2_os_8)826 TEST(X16_TRANSPOSE__4X1_SCALAR_INT, bh_4_bw_1_is_2_os_8) {
827   TransposeMicrokernelTester()
828     .input_stride(2)
829     .output_stride(8)
830     .block_width(1)
831     .block_height(4)
832     .iterations(1)
833     .Test(xnn_x16_transpose_ukernel__4x1_scalar_int);
834 }
835 
TEST(X16_TRANSPOSE__4X2_SCALAR_INT,bh_4_bw_2)836 TEST(X16_TRANSPOSE__4X2_SCALAR_INT, bh_4_bw_2) {
837   TransposeMicrokernelTester()
838     .input_stride(2)
839     .output_stride(4)
840     .block_width(2)
841     .block_height(4)
842     .iterations(1)
843     .Test(xnn_x16_transpose_ukernel__4x2_scalar_int);
844 }
845 
TEST(X16_TRANSPOSE__4X2_SCALAR_INT,bh_1_8_bw_1_4)846 TEST(X16_TRANSPOSE__4X2_SCALAR_INT, bh_1_8_bw_1_4) {
847   for(size_t i = 1; i <= 8; ++i){
848     for(size_t j = 1; j <= 4; ++j){
849       TransposeMicrokernelTester()
850         .input_stride(j)
851         .output_stride(i)
852         .block_width(j)
853         .block_height(i)
854         .iterations(1)
855         .Test(xnn_x16_transpose_ukernel__4x2_scalar_int);
856     }
857   }
858 }
859 
TEST(X16_TRANSPOSE__4X2_SCALAR_INT,bh_4_bw_4)860 TEST(X16_TRANSPOSE__4X2_SCALAR_INT, bh_4_bw_4) {
861   TransposeMicrokernelTester()
862     .input_stride(4)
863     .output_stride(4)
864     .block_width(4)
865     .block_height(4)
866     .iterations(1)
867     .Test(xnn_x16_transpose_ukernel__4x2_scalar_int);
868 }
869 
TEST(X16_TRANSPOSE__4X2_SCALAR_INT,bh_4_bw_3_4)870 TEST(X16_TRANSPOSE__4X2_SCALAR_INT, bh_4_bw_3_4) {
871   for(size_t i = 3; i < 4; ++i){
872     TransposeMicrokernelTester()
873       .input_stride(i)
874       .output_stride(4)
875       .block_width(i)
876       .block_height(4)
877       .iterations(1)
878       .Test(xnn_x16_transpose_ukernel__4x2_scalar_int);
879   }
880 }
881 
TEST(X16_TRANSPOSE__4X2_SCALAR_INT,bh_8_bw_3_4)882 TEST(X16_TRANSPOSE__4X2_SCALAR_INT, bh_8_bw_3_4) {
883   for(size_t i = 3; i < 4; ++i){
884     TransposeMicrokernelTester()
885       .input_stride(i)
886       .output_stride(8)
887       .block_width(i)
888       .block_height(8)
889       .iterations(1)
890       .Test(xnn_x16_transpose_ukernel__4x2_scalar_int);
891   }
892 }
893 
TEST(X16_TRANSPOSE__4X2_SCALAR_INT,bh_8_bw_2)894 TEST(X16_TRANSPOSE__4X2_SCALAR_INT, bh_8_bw_2) {
895   TransposeMicrokernelTester()
896     .input_stride(2)
897     .output_stride(8)
898     .block_width(2)
899     .block_height(8)
900     .iterations(1)
901     .Test(xnn_x16_transpose_ukernel__4x2_scalar_int);
902 }
903 
TEST(X16_TRANSPOSE__4X2_SCALAR_INT,bh_5_8_bw_2)904 TEST(X16_TRANSPOSE__4X2_SCALAR_INT, bh_5_8_bw_2){
905   for(size_t i = 5; i < 8; ++i){
906     TransposeMicrokernelTester()
907       .input_stride(2)
908       .output_stride(i)
909       .block_width(2)
910       .block_height(i)
911       .iterations(1)
912       .Test(xnn_x16_transpose_ukernel__4x2_scalar_int);
913   }
914 }
915 
TEST(X16_TRANSPOSE__4X2_SCALAR_INT,bh_5_8_bw_4)916 TEST(X16_TRANSPOSE__4X2_SCALAR_INT, bh_5_8_bw_4){
917   for(size_t i = 5; i < 8; ++i){
918     TransposeMicrokernelTester()
919       .input_stride(4)
920       .output_stride(i)
921       .block_width(4)
922       .block_height(i)
923       .iterations(1)
924       .Test(xnn_x16_transpose_ukernel__4x2_scalar_int);
925   }
926 }
927 
TEST(X16_TRANSPOSE__4X2_SCALAR_INT,bh_5_8_bw_3_4)928 TEST(X16_TRANSPOSE__4X2_SCALAR_INT, bh_5_8_bw_3_4) {
929   for(size_t i = 5; i < 8; ++i){
930     for(size_t j = 3; j < 4; ++j){
931       TransposeMicrokernelTester()
932         .input_stride(j)
933         .output_stride(i)
934         .block_width(j)
935         .block_height(i)
936         .iterations(1)
937         .Test(xnn_x16_transpose_ukernel__4x2_scalar_int);
938     }
939   }
940 }
941 
TEST(X16_TRANSPOSE__4X2_SCALAR_INT,bh_4_bw_2_is_4)942 TEST(X16_TRANSPOSE__4X2_SCALAR_INT, bh_4_bw_2_is_4) {
943   TransposeMicrokernelTester()
944     .input_stride(4)
945     .output_stride(4)
946     .block_width(2)
947     .block_height(4)
948     .iterations(1)
949     .Test(xnn_x16_transpose_ukernel__4x2_scalar_int);
950 }
951 
TEST(X16_TRANSPOSE__4X2_SCALAR_INT,bh_4_bw_2_os_8)952 TEST(X16_TRANSPOSE__4X2_SCALAR_INT, bh_4_bw_2_os_8) {
953   TransposeMicrokernelTester()
954     .input_stride(2)
955     .output_stride(8)
956     .block_width(2)
957     .block_height(4)
958     .iterations(1)
959     .Test(xnn_x16_transpose_ukernel__4x2_scalar_int);
960 }
961 
TEST(X16_TRANSPOSE__4X2_SCALAR_INT,bh_4_bw_2_is_4_os_8)962 TEST(X16_TRANSPOSE__4X2_SCALAR_INT, bh_4_bw_2_is_4_os_8) {
963   TransposeMicrokernelTester()
964     .input_stride(4)
965     .output_stride(8)
966     .block_width(2)
967     .block_height(4)
968     .iterations(1)
969     .Test(xnn_x16_transpose_ukernel__4x2_scalar_int);
970 }
971 
TEST(X16_TRANSPOSE__4X4_SCALAR_INT,bh_4_bw_4)972 TEST(X16_TRANSPOSE__4X4_SCALAR_INT, bh_4_bw_4) {
973   TransposeMicrokernelTester()
974     .input_stride(4)
975     .output_stride(4)
976     .block_width(4)
977     .block_height(4)
978     .iterations(1)
979     .Test(xnn_x16_transpose_ukernel__4x4_scalar_int);
980 }
981 
TEST(X16_TRANSPOSE__4X4_SCALAR_INT,bh_1_8_bw_1_8)982 TEST(X16_TRANSPOSE__4X4_SCALAR_INT, bh_1_8_bw_1_8) {
983   for(size_t i = 1; i <= 8; ++i){
984     for(size_t j = 1; j <= 8; ++j){
985       TransposeMicrokernelTester()
986         .input_stride(j)
987         .output_stride(i)
988         .block_width(j)
989         .block_height(i)
990         .iterations(1)
991         .Test(xnn_x16_transpose_ukernel__4x4_scalar_int);
992     }
993   }
994 }
995 
TEST(X16_TRANSPOSE__4X4_SCALAR_INT,bh_4_bw_8)996 TEST(X16_TRANSPOSE__4X4_SCALAR_INT, bh_4_bw_8) {
997   TransposeMicrokernelTester()
998     .input_stride(8)
999     .output_stride(4)
1000     .block_width(8)
1001     .block_height(4)
1002     .iterations(1)
1003     .Test(xnn_x16_transpose_ukernel__4x4_scalar_int);
1004 }
1005 
TEST(X16_TRANSPOSE__4X4_SCALAR_INT,bh_4_bw_5_8)1006 TEST(X16_TRANSPOSE__4X4_SCALAR_INT, bh_4_bw_5_8) {
1007   for(size_t i = 5; i < 8; ++i){
1008     TransposeMicrokernelTester()
1009       .input_stride(i)
1010       .output_stride(4)
1011       .block_width(i)
1012       .block_height(4)
1013       .iterations(1)
1014       .Test(xnn_x16_transpose_ukernel__4x4_scalar_int);
1015   }
1016 }
1017 
TEST(X16_TRANSPOSE__4X4_SCALAR_INT,bh_8_bw_5_8)1018 TEST(X16_TRANSPOSE__4X4_SCALAR_INT, bh_8_bw_5_8) {
1019   for(size_t i = 5; i < 8; ++i){
1020     TransposeMicrokernelTester()
1021       .input_stride(i)
1022       .output_stride(8)
1023       .block_width(i)
1024       .block_height(8)
1025       .iterations(1)
1026       .Test(xnn_x16_transpose_ukernel__4x4_scalar_int);
1027   }
1028 }
1029 
TEST(X16_TRANSPOSE__4X4_SCALAR_INT,bh_8_bw_4)1030 TEST(X16_TRANSPOSE__4X4_SCALAR_INT, bh_8_bw_4) {
1031   TransposeMicrokernelTester()
1032     .input_stride(4)
1033     .output_stride(8)
1034     .block_width(4)
1035     .block_height(8)
1036     .iterations(1)
1037     .Test(xnn_x16_transpose_ukernel__4x4_scalar_int);
1038 }
1039 
TEST(X16_TRANSPOSE__4X4_SCALAR_INT,bh_5_8_bw_4)1040 TEST(X16_TRANSPOSE__4X4_SCALAR_INT, bh_5_8_bw_4){
1041   for(size_t i = 5; i < 8; ++i){
1042     TransposeMicrokernelTester()
1043       .input_stride(4)
1044       .output_stride(i)
1045       .block_width(4)
1046       .block_height(i)
1047       .iterations(1)
1048       .Test(xnn_x16_transpose_ukernel__4x4_scalar_int);
1049   }
1050 }
1051 
TEST(X16_TRANSPOSE__4X4_SCALAR_INT,bh_5_8_bw_8)1052 TEST(X16_TRANSPOSE__4X4_SCALAR_INT, bh_5_8_bw_8){
1053   for(size_t i = 5; i < 8; ++i){
1054     TransposeMicrokernelTester()
1055       .input_stride(8)
1056       .output_stride(i)
1057       .block_width(8)
1058       .block_height(i)
1059       .iterations(1)
1060       .Test(xnn_x16_transpose_ukernel__4x4_scalar_int);
1061   }
1062 }
1063 
TEST(X16_TRANSPOSE__4X4_SCALAR_INT,bh_5_8_bw_5_8)1064 TEST(X16_TRANSPOSE__4X4_SCALAR_INT, bh_5_8_bw_5_8) {
1065   for(size_t i = 5; i < 8; ++i){
1066     for(size_t j = 5; j < 8; ++j){
1067       TransposeMicrokernelTester()
1068         .input_stride(j)
1069         .output_stride(i)
1070         .block_width(j)
1071         .block_height(i)
1072         .iterations(1)
1073         .Test(xnn_x16_transpose_ukernel__4x4_scalar_int);
1074     }
1075   }
1076 }
1077 
TEST(X16_TRANSPOSE__4X4_SCALAR_INT,bh_4_bw_4_is_8)1078 TEST(X16_TRANSPOSE__4X4_SCALAR_INT, bh_4_bw_4_is_8) {
1079   TransposeMicrokernelTester()
1080     .input_stride(8)
1081     .output_stride(4)
1082     .block_width(4)
1083     .block_height(4)
1084     .iterations(1)
1085     .Test(xnn_x16_transpose_ukernel__4x4_scalar_int);
1086 }
1087 
TEST(X16_TRANSPOSE__4X4_SCALAR_INT,bh_4_bw_4_os_8)1088 TEST(X16_TRANSPOSE__4X4_SCALAR_INT, bh_4_bw_4_os_8) {
1089   TransposeMicrokernelTester()
1090     .input_stride(4)
1091     .output_stride(8)
1092     .block_width(4)
1093     .block_height(4)
1094     .iterations(1)
1095     .Test(xnn_x16_transpose_ukernel__4x4_scalar_int);
1096 }
1097 
TEST(X16_TRANSPOSE__4X4_SCALAR_INT,bh_4_bw_4_is_8_os_8)1098 TEST(X16_TRANSPOSE__4X4_SCALAR_INT, bh_4_bw_4_is_8_os_8) {
1099   TransposeMicrokernelTester()
1100     .input_stride(8)
1101     .output_stride(8)
1102     .block_width(4)
1103     .block_height(4)
1104     .iterations(1)
1105     .Test(xnn_x16_transpose_ukernel__4x4_scalar_int);
1106 }
1107 
1108 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(X16_TRANSPOSE__4X8_SSE2,bh_4_bw_8)1109   TEST(X16_TRANSPOSE__4X8_SSE2, bh_4_bw_8) {
1110     TEST_REQUIRES_X86_SSE2;
1111     TransposeMicrokernelTester()
1112       .input_stride(8)
1113       .output_stride(4)
1114       .block_width(8)
1115       .block_height(4)
1116       .iterations(1)
1117       .Test(xnn_x16_transpose_ukernel__4x8_sse2);
1118   }
1119 
TEST(X16_TRANSPOSE__4X8_SSE2,bh_1_8_bw_1_16)1120   TEST(X16_TRANSPOSE__4X8_SSE2, bh_1_8_bw_1_16) {
1121     TEST_REQUIRES_X86_SSE2;
1122     for(size_t i = 1; i <= 8; ++i){
1123       for(size_t j = 1; j <= 16; ++j){
1124         TransposeMicrokernelTester()
1125           .input_stride(j)
1126           .output_stride(i)
1127           .block_width(j)
1128           .block_height(i)
1129           .iterations(1)
1130           .Test(xnn_x16_transpose_ukernel__4x8_sse2);
1131       }
1132     }
1133   }
1134 
TEST(X16_TRANSPOSE__4X8_SSE2,bh_4_bw_16)1135   TEST(X16_TRANSPOSE__4X8_SSE2, bh_4_bw_16) {
1136     TEST_REQUIRES_X86_SSE2;
1137     TransposeMicrokernelTester()
1138       .input_stride(16)
1139       .output_stride(4)
1140       .block_width(16)
1141       .block_height(4)
1142       .iterations(1)
1143       .Test(xnn_x16_transpose_ukernel__4x8_sse2);
1144   }
1145 
TEST(X16_TRANSPOSE__4X8_SSE2,bh_4_bw_9_16)1146   TEST(X16_TRANSPOSE__4X8_SSE2, bh_4_bw_9_16) {
1147     TEST_REQUIRES_X86_SSE2;
1148     for(size_t i = 9; i < 16; ++i){
1149       TransposeMicrokernelTester()
1150         .input_stride(i)
1151         .output_stride(4)
1152         .block_width(i)
1153         .block_height(4)
1154         .iterations(1)
1155         .Test(xnn_x16_transpose_ukernel__4x8_sse2);
1156     }
1157   }
1158 
TEST(X16_TRANSPOSE__4X8_SSE2,bh_8_bw_9_16)1159   TEST(X16_TRANSPOSE__4X8_SSE2, bh_8_bw_9_16) {
1160     TEST_REQUIRES_X86_SSE2;
1161     for(size_t i = 9; i < 16; ++i){
1162       TransposeMicrokernelTester()
1163         .input_stride(i)
1164         .output_stride(8)
1165         .block_width(i)
1166         .block_height(8)
1167         .iterations(1)
1168         .Test(xnn_x16_transpose_ukernel__4x8_sse2);
1169     }
1170   }
1171 
TEST(X16_TRANSPOSE__4X8_SSE2,bh_8_bw_8)1172   TEST(X16_TRANSPOSE__4X8_SSE2, bh_8_bw_8) {
1173     TEST_REQUIRES_X86_SSE2;
1174     TransposeMicrokernelTester()
1175       .input_stride(8)
1176       .output_stride(8)
1177       .block_width(8)
1178       .block_height(8)
1179       .iterations(1)
1180       .Test(xnn_x16_transpose_ukernel__4x8_sse2);
1181   }
1182 
TEST(X16_TRANSPOSE__4X8_SSE2,bh_5_8_bw_8)1183   TEST(X16_TRANSPOSE__4X8_SSE2, bh_5_8_bw_8){
1184     TEST_REQUIRES_X86_SSE2;
1185     for(size_t i = 5; i < 8; ++i){
1186       TransposeMicrokernelTester()
1187         .input_stride(8)
1188         .output_stride(i)
1189         .block_width(8)
1190         .block_height(i)
1191         .iterations(1)
1192         .Test(xnn_x16_transpose_ukernel__4x8_sse2);
1193     }
1194   }
1195 
TEST(X16_TRANSPOSE__4X8_SSE2,bh_5_8_bw_16)1196   TEST(X16_TRANSPOSE__4X8_SSE2, bh_5_8_bw_16){
1197     TEST_REQUIRES_X86_SSE2;
1198     for(size_t i = 5; i < 8; ++i){
1199       TransposeMicrokernelTester()
1200         .input_stride(16)
1201         .output_stride(i)
1202         .block_width(16)
1203         .block_height(i)
1204         .iterations(1)
1205         .Test(xnn_x16_transpose_ukernel__4x8_sse2);
1206     }
1207   }
1208 
TEST(X16_TRANSPOSE__4X8_SSE2,bh_5_8_bw_9_16)1209   TEST(X16_TRANSPOSE__4X8_SSE2, bh_5_8_bw_9_16) {
1210     TEST_REQUIRES_X86_SSE2;
1211     for(size_t i = 5; i < 8; ++i){
1212       for(size_t j = 9; j < 16; ++j){
1213         TransposeMicrokernelTester()
1214           .input_stride(j)
1215           .output_stride(i)
1216           .block_width(j)
1217           .block_height(i)
1218           .iterations(1)
1219           .Test(xnn_x16_transpose_ukernel__4x8_sse2);
1220       }
1221     }
1222   }
1223 
TEST(X16_TRANSPOSE__4X8_SSE2,bh_4_bw_8_is_16)1224   TEST(X16_TRANSPOSE__4X8_SSE2, bh_4_bw_8_is_16) {
1225     TEST_REQUIRES_X86_SSE2;
1226     TransposeMicrokernelTester()
1227       .input_stride(16)
1228       .output_stride(4)
1229       .block_width(8)
1230       .block_height(4)
1231       .iterations(1)
1232       .Test(xnn_x16_transpose_ukernel__4x8_sse2);
1233   }
1234 
TEST(X16_TRANSPOSE__4X8_SSE2,bh_4_bw_8_os_8)1235   TEST(X16_TRANSPOSE__4X8_SSE2, bh_4_bw_8_os_8) {
1236     TEST_REQUIRES_X86_SSE2;
1237     TransposeMicrokernelTester()
1238       .input_stride(8)
1239       .output_stride(8)
1240       .block_width(8)
1241       .block_height(4)
1242       .iterations(1)
1243       .Test(xnn_x16_transpose_ukernel__4x8_sse2);
1244   }
1245 
TEST(X16_TRANSPOSE__4X8_SSE2,bh_4_bw_8_is_16_os_8)1246   TEST(X16_TRANSPOSE__4X8_SSE2, bh_4_bw_8_is_16_os_8) {
1247     TEST_REQUIRES_X86_SSE2;
1248     TransposeMicrokernelTester()
1249       .input_stride(16)
1250       .output_stride(8)
1251       .block_width(8)
1252       .block_height(4)
1253       .iterations(1)
1254       .Test(xnn_x16_transpose_ukernel__4x8_sse2);
1255   }
1256 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
1257 
1258 
1259 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(X16_TRANSPOSE__8X8_MULTI_MOV_SSE2,bh_8_bw_8)1260   TEST(X16_TRANSPOSE__8X8_MULTI_MOV_SSE2, bh_8_bw_8) {
1261     TEST_REQUIRES_X86_SSE2;
1262     TransposeMicrokernelTester()
1263       .input_stride(8)
1264       .output_stride(8)
1265       .block_width(8)
1266       .block_height(8)
1267       .iterations(1)
1268       .Test(xnn_x16_transpose_ukernel__8x8_multi_mov_sse2);
1269   }
1270 
TEST(X16_TRANSPOSE__8X8_MULTI_MOV_SSE2,bh_1_16_bw_1_16)1271   TEST(X16_TRANSPOSE__8X8_MULTI_MOV_SSE2, bh_1_16_bw_1_16) {
1272     TEST_REQUIRES_X86_SSE2;
1273     for(size_t i = 1; i <= 16; ++i){
1274       for(size_t j = 1; j <= 16; ++j){
1275         TransposeMicrokernelTester()
1276           .input_stride(j)
1277           .output_stride(i)
1278           .block_width(j)
1279           .block_height(i)
1280           .iterations(1)
1281           .Test(xnn_x16_transpose_ukernel__8x8_multi_mov_sse2);
1282       }
1283     }
1284   }
1285 
TEST(X16_TRANSPOSE__8X8_MULTI_MOV_SSE2,bh_8_bw_16)1286   TEST(X16_TRANSPOSE__8X8_MULTI_MOV_SSE2, bh_8_bw_16) {
1287     TEST_REQUIRES_X86_SSE2;
1288     TransposeMicrokernelTester()
1289       .input_stride(16)
1290       .output_stride(8)
1291       .block_width(16)
1292       .block_height(8)
1293       .iterations(1)
1294       .Test(xnn_x16_transpose_ukernel__8x8_multi_mov_sse2);
1295   }
1296 
TEST(X16_TRANSPOSE__8X8_MULTI_MOV_SSE2,bh_8_bw_9_16)1297   TEST(X16_TRANSPOSE__8X8_MULTI_MOV_SSE2, bh_8_bw_9_16) {
1298     TEST_REQUIRES_X86_SSE2;
1299     for(size_t i = 9; i < 16; ++i){
1300       TransposeMicrokernelTester()
1301         .input_stride(i)
1302         .output_stride(8)
1303         .block_width(i)
1304         .block_height(8)
1305         .iterations(1)
1306         .Test(xnn_x16_transpose_ukernel__8x8_multi_mov_sse2);
1307     }
1308   }
1309 
TEST(X16_TRANSPOSE__8X8_MULTI_MOV_SSE2,bh_16_bw_9_16)1310   TEST(X16_TRANSPOSE__8X8_MULTI_MOV_SSE2, bh_16_bw_9_16) {
1311     TEST_REQUIRES_X86_SSE2;
1312     for(size_t i = 9; i < 16; ++i){
1313       TransposeMicrokernelTester()
1314         .input_stride(i)
1315         .output_stride(16)
1316         .block_width(i)
1317         .block_height(16)
1318         .iterations(1)
1319         .Test(xnn_x16_transpose_ukernel__8x8_multi_mov_sse2);
1320     }
1321   }
1322 
TEST(X16_TRANSPOSE__8X8_MULTI_MOV_SSE2,bh_16_bw_8)1323   TEST(X16_TRANSPOSE__8X8_MULTI_MOV_SSE2, bh_16_bw_8) {
1324     TEST_REQUIRES_X86_SSE2;
1325     TransposeMicrokernelTester()
1326       .input_stride(8)
1327       .output_stride(16)
1328       .block_width(8)
1329       .block_height(16)
1330       .iterations(1)
1331       .Test(xnn_x16_transpose_ukernel__8x8_multi_mov_sse2);
1332   }
1333 
TEST(X16_TRANSPOSE__8X8_MULTI_MOV_SSE2,bh_9_16_bw_8)1334   TEST(X16_TRANSPOSE__8X8_MULTI_MOV_SSE2, bh_9_16_bw_8){
1335     TEST_REQUIRES_X86_SSE2;
1336     for(size_t i = 9; i < 16; ++i){
1337       TransposeMicrokernelTester()
1338         .input_stride(8)
1339         .output_stride(i)
1340         .block_width(8)
1341         .block_height(i)
1342         .iterations(1)
1343         .Test(xnn_x16_transpose_ukernel__8x8_multi_mov_sse2);
1344     }
1345   }
1346 
TEST(X16_TRANSPOSE__8X8_MULTI_MOV_SSE2,bh_9_16_bw_16)1347   TEST(X16_TRANSPOSE__8X8_MULTI_MOV_SSE2, bh_9_16_bw_16){
1348     TEST_REQUIRES_X86_SSE2;
1349     for(size_t i = 9; i < 16; ++i){
1350       TransposeMicrokernelTester()
1351         .input_stride(16)
1352         .output_stride(i)
1353         .block_width(16)
1354         .block_height(i)
1355         .iterations(1)
1356         .Test(xnn_x16_transpose_ukernel__8x8_multi_mov_sse2);
1357     }
1358   }
1359 
TEST(X16_TRANSPOSE__8X8_MULTI_MOV_SSE2,bh_9_16_bw_9_16)1360   TEST(X16_TRANSPOSE__8X8_MULTI_MOV_SSE2, bh_9_16_bw_9_16) {
1361     TEST_REQUIRES_X86_SSE2;
1362     for(size_t i = 9; i < 16; ++i){
1363       for(size_t j = 9; j < 16; ++j){
1364         TransposeMicrokernelTester()
1365           .input_stride(j)
1366           .output_stride(i)
1367           .block_width(j)
1368           .block_height(i)
1369           .iterations(1)
1370           .Test(xnn_x16_transpose_ukernel__8x8_multi_mov_sse2);
1371       }
1372     }
1373   }
1374 
TEST(X16_TRANSPOSE__8X8_MULTI_MOV_SSE2,bh_8_bw_8_is_16)1375   TEST(X16_TRANSPOSE__8X8_MULTI_MOV_SSE2, bh_8_bw_8_is_16) {
1376     TEST_REQUIRES_X86_SSE2;
1377     TransposeMicrokernelTester()
1378       .input_stride(16)
1379       .output_stride(8)
1380       .block_width(8)
1381       .block_height(8)
1382       .iterations(1)
1383       .Test(xnn_x16_transpose_ukernel__8x8_multi_mov_sse2);
1384   }
1385 
TEST(X16_TRANSPOSE__8X8_MULTI_MOV_SSE2,bh_8_bw_8_os_16)1386   TEST(X16_TRANSPOSE__8X8_MULTI_MOV_SSE2, bh_8_bw_8_os_16) {
1387     TEST_REQUIRES_X86_SSE2;
1388     TransposeMicrokernelTester()
1389       .input_stride(8)
1390       .output_stride(16)
1391       .block_width(8)
1392       .block_height(8)
1393       .iterations(1)
1394       .Test(xnn_x16_transpose_ukernel__8x8_multi_mov_sse2);
1395   }
1396 
TEST(X16_TRANSPOSE__8X8_MULTI_MOV_SSE2,bh_8_bw_8_is_16_os_16)1397   TEST(X16_TRANSPOSE__8X8_MULTI_MOV_SSE2, bh_8_bw_8_is_16_os_16) {
1398     TEST_REQUIRES_X86_SSE2;
1399     TransposeMicrokernelTester()
1400       .input_stride(16)
1401       .output_stride(16)
1402       .block_width(8)
1403       .block_height(8)
1404       .iterations(1)
1405       .Test(xnn_x16_transpose_ukernel__8x8_multi_mov_sse2);
1406   }
1407 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
1408 
1409 
1410 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(X16_TRANSPOSE__8X8_MULTI_SWITCH_SSE2,bh_8_bw_8)1411   TEST(X16_TRANSPOSE__8X8_MULTI_SWITCH_SSE2, bh_8_bw_8) {
1412     TEST_REQUIRES_X86_SSE2;
1413     TransposeMicrokernelTester()
1414       .input_stride(8)
1415       .output_stride(8)
1416       .block_width(8)
1417       .block_height(8)
1418       .iterations(1)
1419       .Test(xnn_x16_transpose_ukernel__8x8_multi_switch_sse2);
1420   }
1421 
TEST(X16_TRANSPOSE__8X8_MULTI_SWITCH_SSE2,bh_1_16_bw_1_16)1422   TEST(X16_TRANSPOSE__8X8_MULTI_SWITCH_SSE2, bh_1_16_bw_1_16) {
1423     TEST_REQUIRES_X86_SSE2;
1424     for(size_t i = 1; i <= 16; ++i){
1425       for(size_t j = 1; j <= 16; ++j){
1426         TransposeMicrokernelTester()
1427           .input_stride(j)
1428           .output_stride(i)
1429           .block_width(j)
1430           .block_height(i)
1431           .iterations(1)
1432           .Test(xnn_x16_transpose_ukernel__8x8_multi_switch_sse2);
1433       }
1434     }
1435   }
1436 
TEST(X16_TRANSPOSE__8X8_MULTI_SWITCH_SSE2,bh_8_bw_16)1437   TEST(X16_TRANSPOSE__8X8_MULTI_SWITCH_SSE2, bh_8_bw_16) {
1438     TEST_REQUIRES_X86_SSE2;
1439     TransposeMicrokernelTester()
1440       .input_stride(16)
1441       .output_stride(8)
1442       .block_width(16)
1443       .block_height(8)
1444       .iterations(1)
1445       .Test(xnn_x16_transpose_ukernel__8x8_multi_switch_sse2);
1446   }
1447 
TEST(X16_TRANSPOSE__8X8_MULTI_SWITCH_SSE2,bh_8_bw_9_16)1448   TEST(X16_TRANSPOSE__8X8_MULTI_SWITCH_SSE2, bh_8_bw_9_16) {
1449     TEST_REQUIRES_X86_SSE2;
1450     for(size_t i = 9; i < 16; ++i){
1451       TransposeMicrokernelTester()
1452         .input_stride(i)
1453         .output_stride(8)
1454         .block_width(i)
1455         .block_height(8)
1456         .iterations(1)
1457         .Test(xnn_x16_transpose_ukernel__8x8_multi_switch_sse2);
1458     }
1459   }
1460 
TEST(X16_TRANSPOSE__8X8_MULTI_SWITCH_SSE2,bh_16_bw_9_16)1461   TEST(X16_TRANSPOSE__8X8_MULTI_SWITCH_SSE2, bh_16_bw_9_16) {
1462     TEST_REQUIRES_X86_SSE2;
1463     for(size_t i = 9; i < 16; ++i){
1464       TransposeMicrokernelTester()
1465         .input_stride(i)
1466         .output_stride(16)
1467         .block_width(i)
1468         .block_height(16)
1469         .iterations(1)
1470         .Test(xnn_x16_transpose_ukernel__8x8_multi_switch_sse2);
1471     }
1472   }
1473 
TEST(X16_TRANSPOSE__8X8_MULTI_SWITCH_SSE2,bh_16_bw_8)1474   TEST(X16_TRANSPOSE__8X8_MULTI_SWITCH_SSE2, bh_16_bw_8) {
1475     TEST_REQUIRES_X86_SSE2;
1476     TransposeMicrokernelTester()
1477       .input_stride(8)
1478       .output_stride(16)
1479       .block_width(8)
1480       .block_height(16)
1481       .iterations(1)
1482       .Test(xnn_x16_transpose_ukernel__8x8_multi_switch_sse2);
1483   }
1484 
TEST(X16_TRANSPOSE__8X8_MULTI_SWITCH_SSE2,bh_9_16_bw_8)1485   TEST(X16_TRANSPOSE__8X8_MULTI_SWITCH_SSE2, bh_9_16_bw_8){
1486     TEST_REQUIRES_X86_SSE2;
1487     for(size_t i = 9; i < 16; ++i){
1488       TransposeMicrokernelTester()
1489         .input_stride(8)
1490         .output_stride(i)
1491         .block_width(8)
1492         .block_height(i)
1493         .iterations(1)
1494         .Test(xnn_x16_transpose_ukernel__8x8_multi_switch_sse2);
1495     }
1496   }
1497 
TEST(X16_TRANSPOSE__8X8_MULTI_SWITCH_SSE2,bh_9_16_bw_16)1498   TEST(X16_TRANSPOSE__8X8_MULTI_SWITCH_SSE2, bh_9_16_bw_16){
1499     TEST_REQUIRES_X86_SSE2;
1500     for(size_t i = 9; i < 16; ++i){
1501       TransposeMicrokernelTester()
1502         .input_stride(16)
1503         .output_stride(i)
1504         .block_width(16)
1505         .block_height(i)
1506         .iterations(1)
1507         .Test(xnn_x16_transpose_ukernel__8x8_multi_switch_sse2);
1508     }
1509   }
1510 
TEST(X16_TRANSPOSE__8X8_MULTI_SWITCH_SSE2,bh_9_16_bw_9_16)1511   TEST(X16_TRANSPOSE__8X8_MULTI_SWITCH_SSE2, bh_9_16_bw_9_16) {
1512     TEST_REQUIRES_X86_SSE2;
1513     for(size_t i = 9; i < 16; ++i){
1514       for(size_t j = 9; j < 16; ++j){
1515         TransposeMicrokernelTester()
1516           .input_stride(j)
1517           .output_stride(i)
1518           .block_width(j)
1519           .block_height(i)
1520           .iterations(1)
1521           .Test(xnn_x16_transpose_ukernel__8x8_multi_switch_sse2);
1522       }
1523     }
1524   }
1525 
TEST(X16_TRANSPOSE__8X8_MULTI_SWITCH_SSE2,bh_8_bw_8_is_16)1526   TEST(X16_TRANSPOSE__8X8_MULTI_SWITCH_SSE2, bh_8_bw_8_is_16) {
1527     TEST_REQUIRES_X86_SSE2;
1528     TransposeMicrokernelTester()
1529       .input_stride(16)
1530       .output_stride(8)
1531       .block_width(8)
1532       .block_height(8)
1533       .iterations(1)
1534       .Test(xnn_x16_transpose_ukernel__8x8_multi_switch_sse2);
1535   }
1536 
TEST(X16_TRANSPOSE__8X8_MULTI_SWITCH_SSE2,bh_8_bw_8_os_16)1537   TEST(X16_TRANSPOSE__8X8_MULTI_SWITCH_SSE2, bh_8_bw_8_os_16) {
1538     TEST_REQUIRES_X86_SSE2;
1539     TransposeMicrokernelTester()
1540       .input_stride(8)
1541       .output_stride(16)
1542       .block_width(8)
1543       .block_height(8)
1544       .iterations(1)
1545       .Test(xnn_x16_transpose_ukernel__8x8_multi_switch_sse2);
1546   }
1547 
TEST(X16_TRANSPOSE__8X8_MULTI_SWITCH_SSE2,bh_8_bw_8_is_16_os_16)1548   TEST(X16_TRANSPOSE__8X8_MULTI_SWITCH_SSE2, bh_8_bw_8_is_16_os_16) {
1549     TEST_REQUIRES_X86_SSE2;
1550     TransposeMicrokernelTester()
1551       .input_stride(16)
1552       .output_stride(16)
1553       .block_width(8)
1554       .block_height(8)
1555       .iterations(1)
1556       .Test(xnn_x16_transpose_ukernel__8x8_multi_switch_sse2);
1557   }
1558 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
1559 
1560 
1561 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(X16_TRANSPOSE__8X8_REUSE_MOV_SSE2,bh_8_bw_8)1562   TEST(X16_TRANSPOSE__8X8_REUSE_MOV_SSE2, bh_8_bw_8) {
1563     TEST_REQUIRES_X86_SSE2;
1564     TransposeMicrokernelTester()
1565       .input_stride(8)
1566       .output_stride(8)
1567       .block_width(8)
1568       .block_height(8)
1569       .iterations(1)
1570       .Test(xnn_x16_transpose_ukernel__8x8_reuse_mov_sse2);
1571   }
1572 
TEST(X16_TRANSPOSE__8X8_REUSE_MOV_SSE2,bh_1_16_bw_1_16)1573   TEST(X16_TRANSPOSE__8X8_REUSE_MOV_SSE2, bh_1_16_bw_1_16) {
1574     TEST_REQUIRES_X86_SSE2;
1575     for(size_t i = 1; i <= 16; ++i){
1576       for(size_t j = 1; j <= 16; ++j){
1577         TransposeMicrokernelTester()
1578           .input_stride(j)
1579           .output_stride(i)
1580           .block_width(j)
1581           .block_height(i)
1582           .iterations(1)
1583           .Test(xnn_x16_transpose_ukernel__8x8_reuse_mov_sse2);
1584       }
1585     }
1586   }
1587 
TEST(X16_TRANSPOSE__8X8_REUSE_MOV_SSE2,bh_8_bw_16)1588   TEST(X16_TRANSPOSE__8X8_REUSE_MOV_SSE2, bh_8_bw_16) {
1589     TEST_REQUIRES_X86_SSE2;
1590     TransposeMicrokernelTester()
1591       .input_stride(16)
1592       .output_stride(8)
1593       .block_width(16)
1594       .block_height(8)
1595       .iterations(1)
1596       .Test(xnn_x16_transpose_ukernel__8x8_reuse_mov_sse2);
1597   }
1598 
TEST(X16_TRANSPOSE__8X8_REUSE_MOV_SSE2,bh_8_bw_9_16)1599   TEST(X16_TRANSPOSE__8X8_REUSE_MOV_SSE2, bh_8_bw_9_16) {
1600     TEST_REQUIRES_X86_SSE2;
1601     for(size_t i = 9; i < 16; ++i){
1602       TransposeMicrokernelTester()
1603         .input_stride(i)
1604         .output_stride(8)
1605         .block_width(i)
1606         .block_height(8)
1607         .iterations(1)
1608         .Test(xnn_x16_transpose_ukernel__8x8_reuse_mov_sse2);
1609     }
1610   }
1611 
TEST(X16_TRANSPOSE__8X8_REUSE_MOV_SSE2,bh_16_bw_9_16)1612   TEST(X16_TRANSPOSE__8X8_REUSE_MOV_SSE2, bh_16_bw_9_16) {
1613     TEST_REQUIRES_X86_SSE2;
1614     for(size_t i = 9; i < 16; ++i){
1615       TransposeMicrokernelTester()
1616         .input_stride(i)
1617         .output_stride(16)
1618         .block_width(i)
1619         .block_height(16)
1620         .iterations(1)
1621         .Test(xnn_x16_transpose_ukernel__8x8_reuse_mov_sse2);
1622     }
1623   }
1624 
TEST(X16_TRANSPOSE__8X8_REUSE_MOV_SSE2,bh_16_bw_8)1625   TEST(X16_TRANSPOSE__8X8_REUSE_MOV_SSE2, bh_16_bw_8) {
1626     TEST_REQUIRES_X86_SSE2;
1627     TransposeMicrokernelTester()
1628       .input_stride(8)
1629       .output_stride(16)
1630       .block_width(8)
1631       .block_height(16)
1632       .iterations(1)
1633       .Test(xnn_x16_transpose_ukernel__8x8_reuse_mov_sse2);
1634   }
1635 
TEST(X16_TRANSPOSE__8X8_REUSE_MOV_SSE2,bh_9_16_bw_8)1636   TEST(X16_TRANSPOSE__8X8_REUSE_MOV_SSE2, bh_9_16_bw_8){
1637     TEST_REQUIRES_X86_SSE2;
1638     for(size_t i = 9; i < 16; ++i){
1639       TransposeMicrokernelTester()
1640         .input_stride(8)
1641         .output_stride(i)
1642         .block_width(8)
1643         .block_height(i)
1644         .iterations(1)
1645         .Test(xnn_x16_transpose_ukernel__8x8_reuse_mov_sse2);
1646     }
1647   }
1648 
TEST(X16_TRANSPOSE__8X8_REUSE_MOV_SSE2,bh_9_16_bw_16)1649   TEST(X16_TRANSPOSE__8X8_REUSE_MOV_SSE2, bh_9_16_bw_16){
1650     TEST_REQUIRES_X86_SSE2;
1651     for(size_t i = 9; i < 16; ++i){
1652       TransposeMicrokernelTester()
1653         .input_stride(16)
1654         .output_stride(i)
1655         .block_width(16)
1656         .block_height(i)
1657         .iterations(1)
1658         .Test(xnn_x16_transpose_ukernel__8x8_reuse_mov_sse2);
1659     }
1660   }
1661 
TEST(X16_TRANSPOSE__8X8_REUSE_MOV_SSE2,bh_9_16_bw_9_16)1662   TEST(X16_TRANSPOSE__8X8_REUSE_MOV_SSE2, bh_9_16_bw_9_16) {
1663     TEST_REQUIRES_X86_SSE2;
1664     for(size_t i = 9; i < 16; ++i){
1665       for(size_t j = 9; j < 16; ++j){
1666         TransposeMicrokernelTester()
1667           .input_stride(j)
1668           .output_stride(i)
1669           .block_width(j)
1670           .block_height(i)
1671           .iterations(1)
1672           .Test(xnn_x16_transpose_ukernel__8x8_reuse_mov_sse2);
1673       }
1674     }
1675   }
1676 
TEST(X16_TRANSPOSE__8X8_REUSE_MOV_SSE2,bh_8_bw_8_is_16)1677   TEST(X16_TRANSPOSE__8X8_REUSE_MOV_SSE2, bh_8_bw_8_is_16) {
1678     TEST_REQUIRES_X86_SSE2;
1679     TransposeMicrokernelTester()
1680       .input_stride(16)
1681       .output_stride(8)
1682       .block_width(8)
1683       .block_height(8)
1684       .iterations(1)
1685       .Test(xnn_x16_transpose_ukernel__8x8_reuse_mov_sse2);
1686   }
1687 
TEST(X16_TRANSPOSE__8X8_REUSE_MOV_SSE2,bh_8_bw_8_os_16)1688   TEST(X16_TRANSPOSE__8X8_REUSE_MOV_SSE2, bh_8_bw_8_os_16) {
1689     TEST_REQUIRES_X86_SSE2;
1690     TransposeMicrokernelTester()
1691       .input_stride(8)
1692       .output_stride(16)
1693       .block_width(8)
1694       .block_height(8)
1695       .iterations(1)
1696       .Test(xnn_x16_transpose_ukernel__8x8_reuse_mov_sse2);
1697   }
1698 
TEST(X16_TRANSPOSE__8X8_REUSE_MOV_SSE2,bh_8_bw_8_is_16_os_16)1699   TEST(X16_TRANSPOSE__8X8_REUSE_MOV_SSE2, bh_8_bw_8_is_16_os_16) {
1700     TEST_REQUIRES_X86_SSE2;
1701     TransposeMicrokernelTester()
1702       .input_stride(16)
1703       .output_stride(16)
1704       .block_width(8)
1705       .block_height(8)
1706       .iterations(1)
1707       .Test(xnn_x16_transpose_ukernel__8x8_reuse_mov_sse2);
1708   }
1709 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
1710 
1711 
1712 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(X16_TRANSPOSE__8X8_REUSE_MULTI_SSE2,bh_8_bw_8)1713   TEST(X16_TRANSPOSE__8X8_REUSE_MULTI_SSE2, bh_8_bw_8) {
1714     TEST_REQUIRES_X86_SSE2;
1715     TransposeMicrokernelTester()
1716       .input_stride(8)
1717       .output_stride(8)
1718       .block_width(8)
1719       .block_height(8)
1720       .iterations(1)
1721       .Test(xnn_x16_transpose_ukernel__8x8_reuse_multi_sse2);
1722   }
1723 
TEST(X16_TRANSPOSE__8X8_REUSE_MULTI_SSE2,bh_1_16_bw_1_16)1724   TEST(X16_TRANSPOSE__8X8_REUSE_MULTI_SSE2, bh_1_16_bw_1_16) {
1725     TEST_REQUIRES_X86_SSE2;
1726     for(size_t i = 1; i <= 16; ++i){
1727       for(size_t j = 1; j <= 16; ++j){
1728         TransposeMicrokernelTester()
1729           .input_stride(j)
1730           .output_stride(i)
1731           .block_width(j)
1732           .block_height(i)
1733           .iterations(1)
1734           .Test(xnn_x16_transpose_ukernel__8x8_reuse_multi_sse2);
1735       }
1736     }
1737   }
1738 
TEST(X16_TRANSPOSE__8X8_REUSE_MULTI_SSE2,bh_8_bw_16)1739   TEST(X16_TRANSPOSE__8X8_REUSE_MULTI_SSE2, bh_8_bw_16) {
1740     TEST_REQUIRES_X86_SSE2;
1741     TransposeMicrokernelTester()
1742       .input_stride(16)
1743       .output_stride(8)
1744       .block_width(16)
1745       .block_height(8)
1746       .iterations(1)
1747       .Test(xnn_x16_transpose_ukernel__8x8_reuse_multi_sse2);
1748   }
1749 
TEST(X16_TRANSPOSE__8X8_REUSE_MULTI_SSE2,bh_8_bw_9_16)1750   TEST(X16_TRANSPOSE__8X8_REUSE_MULTI_SSE2, bh_8_bw_9_16) {
1751     TEST_REQUIRES_X86_SSE2;
1752     for(size_t i = 9; i < 16; ++i){
1753       TransposeMicrokernelTester()
1754         .input_stride(i)
1755         .output_stride(8)
1756         .block_width(i)
1757         .block_height(8)
1758         .iterations(1)
1759         .Test(xnn_x16_transpose_ukernel__8x8_reuse_multi_sse2);
1760     }
1761   }
1762 
TEST(X16_TRANSPOSE__8X8_REUSE_MULTI_SSE2,bh_16_bw_9_16)1763   TEST(X16_TRANSPOSE__8X8_REUSE_MULTI_SSE2, bh_16_bw_9_16) {
1764     TEST_REQUIRES_X86_SSE2;
1765     for(size_t i = 9; i < 16; ++i){
1766       TransposeMicrokernelTester()
1767         .input_stride(i)
1768         .output_stride(16)
1769         .block_width(i)
1770         .block_height(16)
1771         .iterations(1)
1772         .Test(xnn_x16_transpose_ukernel__8x8_reuse_multi_sse2);
1773     }
1774   }
1775 
TEST(X16_TRANSPOSE__8X8_REUSE_MULTI_SSE2,bh_16_bw_8)1776   TEST(X16_TRANSPOSE__8X8_REUSE_MULTI_SSE2, bh_16_bw_8) {
1777     TEST_REQUIRES_X86_SSE2;
1778     TransposeMicrokernelTester()
1779       .input_stride(8)
1780       .output_stride(16)
1781       .block_width(8)
1782       .block_height(16)
1783       .iterations(1)
1784       .Test(xnn_x16_transpose_ukernel__8x8_reuse_multi_sse2);
1785   }
1786 
TEST(X16_TRANSPOSE__8X8_REUSE_MULTI_SSE2,bh_9_16_bw_8)1787   TEST(X16_TRANSPOSE__8X8_REUSE_MULTI_SSE2, bh_9_16_bw_8){
1788     TEST_REQUIRES_X86_SSE2;
1789     for(size_t i = 9; i < 16; ++i){
1790       TransposeMicrokernelTester()
1791         .input_stride(8)
1792         .output_stride(i)
1793         .block_width(8)
1794         .block_height(i)
1795         .iterations(1)
1796         .Test(xnn_x16_transpose_ukernel__8x8_reuse_multi_sse2);
1797     }
1798   }
1799 
TEST(X16_TRANSPOSE__8X8_REUSE_MULTI_SSE2,bh_9_16_bw_16)1800   TEST(X16_TRANSPOSE__8X8_REUSE_MULTI_SSE2, bh_9_16_bw_16){
1801     TEST_REQUIRES_X86_SSE2;
1802     for(size_t i = 9; i < 16; ++i){
1803       TransposeMicrokernelTester()
1804         .input_stride(16)
1805         .output_stride(i)
1806         .block_width(16)
1807         .block_height(i)
1808         .iterations(1)
1809         .Test(xnn_x16_transpose_ukernel__8x8_reuse_multi_sse2);
1810     }
1811   }
1812 
TEST(X16_TRANSPOSE__8X8_REUSE_MULTI_SSE2,bh_9_16_bw_9_16)1813   TEST(X16_TRANSPOSE__8X8_REUSE_MULTI_SSE2, bh_9_16_bw_9_16) {
1814     TEST_REQUIRES_X86_SSE2;
1815     for(size_t i = 9; i < 16; ++i){
1816       for(size_t j = 9; j < 16; ++j){
1817         TransposeMicrokernelTester()
1818           .input_stride(j)
1819           .output_stride(i)
1820           .block_width(j)
1821           .block_height(i)
1822           .iterations(1)
1823           .Test(xnn_x16_transpose_ukernel__8x8_reuse_multi_sse2);
1824       }
1825     }
1826   }
1827 
TEST(X16_TRANSPOSE__8X8_REUSE_MULTI_SSE2,bh_8_bw_8_is_16)1828   TEST(X16_TRANSPOSE__8X8_REUSE_MULTI_SSE2, bh_8_bw_8_is_16) {
1829     TEST_REQUIRES_X86_SSE2;
1830     TransposeMicrokernelTester()
1831       .input_stride(16)
1832       .output_stride(8)
1833       .block_width(8)
1834       .block_height(8)
1835       .iterations(1)
1836       .Test(xnn_x16_transpose_ukernel__8x8_reuse_multi_sse2);
1837   }
1838 
TEST(X16_TRANSPOSE__8X8_REUSE_MULTI_SSE2,bh_8_bw_8_os_16)1839   TEST(X16_TRANSPOSE__8X8_REUSE_MULTI_SSE2, bh_8_bw_8_os_16) {
1840     TEST_REQUIRES_X86_SSE2;
1841     TransposeMicrokernelTester()
1842       .input_stride(8)
1843       .output_stride(16)
1844       .block_width(8)
1845       .block_height(8)
1846       .iterations(1)
1847       .Test(xnn_x16_transpose_ukernel__8x8_reuse_multi_sse2);
1848   }
1849 
TEST(X16_TRANSPOSE__8X8_REUSE_MULTI_SSE2,bh_8_bw_8_is_16_os_16)1850   TEST(X16_TRANSPOSE__8X8_REUSE_MULTI_SSE2, bh_8_bw_8_is_16_os_16) {
1851     TEST_REQUIRES_X86_SSE2;
1852     TransposeMicrokernelTester()
1853       .input_stride(16)
1854       .output_stride(16)
1855       .block_width(8)
1856       .block_height(8)
1857       .iterations(1)
1858       .Test(xnn_x16_transpose_ukernel__8x8_reuse_multi_sse2);
1859   }
1860 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
1861 
1862 
1863 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(X16_TRANSPOSE__8X8_REUSE_SWITCH_SSE2,bh_8_bw_8)1864   TEST(X16_TRANSPOSE__8X8_REUSE_SWITCH_SSE2, bh_8_bw_8) {
1865     TEST_REQUIRES_X86_SSE2;
1866     TransposeMicrokernelTester()
1867       .input_stride(8)
1868       .output_stride(8)
1869       .block_width(8)
1870       .block_height(8)
1871       .iterations(1)
1872       .Test(xnn_x16_transpose_ukernel__8x8_reuse_switch_sse2);
1873   }
1874 
TEST(X16_TRANSPOSE__8X8_REUSE_SWITCH_SSE2,bh_1_16_bw_1_16)1875   TEST(X16_TRANSPOSE__8X8_REUSE_SWITCH_SSE2, bh_1_16_bw_1_16) {
1876     TEST_REQUIRES_X86_SSE2;
1877     for(size_t i = 1; i <= 16; ++i){
1878       for(size_t j = 1; j <= 16; ++j){
1879         TransposeMicrokernelTester()
1880           .input_stride(j)
1881           .output_stride(i)
1882           .block_width(j)
1883           .block_height(i)
1884           .iterations(1)
1885           .Test(xnn_x16_transpose_ukernel__8x8_reuse_switch_sse2);
1886       }
1887     }
1888   }
1889 
TEST(X16_TRANSPOSE__8X8_REUSE_SWITCH_SSE2,bh_8_bw_16)1890   TEST(X16_TRANSPOSE__8X8_REUSE_SWITCH_SSE2, bh_8_bw_16) {
1891     TEST_REQUIRES_X86_SSE2;
1892     TransposeMicrokernelTester()
1893       .input_stride(16)
1894       .output_stride(8)
1895       .block_width(16)
1896       .block_height(8)
1897       .iterations(1)
1898       .Test(xnn_x16_transpose_ukernel__8x8_reuse_switch_sse2);
1899   }
1900 
TEST(X16_TRANSPOSE__8X8_REUSE_SWITCH_SSE2,bh_8_bw_9_16)1901   TEST(X16_TRANSPOSE__8X8_REUSE_SWITCH_SSE2, bh_8_bw_9_16) {
1902     TEST_REQUIRES_X86_SSE2;
1903     for(size_t i = 9; i < 16; ++i){
1904       TransposeMicrokernelTester()
1905         .input_stride(i)
1906         .output_stride(8)
1907         .block_width(i)
1908         .block_height(8)
1909         .iterations(1)
1910         .Test(xnn_x16_transpose_ukernel__8x8_reuse_switch_sse2);
1911     }
1912   }
1913 
TEST(X16_TRANSPOSE__8X8_REUSE_SWITCH_SSE2,bh_16_bw_9_16)1914   TEST(X16_TRANSPOSE__8X8_REUSE_SWITCH_SSE2, bh_16_bw_9_16) {
1915     TEST_REQUIRES_X86_SSE2;
1916     for(size_t i = 9; i < 16; ++i){
1917       TransposeMicrokernelTester()
1918         .input_stride(i)
1919         .output_stride(16)
1920         .block_width(i)
1921         .block_height(16)
1922         .iterations(1)
1923         .Test(xnn_x16_transpose_ukernel__8x8_reuse_switch_sse2);
1924     }
1925   }
1926 
TEST(X16_TRANSPOSE__8X8_REUSE_SWITCH_SSE2,bh_16_bw_8)1927   TEST(X16_TRANSPOSE__8X8_REUSE_SWITCH_SSE2, bh_16_bw_8) {
1928     TEST_REQUIRES_X86_SSE2;
1929     TransposeMicrokernelTester()
1930       .input_stride(8)
1931       .output_stride(16)
1932       .block_width(8)
1933       .block_height(16)
1934       .iterations(1)
1935       .Test(xnn_x16_transpose_ukernel__8x8_reuse_switch_sse2);
1936   }
1937 
TEST(X16_TRANSPOSE__8X8_REUSE_SWITCH_SSE2,bh_9_16_bw_8)1938   TEST(X16_TRANSPOSE__8X8_REUSE_SWITCH_SSE2, bh_9_16_bw_8){
1939     TEST_REQUIRES_X86_SSE2;
1940     for(size_t i = 9; i < 16; ++i){
1941       TransposeMicrokernelTester()
1942         .input_stride(8)
1943         .output_stride(i)
1944         .block_width(8)
1945         .block_height(i)
1946         .iterations(1)
1947         .Test(xnn_x16_transpose_ukernel__8x8_reuse_switch_sse2);
1948     }
1949   }
1950 
TEST(X16_TRANSPOSE__8X8_REUSE_SWITCH_SSE2,bh_9_16_bw_16)1951   TEST(X16_TRANSPOSE__8X8_REUSE_SWITCH_SSE2, bh_9_16_bw_16){
1952     TEST_REQUIRES_X86_SSE2;
1953     for(size_t i = 9; i < 16; ++i){
1954       TransposeMicrokernelTester()
1955         .input_stride(16)
1956         .output_stride(i)
1957         .block_width(16)
1958         .block_height(i)
1959         .iterations(1)
1960         .Test(xnn_x16_transpose_ukernel__8x8_reuse_switch_sse2);
1961     }
1962   }
1963 
TEST(X16_TRANSPOSE__8X8_REUSE_SWITCH_SSE2,bh_9_16_bw_9_16)1964   TEST(X16_TRANSPOSE__8X8_REUSE_SWITCH_SSE2, bh_9_16_bw_9_16) {
1965     TEST_REQUIRES_X86_SSE2;
1966     for(size_t i = 9; i < 16; ++i){
1967       for(size_t j = 9; j < 16; ++j){
1968         TransposeMicrokernelTester()
1969           .input_stride(j)
1970           .output_stride(i)
1971           .block_width(j)
1972           .block_height(i)
1973           .iterations(1)
1974           .Test(xnn_x16_transpose_ukernel__8x8_reuse_switch_sse2);
1975       }
1976     }
1977   }
1978 
TEST(X16_TRANSPOSE__8X8_REUSE_SWITCH_SSE2,bh_8_bw_8_is_16)1979   TEST(X16_TRANSPOSE__8X8_REUSE_SWITCH_SSE2, bh_8_bw_8_is_16) {
1980     TEST_REQUIRES_X86_SSE2;
1981     TransposeMicrokernelTester()
1982       .input_stride(16)
1983       .output_stride(8)
1984       .block_width(8)
1985       .block_height(8)
1986       .iterations(1)
1987       .Test(xnn_x16_transpose_ukernel__8x8_reuse_switch_sse2);
1988   }
1989 
TEST(X16_TRANSPOSE__8X8_REUSE_SWITCH_SSE2,bh_8_bw_8_os_16)1990   TEST(X16_TRANSPOSE__8X8_REUSE_SWITCH_SSE2, bh_8_bw_8_os_16) {
1991     TEST_REQUIRES_X86_SSE2;
1992     TransposeMicrokernelTester()
1993       .input_stride(8)
1994       .output_stride(16)
1995       .block_width(8)
1996       .block_height(8)
1997       .iterations(1)
1998       .Test(xnn_x16_transpose_ukernel__8x8_reuse_switch_sse2);
1999   }
2000 
TEST(X16_TRANSPOSE__8X8_REUSE_SWITCH_SSE2,bh_8_bw_8_is_16_os_16)2001   TEST(X16_TRANSPOSE__8X8_REUSE_SWITCH_SSE2, bh_8_bw_8_is_16_os_16) {
2002     TEST_REQUIRES_X86_SSE2;
2003     TransposeMicrokernelTester()
2004       .input_stride(16)
2005       .output_stride(16)
2006       .block_width(8)
2007       .block_height(8)
2008       .iterations(1)
2009       .Test(xnn_x16_transpose_ukernel__8x8_reuse_switch_sse2);
2010   }
2011 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
2012 
2013 
2014 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(X16_TRANSPOSE__8X8_MULTI_DEC_ZIP_NEON,bh_8_bw_8)2015   TEST(X16_TRANSPOSE__8X8_MULTI_DEC_ZIP_NEON, bh_8_bw_8) {
2016     TEST_REQUIRES_ARM_NEON;
2017     TransposeMicrokernelTester()
2018       .input_stride(8)
2019       .output_stride(8)
2020       .block_width(8)
2021       .block_height(8)
2022       .iterations(1)
2023       .Test(xnn_x16_transpose_ukernel__8x8_multi_dec_zip_neon);
2024   }
2025 
TEST(X16_TRANSPOSE__8X8_MULTI_DEC_ZIP_NEON,bh_1_16_bw_1_16)2026   TEST(X16_TRANSPOSE__8X8_MULTI_DEC_ZIP_NEON, bh_1_16_bw_1_16) {
2027     TEST_REQUIRES_ARM_NEON;
2028     for(size_t i = 1; i <= 16; ++i){
2029       for(size_t j = 1; j <= 16; ++j){
2030         TransposeMicrokernelTester()
2031           .input_stride(j)
2032           .output_stride(i)
2033           .block_width(j)
2034           .block_height(i)
2035           .iterations(1)
2036           .Test(xnn_x16_transpose_ukernel__8x8_multi_dec_zip_neon);
2037       }
2038     }
2039   }
2040 
TEST(X16_TRANSPOSE__8X8_MULTI_DEC_ZIP_NEON,bh_8_bw_16)2041   TEST(X16_TRANSPOSE__8X8_MULTI_DEC_ZIP_NEON, bh_8_bw_16) {
2042     TEST_REQUIRES_ARM_NEON;
2043     TransposeMicrokernelTester()
2044       .input_stride(16)
2045       .output_stride(8)
2046       .block_width(16)
2047       .block_height(8)
2048       .iterations(1)
2049       .Test(xnn_x16_transpose_ukernel__8x8_multi_dec_zip_neon);
2050   }
2051 
TEST(X16_TRANSPOSE__8X8_MULTI_DEC_ZIP_NEON,bh_8_bw_9_16)2052   TEST(X16_TRANSPOSE__8X8_MULTI_DEC_ZIP_NEON, bh_8_bw_9_16) {
2053     TEST_REQUIRES_ARM_NEON;
2054     for(size_t i = 9; i < 16; ++i){
2055       TransposeMicrokernelTester()
2056         .input_stride(i)
2057         .output_stride(8)
2058         .block_width(i)
2059         .block_height(8)
2060         .iterations(1)
2061         .Test(xnn_x16_transpose_ukernel__8x8_multi_dec_zip_neon);
2062     }
2063   }
2064 
TEST(X16_TRANSPOSE__8X8_MULTI_DEC_ZIP_NEON,bh_16_bw_9_16)2065   TEST(X16_TRANSPOSE__8X8_MULTI_DEC_ZIP_NEON, bh_16_bw_9_16) {
2066     TEST_REQUIRES_ARM_NEON;
2067     for(size_t i = 9; i < 16; ++i){
2068       TransposeMicrokernelTester()
2069         .input_stride(i)
2070         .output_stride(16)
2071         .block_width(i)
2072         .block_height(16)
2073         .iterations(1)
2074         .Test(xnn_x16_transpose_ukernel__8x8_multi_dec_zip_neon);
2075     }
2076   }
2077 
TEST(X16_TRANSPOSE__8X8_MULTI_DEC_ZIP_NEON,bh_16_bw_8)2078   TEST(X16_TRANSPOSE__8X8_MULTI_DEC_ZIP_NEON, bh_16_bw_8) {
2079     TEST_REQUIRES_ARM_NEON;
2080     TransposeMicrokernelTester()
2081       .input_stride(8)
2082       .output_stride(16)
2083       .block_width(8)
2084       .block_height(16)
2085       .iterations(1)
2086       .Test(xnn_x16_transpose_ukernel__8x8_multi_dec_zip_neon);
2087   }
2088 
TEST(X16_TRANSPOSE__8X8_MULTI_DEC_ZIP_NEON,bh_9_16_bw_8)2089   TEST(X16_TRANSPOSE__8X8_MULTI_DEC_ZIP_NEON, bh_9_16_bw_8){
2090     TEST_REQUIRES_ARM_NEON;
2091     for(size_t i = 9; i < 16; ++i){
2092       TransposeMicrokernelTester()
2093         .input_stride(8)
2094         .output_stride(i)
2095         .block_width(8)
2096         .block_height(i)
2097         .iterations(1)
2098         .Test(xnn_x16_transpose_ukernel__8x8_multi_dec_zip_neon);
2099     }
2100   }
2101 
TEST(X16_TRANSPOSE__8X8_MULTI_DEC_ZIP_NEON,bh_9_16_bw_16)2102   TEST(X16_TRANSPOSE__8X8_MULTI_DEC_ZIP_NEON, bh_9_16_bw_16){
2103     TEST_REQUIRES_ARM_NEON;
2104     for(size_t i = 9; i < 16; ++i){
2105       TransposeMicrokernelTester()
2106         .input_stride(16)
2107         .output_stride(i)
2108         .block_width(16)
2109         .block_height(i)
2110         .iterations(1)
2111         .Test(xnn_x16_transpose_ukernel__8x8_multi_dec_zip_neon);
2112     }
2113   }
2114 
TEST(X16_TRANSPOSE__8X8_MULTI_DEC_ZIP_NEON,bh_9_16_bw_9_16)2115   TEST(X16_TRANSPOSE__8X8_MULTI_DEC_ZIP_NEON, bh_9_16_bw_9_16) {
2116     TEST_REQUIRES_ARM_NEON;
2117     for(size_t i = 9; i < 16; ++i){
2118       for(size_t j = 9; j < 16; ++j){
2119         TransposeMicrokernelTester()
2120           .input_stride(j)
2121           .output_stride(i)
2122           .block_width(j)
2123           .block_height(i)
2124           .iterations(1)
2125           .Test(xnn_x16_transpose_ukernel__8x8_multi_dec_zip_neon);
2126       }
2127     }
2128   }
2129 
TEST(X16_TRANSPOSE__8X8_MULTI_DEC_ZIP_NEON,bh_8_bw_8_is_16)2130   TEST(X16_TRANSPOSE__8X8_MULTI_DEC_ZIP_NEON, bh_8_bw_8_is_16) {
2131     TEST_REQUIRES_ARM_NEON;
2132     TransposeMicrokernelTester()
2133       .input_stride(16)
2134       .output_stride(8)
2135       .block_width(8)
2136       .block_height(8)
2137       .iterations(1)
2138       .Test(xnn_x16_transpose_ukernel__8x8_multi_dec_zip_neon);
2139   }
2140 
TEST(X16_TRANSPOSE__8X8_MULTI_DEC_ZIP_NEON,bh_8_bw_8_os_16)2141   TEST(X16_TRANSPOSE__8X8_MULTI_DEC_ZIP_NEON, bh_8_bw_8_os_16) {
2142     TEST_REQUIRES_ARM_NEON;
2143     TransposeMicrokernelTester()
2144       .input_stride(8)
2145       .output_stride(16)
2146       .block_width(8)
2147       .block_height(8)
2148       .iterations(1)
2149       .Test(xnn_x16_transpose_ukernel__8x8_multi_dec_zip_neon);
2150   }
2151 
TEST(X16_TRANSPOSE__8X8_MULTI_DEC_ZIP_NEON,bh_8_bw_8_is_16_os_16)2152   TEST(X16_TRANSPOSE__8X8_MULTI_DEC_ZIP_NEON, bh_8_bw_8_is_16_os_16) {
2153     TEST_REQUIRES_ARM_NEON;
2154     TransposeMicrokernelTester()
2155       .input_stride(16)
2156       .output_stride(16)
2157       .block_width(8)
2158       .block_height(8)
2159       .iterations(1)
2160       .Test(xnn_x16_transpose_ukernel__8x8_multi_dec_zip_neon);
2161   }
2162 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
2163 
2164 
2165 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(X16_TRANSPOSE__8X8_MULTI_MOV_ZIP_NEON,bh_8_bw_8)2166   TEST(X16_TRANSPOSE__8X8_MULTI_MOV_ZIP_NEON, bh_8_bw_8) {
2167     TEST_REQUIRES_ARM_NEON;
2168     TransposeMicrokernelTester()
2169       .input_stride(8)
2170       .output_stride(8)
2171       .block_width(8)
2172       .block_height(8)
2173       .iterations(1)
2174       .Test(xnn_x16_transpose_ukernel__8x8_multi_mov_zip_neon);
2175   }
2176 
TEST(X16_TRANSPOSE__8X8_MULTI_MOV_ZIP_NEON,bh_1_16_bw_1_16)2177   TEST(X16_TRANSPOSE__8X8_MULTI_MOV_ZIP_NEON, bh_1_16_bw_1_16) {
2178     TEST_REQUIRES_ARM_NEON;
2179     for(size_t i = 1; i <= 16; ++i){
2180       for(size_t j = 1; j <= 16; ++j){
2181         TransposeMicrokernelTester()
2182           .input_stride(j)
2183           .output_stride(i)
2184           .block_width(j)
2185           .block_height(i)
2186           .iterations(1)
2187           .Test(xnn_x16_transpose_ukernel__8x8_multi_mov_zip_neon);
2188       }
2189     }
2190   }
2191 
TEST(X16_TRANSPOSE__8X8_MULTI_MOV_ZIP_NEON,bh_8_bw_16)2192   TEST(X16_TRANSPOSE__8X8_MULTI_MOV_ZIP_NEON, bh_8_bw_16) {
2193     TEST_REQUIRES_ARM_NEON;
2194     TransposeMicrokernelTester()
2195       .input_stride(16)
2196       .output_stride(8)
2197       .block_width(16)
2198       .block_height(8)
2199       .iterations(1)
2200       .Test(xnn_x16_transpose_ukernel__8x8_multi_mov_zip_neon);
2201   }
2202 
TEST(X16_TRANSPOSE__8X8_MULTI_MOV_ZIP_NEON,bh_8_bw_9_16)2203   TEST(X16_TRANSPOSE__8X8_MULTI_MOV_ZIP_NEON, bh_8_bw_9_16) {
2204     TEST_REQUIRES_ARM_NEON;
2205     for(size_t i = 9; i < 16; ++i){
2206       TransposeMicrokernelTester()
2207         .input_stride(i)
2208         .output_stride(8)
2209         .block_width(i)
2210         .block_height(8)
2211         .iterations(1)
2212         .Test(xnn_x16_transpose_ukernel__8x8_multi_mov_zip_neon);
2213     }
2214   }
2215 
TEST(X16_TRANSPOSE__8X8_MULTI_MOV_ZIP_NEON,bh_16_bw_9_16)2216   TEST(X16_TRANSPOSE__8X8_MULTI_MOV_ZIP_NEON, bh_16_bw_9_16) {
2217     TEST_REQUIRES_ARM_NEON;
2218     for(size_t i = 9; i < 16; ++i){
2219       TransposeMicrokernelTester()
2220         .input_stride(i)
2221         .output_stride(16)
2222         .block_width(i)
2223         .block_height(16)
2224         .iterations(1)
2225         .Test(xnn_x16_transpose_ukernel__8x8_multi_mov_zip_neon);
2226     }
2227   }
2228 
TEST(X16_TRANSPOSE__8X8_MULTI_MOV_ZIP_NEON,bh_16_bw_8)2229   TEST(X16_TRANSPOSE__8X8_MULTI_MOV_ZIP_NEON, bh_16_bw_8) {
2230     TEST_REQUIRES_ARM_NEON;
2231     TransposeMicrokernelTester()
2232       .input_stride(8)
2233       .output_stride(16)
2234       .block_width(8)
2235       .block_height(16)
2236       .iterations(1)
2237       .Test(xnn_x16_transpose_ukernel__8x8_multi_mov_zip_neon);
2238   }
2239 
TEST(X16_TRANSPOSE__8X8_MULTI_MOV_ZIP_NEON,bh_9_16_bw_8)2240   TEST(X16_TRANSPOSE__8X8_MULTI_MOV_ZIP_NEON, bh_9_16_bw_8){
2241     TEST_REQUIRES_ARM_NEON;
2242     for(size_t i = 9; i < 16; ++i){
2243       TransposeMicrokernelTester()
2244         .input_stride(8)
2245         .output_stride(i)
2246         .block_width(8)
2247         .block_height(i)
2248         .iterations(1)
2249         .Test(xnn_x16_transpose_ukernel__8x8_multi_mov_zip_neon);
2250     }
2251   }
2252 
TEST(X16_TRANSPOSE__8X8_MULTI_MOV_ZIP_NEON,bh_9_16_bw_16)2253   TEST(X16_TRANSPOSE__8X8_MULTI_MOV_ZIP_NEON, bh_9_16_bw_16){
2254     TEST_REQUIRES_ARM_NEON;
2255     for(size_t i = 9; i < 16; ++i){
2256       TransposeMicrokernelTester()
2257         .input_stride(16)
2258         .output_stride(i)
2259         .block_width(16)
2260         .block_height(i)
2261         .iterations(1)
2262         .Test(xnn_x16_transpose_ukernel__8x8_multi_mov_zip_neon);
2263     }
2264   }
2265 
TEST(X16_TRANSPOSE__8X8_MULTI_MOV_ZIP_NEON,bh_9_16_bw_9_16)2266   TEST(X16_TRANSPOSE__8X8_MULTI_MOV_ZIP_NEON, bh_9_16_bw_9_16) {
2267     TEST_REQUIRES_ARM_NEON;
2268     for(size_t i = 9; i < 16; ++i){
2269       for(size_t j = 9; j < 16; ++j){
2270         TransposeMicrokernelTester()
2271           .input_stride(j)
2272           .output_stride(i)
2273           .block_width(j)
2274           .block_height(i)
2275           .iterations(1)
2276           .Test(xnn_x16_transpose_ukernel__8x8_multi_mov_zip_neon);
2277       }
2278     }
2279   }
2280 
TEST(X16_TRANSPOSE__8X8_MULTI_MOV_ZIP_NEON,bh_8_bw_8_is_16)2281   TEST(X16_TRANSPOSE__8X8_MULTI_MOV_ZIP_NEON, bh_8_bw_8_is_16) {
2282     TEST_REQUIRES_ARM_NEON;
2283     TransposeMicrokernelTester()
2284       .input_stride(16)
2285       .output_stride(8)
2286       .block_width(8)
2287       .block_height(8)
2288       .iterations(1)
2289       .Test(xnn_x16_transpose_ukernel__8x8_multi_mov_zip_neon);
2290   }
2291 
TEST(X16_TRANSPOSE__8X8_MULTI_MOV_ZIP_NEON,bh_8_bw_8_os_16)2292   TEST(X16_TRANSPOSE__8X8_MULTI_MOV_ZIP_NEON, bh_8_bw_8_os_16) {
2293     TEST_REQUIRES_ARM_NEON;
2294     TransposeMicrokernelTester()
2295       .input_stride(8)
2296       .output_stride(16)
2297       .block_width(8)
2298       .block_height(8)
2299       .iterations(1)
2300       .Test(xnn_x16_transpose_ukernel__8x8_multi_mov_zip_neon);
2301   }
2302 
TEST(X16_TRANSPOSE__8X8_MULTI_MOV_ZIP_NEON,bh_8_bw_8_is_16_os_16)2303   TEST(X16_TRANSPOSE__8X8_MULTI_MOV_ZIP_NEON, bh_8_bw_8_is_16_os_16) {
2304     TEST_REQUIRES_ARM_NEON;
2305     TransposeMicrokernelTester()
2306       .input_stride(16)
2307       .output_stride(16)
2308       .block_width(8)
2309       .block_height(8)
2310       .iterations(1)
2311       .Test(xnn_x16_transpose_ukernel__8x8_multi_mov_zip_neon);
2312   }
2313 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
2314 
2315 
2316 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(X16_TRANSPOSE__8X8_MULTI_SWITCH_ZIP_NEON,bh_8_bw_8)2317   TEST(X16_TRANSPOSE__8X8_MULTI_SWITCH_ZIP_NEON, bh_8_bw_8) {
2318     TEST_REQUIRES_ARM_NEON;
2319     TransposeMicrokernelTester()
2320       .input_stride(8)
2321       .output_stride(8)
2322       .block_width(8)
2323       .block_height(8)
2324       .iterations(1)
2325       .Test(xnn_x16_transpose_ukernel__8x8_multi_switch_zip_neon);
2326   }
2327 
TEST(X16_TRANSPOSE__8X8_MULTI_SWITCH_ZIP_NEON,bh_1_16_bw_1_16)2328   TEST(X16_TRANSPOSE__8X8_MULTI_SWITCH_ZIP_NEON, bh_1_16_bw_1_16) {
2329     TEST_REQUIRES_ARM_NEON;
2330     for(size_t i = 1; i <= 16; ++i){
2331       for(size_t j = 1; j <= 16; ++j){
2332         TransposeMicrokernelTester()
2333           .input_stride(j)
2334           .output_stride(i)
2335           .block_width(j)
2336           .block_height(i)
2337           .iterations(1)
2338           .Test(xnn_x16_transpose_ukernel__8x8_multi_switch_zip_neon);
2339       }
2340     }
2341   }
2342 
TEST(X16_TRANSPOSE__8X8_MULTI_SWITCH_ZIP_NEON,bh_8_bw_16)2343   TEST(X16_TRANSPOSE__8X8_MULTI_SWITCH_ZIP_NEON, bh_8_bw_16) {
2344     TEST_REQUIRES_ARM_NEON;
2345     TransposeMicrokernelTester()
2346       .input_stride(16)
2347       .output_stride(8)
2348       .block_width(16)
2349       .block_height(8)
2350       .iterations(1)
2351       .Test(xnn_x16_transpose_ukernel__8x8_multi_switch_zip_neon);
2352   }
2353 
TEST(X16_TRANSPOSE__8X8_MULTI_SWITCH_ZIP_NEON,bh_8_bw_9_16)2354   TEST(X16_TRANSPOSE__8X8_MULTI_SWITCH_ZIP_NEON, bh_8_bw_9_16) {
2355     TEST_REQUIRES_ARM_NEON;
2356     for(size_t i = 9; i < 16; ++i){
2357       TransposeMicrokernelTester()
2358         .input_stride(i)
2359         .output_stride(8)
2360         .block_width(i)
2361         .block_height(8)
2362         .iterations(1)
2363         .Test(xnn_x16_transpose_ukernel__8x8_multi_switch_zip_neon);
2364     }
2365   }
2366 
TEST(X16_TRANSPOSE__8X8_MULTI_SWITCH_ZIP_NEON,bh_16_bw_9_16)2367   TEST(X16_TRANSPOSE__8X8_MULTI_SWITCH_ZIP_NEON, bh_16_bw_9_16) {
2368     TEST_REQUIRES_ARM_NEON;
2369     for(size_t i = 9; i < 16; ++i){
2370       TransposeMicrokernelTester()
2371         .input_stride(i)
2372         .output_stride(16)
2373         .block_width(i)
2374         .block_height(16)
2375         .iterations(1)
2376         .Test(xnn_x16_transpose_ukernel__8x8_multi_switch_zip_neon);
2377     }
2378   }
2379 
TEST(X16_TRANSPOSE__8X8_MULTI_SWITCH_ZIP_NEON,bh_16_bw_8)2380   TEST(X16_TRANSPOSE__8X8_MULTI_SWITCH_ZIP_NEON, bh_16_bw_8) {
2381     TEST_REQUIRES_ARM_NEON;
2382     TransposeMicrokernelTester()
2383       .input_stride(8)
2384       .output_stride(16)
2385       .block_width(8)
2386       .block_height(16)
2387       .iterations(1)
2388       .Test(xnn_x16_transpose_ukernel__8x8_multi_switch_zip_neon);
2389   }
2390 
TEST(X16_TRANSPOSE__8X8_MULTI_SWITCH_ZIP_NEON,bh_9_16_bw_8)2391   TEST(X16_TRANSPOSE__8X8_MULTI_SWITCH_ZIP_NEON, bh_9_16_bw_8){
2392     TEST_REQUIRES_ARM_NEON;
2393     for(size_t i = 9; i < 16; ++i){
2394       TransposeMicrokernelTester()
2395         .input_stride(8)
2396         .output_stride(i)
2397         .block_width(8)
2398         .block_height(i)
2399         .iterations(1)
2400         .Test(xnn_x16_transpose_ukernel__8x8_multi_switch_zip_neon);
2401     }
2402   }
2403 
TEST(X16_TRANSPOSE__8X8_MULTI_SWITCH_ZIP_NEON,bh_9_16_bw_16)2404   TEST(X16_TRANSPOSE__8X8_MULTI_SWITCH_ZIP_NEON, bh_9_16_bw_16){
2405     TEST_REQUIRES_ARM_NEON;
2406     for(size_t i = 9; i < 16; ++i){
2407       TransposeMicrokernelTester()
2408         .input_stride(16)
2409         .output_stride(i)
2410         .block_width(16)
2411         .block_height(i)
2412         .iterations(1)
2413         .Test(xnn_x16_transpose_ukernel__8x8_multi_switch_zip_neon);
2414     }
2415   }
2416 
TEST(X16_TRANSPOSE__8X8_MULTI_SWITCH_ZIP_NEON,bh_9_16_bw_9_16)2417   TEST(X16_TRANSPOSE__8X8_MULTI_SWITCH_ZIP_NEON, bh_9_16_bw_9_16) {
2418     TEST_REQUIRES_ARM_NEON;
2419     for(size_t i = 9; i < 16; ++i){
2420       for(size_t j = 9; j < 16; ++j){
2421         TransposeMicrokernelTester()
2422           .input_stride(j)
2423           .output_stride(i)
2424           .block_width(j)
2425           .block_height(i)
2426           .iterations(1)
2427           .Test(xnn_x16_transpose_ukernel__8x8_multi_switch_zip_neon);
2428       }
2429     }
2430   }
2431 
TEST(X16_TRANSPOSE__8X8_MULTI_SWITCH_ZIP_NEON,bh_8_bw_8_is_16)2432   TEST(X16_TRANSPOSE__8X8_MULTI_SWITCH_ZIP_NEON, bh_8_bw_8_is_16) {
2433     TEST_REQUIRES_ARM_NEON;
2434     TransposeMicrokernelTester()
2435       .input_stride(16)
2436       .output_stride(8)
2437       .block_width(8)
2438       .block_height(8)
2439       .iterations(1)
2440       .Test(xnn_x16_transpose_ukernel__8x8_multi_switch_zip_neon);
2441   }
2442 
TEST(X16_TRANSPOSE__8X8_MULTI_SWITCH_ZIP_NEON,bh_8_bw_8_os_16)2443   TEST(X16_TRANSPOSE__8X8_MULTI_SWITCH_ZIP_NEON, bh_8_bw_8_os_16) {
2444     TEST_REQUIRES_ARM_NEON;
2445     TransposeMicrokernelTester()
2446       .input_stride(8)
2447       .output_stride(16)
2448       .block_width(8)
2449       .block_height(8)
2450       .iterations(1)
2451       .Test(xnn_x16_transpose_ukernel__8x8_multi_switch_zip_neon);
2452   }
2453 
TEST(X16_TRANSPOSE__8X8_MULTI_SWITCH_ZIP_NEON,bh_8_bw_8_is_16_os_16)2454   TEST(X16_TRANSPOSE__8X8_MULTI_SWITCH_ZIP_NEON, bh_8_bw_8_is_16_os_16) {
2455     TEST_REQUIRES_ARM_NEON;
2456     TransposeMicrokernelTester()
2457       .input_stride(16)
2458       .output_stride(16)
2459       .block_width(8)
2460       .block_height(8)
2461       .iterations(1)
2462       .Test(xnn_x16_transpose_ukernel__8x8_multi_switch_zip_neon);
2463   }
2464 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
2465 
2466 
2467 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(X16_TRANSPOSE__8X8_REUSE_DEC_ZIP_NEON,bh_8_bw_8)2468   TEST(X16_TRANSPOSE__8X8_REUSE_DEC_ZIP_NEON, bh_8_bw_8) {
2469     TEST_REQUIRES_ARM_NEON;
2470     TransposeMicrokernelTester()
2471       .input_stride(8)
2472       .output_stride(8)
2473       .block_width(8)
2474       .block_height(8)
2475       .iterations(1)
2476       .Test(xnn_x16_transpose_ukernel__8x8_reuse_dec_zip_neon);
2477   }
2478 
TEST(X16_TRANSPOSE__8X8_REUSE_DEC_ZIP_NEON,bh_1_16_bw_1_16)2479   TEST(X16_TRANSPOSE__8X8_REUSE_DEC_ZIP_NEON, bh_1_16_bw_1_16) {
2480     TEST_REQUIRES_ARM_NEON;
2481     for(size_t i = 1; i <= 16; ++i){
2482       for(size_t j = 1; j <= 16; ++j){
2483         TransposeMicrokernelTester()
2484           .input_stride(j)
2485           .output_stride(i)
2486           .block_width(j)
2487           .block_height(i)
2488           .iterations(1)
2489           .Test(xnn_x16_transpose_ukernel__8x8_reuse_dec_zip_neon);
2490       }
2491     }
2492   }
2493 
TEST(X16_TRANSPOSE__8X8_REUSE_DEC_ZIP_NEON,bh_8_bw_16)2494   TEST(X16_TRANSPOSE__8X8_REUSE_DEC_ZIP_NEON, bh_8_bw_16) {
2495     TEST_REQUIRES_ARM_NEON;
2496     TransposeMicrokernelTester()
2497       .input_stride(16)
2498       .output_stride(8)
2499       .block_width(16)
2500       .block_height(8)
2501       .iterations(1)
2502       .Test(xnn_x16_transpose_ukernel__8x8_reuse_dec_zip_neon);
2503   }
2504 
TEST(X16_TRANSPOSE__8X8_REUSE_DEC_ZIP_NEON,bh_8_bw_9_16)2505   TEST(X16_TRANSPOSE__8X8_REUSE_DEC_ZIP_NEON, bh_8_bw_9_16) {
2506     TEST_REQUIRES_ARM_NEON;
2507     for(size_t i = 9; i < 16; ++i){
2508       TransposeMicrokernelTester()
2509         .input_stride(i)
2510         .output_stride(8)
2511         .block_width(i)
2512         .block_height(8)
2513         .iterations(1)
2514         .Test(xnn_x16_transpose_ukernel__8x8_reuse_dec_zip_neon);
2515     }
2516   }
2517 
TEST(X16_TRANSPOSE__8X8_REUSE_DEC_ZIP_NEON,bh_16_bw_9_16)2518   TEST(X16_TRANSPOSE__8X8_REUSE_DEC_ZIP_NEON, bh_16_bw_9_16) {
2519     TEST_REQUIRES_ARM_NEON;
2520     for(size_t i = 9; i < 16; ++i){
2521       TransposeMicrokernelTester()
2522         .input_stride(i)
2523         .output_stride(16)
2524         .block_width(i)
2525         .block_height(16)
2526         .iterations(1)
2527         .Test(xnn_x16_transpose_ukernel__8x8_reuse_dec_zip_neon);
2528     }
2529   }
2530 
TEST(X16_TRANSPOSE__8X8_REUSE_DEC_ZIP_NEON,bh_16_bw_8)2531   TEST(X16_TRANSPOSE__8X8_REUSE_DEC_ZIP_NEON, bh_16_bw_8) {
2532     TEST_REQUIRES_ARM_NEON;
2533     TransposeMicrokernelTester()
2534       .input_stride(8)
2535       .output_stride(16)
2536       .block_width(8)
2537       .block_height(16)
2538       .iterations(1)
2539       .Test(xnn_x16_transpose_ukernel__8x8_reuse_dec_zip_neon);
2540   }
2541 
TEST(X16_TRANSPOSE__8X8_REUSE_DEC_ZIP_NEON,bh_9_16_bw_8)2542   TEST(X16_TRANSPOSE__8X8_REUSE_DEC_ZIP_NEON, bh_9_16_bw_8){
2543     TEST_REQUIRES_ARM_NEON;
2544     for(size_t i = 9; i < 16; ++i){
2545       TransposeMicrokernelTester()
2546         .input_stride(8)
2547         .output_stride(i)
2548         .block_width(8)
2549         .block_height(i)
2550         .iterations(1)
2551         .Test(xnn_x16_transpose_ukernel__8x8_reuse_dec_zip_neon);
2552     }
2553   }
2554 
TEST(X16_TRANSPOSE__8X8_REUSE_DEC_ZIP_NEON,bh_9_16_bw_16)2555   TEST(X16_TRANSPOSE__8X8_REUSE_DEC_ZIP_NEON, bh_9_16_bw_16){
2556     TEST_REQUIRES_ARM_NEON;
2557     for(size_t i = 9; i < 16; ++i){
2558       TransposeMicrokernelTester()
2559         .input_stride(16)
2560         .output_stride(i)
2561         .block_width(16)
2562         .block_height(i)
2563         .iterations(1)
2564         .Test(xnn_x16_transpose_ukernel__8x8_reuse_dec_zip_neon);
2565     }
2566   }
2567 
TEST(X16_TRANSPOSE__8X8_REUSE_DEC_ZIP_NEON,bh_9_16_bw_9_16)2568   TEST(X16_TRANSPOSE__8X8_REUSE_DEC_ZIP_NEON, bh_9_16_bw_9_16) {
2569     TEST_REQUIRES_ARM_NEON;
2570     for(size_t i = 9; i < 16; ++i){
2571       for(size_t j = 9; j < 16; ++j){
2572         TransposeMicrokernelTester()
2573           .input_stride(j)
2574           .output_stride(i)
2575           .block_width(j)
2576           .block_height(i)
2577           .iterations(1)
2578           .Test(xnn_x16_transpose_ukernel__8x8_reuse_dec_zip_neon);
2579       }
2580     }
2581   }
2582 
TEST(X16_TRANSPOSE__8X8_REUSE_DEC_ZIP_NEON,bh_8_bw_8_is_16)2583   TEST(X16_TRANSPOSE__8X8_REUSE_DEC_ZIP_NEON, bh_8_bw_8_is_16) {
2584     TEST_REQUIRES_ARM_NEON;
2585     TransposeMicrokernelTester()
2586       .input_stride(16)
2587       .output_stride(8)
2588       .block_width(8)
2589       .block_height(8)
2590       .iterations(1)
2591       .Test(xnn_x16_transpose_ukernel__8x8_reuse_dec_zip_neon);
2592   }
2593 
TEST(X16_TRANSPOSE__8X8_REUSE_DEC_ZIP_NEON,bh_8_bw_8_os_16)2594   TEST(X16_TRANSPOSE__8X8_REUSE_DEC_ZIP_NEON, bh_8_bw_8_os_16) {
2595     TEST_REQUIRES_ARM_NEON;
2596     TransposeMicrokernelTester()
2597       .input_stride(8)
2598       .output_stride(16)
2599       .block_width(8)
2600       .block_height(8)
2601       .iterations(1)
2602       .Test(xnn_x16_transpose_ukernel__8x8_reuse_dec_zip_neon);
2603   }
2604 
TEST(X16_TRANSPOSE__8X8_REUSE_DEC_ZIP_NEON,bh_8_bw_8_is_16_os_16)2605   TEST(X16_TRANSPOSE__8X8_REUSE_DEC_ZIP_NEON, bh_8_bw_8_is_16_os_16) {
2606     TEST_REQUIRES_ARM_NEON;
2607     TransposeMicrokernelTester()
2608       .input_stride(16)
2609       .output_stride(16)
2610       .block_width(8)
2611       .block_height(8)
2612       .iterations(1)
2613       .Test(xnn_x16_transpose_ukernel__8x8_reuse_dec_zip_neon);
2614   }
2615 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
2616 
2617 
2618 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(X16_TRANSPOSE__8X8_REUSE_MOV_ZIP_NEON,bh_8_bw_8)2619   TEST(X16_TRANSPOSE__8X8_REUSE_MOV_ZIP_NEON, bh_8_bw_8) {
2620     TEST_REQUIRES_ARM_NEON;
2621     TransposeMicrokernelTester()
2622       .input_stride(8)
2623       .output_stride(8)
2624       .block_width(8)
2625       .block_height(8)
2626       .iterations(1)
2627       .Test(xnn_x16_transpose_ukernel__8x8_reuse_mov_zip_neon);
2628   }
2629 
TEST(X16_TRANSPOSE__8X8_REUSE_MOV_ZIP_NEON,bh_1_16_bw_1_16)2630   TEST(X16_TRANSPOSE__8X8_REUSE_MOV_ZIP_NEON, bh_1_16_bw_1_16) {
2631     TEST_REQUIRES_ARM_NEON;
2632     for(size_t i = 1; i <= 16; ++i){
2633       for(size_t j = 1; j <= 16; ++j){
2634         TransposeMicrokernelTester()
2635           .input_stride(j)
2636           .output_stride(i)
2637           .block_width(j)
2638           .block_height(i)
2639           .iterations(1)
2640           .Test(xnn_x16_transpose_ukernel__8x8_reuse_mov_zip_neon);
2641       }
2642     }
2643   }
2644 
TEST(X16_TRANSPOSE__8X8_REUSE_MOV_ZIP_NEON,bh_8_bw_16)2645   TEST(X16_TRANSPOSE__8X8_REUSE_MOV_ZIP_NEON, bh_8_bw_16) {
2646     TEST_REQUIRES_ARM_NEON;
2647     TransposeMicrokernelTester()
2648       .input_stride(16)
2649       .output_stride(8)
2650       .block_width(16)
2651       .block_height(8)
2652       .iterations(1)
2653       .Test(xnn_x16_transpose_ukernel__8x8_reuse_mov_zip_neon);
2654   }
2655 
TEST(X16_TRANSPOSE__8X8_REUSE_MOV_ZIP_NEON,bh_8_bw_9_16)2656   TEST(X16_TRANSPOSE__8X8_REUSE_MOV_ZIP_NEON, bh_8_bw_9_16) {
2657     TEST_REQUIRES_ARM_NEON;
2658     for(size_t i = 9; i < 16; ++i){
2659       TransposeMicrokernelTester()
2660         .input_stride(i)
2661         .output_stride(8)
2662         .block_width(i)
2663         .block_height(8)
2664         .iterations(1)
2665         .Test(xnn_x16_transpose_ukernel__8x8_reuse_mov_zip_neon);
2666     }
2667   }
2668 
TEST(X16_TRANSPOSE__8X8_REUSE_MOV_ZIP_NEON,bh_16_bw_9_16)2669   TEST(X16_TRANSPOSE__8X8_REUSE_MOV_ZIP_NEON, bh_16_bw_9_16) {
2670     TEST_REQUIRES_ARM_NEON;
2671     for(size_t i = 9; i < 16; ++i){
2672       TransposeMicrokernelTester()
2673         .input_stride(i)
2674         .output_stride(16)
2675         .block_width(i)
2676         .block_height(16)
2677         .iterations(1)
2678         .Test(xnn_x16_transpose_ukernel__8x8_reuse_mov_zip_neon);
2679     }
2680   }
2681 
TEST(X16_TRANSPOSE__8X8_REUSE_MOV_ZIP_NEON,bh_16_bw_8)2682   TEST(X16_TRANSPOSE__8X8_REUSE_MOV_ZIP_NEON, bh_16_bw_8) {
2683     TEST_REQUIRES_ARM_NEON;
2684     TransposeMicrokernelTester()
2685       .input_stride(8)
2686       .output_stride(16)
2687       .block_width(8)
2688       .block_height(16)
2689       .iterations(1)
2690       .Test(xnn_x16_transpose_ukernel__8x8_reuse_mov_zip_neon);
2691   }
2692 
TEST(X16_TRANSPOSE__8X8_REUSE_MOV_ZIP_NEON,bh_9_16_bw_8)2693   TEST(X16_TRANSPOSE__8X8_REUSE_MOV_ZIP_NEON, bh_9_16_bw_8){
2694     TEST_REQUIRES_ARM_NEON;
2695     for(size_t i = 9; i < 16; ++i){
2696       TransposeMicrokernelTester()
2697         .input_stride(8)
2698         .output_stride(i)
2699         .block_width(8)
2700         .block_height(i)
2701         .iterations(1)
2702         .Test(xnn_x16_transpose_ukernel__8x8_reuse_mov_zip_neon);
2703     }
2704   }
2705 
TEST(X16_TRANSPOSE__8X8_REUSE_MOV_ZIP_NEON,bh_9_16_bw_16)2706   TEST(X16_TRANSPOSE__8X8_REUSE_MOV_ZIP_NEON, bh_9_16_bw_16){
2707     TEST_REQUIRES_ARM_NEON;
2708     for(size_t i = 9; i < 16; ++i){
2709       TransposeMicrokernelTester()
2710         .input_stride(16)
2711         .output_stride(i)
2712         .block_width(16)
2713         .block_height(i)
2714         .iterations(1)
2715         .Test(xnn_x16_transpose_ukernel__8x8_reuse_mov_zip_neon);
2716     }
2717   }
2718 
TEST(X16_TRANSPOSE__8X8_REUSE_MOV_ZIP_NEON,bh_9_16_bw_9_16)2719   TEST(X16_TRANSPOSE__8X8_REUSE_MOV_ZIP_NEON, bh_9_16_bw_9_16) {
2720     TEST_REQUIRES_ARM_NEON;
2721     for(size_t i = 9; i < 16; ++i){
2722       for(size_t j = 9; j < 16; ++j){
2723         TransposeMicrokernelTester()
2724           .input_stride(j)
2725           .output_stride(i)
2726           .block_width(j)
2727           .block_height(i)
2728           .iterations(1)
2729           .Test(xnn_x16_transpose_ukernel__8x8_reuse_mov_zip_neon);
2730       }
2731     }
2732   }
2733 
TEST(X16_TRANSPOSE__8X8_REUSE_MOV_ZIP_NEON,bh_8_bw_8_is_16)2734   TEST(X16_TRANSPOSE__8X8_REUSE_MOV_ZIP_NEON, bh_8_bw_8_is_16) {
2735     TEST_REQUIRES_ARM_NEON;
2736     TransposeMicrokernelTester()
2737       .input_stride(16)
2738       .output_stride(8)
2739       .block_width(8)
2740       .block_height(8)
2741       .iterations(1)
2742       .Test(xnn_x16_transpose_ukernel__8x8_reuse_mov_zip_neon);
2743   }
2744 
TEST(X16_TRANSPOSE__8X8_REUSE_MOV_ZIP_NEON,bh_8_bw_8_os_16)2745   TEST(X16_TRANSPOSE__8X8_REUSE_MOV_ZIP_NEON, bh_8_bw_8_os_16) {
2746     TEST_REQUIRES_ARM_NEON;
2747     TransposeMicrokernelTester()
2748       .input_stride(8)
2749       .output_stride(16)
2750       .block_width(8)
2751       .block_height(8)
2752       .iterations(1)
2753       .Test(xnn_x16_transpose_ukernel__8x8_reuse_mov_zip_neon);
2754   }
2755 
TEST(X16_TRANSPOSE__8X8_REUSE_MOV_ZIP_NEON,bh_8_bw_8_is_16_os_16)2756   TEST(X16_TRANSPOSE__8X8_REUSE_MOV_ZIP_NEON, bh_8_bw_8_is_16_os_16) {
2757     TEST_REQUIRES_ARM_NEON;
2758     TransposeMicrokernelTester()
2759       .input_stride(16)
2760       .output_stride(16)
2761       .block_width(8)
2762       .block_height(8)
2763       .iterations(1)
2764       .Test(xnn_x16_transpose_ukernel__8x8_reuse_mov_zip_neon);
2765   }
2766 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
2767 
2768 
2769 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(X16_TRANSPOSE__8X8_REUSE_MULTI_ZIP_NEON,bh_8_bw_8)2770   TEST(X16_TRANSPOSE__8X8_REUSE_MULTI_ZIP_NEON, bh_8_bw_8) {
2771     TEST_REQUIRES_ARM_NEON;
2772     TransposeMicrokernelTester()
2773       .input_stride(8)
2774       .output_stride(8)
2775       .block_width(8)
2776       .block_height(8)
2777       .iterations(1)
2778       .Test(xnn_x16_transpose_ukernel__8x8_reuse_multi_zip_neon);
2779   }
2780 
TEST(X16_TRANSPOSE__8X8_REUSE_MULTI_ZIP_NEON,bh_1_16_bw_1_16)2781   TEST(X16_TRANSPOSE__8X8_REUSE_MULTI_ZIP_NEON, bh_1_16_bw_1_16) {
2782     TEST_REQUIRES_ARM_NEON;
2783     for(size_t i = 1; i <= 16; ++i){
2784       for(size_t j = 1; j <= 16; ++j){
2785         TransposeMicrokernelTester()
2786           .input_stride(j)
2787           .output_stride(i)
2788           .block_width(j)
2789           .block_height(i)
2790           .iterations(1)
2791           .Test(xnn_x16_transpose_ukernel__8x8_reuse_multi_zip_neon);
2792       }
2793     }
2794   }
2795 
TEST(X16_TRANSPOSE__8X8_REUSE_MULTI_ZIP_NEON,bh_8_bw_16)2796   TEST(X16_TRANSPOSE__8X8_REUSE_MULTI_ZIP_NEON, bh_8_bw_16) {
2797     TEST_REQUIRES_ARM_NEON;
2798     TransposeMicrokernelTester()
2799       .input_stride(16)
2800       .output_stride(8)
2801       .block_width(16)
2802       .block_height(8)
2803       .iterations(1)
2804       .Test(xnn_x16_transpose_ukernel__8x8_reuse_multi_zip_neon);
2805   }
2806 
TEST(X16_TRANSPOSE__8X8_REUSE_MULTI_ZIP_NEON,bh_8_bw_9_16)2807   TEST(X16_TRANSPOSE__8X8_REUSE_MULTI_ZIP_NEON, bh_8_bw_9_16) {
2808     TEST_REQUIRES_ARM_NEON;
2809     for(size_t i = 9; i < 16; ++i){
2810       TransposeMicrokernelTester()
2811         .input_stride(i)
2812         .output_stride(8)
2813         .block_width(i)
2814         .block_height(8)
2815         .iterations(1)
2816         .Test(xnn_x16_transpose_ukernel__8x8_reuse_multi_zip_neon);
2817     }
2818   }
2819 
TEST(X16_TRANSPOSE__8X8_REUSE_MULTI_ZIP_NEON,bh_16_bw_9_16)2820   TEST(X16_TRANSPOSE__8X8_REUSE_MULTI_ZIP_NEON, bh_16_bw_9_16) {
2821     TEST_REQUIRES_ARM_NEON;
2822     for(size_t i = 9; i < 16; ++i){
2823       TransposeMicrokernelTester()
2824         .input_stride(i)
2825         .output_stride(16)
2826         .block_width(i)
2827         .block_height(16)
2828         .iterations(1)
2829         .Test(xnn_x16_transpose_ukernel__8x8_reuse_multi_zip_neon);
2830     }
2831   }
2832 
TEST(X16_TRANSPOSE__8X8_REUSE_MULTI_ZIP_NEON,bh_16_bw_8)2833   TEST(X16_TRANSPOSE__8X8_REUSE_MULTI_ZIP_NEON, bh_16_bw_8) {
2834     TEST_REQUIRES_ARM_NEON;
2835     TransposeMicrokernelTester()
2836       .input_stride(8)
2837       .output_stride(16)
2838       .block_width(8)
2839       .block_height(16)
2840       .iterations(1)
2841       .Test(xnn_x16_transpose_ukernel__8x8_reuse_multi_zip_neon);
2842   }
2843 
TEST(X16_TRANSPOSE__8X8_REUSE_MULTI_ZIP_NEON,bh_9_16_bw_8)2844   TEST(X16_TRANSPOSE__8X8_REUSE_MULTI_ZIP_NEON, bh_9_16_bw_8){
2845     TEST_REQUIRES_ARM_NEON;
2846     for(size_t i = 9; i < 16; ++i){
2847       TransposeMicrokernelTester()
2848         .input_stride(8)
2849         .output_stride(i)
2850         .block_width(8)
2851         .block_height(i)
2852         .iterations(1)
2853         .Test(xnn_x16_transpose_ukernel__8x8_reuse_multi_zip_neon);
2854     }
2855   }
2856 
TEST(X16_TRANSPOSE__8X8_REUSE_MULTI_ZIP_NEON,bh_9_16_bw_16)2857   TEST(X16_TRANSPOSE__8X8_REUSE_MULTI_ZIP_NEON, bh_9_16_bw_16){
2858     TEST_REQUIRES_ARM_NEON;
2859     for(size_t i = 9; i < 16; ++i){
2860       TransposeMicrokernelTester()
2861         .input_stride(16)
2862         .output_stride(i)
2863         .block_width(16)
2864         .block_height(i)
2865         .iterations(1)
2866         .Test(xnn_x16_transpose_ukernel__8x8_reuse_multi_zip_neon);
2867     }
2868   }
2869 
TEST(X16_TRANSPOSE__8X8_REUSE_MULTI_ZIP_NEON,bh_9_16_bw_9_16)2870   TEST(X16_TRANSPOSE__8X8_REUSE_MULTI_ZIP_NEON, bh_9_16_bw_9_16) {
2871     TEST_REQUIRES_ARM_NEON;
2872     for(size_t i = 9; i < 16; ++i){
2873       for(size_t j = 9; j < 16; ++j){
2874         TransposeMicrokernelTester()
2875           .input_stride(j)
2876           .output_stride(i)
2877           .block_width(j)
2878           .block_height(i)
2879           .iterations(1)
2880           .Test(xnn_x16_transpose_ukernel__8x8_reuse_multi_zip_neon);
2881       }
2882     }
2883   }
2884 
TEST(X16_TRANSPOSE__8X8_REUSE_MULTI_ZIP_NEON,bh_8_bw_8_is_16)2885   TEST(X16_TRANSPOSE__8X8_REUSE_MULTI_ZIP_NEON, bh_8_bw_8_is_16) {
2886     TEST_REQUIRES_ARM_NEON;
2887     TransposeMicrokernelTester()
2888       .input_stride(16)
2889       .output_stride(8)
2890       .block_width(8)
2891       .block_height(8)
2892       .iterations(1)
2893       .Test(xnn_x16_transpose_ukernel__8x8_reuse_multi_zip_neon);
2894   }
2895 
TEST(X16_TRANSPOSE__8X8_REUSE_MULTI_ZIP_NEON,bh_8_bw_8_os_16)2896   TEST(X16_TRANSPOSE__8X8_REUSE_MULTI_ZIP_NEON, bh_8_bw_8_os_16) {
2897     TEST_REQUIRES_ARM_NEON;
2898     TransposeMicrokernelTester()
2899       .input_stride(8)
2900       .output_stride(16)
2901       .block_width(8)
2902       .block_height(8)
2903       .iterations(1)
2904       .Test(xnn_x16_transpose_ukernel__8x8_reuse_multi_zip_neon);
2905   }
2906 
TEST(X16_TRANSPOSE__8X8_REUSE_MULTI_ZIP_NEON,bh_8_bw_8_is_16_os_16)2907   TEST(X16_TRANSPOSE__8X8_REUSE_MULTI_ZIP_NEON, bh_8_bw_8_is_16_os_16) {
2908     TEST_REQUIRES_ARM_NEON;
2909     TransposeMicrokernelTester()
2910       .input_stride(16)
2911       .output_stride(16)
2912       .block_width(8)
2913       .block_height(8)
2914       .iterations(1)
2915       .Test(xnn_x16_transpose_ukernel__8x8_reuse_multi_zip_neon);
2916   }
2917 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
2918 
2919 
2920 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(X16_TRANSPOSE__8X8_REUSE_SWITCH_ZIP_NEON,bh_8_bw_8)2921   TEST(X16_TRANSPOSE__8X8_REUSE_SWITCH_ZIP_NEON, bh_8_bw_8) {
2922     TEST_REQUIRES_ARM_NEON;
2923     TransposeMicrokernelTester()
2924       .input_stride(8)
2925       .output_stride(8)
2926       .block_width(8)
2927       .block_height(8)
2928       .iterations(1)
2929       .Test(xnn_x16_transpose_ukernel__8x8_reuse_switch_zip_neon);
2930   }
2931 
TEST(X16_TRANSPOSE__8X8_REUSE_SWITCH_ZIP_NEON,bh_1_16_bw_1_16)2932   TEST(X16_TRANSPOSE__8X8_REUSE_SWITCH_ZIP_NEON, bh_1_16_bw_1_16) {
2933     TEST_REQUIRES_ARM_NEON;
2934     for(size_t i = 1; i <= 16; ++i){
2935       for(size_t j = 1; j <= 16; ++j){
2936         TransposeMicrokernelTester()
2937           .input_stride(j)
2938           .output_stride(i)
2939           .block_width(j)
2940           .block_height(i)
2941           .iterations(1)
2942           .Test(xnn_x16_transpose_ukernel__8x8_reuse_switch_zip_neon);
2943       }
2944     }
2945   }
2946 
TEST(X16_TRANSPOSE__8X8_REUSE_SWITCH_ZIP_NEON,bh_8_bw_16)2947   TEST(X16_TRANSPOSE__8X8_REUSE_SWITCH_ZIP_NEON, bh_8_bw_16) {
2948     TEST_REQUIRES_ARM_NEON;
2949     TransposeMicrokernelTester()
2950       .input_stride(16)
2951       .output_stride(8)
2952       .block_width(16)
2953       .block_height(8)
2954       .iterations(1)
2955       .Test(xnn_x16_transpose_ukernel__8x8_reuse_switch_zip_neon);
2956   }
2957 
TEST(X16_TRANSPOSE__8X8_REUSE_SWITCH_ZIP_NEON,bh_8_bw_9_16)2958   TEST(X16_TRANSPOSE__8X8_REUSE_SWITCH_ZIP_NEON, bh_8_bw_9_16) {
2959     TEST_REQUIRES_ARM_NEON;
2960     for(size_t i = 9; i < 16; ++i){
2961       TransposeMicrokernelTester()
2962         .input_stride(i)
2963         .output_stride(8)
2964         .block_width(i)
2965         .block_height(8)
2966         .iterations(1)
2967         .Test(xnn_x16_transpose_ukernel__8x8_reuse_switch_zip_neon);
2968     }
2969   }
2970 
TEST(X16_TRANSPOSE__8X8_REUSE_SWITCH_ZIP_NEON,bh_16_bw_9_16)2971   TEST(X16_TRANSPOSE__8X8_REUSE_SWITCH_ZIP_NEON, bh_16_bw_9_16) {
2972     TEST_REQUIRES_ARM_NEON;
2973     for(size_t i = 9; i < 16; ++i){
2974       TransposeMicrokernelTester()
2975         .input_stride(i)
2976         .output_stride(16)
2977         .block_width(i)
2978         .block_height(16)
2979         .iterations(1)
2980         .Test(xnn_x16_transpose_ukernel__8x8_reuse_switch_zip_neon);
2981     }
2982   }
2983 
TEST(X16_TRANSPOSE__8X8_REUSE_SWITCH_ZIP_NEON,bh_16_bw_8)2984   TEST(X16_TRANSPOSE__8X8_REUSE_SWITCH_ZIP_NEON, bh_16_bw_8) {
2985     TEST_REQUIRES_ARM_NEON;
2986     TransposeMicrokernelTester()
2987       .input_stride(8)
2988       .output_stride(16)
2989       .block_width(8)
2990       .block_height(16)
2991       .iterations(1)
2992       .Test(xnn_x16_transpose_ukernel__8x8_reuse_switch_zip_neon);
2993   }
2994 
TEST(X16_TRANSPOSE__8X8_REUSE_SWITCH_ZIP_NEON,bh_9_16_bw_8)2995   TEST(X16_TRANSPOSE__8X8_REUSE_SWITCH_ZIP_NEON, bh_9_16_bw_8){
2996     TEST_REQUIRES_ARM_NEON;
2997     for(size_t i = 9; i < 16; ++i){
2998       TransposeMicrokernelTester()
2999         .input_stride(8)
3000         .output_stride(i)
3001         .block_width(8)
3002         .block_height(i)
3003         .iterations(1)
3004         .Test(xnn_x16_transpose_ukernel__8x8_reuse_switch_zip_neon);
3005     }
3006   }
3007 
TEST(X16_TRANSPOSE__8X8_REUSE_SWITCH_ZIP_NEON,bh_9_16_bw_16)3008   TEST(X16_TRANSPOSE__8X8_REUSE_SWITCH_ZIP_NEON, bh_9_16_bw_16){
3009     TEST_REQUIRES_ARM_NEON;
3010     for(size_t i = 9; i < 16; ++i){
3011       TransposeMicrokernelTester()
3012         .input_stride(16)
3013         .output_stride(i)
3014         .block_width(16)
3015         .block_height(i)
3016         .iterations(1)
3017         .Test(xnn_x16_transpose_ukernel__8x8_reuse_switch_zip_neon);
3018     }
3019   }
3020 
TEST(X16_TRANSPOSE__8X8_REUSE_SWITCH_ZIP_NEON,bh_9_16_bw_9_16)3021   TEST(X16_TRANSPOSE__8X8_REUSE_SWITCH_ZIP_NEON, bh_9_16_bw_9_16) {
3022     TEST_REQUIRES_ARM_NEON;
3023     for(size_t i = 9; i < 16; ++i){
3024       for(size_t j = 9; j < 16; ++j){
3025         TransposeMicrokernelTester()
3026           .input_stride(j)
3027           .output_stride(i)
3028           .block_width(j)
3029           .block_height(i)
3030           .iterations(1)
3031           .Test(xnn_x16_transpose_ukernel__8x8_reuse_switch_zip_neon);
3032       }
3033     }
3034   }
3035 
TEST(X16_TRANSPOSE__8X8_REUSE_SWITCH_ZIP_NEON,bh_8_bw_8_is_16)3036   TEST(X16_TRANSPOSE__8X8_REUSE_SWITCH_ZIP_NEON, bh_8_bw_8_is_16) {
3037     TEST_REQUIRES_ARM_NEON;
3038     TransposeMicrokernelTester()
3039       .input_stride(16)
3040       .output_stride(8)
3041       .block_width(8)
3042       .block_height(8)
3043       .iterations(1)
3044       .Test(xnn_x16_transpose_ukernel__8x8_reuse_switch_zip_neon);
3045   }
3046 
TEST(X16_TRANSPOSE__8X8_REUSE_SWITCH_ZIP_NEON,bh_8_bw_8_os_16)3047   TEST(X16_TRANSPOSE__8X8_REUSE_SWITCH_ZIP_NEON, bh_8_bw_8_os_16) {
3048     TEST_REQUIRES_ARM_NEON;
3049     TransposeMicrokernelTester()
3050       .input_stride(8)
3051       .output_stride(16)
3052       .block_width(8)
3053       .block_height(8)
3054       .iterations(1)
3055       .Test(xnn_x16_transpose_ukernel__8x8_reuse_switch_zip_neon);
3056   }
3057 
TEST(X16_TRANSPOSE__8X8_REUSE_SWITCH_ZIP_NEON,bh_8_bw_8_is_16_os_16)3058   TEST(X16_TRANSPOSE__8X8_REUSE_SWITCH_ZIP_NEON, bh_8_bw_8_is_16_os_16) {
3059     TEST_REQUIRES_ARM_NEON;
3060     TransposeMicrokernelTester()
3061       .input_stride(16)
3062       .output_stride(16)
3063       .block_width(8)
3064       .block_height(8)
3065       .iterations(1)
3066       .Test(xnn_x16_transpose_ukernel__8x8_reuse_switch_zip_neon);
3067   }
3068 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
3069