• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2021 Google LLC
2 //
3 // This source code is licensed under the BSD-style license found in the
4 // LICENSE file in the root directory of this source tree.
5 //
6 // Auto-generated file. Do not edit!
7 //   Specification: test/x64-transpose.yaml
8 //   Generator: tools/generate-transpose-test.py
9 
10 
11 #include <gtest/gtest.h>
12 
13 #include <xnnpack/common.h>
14 #include <xnnpack/isa-checks.h>
15 
16 #include <xnnpack/transpose.h>
17 #include "transpose-microkernel-tester.h"
18 
19 
TEST(X64_TRANSPOSE__1X2_SCALAR_INT,bh_1_bw_2)20 TEST(X64_TRANSPOSE__1X2_SCALAR_INT, bh_1_bw_2) {
21   TransposeMicrokernelTester()
22     .input_stride(2)
23     .output_stride(1)
24     .block_width(2)
25     .block_height(1)
26     .iterations(1)
27     .Test(xnn_x64_transpose_ukernel__1x2_scalar_int);
28 }
29 
TEST(X64_TRANSPOSE__1X2_SCALAR_INT,bh_1_2_bw_1_4)30 TEST(X64_TRANSPOSE__1X2_SCALAR_INT, bh_1_2_bw_1_4) {
31   for(size_t i = 1; i <= 2; ++i){
32     for(size_t j = 1; j <= 4; ++j){
33       TransposeMicrokernelTester()
34         .input_stride(j)
35         .output_stride(i)
36         .block_width(j)
37         .block_height(i)
38         .iterations(1)
39         .Test(xnn_x64_transpose_ukernel__1x2_scalar_int);
40     }
41   }
42 }
43 
TEST(X64_TRANSPOSE__1X2_SCALAR_INT,bh_1_bw_4)44 TEST(X64_TRANSPOSE__1X2_SCALAR_INT, bh_1_bw_4) {
45   TransposeMicrokernelTester()
46     .input_stride(4)
47     .output_stride(1)
48     .block_width(4)
49     .block_height(1)
50     .iterations(1)
51     .Test(xnn_x64_transpose_ukernel__1x2_scalar_int);
52 }
53 
TEST(X64_TRANSPOSE__1X2_SCALAR_INT,bh_1_bw_3_4)54 TEST(X64_TRANSPOSE__1X2_SCALAR_INT, bh_1_bw_3_4) {
55   for(size_t i = 3; i < 4; ++i){
56     TransposeMicrokernelTester()
57       .input_stride(i)
58       .output_stride(1)
59       .block_width(i)
60       .block_height(1)
61       .iterations(1)
62       .Test(xnn_x64_transpose_ukernel__1x2_scalar_int);
63   }
64 }
65 
TEST(X64_TRANSPOSE__1X2_SCALAR_INT,bh_2_bw_3_4)66 TEST(X64_TRANSPOSE__1X2_SCALAR_INT, bh_2_bw_3_4) {
67   for(size_t i = 3; i < 4; ++i){
68     TransposeMicrokernelTester()
69       .input_stride(i)
70       .output_stride(2)
71       .block_width(i)
72       .block_height(2)
73       .iterations(1)
74       .Test(xnn_x64_transpose_ukernel__1x2_scalar_int);
75   }
76 }
77 
TEST(X64_TRANSPOSE__1X2_SCALAR_INT,bh_2_bw_2)78 TEST(X64_TRANSPOSE__1X2_SCALAR_INT, bh_2_bw_2) {
79   TransposeMicrokernelTester()
80     .input_stride(2)
81     .output_stride(2)
82     .block_width(2)
83     .block_height(2)
84     .iterations(1)
85     .Test(xnn_x64_transpose_ukernel__1x2_scalar_int);
86 }
87 
TEST(X64_TRANSPOSE__1X2_SCALAR_INT,bh_2_2_bw_2)88 TEST(X64_TRANSPOSE__1X2_SCALAR_INT, bh_2_2_bw_2){
89   for(size_t i = 2; i < 2; ++i){
90     TransposeMicrokernelTester()
91       .input_stride(2)
92       .output_stride(i)
93       .block_width(2)
94       .block_height(i)
95       .iterations(1)
96       .Test(xnn_x64_transpose_ukernel__1x2_scalar_int);
97   }
98 }
99 
TEST(X64_TRANSPOSE__1X2_SCALAR_INT,bh_2_2_bw_4)100 TEST(X64_TRANSPOSE__1X2_SCALAR_INT, bh_2_2_bw_4){
101   for(size_t i = 2; i < 2; ++i){
102     TransposeMicrokernelTester()
103       .input_stride(4)
104       .output_stride(i)
105       .block_width(4)
106       .block_height(i)
107       .iterations(1)
108       .Test(xnn_x64_transpose_ukernel__1x2_scalar_int);
109   }
110 }
111 
TEST(X64_TRANSPOSE__1X2_SCALAR_INT,bh_2_2_bw_3_4)112 TEST(X64_TRANSPOSE__1X2_SCALAR_INT, bh_2_2_bw_3_4) {
113   for(size_t i = 2; i < 2; ++i){
114     for(size_t j = 3; j < 4; ++j){
115       TransposeMicrokernelTester()
116         .input_stride(j)
117         .output_stride(i)
118         .block_width(j)
119         .block_height(i)
120         .iterations(1)
121         .Test(xnn_x64_transpose_ukernel__1x2_scalar_int);
122     }
123   }
124 }
125 
TEST(X64_TRANSPOSE__1X2_SCALAR_INT,bh_1_bw_2_is_4)126 TEST(X64_TRANSPOSE__1X2_SCALAR_INT, bh_1_bw_2_is_4) {
127   TransposeMicrokernelTester()
128     .input_stride(4)
129     .output_stride(1)
130     .block_width(2)
131     .block_height(1)
132     .iterations(1)
133     .Test(xnn_x64_transpose_ukernel__1x2_scalar_int);
134 }
135 
TEST(X64_TRANSPOSE__1X2_SCALAR_INT,bh_1_bw_2_os_2)136 TEST(X64_TRANSPOSE__1X2_SCALAR_INT, bh_1_bw_2_os_2) {
137   TransposeMicrokernelTester()
138     .input_stride(2)
139     .output_stride(2)
140     .block_width(2)
141     .block_height(1)
142     .iterations(1)
143     .Test(xnn_x64_transpose_ukernel__1x2_scalar_int);
144 }
145 
TEST(X64_TRANSPOSE__1X2_SCALAR_INT,bh_1_bw_2_is_4_os_2)146 TEST(X64_TRANSPOSE__1X2_SCALAR_INT, bh_1_bw_2_is_4_os_2) {
147   TransposeMicrokernelTester()
148     .input_stride(4)
149     .output_stride(2)
150     .block_width(2)
151     .block_height(1)
152     .iterations(1)
153     .Test(xnn_x64_transpose_ukernel__1x2_scalar_int);
154 }
155 
TEST(X64_TRANSPOSE__2X1_SCALAR_INT,bh_2_bw_1)156 TEST(X64_TRANSPOSE__2X1_SCALAR_INT, bh_2_bw_1) {
157   TransposeMicrokernelTester()
158     .input_stride(1)
159     .output_stride(2)
160     .block_width(1)
161     .block_height(2)
162     .iterations(1)
163     .Test(xnn_x64_transpose_ukernel__2x1_scalar_int);
164 }
165 
TEST(X64_TRANSPOSE__2X1_SCALAR_INT,bh_1_4_bw_1_2)166 TEST(X64_TRANSPOSE__2X1_SCALAR_INT, bh_1_4_bw_1_2) {
167   for(size_t i = 1; i <= 4; ++i){
168     for(size_t j = 1; j <= 2; ++j){
169       TransposeMicrokernelTester()
170         .input_stride(j)
171         .output_stride(i)
172         .block_width(j)
173         .block_height(i)
174         .iterations(1)
175         .Test(xnn_x64_transpose_ukernel__2x1_scalar_int);
176     }
177   }
178 }
179 
TEST(X64_TRANSPOSE__2X1_SCALAR_INT,bh_2_bw_2)180 TEST(X64_TRANSPOSE__2X1_SCALAR_INT, bh_2_bw_2) {
181   TransposeMicrokernelTester()
182     .input_stride(2)
183     .output_stride(2)
184     .block_width(2)
185     .block_height(2)
186     .iterations(1)
187     .Test(xnn_x64_transpose_ukernel__2x1_scalar_int);
188 }
189 
TEST(X64_TRANSPOSE__2X1_SCALAR_INT,bh_2_bw_2_2)190 TEST(X64_TRANSPOSE__2X1_SCALAR_INT, bh_2_bw_2_2) {
191   for(size_t i = 2; i < 2; ++i){
192     TransposeMicrokernelTester()
193       .input_stride(i)
194       .output_stride(2)
195       .block_width(i)
196       .block_height(2)
197       .iterations(1)
198       .Test(xnn_x64_transpose_ukernel__2x1_scalar_int);
199   }
200 }
201 
TEST(X64_TRANSPOSE__2X1_SCALAR_INT,bh_4_bw_2_2)202 TEST(X64_TRANSPOSE__2X1_SCALAR_INT, bh_4_bw_2_2) {
203   for(size_t i = 2; i < 2; ++i){
204     TransposeMicrokernelTester()
205       .input_stride(i)
206       .output_stride(4)
207       .block_width(i)
208       .block_height(4)
209       .iterations(1)
210       .Test(xnn_x64_transpose_ukernel__2x1_scalar_int);
211   }
212 }
213 
TEST(X64_TRANSPOSE__2X1_SCALAR_INT,bh_4_bw_1)214 TEST(X64_TRANSPOSE__2X1_SCALAR_INT, bh_4_bw_1) {
215   TransposeMicrokernelTester()
216     .input_stride(1)
217     .output_stride(4)
218     .block_width(1)
219     .block_height(4)
220     .iterations(1)
221     .Test(xnn_x64_transpose_ukernel__2x1_scalar_int);
222 }
223 
TEST(X64_TRANSPOSE__2X1_SCALAR_INT,bh_3_4_bw_1)224 TEST(X64_TRANSPOSE__2X1_SCALAR_INT, bh_3_4_bw_1){
225   for(size_t i = 3; i < 4; ++i){
226     TransposeMicrokernelTester()
227       .input_stride(1)
228       .output_stride(i)
229       .block_width(1)
230       .block_height(i)
231       .iterations(1)
232       .Test(xnn_x64_transpose_ukernel__2x1_scalar_int);
233   }
234 }
235 
TEST(X64_TRANSPOSE__2X1_SCALAR_INT,bh_3_4_bw_2)236 TEST(X64_TRANSPOSE__2X1_SCALAR_INT, bh_3_4_bw_2){
237   for(size_t i = 3; i < 4; ++i){
238     TransposeMicrokernelTester()
239       .input_stride(2)
240       .output_stride(i)
241       .block_width(2)
242       .block_height(i)
243       .iterations(1)
244       .Test(xnn_x64_transpose_ukernel__2x1_scalar_int);
245   }
246 }
247 
TEST(X64_TRANSPOSE__2X1_SCALAR_INT,bh_3_4_bw_2_2)248 TEST(X64_TRANSPOSE__2X1_SCALAR_INT, bh_3_4_bw_2_2) {
249   for(size_t i = 3; i < 4; ++i){
250     for(size_t j = 2; j < 2; ++j){
251       TransposeMicrokernelTester()
252         .input_stride(j)
253         .output_stride(i)
254         .block_width(j)
255         .block_height(i)
256         .iterations(1)
257         .Test(xnn_x64_transpose_ukernel__2x1_scalar_int);
258     }
259   }
260 }
261 
TEST(X64_TRANSPOSE__2X1_SCALAR_INT,bh_2_bw_1_is_2)262 TEST(X64_TRANSPOSE__2X1_SCALAR_INT, bh_2_bw_1_is_2) {
263   TransposeMicrokernelTester()
264     .input_stride(2)
265     .output_stride(2)
266     .block_width(1)
267     .block_height(2)
268     .iterations(1)
269     .Test(xnn_x64_transpose_ukernel__2x1_scalar_int);
270 }
271 
TEST(X64_TRANSPOSE__2X1_SCALAR_INT,bh_2_bw_1_os_4)272 TEST(X64_TRANSPOSE__2X1_SCALAR_INT, bh_2_bw_1_os_4) {
273   TransposeMicrokernelTester()
274     .input_stride(1)
275     .output_stride(4)
276     .block_width(1)
277     .block_height(2)
278     .iterations(1)
279     .Test(xnn_x64_transpose_ukernel__2x1_scalar_int);
280 }
281 
TEST(X64_TRANSPOSE__2X1_SCALAR_INT,bh_2_bw_1_is_2_os_4)282 TEST(X64_TRANSPOSE__2X1_SCALAR_INT, bh_2_bw_1_is_2_os_4) {
283   TransposeMicrokernelTester()
284     .input_stride(2)
285     .output_stride(4)
286     .block_width(1)
287     .block_height(2)
288     .iterations(1)
289     .Test(xnn_x64_transpose_ukernel__2x1_scalar_int);
290 }
291 
TEST(X64_TRANSPOSE__2X2_SCALAR_INT,bh_2_bw_2)292 TEST(X64_TRANSPOSE__2X2_SCALAR_INT, bh_2_bw_2) {
293   TransposeMicrokernelTester()
294     .input_stride(2)
295     .output_stride(2)
296     .block_width(2)
297     .block_height(2)
298     .iterations(1)
299     .Test(xnn_x64_transpose_ukernel__2x2_scalar_int);
300 }
301 
TEST(X64_TRANSPOSE__2X2_SCALAR_INT,bh_1_4_bw_1_4)302 TEST(X64_TRANSPOSE__2X2_SCALAR_INT, bh_1_4_bw_1_4) {
303   for(size_t i = 1; i <= 4; ++i){
304     for(size_t j = 1; j <= 4; ++j){
305       TransposeMicrokernelTester()
306         .input_stride(j)
307         .output_stride(i)
308         .block_width(j)
309         .block_height(i)
310         .iterations(1)
311         .Test(xnn_x64_transpose_ukernel__2x2_scalar_int);
312     }
313   }
314 }
315 
TEST(X64_TRANSPOSE__2X2_SCALAR_INT,bh_2_bw_4)316 TEST(X64_TRANSPOSE__2X2_SCALAR_INT, bh_2_bw_4) {
317   TransposeMicrokernelTester()
318     .input_stride(4)
319     .output_stride(2)
320     .block_width(4)
321     .block_height(2)
322     .iterations(1)
323     .Test(xnn_x64_transpose_ukernel__2x2_scalar_int);
324 }
325 
TEST(X64_TRANSPOSE__2X2_SCALAR_INT,bh_2_bw_3_4)326 TEST(X64_TRANSPOSE__2X2_SCALAR_INT, bh_2_bw_3_4) {
327   for(size_t i = 3; i < 4; ++i){
328     TransposeMicrokernelTester()
329       .input_stride(i)
330       .output_stride(2)
331       .block_width(i)
332       .block_height(2)
333       .iterations(1)
334       .Test(xnn_x64_transpose_ukernel__2x2_scalar_int);
335   }
336 }
337 
TEST(X64_TRANSPOSE__2X2_SCALAR_INT,bh_4_bw_3_4)338 TEST(X64_TRANSPOSE__2X2_SCALAR_INT, bh_4_bw_3_4) {
339   for(size_t i = 3; i < 4; ++i){
340     TransposeMicrokernelTester()
341       .input_stride(i)
342       .output_stride(4)
343       .block_width(i)
344       .block_height(4)
345       .iterations(1)
346       .Test(xnn_x64_transpose_ukernel__2x2_scalar_int);
347   }
348 }
349 
TEST(X64_TRANSPOSE__2X2_SCALAR_INT,bh_4_bw_2)350 TEST(X64_TRANSPOSE__2X2_SCALAR_INT, bh_4_bw_2) {
351   TransposeMicrokernelTester()
352     .input_stride(2)
353     .output_stride(4)
354     .block_width(2)
355     .block_height(4)
356     .iterations(1)
357     .Test(xnn_x64_transpose_ukernel__2x2_scalar_int);
358 }
359 
TEST(X64_TRANSPOSE__2X2_SCALAR_INT,bh_3_4_bw_2)360 TEST(X64_TRANSPOSE__2X2_SCALAR_INT, bh_3_4_bw_2){
361   for(size_t i = 3; i < 4; ++i){
362     TransposeMicrokernelTester()
363       .input_stride(2)
364       .output_stride(i)
365       .block_width(2)
366       .block_height(i)
367       .iterations(1)
368       .Test(xnn_x64_transpose_ukernel__2x2_scalar_int);
369   }
370 }
371 
TEST(X64_TRANSPOSE__2X2_SCALAR_INT,bh_3_4_bw_4)372 TEST(X64_TRANSPOSE__2X2_SCALAR_INT, bh_3_4_bw_4){
373   for(size_t i = 3; i < 4; ++i){
374     TransposeMicrokernelTester()
375       .input_stride(4)
376       .output_stride(i)
377       .block_width(4)
378       .block_height(i)
379       .iterations(1)
380       .Test(xnn_x64_transpose_ukernel__2x2_scalar_int);
381   }
382 }
383 
TEST(X64_TRANSPOSE__2X2_SCALAR_INT,bh_3_4_bw_3_4)384 TEST(X64_TRANSPOSE__2X2_SCALAR_INT, bh_3_4_bw_3_4) {
385   for(size_t i = 3; i < 4; ++i){
386     for(size_t j = 3; j < 4; ++j){
387       TransposeMicrokernelTester()
388         .input_stride(j)
389         .output_stride(i)
390         .block_width(j)
391         .block_height(i)
392         .iterations(1)
393         .Test(xnn_x64_transpose_ukernel__2x2_scalar_int);
394     }
395   }
396 }
397 
TEST(X64_TRANSPOSE__2X2_SCALAR_INT,bh_2_bw_2_is_4)398 TEST(X64_TRANSPOSE__2X2_SCALAR_INT, bh_2_bw_2_is_4) {
399   TransposeMicrokernelTester()
400     .input_stride(4)
401     .output_stride(2)
402     .block_width(2)
403     .block_height(2)
404     .iterations(1)
405     .Test(xnn_x64_transpose_ukernel__2x2_scalar_int);
406 }
407 
TEST(X64_TRANSPOSE__2X2_SCALAR_INT,bh_2_bw_2_os_4)408 TEST(X64_TRANSPOSE__2X2_SCALAR_INT, bh_2_bw_2_os_4) {
409   TransposeMicrokernelTester()
410     .input_stride(2)
411     .output_stride(4)
412     .block_width(2)
413     .block_height(2)
414     .iterations(1)
415     .Test(xnn_x64_transpose_ukernel__2x2_scalar_int);
416 }
417 
TEST(X64_TRANSPOSE__2X2_SCALAR_INT,bh_2_bw_2_is_4_os_4)418 TEST(X64_TRANSPOSE__2X2_SCALAR_INT, bh_2_bw_2_is_4_os_4) {
419   TransposeMicrokernelTester()
420     .input_stride(4)
421     .output_stride(4)
422     .block_width(2)
423     .block_height(2)
424     .iterations(1)
425     .Test(xnn_x64_transpose_ukernel__2x2_scalar_int);
426 }
427 
TEST(X64_TRANSPOSE__4X1_SCALAR_INT,bh_4_bw_1)428 TEST(X64_TRANSPOSE__4X1_SCALAR_INT, bh_4_bw_1) {
429   TransposeMicrokernelTester()
430     .input_stride(1)
431     .output_stride(4)
432     .block_width(1)
433     .block_height(4)
434     .iterations(1)
435     .Test(xnn_x64_transpose_ukernel__4x1_scalar_int);
436 }
437 
TEST(X64_TRANSPOSE__4X1_SCALAR_INT,bh_1_8_bw_1_2)438 TEST(X64_TRANSPOSE__4X1_SCALAR_INT, bh_1_8_bw_1_2) {
439   for(size_t i = 1; i <= 8; ++i){
440     for(size_t j = 1; j <= 2; ++j){
441       TransposeMicrokernelTester()
442         .input_stride(j)
443         .output_stride(i)
444         .block_width(j)
445         .block_height(i)
446         .iterations(1)
447         .Test(xnn_x64_transpose_ukernel__4x1_scalar_int);
448     }
449   }
450 }
451 
TEST(X64_TRANSPOSE__4X1_SCALAR_INT,bh_4_bw_2)452 TEST(X64_TRANSPOSE__4X1_SCALAR_INT, bh_4_bw_2) {
453   TransposeMicrokernelTester()
454     .input_stride(2)
455     .output_stride(4)
456     .block_width(2)
457     .block_height(4)
458     .iterations(1)
459     .Test(xnn_x64_transpose_ukernel__4x1_scalar_int);
460 }
461 
TEST(X64_TRANSPOSE__4X1_SCALAR_INT,bh_4_bw_2_2)462 TEST(X64_TRANSPOSE__4X1_SCALAR_INT, bh_4_bw_2_2) {
463   for(size_t i = 2; i < 2; ++i){
464     TransposeMicrokernelTester()
465       .input_stride(i)
466       .output_stride(4)
467       .block_width(i)
468       .block_height(4)
469       .iterations(1)
470       .Test(xnn_x64_transpose_ukernel__4x1_scalar_int);
471   }
472 }
473 
TEST(X64_TRANSPOSE__4X1_SCALAR_INT,bh_8_bw_2_2)474 TEST(X64_TRANSPOSE__4X1_SCALAR_INT, bh_8_bw_2_2) {
475   for(size_t i = 2; i < 2; ++i){
476     TransposeMicrokernelTester()
477       .input_stride(i)
478       .output_stride(8)
479       .block_width(i)
480       .block_height(8)
481       .iterations(1)
482       .Test(xnn_x64_transpose_ukernel__4x1_scalar_int);
483   }
484 }
485 
TEST(X64_TRANSPOSE__4X1_SCALAR_INT,bh_8_bw_1)486 TEST(X64_TRANSPOSE__4X1_SCALAR_INT, bh_8_bw_1) {
487   TransposeMicrokernelTester()
488     .input_stride(1)
489     .output_stride(8)
490     .block_width(1)
491     .block_height(8)
492     .iterations(1)
493     .Test(xnn_x64_transpose_ukernel__4x1_scalar_int);
494 }
495 
TEST(X64_TRANSPOSE__4X1_SCALAR_INT,bh_5_8_bw_1)496 TEST(X64_TRANSPOSE__4X1_SCALAR_INT, bh_5_8_bw_1){
497   for(size_t i = 5; i < 8; ++i){
498     TransposeMicrokernelTester()
499       .input_stride(1)
500       .output_stride(i)
501       .block_width(1)
502       .block_height(i)
503       .iterations(1)
504       .Test(xnn_x64_transpose_ukernel__4x1_scalar_int);
505   }
506 }
507 
TEST(X64_TRANSPOSE__4X1_SCALAR_INT,bh_5_8_bw_2)508 TEST(X64_TRANSPOSE__4X1_SCALAR_INT, bh_5_8_bw_2){
509   for(size_t i = 5; i < 8; ++i){
510     TransposeMicrokernelTester()
511       .input_stride(2)
512       .output_stride(i)
513       .block_width(2)
514       .block_height(i)
515       .iterations(1)
516       .Test(xnn_x64_transpose_ukernel__4x1_scalar_int);
517   }
518 }
519 
TEST(X64_TRANSPOSE__4X1_SCALAR_INT,bh_5_8_bw_2_2)520 TEST(X64_TRANSPOSE__4X1_SCALAR_INT, bh_5_8_bw_2_2) {
521   for(size_t i = 5; i < 8; ++i){
522     for(size_t j = 2; j < 2; ++j){
523       TransposeMicrokernelTester()
524         .input_stride(j)
525         .output_stride(i)
526         .block_width(j)
527         .block_height(i)
528         .iterations(1)
529         .Test(xnn_x64_transpose_ukernel__4x1_scalar_int);
530     }
531   }
532 }
533 
TEST(X64_TRANSPOSE__4X1_SCALAR_INT,bh_4_bw_1_is_2)534 TEST(X64_TRANSPOSE__4X1_SCALAR_INT, bh_4_bw_1_is_2) {
535   TransposeMicrokernelTester()
536     .input_stride(2)
537     .output_stride(4)
538     .block_width(1)
539     .block_height(4)
540     .iterations(1)
541     .Test(xnn_x64_transpose_ukernel__4x1_scalar_int);
542 }
543 
TEST(X64_TRANSPOSE__4X1_SCALAR_INT,bh_4_bw_1_os_8)544 TEST(X64_TRANSPOSE__4X1_SCALAR_INT, bh_4_bw_1_os_8) {
545   TransposeMicrokernelTester()
546     .input_stride(1)
547     .output_stride(8)
548     .block_width(1)
549     .block_height(4)
550     .iterations(1)
551     .Test(xnn_x64_transpose_ukernel__4x1_scalar_int);
552 }
553 
TEST(X64_TRANSPOSE__4X1_SCALAR_INT,bh_4_bw_1_is_2_os_8)554 TEST(X64_TRANSPOSE__4X1_SCALAR_INT, bh_4_bw_1_is_2_os_8) {
555   TransposeMicrokernelTester()
556     .input_stride(2)
557     .output_stride(8)
558     .block_width(1)
559     .block_height(4)
560     .iterations(1)
561     .Test(xnn_x64_transpose_ukernel__4x1_scalar_int);
562 }
563 
TEST(X64_TRANSPOSE__4X2_SCALAR_INT,bh_4_bw_2)564 TEST(X64_TRANSPOSE__4X2_SCALAR_INT, bh_4_bw_2) {
565   TransposeMicrokernelTester()
566     .input_stride(2)
567     .output_stride(4)
568     .block_width(2)
569     .block_height(4)
570     .iterations(1)
571     .Test(xnn_x64_transpose_ukernel__4x2_scalar_int);
572 }
573 
TEST(X64_TRANSPOSE__4X2_SCALAR_INT,bh_1_8_bw_1_4)574 TEST(X64_TRANSPOSE__4X2_SCALAR_INT, bh_1_8_bw_1_4) {
575   for(size_t i = 1; i <= 8; ++i){
576     for(size_t j = 1; j <= 4; ++j){
577       TransposeMicrokernelTester()
578         .input_stride(j)
579         .output_stride(i)
580         .block_width(j)
581         .block_height(i)
582         .iterations(1)
583         .Test(xnn_x64_transpose_ukernel__4x2_scalar_int);
584     }
585   }
586 }
587 
TEST(X64_TRANSPOSE__4X2_SCALAR_INT,bh_4_bw_4)588 TEST(X64_TRANSPOSE__4X2_SCALAR_INT, bh_4_bw_4) {
589   TransposeMicrokernelTester()
590     .input_stride(4)
591     .output_stride(4)
592     .block_width(4)
593     .block_height(4)
594     .iterations(1)
595     .Test(xnn_x64_transpose_ukernel__4x2_scalar_int);
596 }
597 
TEST(X64_TRANSPOSE__4X2_SCALAR_INT,bh_4_bw_3_4)598 TEST(X64_TRANSPOSE__4X2_SCALAR_INT, bh_4_bw_3_4) {
599   for(size_t i = 3; i < 4; ++i){
600     TransposeMicrokernelTester()
601       .input_stride(i)
602       .output_stride(4)
603       .block_width(i)
604       .block_height(4)
605       .iterations(1)
606       .Test(xnn_x64_transpose_ukernel__4x2_scalar_int);
607   }
608 }
609 
TEST(X64_TRANSPOSE__4X2_SCALAR_INT,bh_8_bw_3_4)610 TEST(X64_TRANSPOSE__4X2_SCALAR_INT, bh_8_bw_3_4) {
611   for(size_t i = 3; i < 4; ++i){
612     TransposeMicrokernelTester()
613       .input_stride(i)
614       .output_stride(8)
615       .block_width(i)
616       .block_height(8)
617       .iterations(1)
618       .Test(xnn_x64_transpose_ukernel__4x2_scalar_int);
619   }
620 }
621 
TEST(X64_TRANSPOSE__4X2_SCALAR_INT,bh_8_bw_2)622 TEST(X64_TRANSPOSE__4X2_SCALAR_INT, bh_8_bw_2) {
623   TransposeMicrokernelTester()
624     .input_stride(2)
625     .output_stride(8)
626     .block_width(2)
627     .block_height(8)
628     .iterations(1)
629     .Test(xnn_x64_transpose_ukernel__4x2_scalar_int);
630 }
631 
TEST(X64_TRANSPOSE__4X2_SCALAR_INT,bh_5_8_bw_2)632 TEST(X64_TRANSPOSE__4X2_SCALAR_INT, bh_5_8_bw_2){
633   for(size_t i = 5; i < 8; ++i){
634     TransposeMicrokernelTester()
635       .input_stride(2)
636       .output_stride(i)
637       .block_width(2)
638       .block_height(i)
639       .iterations(1)
640       .Test(xnn_x64_transpose_ukernel__4x2_scalar_int);
641   }
642 }
643 
TEST(X64_TRANSPOSE__4X2_SCALAR_INT,bh_5_8_bw_4)644 TEST(X64_TRANSPOSE__4X2_SCALAR_INT, bh_5_8_bw_4){
645   for(size_t i = 5; i < 8; ++i){
646     TransposeMicrokernelTester()
647       .input_stride(4)
648       .output_stride(i)
649       .block_width(4)
650       .block_height(i)
651       .iterations(1)
652       .Test(xnn_x64_transpose_ukernel__4x2_scalar_int);
653   }
654 }
655 
TEST(X64_TRANSPOSE__4X2_SCALAR_INT,bh_5_8_bw_3_4)656 TEST(X64_TRANSPOSE__4X2_SCALAR_INT, bh_5_8_bw_3_4) {
657   for(size_t i = 5; i < 8; ++i){
658     for(size_t j = 3; j < 4; ++j){
659       TransposeMicrokernelTester()
660         .input_stride(j)
661         .output_stride(i)
662         .block_width(j)
663         .block_height(i)
664         .iterations(1)
665         .Test(xnn_x64_transpose_ukernel__4x2_scalar_int);
666     }
667   }
668 }
669 
TEST(X64_TRANSPOSE__4X2_SCALAR_INT,bh_4_bw_2_is_4)670 TEST(X64_TRANSPOSE__4X2_SCALAR_INT, bh_4_bw_2_is_4) {
671   TransposeMicrokernelTester()
672     .input_stride(4)
673     .output_stride(4)
674     .block_width(2)
675     .block_height(4)
676     .iterations(1)
677     .Test(xnn_x64_transpose_ukernel__4x2_scalar_int);
678 }
679 
TEST(X64_TRANSPOSE__4X2_SCALAR_INT,bh_4_bw_2_os_8)680 TEST(X64_TRANSPOSE__4X2_SCALAR_INT, bh_4_bw_2_os_8) {
681   TransposeMicrokernelTester()
682     .input_stride(2)
683     .output_stride(8)
684     .block_width(2)
685     .block_height(4)
686     .iterations(1)
687     .Test(xnn_x64_transpose_ukernel__4x2_scalar_int);
688 }
689 
TEST(X64_TRANSPOSE__4X2_SCALAR_INT,bh_4_bw_2_is_4_os_8)690 TEST(X64_TRANSPOSE__4X2_SCALAR_INT, bh_4_bw_2_is_4_os_8) {
691   TransposeMicrokernelTester()
692     .input_stride(4)
693     .output_stride(8)
694     .block_width(2)
695     .block_height(4)
696     .iterations(1)
697     .Test(xnn_x64_transpose_ukernel__4x2_scalar_int);
698 }
699 
TEST(X64_TRANSPOSE__1X2_SCALAR_FLOAT,bh_1_bw_2)700 TEST(X64_TRANSPOSE__1X2_SCALAR_FLOAT, bh_1_bw_2) {
701   TransposeMicrokernelTester()
702     .input_stride(2)
703     .output_stride(1)
704     .block_width(2)
705     .block_height(1)
706     .iterations(1)
707     .Test(xnn_x64_transpose_ukernel__1x2_scalar_float);
708 }
709 
TEST(X64_TRANSPOSE__1X2_SCALAR_FLOAT,bh_1_2_bw_1_4)710 TEST(X64_TRANSPOSE__1X2_SCALAR_FLOAT, bh_1_2_bw_1_4) {
711   for(size_t i = 1; i <= 2; ++i){
712     for(size_t j = 1; j <= 4; ++j){
713       TransposeMicrokernelTester()
714         .input_stride(j)
715         .output_stride(i)
716         .block_width(j)
717         .block_height(i)
718         .iterations(1)
719         .Test(xnn_x64_transpose_ukernel__1x2_scalar_float);
720     }
721   }
722 }
723 
TEST(X64_TRANSPOSE__1X2_SCALAR_FLOAT,bh_1_bw_4)724 TEST(X64_TRANSPOSE__1X2_SCALAR_FLOAT, bh_1_bw_4) {
725   TransposeMicrokernelTester()
726     .input_stride(4)
727     .output_stride(1)
728     .block_width(4)
729     .block_height(1)
730     .iterations(1)
731     .Test(xnn_x64_transpose_ukernel__1x2_scalar_float);
732 }
733 
TEST(X64_TRANSPOSE__1X2_SCALAR_FLOAT,bh_1_bw_3_4)734 TEST(X64_TRANSPOSE__1X2_SCALAR_FLOAT, bh_1_bw_3_4) {
735   for(size_t i = 3; i < 4; ++i){
736     TransposeMicrokernelTester()
737       .input_stride(i)
738       .output_stride(1)
739       .block_width(i)
740       .block_height(1)
741       .iterations(1)
742       .Test(xnn_x64_transpose_ukernel__1x2_scalar_float);
743   }
744 }
745 
TEST(X64_TRANSPOSE__1X2_SCALAR_FLOAT,bh_2_bw_3_4)746 TEST(X64_TRANSPOSE__1X2_SCALAR_FLOAT, bh_2_bw_3_4) {
747   for(size_t i = 3; i < 4; ++i){
748     TransposeMicrokernelTester()
749       .input_stride(i)
750       .output_stride(2)
751       .block_width(i)
752       .block_height(2)
753       .iterations(1)
754       .Test(xnn_x64_transpose_ukernel__1x2_scalar_float);
755   }
756 }
757 
TEST(X64_TRANSPOSE__1X2_SCALAR_FLOAT,bh_2_bw_2)758 TEST(X64_TRANSPOSE__1X2_SCALAR_FLOAT, bh_2_bw_2) {
759   TransposeMicrokernelTester()
760     .input_stride(2)
761     .output_stride(2)
762     .block_width(2)
763     .block_height(2)
764     .iterations(1)
765     .Test(xnn_x64_transpose_ukernel__1x2_scalar_float);
766 }
767 
TEST(X64_TRANSPOSE__1X2_SCALAR_FLOAT,bh_2_2_bw_2)768 TEST(X64_TRANSPOSE__1X2_SCALAR_FLOAT, bh_2_2_bw_2){
769   for(size_t i = 2; i < 2; ++i){
770     TransposeMicrokernelTester()
771       .input_stride(2)
772       .output_stride(i)
773       .block_width(2)
774       .block_height(i)
775       .iterations(1)
776       .Test(xnn_x64_transpose_ukernel__1x2_scalar_float);
777   }
778 }
779 
TEST(X64_TRANSPOSE__1X2_SCALAR_FLOAT,bh_2_2_bw_4)780 TEST(X64_TRANSPOSE__1X2_SCALAR_FLOAT, bh_2_2_bw_4){
781   for(size_t i = 2; i < 2; ++i){
782     TransposeMicrokernelTester()
783       .input_stride(4)
784       .output_stride(i)
785       .block_width(4)
786       .block_height(i)
787       .iterations(1)
788       .Test(xnn_x64_transpose_ukernel__1x2_scalar_float);
789   }
790 }
791 
TEST(X64_TRANSPOSE__1X2_SCALAR_FLOAT,bh_2_2_bw_3_4)792 TEST(X64_TRANSPOSE__1X2_SCALAR_FLOAT, bh_2_2_bw_3_4) {
793   for(size_t i = 2; i < 2; ++i){
794     for(size_t j = 3; j < 4; ++j){
795       TransposeMicrokernelTester()
796         .input_stride(j)
797         .output_stride(i)
798         .block_width(j)
799         .block_height(i)
800         .iterations(1)
801         .Test(xnn_x64_transpose_ukernel__1x2_scalar_float);
802     }
803   }
804 }
805 
TEST(X64_TRANSPOSE__1X2_SCALAR_FLOAT,bh_1_bw_2_is_4)806 TEST(X64_TRANSPOSE__1X2_SCALAR_FLOAT, bh_1_bw_2_is_4) {
807   TransposeMicrokernelTester()
808     .input_stride(4)
809     .output_stride(1)
810     .block_width(2)
811     .block_height(1)
812     .iterations(1)
813     .Test(xnn_x64_transpose_ukernel__1x2_scalar_float);
814 }
815 
TEST(X64_TRANSPOSE__1X2_SCALAR_FLOAT,bh_1_bw_2_os_2)816 TEST(X64_TRANSPOSE__1X2_SCALAR_FLOAT, bh_1_bw_2_os_2) {
817   TransposeMicrokernelTester()
818     .input_stride(2)
819     .output_stride(2)
820     .block_width(2)
821     .block_height(1)
822     .iterations(1)
823     .Test(xnn_x64_transpose_ukernel__1x2_scalar_float);
824 }
825 
TEST(X64_TRANSPOSE__1X2_SCALAR_FLOAT,bh_1_bw_2_is_4_os_2)826 TEST(X64_TRANSPOSE__1X2_SCALAR_FLOAT, bh_1_bw_2_is_4_os_2) {
827   TransposeMicrokernelTester()
828     .input_stride(4)
829     .output_stride(2)
830     .block_width(2)
831     .block_height(1)
832     .iterations(1)
833     .Test(xnn_x64_transpose_ukernel__1x2_scalar_float);
834 }
835 
TEST(X64_TRANSPOSE__2X1_SCALAR_FLOAT,bh_2_bw_1)836 TEST(X64_TRANSPOSE__2X1_SCALAR_FLOAT, bh_2_bw_1) {
837   TransposeMicrokernelTester()
838     .input_stride(1)
839     .output_stride(2)
840     .block_width(1)
841     .block_height(2)
842     .iterations(1)
843     .Test(xnn_x64_transpose_ukernel__2x1_scalar_float);
844 }
845 
TEST(X64_TRANSPOSE__2X1_SCALAR_FLOAT,bh_1_4_bw_1_2)846 TEST(X64_TRANSPOSE__2X1_SCALAR_FLOAT, bh_1_4_bw_1_2) {
847   for(size_t i = 1; i <= 4; ++i){
848     for(size_t j = 1; j <= 2; ++j){
849       TransposeMicrokernelTester()
850         .input_stride(j)
851         .output_stride(i)
852         .block_width(j)
853         .block_height(i)
854         .iterations(1)
855         .Test(xnn_x64_transpose_ukernel__2x1_scalar_float);
856     }
857   }
858 }
859 
TEST(X64_TRANSPOSE__2X1_SCALAR_FLOAT,bh_2_bw_2)860 TEST(X64_TRANSPOSE__2X1_SCALAR_FLOAT, bh_2_bw_2) {
861   TransposeMicrokernelTester()
862     .input_stride(2)
863     .output_stride(2)
864     .block_width(2)
865     .block_height(2)
866     .iterations(1)
867     .Test(xnn_x64_transpose_ukernel__2x1_scalar_float);
868 }
869 
TEST(X64_TRANSPOSE__2X1_SCALAR_FLOAT,bh_2_bw_2_2)870 TEST(X64_TRANSPOSE__2X1_SCALAR_FLOAT, bh_2_bw_2_2) {
871   for(size_t i = 2; i < 2; ++i){
872     TransposeMicrokernelTester()
873       .input_stride(i)
874       .output_stride(2)
875       .block_width(i)
876       .block_height(2)
877       .iterations(1)
878       .Test(xnn_x64_transpose_ukernel__2x1_scalar_float);
879   }
880 }
881 
TEST(X64_TRANSPOSE__2X1_SCALAR_FLOAT,bh_4_bw_2_2)882 TEST(X64_TRANSPOSE__2X1_SCALAR_FLOAT, bh_4_bw_2_2) {
883   for(size_t i = 2; i < 2; ++i){
884     TransposeMicrokernelTester()
885       .input_stride(i)
886       .output_stride(4)
887       .block_width(i)
888       .block_height(4)
889       .iterations(1)
890       .Test(xnn_x64_transpose_ukernel__2x1_scalar_float);
891   }
892 }
893 
TEST(X64_TRANSPOSE__2X1_SCALAR_FLOAT,bh_4_bw_1)894 TEST(X64_TRANSPOSE__2X1_SCALAR_FLOAT, bh_4_bw_1) {
895   TransposeMicrokernelTester()
896     .input_stride(1)
897     .output_stride(4)
898     .block_width(1)
899     .block_height(4)
900     .iterations(1)
901     .Test(xnn_x64_transpose_ukernel__2x1_scalar_float);
902 }
903 
TEST(X64_TRANSPOSE__2X1_SCALAR_FLOAT,bh_3_4_bw_1)904 TEST(X64_TRANSPOSE__2X1_SCALAR_FLOAT, bh_3_4_bw_1){
905   for(size_t i = 3; i < 4; ++i){
906     TransposeMicrokernelTester()
907       .input_stride(1)
908       .output_stride(i)
909       .block_width(1)
910       .block_height(i)
911       .iterations(1)
912       .Test(xnn_x64_transpose_ukernel__2x1_scalar_float);
913   }
914 }
915 
TEST(X64_TRANSPOSE__2X1_SCALAR_FLOAT,bh_3_4_bw_2)916 TEST(X64_TRANSPOSE__2X1_SCALAR_FLOAT, bh_3_4_bw_2){
917   for(size_t i = 3; i < 4; ++i){
918     TransposeMicrokernelTester()
919       .input_stride(2)
920       .output_stride(i)
921       .block_width(2)
922       .block_height(i)
923       .iterations(1)
924       .Test(xnn_x64_transpose_ukernel__2x1_scalar_float);
925   }
926 }
927 
TEST(X64_TRANSPOSE__2X1_SCALAR_FLOAT,bh_3_4_bw_2_2)928 TEST(X64_TRANSPOSE__2X1_SCALAR_FLOAT, bh_3_4_bw_2_2) {
929   for(size_t i = 3; i < 4; ++i){
930     for(size_t j = 2; j < 2; ++j){
931       TransposeMicrokernelTester()
932         .input_stride(j)
933         .output_stride(i)
934         .block_width(j)
935         .block_height(i)
936         .iterations(1)
937         .Test(xnn_x64_transpose_ukernel__2x1_scalar_float);
938     }
939   }
940 }
941 
TEST(X64_TRANSPOSE__2X1_SCALAR_FLOAT,bh_2_bw_1_is_2)942 TEST(X64_TRANSPOSE__2X1_SCALAR_FLOAT, bh_2_bw_1_is_2) {
943   TransposeMicrokernelTester()
944     .input_stride(2)
945     .output_stride(2)
946     .block_width(1)
947     .block_height(2)
948     .iterations(1)
949     .Test(xnn_x64_transpose_ukernel__2x1_scalar_float);
950 }
951 
TEST(X64_TRANSPOSE__2X1_SCALAR_FLOAT,bh_2_bw_1_os_4)952 TEST(X64_TRANSPOSE__2X1_SCALAR_FLOAT, bh_2_bw_1_os_4) {
953   TransposeMicrokernelTester()
954     .input_stride(1)
955     .output_stride(4)
956     .block_width(1)
957     .block_height(2)
958     .iterations(1)
959     .Test(xnn_x64_transpose_ukernel__2x1_scalar_float);
960 }
961 
TEST(X64_TRANSPOSE__2X1_SCALAR_FLOAT,bh_2_bw_1_is_2_os_4)962 TEST(X64_TRANSPOSE__2X1_SCALAR_FLOAT, bh_2_bw_1_is_2_os_4) {
963   TransposeMicrokernelTester()
964     .input_stride(2)
965     .output_stride(4)
966     .block_width(1)
967     .block_height(2)
968     .iterations(1)
969     .Test(xnn_x64_transpose_ukernel__2x1_scalar_float);
970 }
971 
TEST(X64_TRANSPOSE__2X2_SCALAR_FLOAT,bh_2_bw_2)972 TEST(X64_TRANSPOSE__2X2_SCALAR_FLOAT, bh_2_bw_2) {
973   TransposeMicrokernelTester()
974     .input_stride(2)
975     .output_stride(2)
976     .block_width(2)
977     .block_height(2)
978     .iterations(1)
979     .Test(xnn_x64_transpose_ukernel__2x2_scalar_float);
980 }
981 
TEST(X64_TRANSPOSE__2X2_SCALAR_FLOAT,bh_1_4_bw_1_4)982 TEST(X64_TRANSPOSE__2X2_SCALAR_FLOAT, bh_1_4_bw_1_4) {
983   for(size_t i = 1; i <= 4; ++i){
984     for(size_t j = 1; j <= 4; ++j){
985       TransposeMicrokernelTester()
986         .input_stride(j)
987         .output_stride(i)
988         .block_width(j)
989         .block_height(i)
990         .iterations(1)
991         .Test(xnn_x64_transpose_ukernel__2x2_scalar_float);
992     }
993   }
994 }
995 
TEST(X64_TRANSPOSE__2X2_SCALAR_FLOAT,bh_2_bw_4)996 TEST(X64_TRANSPOSE__2X2_SCALAR_FLOAT, bh_2_bw_4) {
997   TransposeMicrokernelTester()
998     .input_stride(4)
999     .output_stride(2)
1000     .block_width(4)
1001     .block_height(2)
1002     .iterations(1)
1003     .Test(xnn_x64_transpose_ukernel__2x2_scalar_float);
1004 }
1005 
TEST(X64_TRANSPOSE__2X2_SCALAR_FLOAT,bh_2_bw_3_4)1006 TEST(X64_TRANSPOSE__2X2_SCALAR_FLOAT, bh_2_bw_3_4) {
1007   for(size_t i = 3; i < 4; ++i){
1008     TransposeMicrokernelTester()
1009       .input_stride(i)
1010       .output_stride(2)
1011       .block_width(i)
1012       .block_height(2)
1013       .iterations(1)
1014       .Test(xnn_x64_transpose_ukernel__2x2_scalar_float);
1015   }
1016 }
1017 
TEST(X64_TRANSPOSE__2X2_SCALAR_FLOAT,bh_4_bw_3_4)1018 TEST(X64_TRANSPOSE__2X2_SCALAR_FLOAT, bh_4_bw_3_4) {
1019   for(size_t i = 3; i < 4; ++i){
1020     TransposeMicrokernelTester()
1021       .input_stride(i)
1022       .output_stride(4)
1023       .block_width(i)
1024       .block_height(4)
1025       .iterations(1)
1026       .Test(xnn_x64_transpose_ukernel__2x2_scalar_float);
1027   }
1028 }
1029 
TEST(X64_TRANSPOSE__2X2_SCALAR_FLOAT,bh_4_bw_2)1030 TEST(X64_TRANSPOSE__2X2_SCALAR_FLOAT, bh_4_bw_2) {
1031   TransposeMicrokernelTester()
1032     .input_stride(2)
1033     .output_stride(4)
1034     .block_width(2)
1035     .block_height(4)
1036     .iterations(1)
1037     .Test(xnn_x64_transpose_ukernel__2x2_scalar_float);
1038 }
1039 
TEST(X64_TRANSPOSE__2X2_SCALAR_FLOAT,bh_3_4_bw_2)1040 TEST(X64_TRANSPOSE__2X2_SCALAR_FLOAT, bh_3_4_bw_2){
1041   for(size_t i = 3; i < 4; ++i){
1042     TransposeMicrokernelTester()
1043       .input_stride(2)
1044       .output_stride(i)
1045       .block_width(2)
1046       .block_height(i)
1047       .iterations(1)
1048       .Test(xnn_x64_transpose_ukernel__2x2_scalar_float);
1049   }
1050 }
1051 
TEST(X64_TRANSPOSE__2X2_SCALAR_FLOAT,bh_3_4_bw_4)1052 TEST(X64_TRANSPOSE__2X2_SCALAR_FLOAT, bh_3_4_bw_4){
1053   for(size_t i = 3; i < 4; ++i){
1054     TransposeMicrokernelTester()
1055       .input_stride(4)
1056       .output_stride(i)
1057       .block_width(4)
1058       .block_height(i)
1059       .iterations(1)
1060       .Test(xnn_x64_transpose_ukernel__2x2_scalar_float);
1061   }
1062 }
1063 
TEST(X64_TRANSPOSE__2X2_SCALAR_FLOAT,bh_3_4_bw_3_4)1064 TEST(X64_TRANSPOSE__2X2_SCALAR_FLOAT, bh_3_4_bw_3_4) {
1065   for(size_t i = 3; i < 4; ++i){
1066     for(size_t j = 3; j < 4; ++j){
1067       TransposeMicrokernelTester()
1068         .input_stride(j)
1069         .output_stride(i)
1070         .block_width(j)
1071         .block_height(i)
1072         .iterations(1)
1073         .Test(xnn_x64_transpose_ukernel__2x2_scalar_float);
1074     }
1075   }
1076 }
1077 
TEST(X64_TRANSPOSE__2X2_SCALAR_FLOAT,bh_2_bw_2_is_4)1078 TEST(X64_TRANSPOSE__2X2_SCALAR_FLOAT, bh_2_bw_2_is_4) {
1079   TransposeMicrokernelTester()
1080     .input_stride(4)
1081     .output_stride(2)
1082     .block_width(2)
1083     .block_height(2)
1084     .iterations(1)
1085     .Test(xnn_x64_transpose_ukernel__2x2_scalar_float);
1086 }
1087 
TEST(X64_TRANSPOSE__2X2_SCALAR_FLOAT,bh_2_bw_2_os_4)1088 TEST(X64_TRANSPOSE__2X2_SCALAR_FLOAT, bh_2_bw_2_os_4) {
1089   TransposeMicrokernelTester()
1090     .input_stride(2)
1091     .output_stride(4)
1092     .block_width(2)
1093     .block_height(2)
1094     .iterations(1)
1095     .Test(xnn_x64_transpose_ukernel__2x2_scalar_float);
1096 }
1097 
TEST(X64_TRANSPOSE__2X2_SCALAR_FLOAT,bh_2_bw_2_is_4_os_4)1098 TEST(X64_TRANSPOSE__2X2_SCALAR_FLOAT, bh_2_bw_2_is_4_os_4) {
1099   TransposeMicrokernelTester()
1100     .input_stride(4)
1101     .output_stride(4)
1102     .block_width(2)
1103     .block_height(2)
1104     .iterations(1)
1105     .Test(xnn_x64_transpose_ukernel__2x2_scalar_float);
1106 }
1107 
TEST(X64_TRANSPOSE__4X1_SCALAR_FLOAT,bh_4_bw_1)1108 TEST(X64_TRANSPOSE__4X1_SCALAR_FLOAT, bh_4_bw_1) {
1109   TransposeMicrokernelTester()
1110     .input_stride(1)
1111     .output_stride(4)
1112     .block_width(1)
1113     .block_height(4)
1114     .iterations(1)
1115     .Test(xnn_x64_transpose_ukernel__4x1_scalar_float);
1116 }
1117 
TEST(X64_TRANSPOSE__4X1_SCALAR_FLOAT,bh_1_8_bw_1_2)1118 TEST(X64_TRANSPOSE__4X1_SCALAR_FLOAT, bh_1_8_bw_1_2) {
1119   for(size_t i = 1; i <= 8; ++i){
1120     for(size_t j = 1; j <= 2; ++j){
1121       TransposeMicrokernelTester()
1122         .input_stride(j)
1123         .output_stride(i)
1124         .block_width(j)
1125         .block_height(i)
1126         .iterations(1)
1127         .Test(xnn_x64_transpose_ukernel__4x1_scalar_float);
1128     }
1129   }
1130 }
1131 
TEST(X64_TRANSPOSE__4X1_SCALAR_FLOAT,bh_4_bw_2)1132 TEST(X64_TRANSPOSE__4X1_SCALAR_FLOAT, bh_4_bw_2) {
1133   TransposeMicrokernelTester()
1134     .input_stride(2)
1135     .output_stride(4)
1136     .block_width(2)
1137     .block_height(4)
1138     .iterations(1)
1139     .Test(xnn_x64_transpose_ukernel__4x1_scalar_float);
1140 }
1141 
TEST(X64_TRANSPOSE__4X1_SCALAR_FLOAT,bh_4_bw_2_2)1142 TEST(X64_TRANSPOSE__4X1_SCALAR_FLOAT, bh_4_bw_2_2) {
1143   for(size_t i = 2; i < 2; ++i){
1144     TransposeMicrokernelTester()
1145       .input_stride(i)
1146       .output_stride(4)
1147       .block_width(i)
1148       .block_height(4)
1149       .iterations(1)
1150       .Test(xnn_x64_transpose_ukernel__4x1_scalar_float);
1151   }
1152 }
1153 
TEST(X64_TRANSPOSE__4X1_SCALAR_FLOAT,bh_8_bw_2_2)1154 TEST(X64_TRANSPOSE__4X1_SCALAR_FLOAT, bh_8_bw_2_2) {
1155   for(size_t i = 2; i < 2; ++i){
1156     TransposeMicrokernelTester()
1157       .input_stride(i)
1158       .output_stride(8)
1159       .block_width(i)
1160       .block_height(8)
1161       .iterations(1)
1162       .Test(xnn_x64_transpose_ukernel__4x1_scalar_float);
1163   }
1164 }
1165 
TEST(X64_TRANSPOSE__4X1_SCALAR_FLOAT,bh_8_bw_1)1166 TEST(X64_TRANSPOSE__4X1_SCALAR_FLOAT, bh_8_bw_1) {
1167   TransposeMicrokernelTester()
1168     .input_stride(1)
1169     .output_stride(8)
1170     .block_width(1)
1171     .block_height(8)
1172     .iterations(1)
1173     .Test(xnn_x64_transpose_ukernel__4x1_scalar_float);
1174 }
1175 
TEST(X64_TRANSPOSE__4X1_SCALAR_FLOAT,bh_5_8_bw_1)1176 TEST(X64_TRANSPOSE__4X1_SCALAR_FLOAT, bh_5_8_bw_1){
1177   for(size_t i = 5; i < 8; ++i){
1178     TransposeMicrokernelTester()
1179       .input_stride(1)
1180       .output_stride(i)
1181       .block_width(1)
1182       .block_height(i)
1183       .iterations(1)
1184       .Test(xnn_x64_transpose_ukernel__4x1_scalar_float);
1185   }
1186 }
1187 
TEST(X64_TRANSPOSE__4X1_SCALAR_FLOAT,bh_5_8_bw_2)1188 TEST(X64_TRANSPOSE__4X1_SCALAR_FLOAT, bh_5_8_bw_2){
1189   for(size_t i = 5; i < 8; ++i){
1190     TransposeMicrokernelTester()
1191       .input_stride(2)
1192       .output_stride(i)
1193       .block_width(2)
1194       .block_height(i)
1195       .iterations(1)
1196       .Test(xnn_x64_transpose_ukernel__4x1_scalar_float);
1197   }
1198 }
1199 
TEST(X64_TRANSPOSE__4X1_SCALAR_FLOAT,bh_5_8_bw_2_2)1200 TEST(X64_TRANSPOSE__4X1_SCALAR_FLOAT, bh_5_8_bw_2_2) {
1201   for(size_t i = 5; i < 8; ++i){
1202     for(size_t j = 2; j < 2; ++j){
1203       TransposeMicrokernelTester()
1204         .input_stride(j)
1205         .output_stride(i)
1206         .block_width(j)
1207         .block_height(i)
1208         .iterations(1)
1209         .Test(xnn_x64_transpose_ukernel__4x1_scalar_float);
1210     }
1211   }
1212 }
1213 
TEST(X64_TRANSPOSE__4X1_SCALAR_FLOAT,bh_4_bw_1_is_2)1214 TEST(X64_TRANSPOSE__4X1_SCALAR_FLOAT, bh_4_bw_1_is_2) {
1215   TransposeMicrokernelTester()
1216     .input_stride(2)
1217     .output_stride(4)
1218     .block_width(1)
1219     .block_height(4)
1220     .iterations(1)
1221     .Test(xnn_x64_transpose_ukernel__4x1_scalar_float);
1222 }
1223 
TEST(X64_TRANSPOSE__4X1_SCALAR_FLOAT,bh_4_bw_1_os_8)1224 TEST(X64_TRANSPOSE__4X1_SCALAR_FLOAT, bh_4_bw_1_os_8) {
1225   TransposeMicrokernelTester()
1226     .input_stride(1)
1227     .output_stride(8)
1228     .block_width(1)
1229     .block_height(4)
1230     .iterations(1)
1231     .Test(xnn_x64_transpose_ukernel__4x1_scalar_float);
1232 }
1233 
TEST(X64_TRANSPOSE__4X1_SCALAR_FLOAT,bh_4_bw_1_is_2_os_8)1234 TEST(X64_TRANSPOSE__4X1_SCALAR_FLOAT, bh_4_bw_1_is_2_os_8) {
1235   TransposeMicrokernelTester()
1236     .input_stride(2)
1237     .output_stride(8)
1238     .block_width(1)
1239     .block_height(4)
1240     .iterations(1)
1241     .Test(xnn_x64_transpose_ukernel__4x1_scalar_float);
1242 }
1243 
TEST(X64_TRANSPOSE__4X2_SCALAR_FLOAT,bh_4_bw_2)1244 TEST(X64_TRANSPOSE__4X2_SCALAR_FLOAT, bh_4_bw_2) {
1245   TransposeMicrokernelTester()
1246     .input_stride(2)
1247     .output_stride(4)
1248     .block_width(2)
1249     .block_height(4)
1250     .iterations(1)
1251     .Test(xnn_x64_transpose_ukernel__4x2_scalar_float);
1252 }
1253 
TEST(X64_TRANSPOSE__4X2_SCALAR_FLOAT,bh_1_8_bw_1_4)1254 TEST(X64_TRANSPOSE__4X2_SCALAR_FLOAT, bh_1_8_bw_1_4) {
1255   for(size_t i = 1; i <= 8; ++i){
1256     for(size_t j = 1; j <= 4; ++j){
1257       TransposeMicrokernelTester()
1258         .input_stride(j)
1259         .output_stride(i)
1260         .block_width(j)
1261         .block_height(i)
1262         .iterations(1)
1263         .Test(xnn_x64_transpose_ukernel__4x2_scalar_float);
1264     }
1265   }
1266 }
1267 
TEST(X64_TRANSPOSE__4X2_SCALAR_FLOAT,bh_4_bw_4)1268 TEST(X64_TRANSPOSE__4X2_SCALAR_FLOAT, bh_4_bw_4) {
1269   TransposeMicrokernelTester()
1270     .input_stride(4)
1271     .output_stride(4)
1272     .block_width(4)
1273     .block_height(4)
1274     .iterations(1)
1275     .Test(xnn_x64_transpose_ukernel__4x2_scalar_float);
1276 }
1277 
TEST(X64_TRANSPOSE__4X2_SCALAR_FLOAT,bh_4_bw_3_4)1278 TEST(X64_TRANSPOSE__4X2_SCALAR_FLOAT, bh_4_bw_3_4) {
1279   for(size_t i = 3; i < 4; ++i){
1280     TransposeMicrokernelTester()
1281       .input_stride(i)
1282       .output_stride(4)
1283       .block_width(i)
1284       .block_height(4)
1285       .iterations(1)
1286       .Test(xnn_x64_transpose_ukernel__4x2_scalar_float);
1287   }
1288 }
1289 
TEST(X64_TRANSPOSE__4X2_SCALAR_FLOAT,bh_8_bw_3_4)1290 TEST(X64_TRANSPOSE__4X2_SCALAR_FLOAT, bh_8_bw_3_4) {
1291   for(size_t i = 3; i < 4; ++i){
1292     TransposeMicrokernelTester()
1293       .input_stride(i)
1294       .output_stride(8)
1295       .block_width(i)
1296       .block_height(8)
1297       .iterations(1)
1298       .Test(xnn_x64_transpose_ukernel__4x2_scalar_float);
1299   }
1300 }
1301 
TEST(X64_TRANSPOSE__4X2_SCALAR_FLOAT,bh_8_bw_2)1302 TEST(X64_TRANSPOSE__4X2_SCALAR_FLOAT, bh_8_bw_2) {
1303   TransposeMicrokernelTester()
1304     .input_stride(2)
1305     .output_stride(8)
1306     .block_width(2)
1307     .block_height(8)
1308     .iterations(1)
1309     .Test(xnn_x64_transpose_ukernel__4x2_scalar_float);
1310 }
1311 
TEST(X64_TRANSPOSE__4X2_SCALAR_FLOAT,bh_5_8_bw_2)1312 TEST(X64_TRANSPOSE__4X2_SCALAR_FLOAT, bh_5_8_bw_2){
1313   for(size_t i = 5; i < 8; ++i){
1314     TransposeMicrokernelTester()
1315       .input_stride(2)
1316       .output_stride(i)
1317       .block_width(2)
1318       .block_height(i)
1319       .iterations(1)
1320       .Test(xnn_x64_transpose_ukernel__4x2_scalar_float);
1321   }
1322 }
1323 
TEST(X64_TRANSPOSE__4X2_SCALAR_FLOAT,bh_5_8_bw_4)1324 TEST(X64_TRANSPOSE__4X2_SCALAR_FLOAT, bh_5_8_bw_4){
1325   for(size_t i = 5; i < 8; ++i){
1326     TransposeMicrokernelTester()
1327       .input_stride(4)
1328       .output_stride(i)
1329       .block_width(4)
1330       .block_height(i)
1331       .iterations(1)
1332       .Test(xnn_x64_transpose_ukernel__4x2_scalar_float);
1333   }
1334 }
1335 
TEST(X64_TRANSPOSE__4X2_SCALAR_FLOAT,bh_5_8_bw_3_4)1336 TEST(X64_TRANSPOSE__4X2_SCALAR_FLOAT, bh_5_8_bw_3_4) {
1337   for(size_t i = 5; i < 8; ++i){
1338     for(size_t j = 3; j < 4; ++j){
1339       TransposeMicrokernelTester()
1340         .input_stride(j)
1341         .output_stride(i)
1342         .block_width(j)
1343         .block_height(i)
1344         .iterations(1)
1345         .Test(xnn_x64_transpose_ukernel__4x2_scalar_float);
1346     }
1347   }
1348 }
1349 
TEST(X64_TRANSPOSE__4X2_SCALAR_FLOAT,bh_4_bw_2_is_4)1350 TEST(X64_TRANSPOSE__4X2_SCALAR_FLOAT, bh_4_bw_2_is_4) {
1351   TransposeMicrokernelTester()
1352     .input_stride(4)
1353     .output_stride(4)
1354     .block_width(2)
1355     .block_height(4)
1356     .iterations(1)
1357     .Test(xnn_x64_transpose_ukernel__4x2_scalar_float);
1358 }
1359 
TEST(X64_TRANSPOSE__4X2_SCALAR_FLOAT,bh_4_bw_2_os_8)1360 TEST(X64_TRANSPOSE__4X2_SCALAR_FLOAT, bh_4_bw_2_os_8) {
1361   TransposeMicrokernelTester()
1362     .input_stride(2)
1363     .output_stride(8)
1364     .block_width(2)
1365     .block_height(4)
1366     .iterations(1)
1367     .Test(xnn_x64_transpose_ukernel__4x2_scalar_float);
1368 }
1369 
TEST(X64_TRANSPOSE__4X2_SCALAR_FLOAT,bh_4_bw_2_is_4_os_8)1370 TEST(X64_TRANSPOSE__4X2_SCALAR_FLOAT, bh_4_bw_2_is_4_os_8) {
1371   TransposeMicrokernelTester()
1372     .input_stride(4)
1373     .output_stride(8)
1374     .block_width(2)
1375     .block_height(4)
1376     .iterations(1)
1377     .Test(xnn_x64_transpose_ukernel__4x2_scalar_float);
1378 }
1379 
1380 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(X64_TRANSPOSE__2X2_MULTI_MOV_SSE2,bh_2_bw_2)1381   TEST(X64_TRANSPOSE__2X2_MULTI_MOV_SSE2, bh_2_bw_2) {
1382     TEST_REQUIRES_X86_SSE2;
1383     TransposeMicrokernelTester()
1384       .input_stride(2)
1385       .output_stride(2)
1386       .block_width(2)
1387       .block_height(2)
1388       .iterations(1)
1389       .Test(xnn_x64_transpose_ukernel__2x2_multi_mov_sse2);
1390   }
1391 
TEST(X64_TRANSPOSE__2X2_MULTI_MOV_SSE2,bh_1_4_bw_1_4)1392   TEST(X64_TRANSPOSE__2X2_MULTI_MOV_SSE2, bh_1_4_bw_1_4) {
1393     TEST_REQUIRES_X86_SSE2;
1394     for(size_t i = 1; i <= 4; ++i){
1395       for(size_t j = 1; j <= 4; ++j){
1396         TransposeMicrokernelTester()
1397           .input_stride(j)
1398           .output_stride(i)
1399           .block_width(j)
1400           .block_height(i)
1401           .iterations(1)
1402           .Test(xnn_x64_transpose_ukernel__2x2_multi_mov_sse2);
1403       }
1404     }
1405   }
1406 
TEST(X64_TRANSPOSE__2X2_MULTI_MOV_SSE2,bh_2_bw_4)1407   TEST(X64_TRANSPOSE__2X2_MULTI_MOV_SSE2, bh_2_bw_4) {
1408     TEST_REQUIRES_X86_SSE2;
1409     TransposeMicrokernelTester()
1410       .input_stride(4)
1411       .output_stride(2)
1412       .block_width(4)
1413       .block_height(2)
1414       .iterations(1)
1415       .Test(xnn_x64_transpose_ukernel__2x2_multi_mov_sse2);
1416   }
1417 
TEST(X64_TRANSPOSE__2X2_MULTI_MOV_SSE2,bh_2_bw_3_4)1418   TEST(X64_TRANSPOSE__2X2_MULTI_MOV_SSE2, bh_2_bw_3_4) {
1419     TEST_REQUIRES_X86_SSE2;
1420     for(size_t i = 3; i < 4; ++i){
1421       TransposeMicrokernelTester()
1422         .input_stride(i)
1423         .output_stride(2)
1424         .block_width(i)
1425         .block_height(2)
1426         .iterations(1)
1427         .Test(xnn_x64_transpose_ukernel__2x2_multi_mov_sse2);
1428     }
1429   }
1430 
TEST(X64_TRANSPOSE__2X2_MULTI_MOV_SSE2,bh_4_bw_3_4)1431   TEST(X64_TRANSPOSE__2X2_MULTI_MOV_SSE2, bh_4_bw_3_4) {
1432     TEST_REQUIRES_X86_SSE2;
1433     for(size_t i = 3; i < 4; ++i){
1434       TransposeMicrokernelTester()
1435         .input_stride(i)
1436         .output_stride(4)
1437         .block_width(i)
1438         .block_height(4)
1439         .iterations(1)
1440         .Test(xnn_x64_transpose_ukernel__2x2_multi_mov_sse2);
1441     }
1442   }
1443 
TEST(X64_TRANSPOSE__2X2_MULTI_MOV_SSE2,bh_4_bw_2)1444   TEST(X64_TRANSPOSE__2X2_MULTI_MOV_SSE2, bh_4_bw_2) {
1445     TEST_REQUIRES_X86_SSE2;
1446     TransposeMicrokernelTester()
1447       .input_stride(2)
1448       .output_stride(4)
1449       .block_width(2)
1450       .block_height(4)
1451       .iterations(1)
1452       .Test(xnn_x64_transpose_ukernel__2x2_multi_mov_sse2);
1453   }
1454 
TEST(X64_TRANSPOSE__2X2_MULTI_MOV_SSE2,bh_3_4_bw_2)1455   TEST(X64_TRANSPOSE__2X2_MULTI_MOV_SSE2, bh_3_4_bw_2){
1456     TEST_REQUIRES_X86_SSE2;
1457     for(size_t i = 3; i < 4; ++i){
1458       TransposeMicrokernelTester()
1459         .input_stride(2)
1460         .output_stride(i)
1461         .block_width(2)
1462         .block_height(i)
1463         .iterations(1)
1464         .Test(xnn_x64_transpose_ukernel__2x2_multi_mov_sse2);
1465     }
1466   }
1467 
TEST(X64_TRANSPOSE__2X2_MULTI_MOV_SSE2,bh_3_4_bw_4)1468   TEST(X64_TRANSPOSE__2X2_MULTI_MOV_SSE2, bh_3_4_bw_4){
1469     TEST_REQUIRES_X86_SSE2;
1470     for(size_t i = 3; i < 4; ++i){
1471       TransposeMicrokernelTester()
1472         .input_stride(4)
1473         .output_stride(i)
1474         .block_width(4)
1475         .block_height(i)
1476         .iterations(1)
1477         .Test(xnn_x64_transpose_ukernel__2x2_multi_mov_sse2);
1478     }
1479   }
1480 
TEST(X64_TRANSPOSE__2X2_MULTI_MOV_SSE2,bh_3_4_bw_3_4)1481   TEST(X64_TRANSPOSE__2X2_MULTI_MOV_SSE2, bh_3_4_bw_3_4) {
1482     TEST_REQUIRES_X86_SSE2;
1483     for(size_t i = 3; i < 4; ++i){
1484       for(size_t j = 3; j < 4; ++j){
1485         TransposeMicrokernelTester()
1486           .input_stride(j)
1487           .output_stride(i)
1488           .block_width(j)
1489           .block_height(i)
1490           .iterations(1)
1491           .Test(xnn_x64_transpose_ukernel__2x2_multi_mov_sse2);
1492       }
1493     }
1494   }
1495 
TEST(X64_TRANSPOSE__2X2_MULTI_MOV_SSE2,bh_2_bw_2_is_4)1496   TEST(X64_TRANSPOSE__2X2_MULTI_MOV_SSE2, bh_2_bw_2_is_4) {
1497     TEST_REQUIRES_X86_SSE2;
1498     TransposeMicrokernelTester()
1499       .input_stride(4)
1500       .output_stride(2)
1501       .block_width(2)
1502       .block_height(2)
1503       .iterations(1)
1504       .Test(xnn_x64_transpose_ukernel__2x2_multi_mov_sse2);
1505   }
1506 
TEST(X64_TRANSPOSE__2X2_MULTI_MOV_SSE2,bh_2_bw_2_os_4)1507   TEST(X64_TRANSPOSE__2X2_MULTI_MOV_SSE2, bh_2_bw_2_os_4) {
1508     TEST_REQUIRES_X86_SSE2;
1509     TransposeMicrokernelTester()
1510       .input_stride(2)
1511       .output_stride(4)
1512       .block_width(2)
1513       .block_height(2)
1514       .iterations(1)
1515       .Test(xnn_x64_transpose_ukernel__2x2_multi_mov_sse2);
1516   }
1517 
TEST(X64_TRANSPOSE__2X2_MULTI_MOV_SSE2,bh_2_bw_2_is_4_os_4)1518   TEST(X64_TRANSPOSE__2X2_MULTI_MOV_SSE2, bh_2_bw_2_is_4_os_4) {
1519     TEST_REQUIRES_X86_SSE2;
1520     TransposeMicrokernelTester()
1521       .input_stride(4)
1522       .output_stride(4)
1523       .block_width(2)
1524       .block_height(2)
1525       .iterations(1)
1526       .Test(xnn_x64_transpose_ukernel__2x2_multi_mov_sse2);
1527   }
1528 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
1529 
1530 
1531 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(X64_TRANSPOSE__2X2_MULTI_MULTI_SSE2,bh_2_bw_2)1532   TEST(X64_TRANSPOSE__2X2_MULTI_MULTI_SSE2, bh_2_bw_2) {
1533     TEST_REQUIRES_X86_SSE2;
1534     TransposeMicrokernelTester()
1535       .input_stride(2)
1536       .output_stride(2)
1537       .block_width(2)
1538       .block_height(2)
1539       .iterations(1)
1540       .Test(xnn_x64_transpose_ukernel__2x2_multi_multi_sse2);
1541   }
1542 
TEST(X64_TRANSPOSE__2X2_MULTI_MULTI_SSE2,bh_1_4_bw_1_4)1543   TEST(X64_TRANSPOSE__2X2_MULTI_MULTI_SSE2, bh_1_4_bw_1_4) {
1544     TEST_REQUIRES_X86_SSE2;
1545     for(size_t i = 1; i <= 4; ++i){
1546       for(size_t j = 1; j <= 4; ++j){
1547         TransposeMicrokernelTester()
1548           .input_stride(j)
1549           .output_stride(i)
1550           .block_width(j)
1551           .block_height(i)
1552           .iterations(1)
1553           .Test(xnn_x64_transpose_ukernel__2x2_multi_multi_sse2);
1554       }
1555     }
1556   }
1557 
TEST(X64_TRANSPOSE__2X2_MULTI_MULTI_SSE2,bh_2_bw_4)1558   TEST(X64_TRANSPOSE__2X2_MULTI_MULTI_SSE2, bh_2_bw_4) {
1559     TEST_REQUIRES_X86_SSE2;
1560     TransposeMicrokernelTester()
1561       .input_stride(4)
1562       .output_stride(2)
1563       .block_width(4)
1564       .block_height(2)
1565       .iterations(1)
1566       .Test(xnn_x64_transpose_ukernel__2x2_multi_multi_sse2);
1567   }
1568 
TEST(X64_TRANSPOSE__2X2_MULTI_MULTI_SSE2,bh_2_bw_3_4)1569   TEST(X64_TRANSPOSE__2X2_MULTI_MULTI_SSE2, bh_2_bw_3_4) {
1570     TEST_REQUIRES_X86_SSE2;
1571     for(size_t i = 3; i < 4; ++i){
1572       TransposeMicrokernelTester()
1573         .input_stride(i)
1574         .output_stride(2)
1575         .block_width(i)
1576         .block_height(2)
1577         .iterations(1)
1578         .Test(xnn_x64_transpose_ukernel__2x2_multi_multi_sse2);
1579     }
1580   }
1581 
TEST(X64_TRANSPOSE__2X2_MULTI_MULTI_SSE2,bh_4_bw_3_4)1582   TEST(X64_TRANSPOSE__2X2_MULTI_MULTI_SSE2, bh_4_bw_3_4) {
1583     TEST_REQUIRES_X86_SSE2;
1584     for(size_t i = 3; i < 4; ++i){
1585       TransposeMicrokernelTester()
1586         .input_stride(i)
1587         .output_stride(4)
1588         .block_width(i)
1589         .block_height(4)
1590         .iterations(1)
1591         .Test(xnn_x64_transpose_ukernel__2x2_multi_multi_sse2);
1592     }
1593   }
1594 
TEST(X64_TRANSPOSE__2X2_MULTI_MULTI_SSE2,bh_4_bw_2)1595   TEST(X64_TRANSPOSE__2X2_MULTI_MULTI_SSE2, bh_4_bw_2) {
1596     TEST_REQUIRES_X86_SSE2;
1597     TransposeMicrokernelTester()
1598       .input_stride(2)
1599       .output_stride(4)
1600       .block_width(2)
1601       .block_height(4)
1602       .iterations(1)
1603       .Test(xnn_x64_transpose_ukernel__2x2_multi_multi_sse2);
1604   }
1605 
TEST(X64_TRANSPOSE__2X2_MULTI_MULTI_SSE2,bh_3_4_bw_2)1606   TEST(X64_TRANSPOSE__2X2_MULTI_MULTI_SSE2, bh_3_4_bw_2){
1607     TEST_REQUIRES_X86_SSE2;
1608     for(size_t i = 3; i < 4; ++i){
1609       TransposeMicrokernelTester()
1610         .input_stride(2)
1611         .output_stride(i)
1612         .block_width(2)
1613         .block_height(i)
1614         .iterations(1)
1615         .Test(xnn_x64_transpose_ukernel__2x2_multi_multi_sse2);
1616     }
1617   }
1618 
TEST(X64_TRANSPOSE__2X2_MULTI_MULTI_SSE2,bh_3_4_bw_4)1619   TEST(X64_TRANSPOSE__2X2_MULTI_MULTI_SSE2, bh_3_4_bw_4){
1620     TEST_REQUIRES_X86_SSE2;
1621     for(size_t i = 3; i < 4; ++i){
1622       TransposeMicrokernelTester()
1623         .input_stride(4)
1624         .output_stride(i)
1625         .block_width(4)
1626         .block_height(i)
1627         .iterations(1)
1628         .Test(xnn_x64_transpose_ukernel__2x2_multi_multi_sse2);
1629     }
1630   }
1631 
TEST(X64_TRANSPOSE__2X2_MULTI_MULTI_SSE2,bh_3_4_bw_3_4)1632   TEST(X64_TRANSPOSE__2X2_MULTI_MULTI_SSE2, bh_3_4_bw_3_4) {
1633     TEST_REQUIRES_X86_SSE2;
1634     for(size_t i = 3; i < 4; ++i){
1635       for(size_t j = 3; j < 4; ++j){
1636         TransposeMicrokernelTester()
1637           .input_stride(j)
1638           .output_stride(i)
1639           .block_width(j)
1640           .block_height(i)
1641           .iterations(1)
1642           .Test(xnn_x64_transpose_ukernel__2x2_multi_multi_sse2);
1643       }
1644     }
1645   }
1646 
TEST(X64_TRANSPOSE__2X2_MULTI_MULTI_SSE2,bh_2_bw_2_is_4)1647   TEST(X64_TRANSPOSE__2X2_MULTI_MULTI_SSE2, bh_2_bw_2_is_4) {
1648     TEST_REQUIRES_X86_SSE2;
1649     TransposeMicrokernelTester()
1650       .input_stride(4)
1651       .output_stride(2)
1652       .block_width(2)
1653       .block_height(2)
1654       .iterations(1)
1655       .Test(xnn_x64_transpose_ukernel__2x2_multi_multi_sse2);
1656   }
1657 
TEST(X64_TRANSPOSE__2X2_MULTI_MULTI_SSE2,bh_2_bw_2_os_4)1658   TEST(X64_TRANSPOSE__2X2_MULTI_MULTI_SSE2, bh_2_bw_2_os_4) {
1659     TEST_REQUIRES_X86_SSE2;
1660     TransposeMicrokernelTester()
1661       .input_stride(2)
1662       .output_stride(4)
1663       .block_width(2)
1664       .block_height(2)
1665       .iterations(1)
1666       .Test(xnn_x64_transpose_ukernel__2x2_multi_multi_sse2);
1667   }
1668 
TEST(X64_TRANSPOSE__2X2_MULTI_MULTI_SSE2,bh_2_bw_2_is_4_os_4)1669   TEST(X64_TRANSPOSE__2X2_MULTI_MULTI_SSE2, bh_2_bw_2_is_4_os_4) {
1670     TEST_REQUIRES_X86_SSE2;
1671     TransposeMicrokernelTester()
1672       .input_stride(4)
1673       .output_stride(4)
1674       .block_width(2)
1675       .block_height(2)
1676       .iterations(1)
1677       .Test(xnn_x64_transpose_ukernel__2x2_multi_multi_sse2);
1678   }
1679 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
1680 
1681 
1682 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(X64_TRANSPOSE__2X2_MULTI_SWITCH_SSE2,bh_2_bw_2)1683   TEST(X64_TRANSPOSE__2X2_MULTI_SWITCH_SSE2, bh_2_bw_2) {
1684     TEST_REQUIRES_X86_SSE2;
1685     TransposeMicrokernelTester()
1686       .input_stride(2)
1687       .output_stride(2)
1688       .block_width(2)
1689       .block_height(2)
1690       .iterations(1)
1691       .Test(xnn_x64_transpose_ukernel__2x2_multi_switch_sse2);
1692   }
1693 
TEST(X64_TRANSPOSE__2X2_MULTI_SWITCH_SSE2,bh_1_4_bw_1_4)1694   TEST(X64_TRANSPOSE__2X2_MULTI_SWITCH_SSE2, bh_1_4_bw_1_4) {
1695     TEST_REQUIRES_X86_SSE2;
1696     for(size_t i = 1; i <= 4; ++i){
1697       for(size_t j = 1; j <= 4; ++j){
1698         TransposeMicrokernelTester()
1699           .input_stride(j)
1700           .output_stride(i)
1701           .block_width(j)
1702           .block_height(i)
1703           .iterations(1)
1704           .Test(xnn_x64_transpose_ukernel__2x2_multi_switch_sse2);
1705       }
1706     }
1707   }
1708 
TEST(X64_TRANSPOSE__2X2_MULTI_SWITCH_SSE2,bh_2_bw_4)1709   TEST(X64_TRANSPOSE__2X2_MULTI_SWITCH_SSE2, bh_2_bw_4) {
1710     TEST_REQUIRES_X86_SSE2;
1711     TransposeMicrokernelTester()
1712       .input_stride(4)
1713       .output_stride(2)
1714       .block_width(4)
1715       .block_height(2)
1716       .iterations(1)
1717       .Test(xnn_x64_transpose_ukernel__2x2_multi_switch_sse2);
1718   }
1719 
TEST(X64_TRANSPOSE__2X2_MULTI_SWITCH_SSE2,bh_2_bw_3_4)1720   TEST(X64_TRANSPOSE__2X2_MULTI_SWITCH_SSE2, bh_2_bw_3_4) {
1721     TEST_REQUIRES_X86_SSE2;
1722     for(size_t i = 3; i < 4; ++i){
1723       TransposeMicrokernelTester()
1724         .input_stride(i)
1725         .output_stride(2)
1726         .block_width(i)
1727         .block_height(2)
1728         .iterations(1)
1729         .Test(xnn_x64_transpose_ukernel__2x2_multi_switch_sse2);
1730     }
1731   }
1732 
TEST(X64_TRANSPOSE__2X2_MULTI_SWITCH_SSE2,bh_4_bw_3_4)1733   TEST(X64_TRANSPOSE__2X2_MULTI_SWITCH_SSE2, bh_4_bw_3_4) {
1734     TEST_REQUIRES_X86_SSE2;
1735     for(size_t i = 3; i < 4; ++i){
1736       TransposeMicrokernelTester()
1737         .input_stride(i)
1738         .output_stride(4)
1739         .block_width(i)
1740         .block_height(4)
1741         .iterations(1)
1742         .Test(xnn_x64_transpose_ukernel__2x2_multi_switch_sse2);
1743     }
1744   }
1745 
TEST(X64_TRANSPOSE__2X2_MULTI_SWITCH_SSE2,bh_4_bw_2)1746   TEST(X64_TRANSPOSE__2X2_MULTI_SWITCH_SSE2, bh_4_bw_2) {
1747     TEST_REQUIRES_X86_SSE2;
1748     TransposeMicrokernelTester()
1749       .input_stride(2)
1750       .output_stride(4)
1751       .block_width(2)
1752       .block_height(4)
1753       .iterations(1)
1754       .Test(xnn_x64_transpose_ukernel__2x2_multi_switch_sse2);
1755   }
1756 
TEST(X64_TRANSPOSE__2X2_MULTI_SWITCH_SSE2,bh_3_4_bw_2)1757   TEST(X64_TRANSPOSE__2X2_MULTI_SWITCH_SSE2, bh_3_4_bw_2){
1758     TEST_REQUIRES_X86_SSE2;
1759     for(size_t i = 3; i < 4; ++i){
1760       TransposeMicrokernelTester()
1761         .input_stride(2)
1762         .output_stride(i)
1763         .block_width(2)
1764         .block_height(i)
1765         .iterations(1)
1766         .Test(xnn_x64_transpose_ukernel__2x2_multi_switch_sse2);
1767     }
1768   }
1769 
TEST(X64_TRANSPOSE__2X2_MULTI_SWITCH_SSE2,bh_3_4_bw_4)1770   TEST(X64_TRANSPOSE__2X2_MULTI_SWITCH_SSE2, bh_3_4_bw_4){
1771     TEST_REQUIRES_X86_SSE2;
1772     for(size_t i = 3; i < 4; ++i){
1773       TransposeMicrokernelTester()
1774         .input_stride(4)
1775         .output_stride(i)
1776         .block_width(4)
1777         .block_height(i)
1778         .iterations(1)
1779         .Test(xnn_x64_transpose_ukernel__2x2_multi_switch_sse2);
1780     }
1781   }
1782 
TEST(X64_TRANSPOSE__2X2_MULTI_SWITCH_SSE2,bh_3_4_bw_3_4)1783   TEST(X64_TRANSPOSE__2X2_MULTI_SWITCH_SSE2, bh_3_4_bw_3_4) {
1784     TEST_REQUIRES_X86_SSE2;
1785     for(size_t i = 3; i < 4; ++i){
1786       for(size_t j = 3; j < 4; ++j){
1787         TransposeMicrokernelTester()
1788           .input_stride(j)
1789           .output_stride(i)
1790           .block_width(j)
1791           .block_height(i)
1792           .iterations(1)
1793           .Test(xnn_x64_transpose_ukernel__2x2_multi_switch_sse2);
1794       }
1795     }
1796   }
1797 
TEST(X64_TRANSPOSE__2X2_MULTI_SWITCH_SSE2,bh_2_bw_2_is_4)1798   TEST(X64_TRANSPOSE__2X2_MULTI_SWITCH_SSE2, bh_2_bw_2_is_4) {
1799     TEST_REQUIRES_X86_SSE2;
1800     TransposeMicrokernelTester()
1801       .input_stride(4)
1802       .output_stride(2)
1803       .block_width(2)
1804       .block_height(2)
1805       .iterations(1)
1806       .Test(xnn_x64_transpose_ukernel__2x2_multi_switch_sse2);
1807   }
1808 
TEST(X64_TRANSPOSE__2X2_MULTI_SWITCH_SSE2,bh_2_bw_2_os_4)1809   TEST(X64_TRANSPOSE__2X2_MULTI_SWITCH_SSE2, bh_2_bw_2_os_4) {
1810     TEST_REQUIRES_X86_SSE2;
1811     TransposeMicrokernelTester()
1812       .input_stride(2)
1813       .output_stride(4)
1814       .block_width(2)
1815       .block_height(2)
1816       .iterations(1)
1817       .Test(xnn_x64_transpose_ukernel__2x2_multi_switch_sse2);
1818   }
1819 
TEST(X64_TRANSPOSE__2X2_MULTI_SWITCH_SSE2,bh_2_bw_2_is_4_os_4)1820   TEST(X64_TRANSPOSE__2X2_MULTI_SWITCH_SSE2, bh_2_bw_2_is_4_os_4) {
1821     TEST_REQUIRES_X86_SSE2;
1822     TransposeMicrokernelTester()
1823       .input_stride(4)
1824       .output_stride(4)
1825       .block_width(2)
1826       .block_height(2)
1827       .iterations(1)
1828       .Test(xnn_x64_transpose_ukernel__2x2_multi_switch_sse2);
1829   }
1830 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
1831 
1832 
1833 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(X64_TRANSPOSE__2X2_REUSE_MOV_SSE2,bh_2_bw_2)1834   TEST(X64_TRANSPOSE__2X2_REUSE_MOV_SSE2, bh_2_bw_2) {
1835     TEST_REQUIRES_X86_SSE2;
1836     TransposeMicrokernelTester()
1837       .input_stride(2)
1838       .output_stride(2)
1839       .block_width(2)
1840       .block_height(2)
1841       .iterations(1)
1842       .Test(xnn_x64_transpose_ukernel__2x2_reuse_mov_sse2);
1843   }
1844 
TEST(X64_TRANSPOSE__2X2_REUSE_MOV_SSE2,bh_1_4_bw_1_4)1845   TEST(X64_TRANSPOSE__2X2_REUSE_MOV_SSE2, bh_1_4_bw_1_4) {
1846     TEST_REQUIRES_X86_SSE2;
1847     for(size_t i = 1; i <= 4; ++i){
1848       for(size_t j = 1; j <= 4; ++j){
1849         TransposeMicrokernelTester()
1850           .input_stride(j)
1851           .output_stride(i)
1852           .block_width(j)
1853           .block_height(i)
1854           .iterations(1)
1855           .Test(xnn_x64_transpose_ukernel__2x2_reuse_mov_sse2);
1856       }
1857     }
1858   }
1859 
TEST(X64_TRANSPOSE__2X2_REUSE_MOV_SSE2,bh_2_bw_4)1860   TEST(X64_TRANSPOSE__2X2_REUSE_MOV_SSE2, bh_2_bw_4) {
1861     TEST_REQUIRES_X86_SSE2;
1862     TransposeMicrokernelTester()
1863       .input_stride(4)
1864       .output_stride(2)
1865       .block_width(4)
1866       .block_height(2)
1867       .iterations(1)
1868       .Test(xnn_x64_transpose_ukernel__2x2_reuse_mov_sse2);
1869   }
1870 
TEST(X64_TRANSPOSE__2X2_REUSE_MOV_SSE2,bh_2_bw_3_4)1871   TEST(X64_TRANSPOSE__2X2_REUSE_MOV_SSE2, bh_2_bw_3_4) {
1872     TEST_REQUIRES_X86_SSE2;
1873     for(size_t i = 3; i < 4; ++i){
1874       TransposeMicrokernelTester()
1875         .input_stride(i)
1876         .output_stride(2)
1877         .block_width(i)
1878         .block_height(2)
1879         .iterations(1)
1880         .Test(xnn_x64_transpose_ukernel__2x2_reuse_mov_sse2);
1881     }
1882   }
1883 
TEST(X64_TRANSPOSE__2X2_REUSE_MOV_SSE2,bh_4_bw_3_4)1884   TEST(X64_TRANSPOSE__2X2_REUSE_MOV_SSE2, bh_4_bw_3_4) {
1885     TEST_REQUIRES_X86_SSE2;
1886     for(size_t i = 3; i < 4; ++i){
1887       TransposeMicrokernelTester()
1888         .input_stride(i)
1889         .output_stride(4)
1890         .block_width(i)
1891         .block_height(4)
1892         .iterations(1)
1893         .Test(xnn_x64_transpose_ukernel__2x2_reuse_mov_sse2);
1894     }
1895   }
1896 
TEST(X64_TRANSPOSE__2X2_REUSE_MOV_SSE2,bh_4_bw_2)1897   TEST(X64_TRANSPOSE__2X2_REUSE_MOV_SSE2, bh_4_bw_2) {
1898     TEST_REQUIRES_X86_SSE2;
1899     TransposeMicrokernelTester()
1900       .input_stride(2)
1901       .output_stride(4)
1902       .block_width(2)
1903       .block_height(4)
1904       .iterations(1)
1905       .Test(xnn_x64_transpose_ukernel__2x2_reuse_mov_sse2);
1906   }
1907 
TEST(X64_TRANSPOSE__2X2_REUSE_MOV_SSE2,bh_3_4_bw_2)1908   TEST(X64_TRANSPOSE__2X2_REUSE_MOV_SSE2, bh_3_4_bw_2){
1909     TEST_REQUIRES_X86_SSE2;
1910     for(size_t i = 3; i < 4; ++i){
1911       TransposeMicrokernelTester()
1912         .input_stride(2)
1913         .output_stride(i)
1914         .block_width(2)
1915         .block_height(i)
1916         .iterations(1)
1917         .Test(xnn_x64_transpose_ukernel__2x2_reuse_mov_sse2);
1918     }
1919   }
1920 
TEST(X64_TRANSPOSE__2X2_REUSE_MOV_SSE2,bh_3_4_bw_4)1921   TEST(X64_TRANSPOSE__2X2_REUSE_MOV_SSE2, bh_3_4_bw_4){
1922     TEST_REQUIRES_X86_SSE2;
1923     for(size_t i = 3; i < 4; ++i){
1924       TransposeMicrokernelTester()
1925         .input_stride(4)
1926         .output_stride(i)
1927         .block_width(4)
1928         .block_height(i)
1929         .iterations(1)
1930         .Test(xnn_x64_transpose_ukernel__2x2_reuse_mov_sse2);
1931     }
1932   }
1933 
TEST(X64_TRANSPOSE__2X2_REUSE_MOV_SSE2,bh_3_4_bw_3_4)1934   TEST(X64_TRANSPOSE__2X2_REUSE_MOV_SSE2, bh_3_4_bw_3_4) {
1935     TEST_REQUIRES_X86_SSE2;
1936     for(size_t i = 3; i < 4; ++i){
1937       for(size_t j = 3; j < 4; ++j){
1938         TransposeMicrokernelTester()
1939           .input_stride(j)
1940           .output_stride(i)
1941           .block_width(j)
1942           .block_height(i)
1943           .iterations(1)
1944           .Test(xnn_x64_transpose_ukernel__2x2_reuse_mov_sse2);
1945       }
1946     }
1947   }
1948 
TEST(X64_TRANSPOSE__2X2_REUSE_MOV_SSE2,bh_2_bw_2_is_4)1949   TEST(X64_TRANSPOSE__2X2_REUSE_MOV_SSE2, bh_2_bw_2_is_4) {
1950     TEST_REQUIRES_X86_SSE2;
1951     TransposeMicrokernelTester()
1952       .input_stride(4)
1953       .output_stride(2)
1954       .block_width(2)
1955       .block_height(2)
1956       .iterations(1)
1957       .Test(xnn_x64_transpose_ukernel__2x2_reuse_mov_sse2);
1958   }
1959 
TEST(X64_TRANSPOSE__2X2_REUSE_MOV_SSE2,bh_2_bw_2_os_4)1960   TEST(X64_TRANSPOSE__2X2_REUSE_MOV_SSE2, bh_2_bw_2_os_4) {
1961     TEST_REQUIRES_X86_SSE2;
1962     TransposeMicrokernelTester()
1963       .input_stride(2)
1964       .output_stride(4)
1965       .block_width(2)
1966       .block_height(2)
1967       .iterations(1)
1968       .Test(xnn_x64_transpose_ukernel__2x2_reuse_mov_sse2);
1969   }
1970 
TEST(X64_TRANSPOSE__2X2_REUSE_MOV_SSE2,bh_2_bw_2_is_4_os_4)1971   TEST(X64_TRANSPOSE__2X2_REUSE_MOV_SSE2, bh_2_bw_2_is_4_os_4) {
1972     TEST_REQUIRES_X86_SSE2;
1973     TransposeMicrokernelTester()
1974       .input_stride(4)
1975       .output_stride(4)
1976       .block_width(2)
1977       .block_height(2)
1978       .iterations(1)
1979       .Test(xnn_x64_transpose_ukernel__2x2_reuse_mov_sse2);
1980   }
1981 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
1982 
1983 
1984 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(X64_TRANSPOSE__2X2_REUSE_MULTI_SSE2,bh_2_bw_2)1985   TEST(X64_TRANSPOSE__2X2_REUSE_MULTI_SSE2, bh_2_bw_2) {
1986     TEST_REQUIRES_X86_SSE2;
1987     TransposeMicrokernelTester()
1988       .input_stride(2)
1989       .output_stride(2)
1990       .block_width(2)
1991       .block_height(2)
1992       .iterations(1)
1993       .Test(xnn_x64_transpose_ukernel__2x2_reuse_multi_sse2);
1994   }
1995 
TEST(X64_TRANSPOSE__2X2_REUSE_MULTI_SSE2,bh_1_4_bw_1_4)1996   TEST(X64_TRANSPOSE__2X2_REUSE_MULTI_SSE2, bh_1_4_bw_1_4) {
1997     TEST_REQUIRES_X86_SSE2;
1998     for(size_t i = 1; i <= 4; ++i){
1999       for(size_t j = 1; j <= 4; ++j){
2000         TransposeMicrokernelTester()
2001           .input_stride(j)
2002           .output_stride(i)
2003           .block_width(j)
2004           .block_height(i)
2005           .iterations(1)
2006           .Test(xnn_x64_transpose_ukernel__2x2_reuse_multi_sse2);
2007       }
2008     }
2009   }
2010 
TEST(X64_TRANSPOSE__2X2_REUSE_MULTI_SSE2,bh_2_bw_4)2011   TEST(X64_TRANSPOSE__2X2_REUSE_MULTI_SSE2, bh_2_bw_4) {
2012     TEST_REQUIRES_X86_SSE2;
2013     TransposeMicrokernelTester()
2014       .input_stride(4)
2015       .output_stride(2)
2016       .block_width(4)
2017       .block_height(2)
2018       .iterations(1)
2019       .Test(xnn_x64_transpose_ukernel__2x2_reuse_multi_sse2);
2020   }
2021 
TEST(X64_TRANSPOSE__2X2_REUSE_MULTI_SSE2,bh_2_bw_3_4)2022   TEST(X64_TRANSPOSE__2X2_REUSE_MULTI_SSE2, bh_2_bw_3_4) {
2023     TEST_REQUIRES_X86_SSE2;
2024     for(size_t i = 3; i < 4; ++i){
2025       TransposeMicrokernelTester()
2026         .input_stride(i)
2027         .output_stride(2)
2028         .block_width(i)
2029         .block_height(2)
2030         .iterations(1)
2031         .Test(xnn_x64_transpose_ukernel__2x2_reuse_multi_sse2);
2032     }
2033   }
2034 
TEST(X64_TRANSPOSE__2X2_REUSE_MULTI_SSE2,bh_4_bw_3_4)2035   TEST(X64_TRANSPOSE__2X2_REUSE_MULTI_SSE2, bh_4_bw_3_4) {
2036     TEST_REQUIRES_X86_SSE2;
2037     for(size_t i = 3; i < 4; ++i){
2038       TransposeMicrokernelTester()
2039         .input_stride(i)
2040         .output_stride(4)
2041         .block_width(i)
2042         .block_height(4)
2043         .iterations(1)
2044         .Test(xnn_x64_transpose_ukernel__2x2_reuse_multi_sse2);
2045     }
2046   }
2047 
TEST(X64_TRANSPOSE__2X2_REUSE_MULTI_SSE2,bh_4_bw_2)2048   TEST(X64_TRANSPOSE__2X2_REUSE_MULTI_SSE2, bh_4_bw_2) {
2049     TEST_REQUIRES_X86_SSE2;
2050     TransposeMicrokernelTester()
2051       .input_stride(2)
2052       .output_stride(4)
2053       .block_width(2)
2054       .block_height(4)
2055       .iterations(1)
2056       .Test(xnn_x64_transpose_ukernel__2x2_reuse_multi_sse2);
2057   }
2058 
TEST(X64_TRANSPOSE__2X2_REUSE_MULTI_SSE2,bh_3_4_bw_2)2059   TEST(X64_TRANSPOSE__2X2_REUSE_MULTI_SSE2, bh_3_4_bw_2){
2060     TEST_REQUIRES_X86_SSE2;
2061     for(size_t i = 3; i < 4; ++i){
2062       TransposeMicrokernelTester()
2063         .input_stride(2)
2064         .output_stride(i)
2065         .block_width(2)
2066         .block_height(i)
2067         .iterations(1)
2068         .Test(xnn_x64_transpose_ukernel__2x2_reuse_multi_sse2);
2069     }
2070   }
2071 
TEST(X64_TRANSPOSE__2X2_REUSE_MULTI_SSE2,bh_3_4_bw_4)2072   TEST(X64_TRANSPOSE__2X2_REUSE_MULTI_SSE2, bh_3_4_bw_4){
2073     TEST_REQUIRES_X86_SSE2;
2074     for(size_t i = 3; i < 4; ++i){
2075       TransposeMicrokernelTester()
2076         .input_stride(4)
2077         .output_stride(i)
2078         .block_width(4)
2079         .block_height(i)
2080         .iterations(1)
2081         .Test(xnn_x64_transpose_ukernel__2x2_reuse_multi_sse2);
2082     }
2083   }
2084 
TEST(X64_TRANSPOSE__2X2_REUSE_MULTI_SSE2,bh_3_4_bw_3_4)2085   TEST(X64_TRANSPOSE__2X2_REUSE_MULTI_SSE2, bh_3_4_bw_3_4) {
2086     TEST_REQUIRES_X86_SSE2;
2087     for(size_t i = 3; i < 4; ++i){
2088       for(size_t j = 3; j < 4; ++j){
2089         TransposeMicrokernelTester()
2090           .input_stride(j)
2091           .output_stride(i)
2092           .block_width(j)
2093           .block_height(i)
2094           .iterations(1)
2095           .Test(xnn_x64_transpose_ukernel__2x2_reuse_multi_sse2);
2096       }
2097     }
2098   }
2099 
TEST(X64_TRANSPOSE__2X2_REUSE_MULTI_SSE2,bh_2_bw_2_is_4)2100   TEST(X64_TRANSPOSE__2X2_REUSE_MULTI_SSE2, bh_2_bw_2_is_4) {
2101     TEST_REQUIRES_X86_SSE2;
2102     TransposeMicrokernelTester()
2103       .input_stride(4)
2104       .output_stride(2)
2105       .block_width(2)
2106       .block_height(2)
2107       .iterations(1)
2108       .Test(xnn_x64_transpose_ukernel__2x2_reuse_multi_sse2);
2109   }
2110 
TEST(X64_TRANSPOSE__2X2_REUSE_MULTI_SSE2,bh_2_bw_2_os_4)2111   TEST(X64_TRANSPOSE__2X2_REUSE_MULTI_SSE2, bh_2_bw_2_os_4) {
2112     TEST_REQUIRES_X86_SSE2;
2113     TransposeMicrokernelTester()
2114       .input_stride(2)
2115       .output_stride(4)
2116       .block_width(2)
2117       .block_height(2)
2118       .iterations(1)
2119       .Test(xnn_x64_transpose_ukernel__2x2_reuse_multi_sse2);
2120   }
2121 
TEST(X64_TRANSPOSE__2X2_REUSE_MULTI_SSE2,bh_2_bw_2_is_4_os_4)2122   TEST(X64_TRANSPOSE__2X2_REUSE_MULTI_SSE2, bh_2_bw_2_is_4_os_4) {
2123     TEST_REQUIRES_X86_SSE2;
2124     TransposeMicrokernelTester()
2125       .input_stride(4)
2126       .output_stride(4)
2127       .block_width(2)
2128       .block_height(2)
2129       .iterations(1)
2130       .Test(xnn_x64_transpose_ukernel__2x2_reuse_multi_sse2);
2131   }
2132 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
2133 
2134 
2135 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(X64_TRANSPOSE__2X2_REUSE_SWITCH_SSE2,bh_2_bw_2)2136   TEST(X64_TRANSPOSE__2X2_REUSE_SWITCH_SSE2, bh_2_bw_2) {
2137     TEST_REQUIRES_X86_SSE2;
2138     TransposeMicrokernelTester()
2139       .input_stride(2)
2140       .output_stride(2)
2141       .block_width(2)
2142       .block_height(2)
2143       .iterations(1)
2144       .Test(xnn_x64_transpose_ukernel__2x2_reuse_switch_sse2);
2145   }
2146 
TEST(X64_TRANSPOSE__2X2_REUSE_SWITCH_SSE2,bh_1_4_bw_1_4)2147   TEST(X64_TRANSPOSE__2X2_REUSE_SWITCH_SSE2, bh_1_4_bw_1_4) {
2148     TEST_REQUIRES_X86_SSE2;
2149     for(size_t i = 1; i <= 4; ++i){
2150       for(size_t j = 1; j <= 4; ++j){
2151         TransposeMicrokernelTester()
2152           .input_stride(j)
2153           .output_stride(i)
2154           .block_width(j)
2155           .block_height(i)
2156           .iterations(1)
2157           .Test(xnn_x64_transpose_ukernel__2x2_reuse_switch_sse2);
2158       }
2159     }
2160   }
2161 
TEST(X64_TRANSPOSE__2X2_REUSE_SWITCH_SSE2,bh_2_bw_4)2162   TEST(X64_TRANSPOSE__2X2_REUSE_SWITCH_SSE2, bh_2_bw_4) {
2163     TEST_REQUIRES_X86_SSE2;
2164     TransposeMicrokernelTester()
2165       .input_stride(4)
2166       .output_stride(2)
2167       .block_width(4)
2168       .block_height(2)
2169       .iterations(1)
2170       .Test(xnn_x64_transpose_ukernel__2x2_reuse_switch_sse2);
2171   }
2172 
TEST(X64_TRANSPOSE__2X2_REUSE_SWITCH_SSE2,bh_2_bw_3_4)2173   TEST(X64_TRANSPOSE__2X2_REUSE_SWITCH_SSE2, bh_2_bw_3_4) {
2174     TEST_REQUIRES_X86_SSE2;
2175     for(size_t i = 3; i < 4; ++i){
2176       TransposeMicrokernelTester()
2177         .input_stride(i)
2178         .output_stride(2)
2179         .block_width(i)
2180         .block_height(2)
2181         .iterations(1)
2182         .Test(xnn_x64_transpose_ukernel__2x2_reuse_switch_sse2);
2183     }
2184   }
2185 
TEST(X64_TRANSPOSE__2X2_REUSE_SWITCH_SSE2,bh_4_bw_3_4)2186   TEST(X64_TRANSPOSE__2X2_REUSE_SWITCH_SSE2, bh_4_bw_3_4) {
2187     TEST_REQUIRES_X86_SSE2;
2188     for(size_t i = 3; i < 4; ++i){
2189       TransposeMicrokernelTester()
2190         .input_stride(i)
2191         .output_stride(4)
2192         .block_width(i)
2193         .block_height(4)
2194         .iterations(1)
2195         .Test(xnn_x64_transpose_ukernel__2x2_reuse_switch_sse2);
2196     }
2197   }
2198 
TEST(X64_TRANSPOSE__2X2_REUSE_SWITCH_SSE2,bh_4_bw_2)2199   TEST(X64_TRANSPOSE__2X2_REUSE_SWITCH_SSE2, bh_4_bw_2) {
2200     TEST_REQUIRES_X86_SSE2;
2201     TransposeMicrokernelTester()
2202       .input_stride(2)
2203       .output_stride(4)
2204       .block_width(2)
2205       .block_height(4)
2206       .iterations(1)
2207       .Test(xnn_x64_transpose_ukernel__2x2_reuse_switch_sse2);
2208   }
2209 
TEST(X64_TRANSPOSE__2X2_REUSE_SWITCH_SSE2,bh_3_4_bw_2)2210   TEST(X64_TRANSPOSE__2X2_REUSE_SWITCH_SSE2, bh_3_4_bw_2){
2211     TEST_REQUIRES_X86_SSE2;
2212     for(size_t i = 3; i < 4; ++i){
2213       TransposeMicrokernelTester()
2214         .input_stride(2)
2215         .output_stride(i)
2216         .block_width(2)
2217         .block_height(i)
2218         .iterations(1)
2219         .Test(xnn_x64_transpose_ukernel__2x2_reuse_switch_sse2);
2220     }
2221   }
2222 
TEST(X64_TRANSPOSE__2X2_REUSE_SWITCH_SSE2,bh_3_4_bw_4)2223   TEST(X64_TRANSPOSE__2X2_REUSE_SWITCH_SSE2, bh_3_4_bw_4){
2224     TEST_REQUIRES_X86_SSE2;
2225     for(size_t i = 3; i < 4; ++i){
2226       TransposeMicrokernelTester()
2227         .input_stride(4)
2228         .output_stride(i)
2229         .block_width(4)
2230         .block_height(i)
2231         .iterations(1)
2232         .Test(xnn_x64_transpose_ukernel__2x2_reuse_switch_sse2);
2233     }
2234   }
2235 
TEST(X64_TRANSPOSE__2X2_REUSE_SWITCH_SSE2,bh_3_4_bw_3_4)2236   TEST(X64_TRANSPOSE__2X2_REUSE_SWITCH_SSE2, bh_3_4_bw_3_4) {
2237     TEST_REQUIRES_X86_SSE2;
2238     for(size_t i = 3; i < 4; ++i){
2239       for(size_t j = 3; j < 4; ++j){
2240         TransposeMicrokernelTester()
2241           .input_stride(j)
2242           .output_stride(i)
2243           .block_width(j)
2244           .block_height(i)
2245           .iterations(1)
2246           .Test(xnn_x64_transpose_ukernel__2x2_reuse_switch_sse2);
2247       }
2248     }
2249   }
2250 
TEST(X64_TRANSPOSE__2X2_REUSE_SWITCH_SSE2,bh_2_bw_2_is_4)2251   TEST(X64_TRANSPOSE__2X2_REUSE_SWITCH_SSE2, bh_2_bw_2_is_4) {
2252     TEST_REQUIRES_X86_SSE2;
2253     TransposeMicrokernelTester()
2254       .input_stride(4)
2255       .output_stride(2)
2256       .block_width(2)
2257       .block_height(2)
2258       .iterations(1)
2259       .Test(xnn_x64_transpose_ukernel__2x2_reuse_switch_sse2);
2260   }
2261 
TEST(X64_TRANSPOSE__2X2_REUSE_SWITCH_SSE2,bh_2_bw_2_os_4)2262   TEST(X64_TRANSPOSE__2X2_REUSE_SWITCH_SSE2, bh_2_bw_2_os_4) {
2263     TEST_REQUIRES_X86_SSE2;
2264     TransposeMicrokernelTester()
2265       .input_stride(2)
2266       .output_stride(4)
2267       .block_width(2)
2268       .block_height(2)
2269       .iterations(1)
2270       .Test(xnn_x64_transpose_ukernel__2x2_reuse_switch_sse2);
2271   }
2272 
TEST(X64_TRANSPOSE__2X2_REUSE_SWITCH_SSE2,bh_2_bw_2_is_4_os_4)2273   TEST(X64_TRANSPOSE__2X2_REUSE_SWITCH_SSE2, bh_2_bw_2_is_4_os_4) {
2274     TEST_REQUIRES_X86_SSE2;
2275     TransposeMicrokernelTester()
2276       .input_stride(4)
2277       .output_stride(4)
2278       .block_width(2)
2279       .block_height(2)
2280       .iterations(1)
2281       .Test(xnn_x64_transpose_ukernel__2x2_reuse_switch_sse2);
2282   }
2283 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
2284