1 // Copyright 2019 Google LLC
2 //
3 // This source code is licensed under the BSD-style license found in the
4 // LICENSE file in the root directory of this source tree.
5 //
6 // Auto-generated file. Do not edit!
7 // Specification: test/x32-packx.yaml
8 // Generator: tools/generate-pack-test.py
9
10
11 #include <gtest/gtest.h>
12
13 #include <xnnpack/common.h>
14 #include <xnnpack/isa-checks.h>
15
16 #include <xnnpack/packx.h>
17 #include "pack-microkernel-tester.h"
18
19
20 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(X32_PACKX_4X__NEON_ST4,k_eq_4)21 TEST(X32_PACKX_4X__NEON_ST4, k_eq_4) {
22 TEST_REQUIRES_ARM_NEON;
23 PackMicrokernelTester()
24 .mr(4)
25 .m(4)
26 .k(4)
27 .Test(xnn_x32_packx_ukernel_4x__neon_st4);
28 }
29
TEST(X32_PACKX_4X__NEON_ST4,k_eq_4_subtile)30 TEST(X32_PACKX_4X__NEON_ST4, k_eq_4_subtile) {
31 TEST_REQUIRES_ARM_NEON;
32 for (size_t m = 1; m <= 4; m++) {
33 PackMicrokernelTester()
34 .mr(4)
35 .m(m)
36 .k(4)
37 .Test(xnn_x32_packx_ukernel_4x__neon_st4);
38 }
39 }
40
TEST(X32_PACKX_4X__NEON_ST4,k_lt_4)41 TEST(X32_PACKX_4X__NEON_ST4, k_lt_4) {
42 TEST_REQUIRES_ARM_NEON;
43 for (size_t k = 1; k < 4; k++) {
44 PackMicrokernelTester()
45 .mr(4)
46 .m(4)
47 .k(k)
48 .Test(xnn_x32_packx_ukernel_4x__neon_st4);
49 }
50 }
51
TEST(X32_PACKX_4X__NEON_ST4,k_lt_4_subtile)52 TEST(X32_PACKX_4X__NEON_ST4, k_lt_4_subtile) {
53 TEST_REQUIRES_ARM_NEON;
54 for (size_t k = 1; k < 4; k++) {
55 for (size_t m = 1; m <= 4; m++) {
56 PackMicrokernelTester()
57 .mr(4)
58 .m(m)
59 .k(k)
60 .Test(xnn_x32_packx_ukernel_4x__neon_st4);
61 }
62 }
63 }
64
TEST(X32_PACKX_4X__NEON_ST4,k_gt_4)65 TEST(X32_PACKX_4X__NEON_ST4, k_gt_4) {
66 TEST_REQUIRES_ARM_NEON;
67 for (size_t k = 5; k < 8; k++) {
68 PackMicrokernelTester()
69 .mr(4)
70 .m(4)
71 .k(k)
72 .Test(xnn_x32_packx_ukernel_4x__neon_st4);
73 }
74 }
75
TEST(X32_PACKX_4X__NEON_ST4,k_gt_4_subtile)76 TEST(X32_PACKX_4X__NEON_ST4, k_gt_4_subtile) {
77 TEST_REQUIRES_ARM_NEON;
78 for (size_t k = 5; k < 8; k++) {
79 for (size_t m = 1; m <= 4; m++) {
80 PackMicrokernelTester()
81 .mr(4)
82 .m(m)
83 .k(k)
84 .Test(xnn_x32_packx_ukernel_4x__neon_st4);
85 }
86 }
87 }
88
TEST(X32_PACKX_4X__NEON_ST4,k_div_4)89 TEST(X32_PACKX_4X__NEON_ST4, k_div_4) {
90 TEST_REQUIRES_ARM_NEON;
91 for (size_t k = 8; k < 40; k += 4) {
92 PackMicrokernelTester()
93 .mr(4)
94 .m(4)
95 .k(k)
96 .Test(xnn_x32_packx_ukernel_4x__neon_st4);
97 }
98 }
99
TEST(X32_PACKX_4X__NEON_ST4,k_div_4_subtile)100 TEST(X32_PACKX_4X__NEON_ST4, k_div_4_subtile) {
101 TEST_REQUIRES_ARM_NEON;
102 for (size_t k = 8; k < 40; k += 4) {
103 for (size_t m = 1; m <= 4; m++) {
104 PackMicrokernelTester()
105 .mr(4)
106 .m(m)
107 .k(k)
108 .Test(xnn_x32_packx_ukernel_4x__neon_st4);
109 }
110 }
111 }
112
TEST(X32_PACKX_4X__NEON_ST4,strided_x)113 TEST(X32_PACKX_4X__NEON_ST4, strided_x) {
114 TEST_REQUIRES_ARM_NEON;
115 for (size_t k = 1; k <= 20; k += 5) {
116 PackMicrokernelTester()
117 .mr(4)
118 .m(4)
119 .k(k)
120 .x_stride(23)
121 .Test(xnn_x32_packx_ukernel_4x__neon_st4);
122 }
123 }
124 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
125
126
127 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(X32_PACKX_4X__SSE,k_eq_4)128 TEST(X32_PACKX_4X__SSE, k_eq_4) {
129 TEST_REQUIRES_X86_SSE;
130 PackMicrokernelTester()
131 .mr(4)
132 .m(4)
133 .k(4)
134 .Test(xnn_x32_packx_ukernel_4x__sse);
135 }
136
TEST(X32_PACKX_4X__SSE,k_eq_4_subtile)137 TEST(X32_PACKX_4X__SSE, k_eq_4_subtile) {
138 TEST_REQUIRES_X86_SSE;
139 for (size_t m = 1; m <= 4; m++) {
140 PackMicrokernelTester()
141 .mr(4)
142 .m(m)
143 .k(4)
144 .Test(xnn_x32_packx_ukernel_4x__sse);
145 }
146 }
147
TEST(X32_PACKX_4X__SSE,k_lt_4)148 TEST(X32_PACKX_4X__SSE, k_lt_4) {
149 TEST_REQUIRES_X86_SSE;
150 for (size_t k = 1; k < 4; k++) {
151 PackMicrokernelTester()
152 .mr(4)
153 .m(4)
154 .k(k)
155 .Test(xnn_x32_packx_ukernel_4x__sse);
156 }
157 }
158
TEST(X32_PACKX_4X__SSE,k_lt_4_subtile)159 TEST(X32_PACKX_4X__SSE, k_lt_4_subtile) {
160 TEST_REQUIRES_X86_SSE;
161 for (size_t k = 1; k < 4; k++) {
162 for (size_t m = 1; m <= 4; m++) {
163 PackMicrokernelTester()
164 .mr(4)
165 .m(m)
166 .k(k)
167 .Test(xnn_x32_packx_ukernel_4x__sse);
168 }
169 }
170 }
171
TEST(X32_PACKX_4X__SSE,k_gt_4)172 TEST(X32_PACKX_4X__SSE, k_gt_4) {
173 TEST_REQUIRES_X86_SSE;
174 for (size_t k = 5; k < 8; k++) {
175 PackMicrokernelTester()
176 .mr(4)
177 .m(4)
178 .k(k)
179 .Test(xnn_x32_packx_ukernel_4x__sse);
180 }
181 }
182
TEST(X32_PACKX_4X__SSE,k_gt_4_subtile)183 TEST(X32_PACKX_4X__SSE, k_gt_4_subtile) {
184 TEST_REQUIRES_X86_SSE;
185 for (size_t k = 5; k < 8; k++) {
186 for (size_t m = 1; m <= 4; m++) {
187 PackMicrokernelTester()
188 .mr(4)
189 .m(m)
190 .k(k)
191 .Test(xnn_x32_packx_ukernel_4x__sse);
192 }
193 }
194 }
195
TEST(X32_PACKX_4X__SSE,k_div_4)196 TEST(X32_PACKX_4X__SSE, k_div_4) {
197 TEST_REQUIRES_X86_SSE;
198 for (size_t k = 8; k < 40; k += 4) {
199 PackMicrokernelTester()
200 .mr(4)
201 .m(4)
202 .k(k)
203 .Test(xnn_x32_packx_ukernel_4x__sse);
204 }
205 }
206
TEST(X32_PACKX_4X__SSE,k_div_4_subtile)207 TEST(X32_PACKX_4X__SSE, k_div_4_subtile) {
208 TEST_REQUIRES_X86_SSE;
209 for (size_t k = 8; k < 40; k += 4) {
210 for (size_t m = 1; m <= 4; m++) {
211 PackMicrokernelTester()
212 .mr(4)
213 .m(m)
214 .k(k)
215 .Test(xnn_x32_packx_ukernel_4x__sse);
216 }
217 }
218 }
219
TEST(X32_PACKX_4X__SSE,strided_x)220 TEST(X32_PACKX_4X__SSE, strided_x) {
221 TEST_REQUIRES_X86_SSE;
222 for (size_t k = 1; k <= 20; k += 5) {
223 PackMicrokernelTester()
224 .mr(4)
225 .m(4)
226 .k(k)
227 .x_stride(23)
228 .Test(xnn_x32_packx_ukernel_4x__sse);
229 }
230 }
231 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
232
233
234 #if !XNN_ARCH_ASMJS && !XNN_ARCH_WASM
TEST(X32_PACKX_4X__PSIMD,k_eq_4)235 TEST(X32_PACKX_4X__PSIMD, k_eq_4) {
236 TEST_REQUIRES_PSIMD;
237 PackMicrokernelTester()
238 .mr(4)
239 .m(4)
240 .k(4)
241 .Test(xnn_x32_packx_ukernel_4x__psimd);
242 }
243
TEST(X32_PACKX_4X__PSIMD,k_eq_4_subtile)244 TEST(X32_PACKX_4X__PSIMD, k_eq_4_subtile) {
245 TEST_REQUIRES_PSIMD;
246 for (size_t m = 1; m <= 4; m++) {
247 PackMicrokernelTester()
248 .mr(4)
249 .m(m)
250 .k(4)
251 .Test(xnn_x32_packx_ukernel_4x__psimd);
252 }
253 }
254
TEST(X32_PACKX_4X__PSIMD,k_lt_4)255 TEST(X32_PACKX_4X__PSIMD, k_lt_4) {
256 TEST_REQUIRES_PSIMD;
257 for (size_t k = 1; k < 4; k++) {
258 PackMicrokernelTester()
259 .mr(4)
260 .m(4)
261 .k(k)
262 .Test(xnn_x32_packx_ukernel_4x__psimd);
263 }
264 }
265
TEST(X32_PACKX_4X__PSIMD,k_lt_4_subtile)266 TEST(X32_PACKX_4X__PSIMD, k_lt_4_subtile) {
267 TEST_REQUIRES_PSIMD;
268 for (size_t k = 1; k < 4; k++) {
269 for (size_t m = 1; m <= 4; m++) {
270 PackMicrokernelTester()
271 .mr(4)
272 .m(m)
273 .k(k)
274 .Test(xnn_x32_packx_ukernel_4x__psimd);
275 }
276 }
277 }
278
TEST(X32_PACKX_4X__PSIMD,k_gt_4)279 TEST(X32_PACKX_4X__PSIMD, k_gt_4) {
280 TEST_REQUIRES_PSIMD;
281 for (size_t k = 5; k < 8; k++) {
282 PackMicrokernelTester()
283 .mr(4)
284 .m(4)
285 .k(k)
286 .Test(xnn_x32_packx_ukernel_4x__psimd);
287 }
288 }
289
TEST(X32_PACKX_4X__PSIMD,k_gt_4_subtile)290 TEST(X32_PACKX_4X__PSIMD, k_gt_4_subtile) {
291 TEST_REQUIRES_PSIMD;
292 for (size_t k = 5; k < 8; k++) {
293 for (size_t m = 1; m <= 4; m++) {
294 PackMicrokernelTester()
295 .mr(4)
296 .m(m)
297 .k(k)
298 .Test(xnn_x32_packx_ukernel_4x__psimd);
299 }
300 }
301 }
302
TEST(X32_PACKX_4X__PSIMD,k_div_4)303 TEST(X32_PACKX_4X__PSIMD, k_div_4) {
304 TEST_REQUIRES_PSIMD;
305 for (size_t k = 8; k < 40; k += 4) {
306 PackMicrokernelTester()
307 .mr(4)
308 .m(4)
309 .k(k)
310 .Test(xnn_x32_packx_ukernel_4x__psimd);
311 }
312 }
313
TEST(X32_PACKX_4X__PSIMD,k_div_4_subtile)314 TEST(X32_PACKX_4X__PSIMD, k_div_4_subtile) {
315 TEST_REQUIRES_PSIMD;
316 for (size_t k = 8; k < 40; k += 4) {
317 for (size_t m = 1; m <= 4; m++) {
318 PackMicrokernelTester()
319 .mr(4)
320 .m(m)
321 .k(k)
322 .Test(xnn_x32_packx_ukernel_4x__psimd);
323 }
324 }
325 }
326
TEST(X32_PACKX_4X__PSIMD,strided_x)327 TEST(X32_PACKX_4X__PSIMD, strided_x) {
328 TEST_REQUIRES_PSIMD;
329 for (size_t k = 1; k <= 20; k += 5) {
330 PackMicrokernelTester()
331 .mr(4)
332 .m(4)
333 .k(k)
334 .x_stride(23)
335 .Test(xnn_x32_packx_ukernel_4x__psimd);
336 }
337 }
338 #endif // !XNN_ARCH_ASMJS && !XNN_ARCH_WASM
339
340
TEST(X32_PACKX_2X__SCALAR,k_eq_1)341 TEST(X32_PACKX_2X__SCALAR, k_eq_1) {
342 PackMicrokernelTester()
343 .mr(2)
344 .m(2)
345 .k(1)
346 .Test(xnn_x32_packx_ukernel_2x__scalar);
347 }
348
TEST(X32_PACKX_2X__SCALAR,k_eq_1_subtile)349 TEST(X32_PACKX_2X__SCALAR, k_eq_1_subtile) {
350 for (size_t m = 1; m <= 2; m++) {
351 PackMicrokernelTester()
352 .mr(2)
353 .m(m)
354 .k(1)
355 .Test(xnn_x32_packx_ukernel_2x__scalar);
356 }
357 }
358
TEST(X32_PACKX_2X__SCALAR,k_gt_1)359 TEST(X32_PACKX_2X__SCALAR, k_gt_1) {
360 for (size_t k = 2; k < 10; k++) {
361 PackMicrokernelTester()
362 .mr(2)
363 .m(2)
364 .k(k)
365 .Test(xnn_x32_packx_ukernel_2x__scalar);
366 }
367 }
368
TEST(X32_PACKX_2X__SCALAR,k_gt_1_subtile)369 TEST(X32_PACKX_2X__SCALAR, k_gt_1_subtile) {
370 for (size_t k = 2; k < 10; k++) {
371 for (size_t m = 1; m <= 2; m++) {
372 PackMicrokernelTester()
373 .mr(2)
374 .m(m)
375 .k(k)
376 .Test(xnn_x32_packx_ukernel_2x__scalar);
377 }
378 }
379 }
380
TEST(X32_PACKX_2X__SCALAR,strided_x)381 TEST(X32_PACKX_2X__SCALAR, strided_x) {
382 for (size_t k = 1; k <= 5; k += 2) {
383 PackMicrokernelTester()
384 .mr(2)
385 .m(2)
386 .k(k)
387 .x_stride(7)
388 .Test(xnn_x32_packx_ukernel_2x__scalar);
389 }
390 }
391
TEST(X32_PACKX_3X__SCALAR,k_eq_1)392 TEST(X32_PACKX_3X__SCALAR, k_eq_1) {
393 PackMicrokernelTester()
394 .mr(3)
395 .m(3)
396 .k(1)
397 .Test(xnn_x32_packx_ukernel_3x__scalar);
398 }
399
TEST(X32_PACKX_3X__SCALAR,k_eq_1_subtile)400 TEST(X32_PACKX_3X__SCALAR, k_eq_1_subtile) {
401 for (size_t m = 1; m <= 3; m++) {
402 PackMicrokernelTester()
403 .mr(3)
404 .m(m)
405 .k(1)
406 .Test(xnn_x32_packx_ukernel_3x__scalar);
407 }
408 }
409
TEST(X32_PACKX_3X__SCALAR,k_gt_1)410 TEST(X32_PACKX_3X__SCALAR, k_gt_1) {
411 for (size_t k = 2; k < 10; k++) {
412 PackMicrokernelTester()
413 .mr(3)
414 .m(3)
415 .k(k)
416 .Test(xnn_x32_packx_ukernel_3x__scalar);
417 }
418 }
419
TEST(X32_PACKX_3X__SCALAR,k_gt_1_subtile)420 TEST(X32_PACKX_3X__SCALAR, k_gt_1_subtile) {
421 for (size_t k = 2; k < 10; k++) {
422 for (size_t m = 1; m <= 3; m++) {
423 PackMicrokernelTester()
424 .mr(3)
425 .m(m)
426 .k(k)
427 .Test(xnn_x32_packx_ukernel_3x__scalar);
428 }
429 }
430 }
431
TEST(X32_PACKX_3X__SCALAR,strided_x)432 TEST(X32_PACKX_3X__SCALAR, strided_x) {
433 for (size_t k = 1; k <= 5; k += 2) {
434 PackMicrokernelTester()
435 .mr(3)
436 .m(3)
437 .k(k)
438 .x_stride(7)
439 .Test(xnn_x32_packx_ukernel_3x__scalar);
440 }
441 }
442
TEST(X32_PACKX_4X__SCALAR,k_eq_1)443 TEST(X32_PACKX_4X__SCALAR, k_eq_1) {
444 PackMicrokernelTester()
445 .mr(4)
446 .m(4)
447 .k(1)
448 .Test(xnn_x32_packx_ukernel_4x__scalar);
449 }
450
TEST(X32_PACKX_4X__SCALAR,k_eq_1_subtile)451 TEST(X32_PACKX_4X__SCALAR, k_eq_1_subtile) {
452 for (size_t m = 1; m <= 4; m++) {
453 PackMicrokernelTester()
454 .mr(4)
455 .m(m)
456 .k(1)
457 .Test(xnn_x32_packx_ukernel_4x__scalar);
458 }
459 }
460
TEST(X32_PACKX_4X__SCALAR,k_gt_1)461 TEST(X32_PACKX_4X__SCALAR, k_gt_1) {
462 for (size_t k = 2; k < 10; k++) {
463 PackMicrokernelTester()
464 .mr(4)
465 .m(4)
466 .k(k)
467 .Test(xnn_x32_packx_ukernel_4x__scalar);
468 }
469 }
470
TEST(X32_PACKX_4X__SCALAR,k_gt_1_subtile)471 TEST(X32_PACKX_4X__SCALAR, k_gt_1_subtile) {
472 for (size_t k = 2; k < 10; k++) {
473 for (size_t m = 1; m <= 4; m++) {
474 PackMicrokernelTester()
475 .mr(4)
476 .m(m)
477 .k(k)
478 .Test(xnn_x32_packx_ukernel_4x__scalar);
479 }
480 }
481 }
482
TEST(X32_PACKX_4X__SCALAR,strided_x)483 TEST(X32_PACKX_4X__SCALAR, strided_x) {
484 for (size_t k = 1; k <= 5; k += 2) {
485 PackMicrokernelTester()
486 .mr(4)
487 .m(4)
488 .k(k)
489 .x_stride(7)
490 .Test(xnn_x32_packx_ukernel_4x__scalar);
491 }
492 }