1 // Copyright (c) Facebook, Inc. and its affiliates.
2 // All rights reserved.
3 //
4 // Copyright 2019 Google LLC
5 //
6 // This source code is licensed under the BSD-style license found in the
7 // LICENSE file in the root directory of this source tree.
8
9 #include <gtest/gtest.h>
10
11 #include <xnnpack/common.h>
12 #include <xnnpack/isa-checks.h>
13
14 #include <xnnpack/vadd.h>
15 #include "vadd-microkernel-tester.h"
16
17
18 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(Q8_VADD__SSE2,n_eq_8)19 TEST(Q8_VADD__SSE2, n_eq_8) {
20 TEST_REQUIRES_X86_SSE2;
21 VAddMicrokernelTester()
22 .n(8)
23 .Test(xnn_q8_vadd_ukernel__sse2);
24 }
25
TEST(Q8_VADD__SSE2,n_div_8)26 TEST(Q8_VADD__SSE2, n_div_8) {
27 TEST_REQUIRES_X86_SSE2;
28 for (size_t n = 8; n < 128; n += 24) {
29 VAddMicrokernelTester()
30 .n(n)
31 .Test(xnn_q8_vadd_ukernel__sse2);
32 }
33 }
34
TEST(Q8_VADD__SSE2,n_gt_8)35 TEST(Q8_VADD__SSE2, n_gt_8) {
36 TEST_REQUIRES_X86_SSE2;
37 for (size_t n = 9; n < 16; n++) {
38 VAddMicrokernelTester()
39 .n(n)
40 .Test(xnn_q8_vadd_ukernel__sse2);
41 }
42 }
43
TEST(Q8_VADD__SSE2,n_lt_8)44 TEST(Q8_VADD__SSE2, n_lt_8) {
45 TEST_REQUIRES_X86_SSE2;
46 for (size_t n = 1; n < 8; n++) {
47 VAddMicrokernelTester()
48 .n(n)
49 .Test(xnn_q8_vadd_ukernel__sse2);
50 }
51 }
52
TEST(Q8_VADD__SSE2,inplace_a)53 TEST(Q8_VADD__SSE2, inplace_a) {
54 TEST_REQUIRES_X86_SSE2;
55 for (size_t n = 1; n < 128; n += 11) {
56 VAddMicrokernelTester()
57 .iterations(1)
58 .n(n)
59 .inplace_a(true)
60 .Test(xnn_q8_vadd_ukernel__sse2);
61 }
62 }
63
TEST(Q8_VADD__SSE2,inplace_b)64 TEST(Q8_VADD__SSE2, inplace_b) {
65 TEST_REQUIRES_X86_SSE2;
66 for (size_t n = 1; n < 128; n += 11) {
67 VAddMicrokernelTester()
68 .iterations(1)
69 .n(n)
70 .inplace_b(true)
71 .Test(xnn_q8_vadd_ukernel__sse2);
72 }
73 }
74
TEST(Q8_VADD__SSE2,inplace_a_and_b)75 TEST(Q8_VADD__SSE2, inplace_a_and_b) {
76 TEST_REQUIRES_X86_SSE2;
77 for (size_t n = 1; n < 128; n += 11) {
78 VAddMicrokernelTester()
79 .iterations(1)
80 .n(n)
81 .inplace_a(true)
82 .inplace_b(true)
83 .Test(xnn_q8_vadd_ukernel__sse2);
84 }
85 }
86
TEST(Q8_VADD__SSE2,a_scale)87 TEST(Q8_VADD__SSE2, a_scale) {
88 TEST_REQUIRES_X86_SSE2;
89 for (size_t n = 1; n < 128; n += 11) {
90 for (float a_scale = 1.0e-2; a_scale < 1.0e+2; a_scale *= 1.7f) {
91 VAddMicrokernelTester()
92 .iterations(1)
93 .n(n)
94 .a_scale(a_scale)
95 .Test(xnn_q8_vadd_ukernel__sse2);
96 }
97 }
98 }
99
TEST(Q8_VADD__SSE2,b_scale)100 TEST(Q8_VADD__SSE2, b_scale) {
101 TEST_REQUIRES_X86_SSE2;
102 for (size_t n = 1; n < 128; n += 11) {
103 for (float b_scale = 1.0e-2; b_scale < 1.0e+2; b_scale *= 1.7f) {
104 VAddMicrokernelTester()
105 .iterations(1)
106 .n(n)
107 .b_scale(b_scale)
108 .Test(xnn_q8_vadd_ukernel__sse2);
109 }
110 }
111 }
112
TEST(Q8_VADD__SSE2,y_scale)113 TEST(Q8_VADD__SSE2, y_scale) {
114 TEST_REQUIRES_X86_SSE2;
115 for (size_t n = 1; n < 128; n += 11) {
116 for (float y_scale = 1.0e-2; y_scale < 1.0e+2; y_scale *= 1.7f) {
117 VAddMicrokernelTester()
118 .iterations(1)
119 .n(n)
120 .y_scale(y_scale)
121 .Test(xnn_q8_vadd_ukernel__sse2);
122 }
123 }
124 }
125
TEST(Q8_VADD__SSE2,a_zero_point)126 TEST(Q8_VADD__SSE2, a_zero_point) {
127 TEST_REQUIRES_X86_SSE2;
128 for (size_t n = 1; n < 128; n += 11) {
129 for (int32_t a_zero_point = 0; a_zero_point <= 255; a_zero_point += 51) {
130 VAddMicrokernelTester()
131 .iterations(1)
132 .n(n)
133 .a_zero_point(uint8_t(a_zero_point))
134 .Test(xnn_q8_vadd_ukernel__sse2);
135 }
136 }
137 }
138
TEST(Q8_VADD__SSE2,b_zero_point)139 TEST(Q8_VADD__SSE2, b_zero_point) {
140 TEST_REQUIRES_X86_SSE2;
141 for (size_t n = 1; n < 128; n += 11) {
142 for (int32_t b_zero_point = 0; b_zero_point <= 255; b_zero_point += 51) {
143 VAddMicrokernelTester()
144 .iterations(1)
145 .n(n)
146 .b_zero_point(uint8_t(b_zero_point))
147 .Test(xnn_q8_vadd_ukernel__sse2);
148 }
149 }
150 }
151
TEST(Q8_VADD__SSE2,y_zero_point)152 TEST(Q8_VADD__SSE2, y_zero_point) {
153 TEST_REQUIRES_X86_SSE2;
154 for (size_t n = 1; n < 128; n += 11) {
155 for (int32_t y_zero_point = 0; y_zero_point <= 255; y_zero_point += 51) {
156 VAddMicrokernelTester()
157 .iterations(1)
158 .n(n)
159 .y_zero_point(uint8_t(y_zero_point))
160 .Test(xnn_q8_vadd_ukernel__sse2);
161 }
162 }
163 }
164
TEST(Q8_VADD__SSE2,qmin)165 TEST(Q8_VADD__SSE2, qmin) {
166 TEST_REQUIRES_X86_SSE2;
167 for (size_t n = 1; n < 128; n += 11) {
168 VAddMicrokernelTester()
169 .iterations(1)
170 .n(n)
171 .qmin(128)
172 .Test(xnn_q8_vadd_ukernel__sse2);
173 }
174 }
175
TEST(Q8_VADD__SSE2,qmax)176 TEST(Q8_VADD__SSE2, qmax) {
177 TEST_REQUIRES_X86_SSE2;
178 for (size_t n = 1; n < 128; n += 11) {
179 VAddMicrokernelTester()
180 .iterations(1)
181 .n(n)
182 .qmax(128)
183 .Test(xnn_q8_vadd_ukernel__sse2);
184 }
185 }
186 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
187
188 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(Q8_VADD__NEON,n_eq_8)189 TEST(Q8_VADD__NEON, n_eq_8) {
190 TEST_REQUIRES_ARM_NEON;
191 VAddMicrokernelTester()
192 .n(8)
193 .Test(xnn_q8_vadd_ukernel__neon);
194 }
195
TEST(Q8_VADD__NEON,n_div_8)196 TEST(Q8_VADD__NEON, n_div_8) {
197 TEST_REQUIRES_ARM_NEON;
198 for (size_t n = 8; n < 128; n += 24) {
199 VAddMicrokernelTester()
200 .n(n)
201 .Test(xnn_q8_vadd_ukernel__neon);
202 }
203 }
204
TEST(Q8_VADD__NEON,n_gt_8)205 TEST(Q8_VADD__NEON, n_gt_8) {
206 TEST_REQUIRES_ARM_NEON;
207 for (size_t n = 9; n < 16; n++) {
208 VAddMicrokernelTester()
209 .n(n)
210 .Test(xnn_q8_vadd_ukernel__neon);
211 }
212 }
213
TEST(Q8_VADD__NEON,n_lt_8)214 TEST(Q8_VADD__NEON, n_lt_8) {
215 TEST_REQUIRES_ARM_NEON;
216 for (size_t n = 1; n < 8; n++) {
217 VAddMicrokernelTester()
218 .n(n)
219 .Test(xnn_q8_vadd_ukernel__neon);
220 }
221 }
222
TEST(Q8_VADD__NEON,inplace_a)223 TEST(Q8_VADD__NEON, inplace_a) {
224 TEST_REQUIRES_ARM_NEON;
225 for (size_t n = 1; n < 128; n += 11) {
226 VAddMicrokernelTester()
227 .iterations(1)
228 .n(n)
229 .inplace_a(true)
230 .Test(xnn_q8_vadd_ukernel__neon);
231 }
232 }
233
TEST(Q8_VADD__NEON,inplace_b)234 TEST(Q8_VADD__NEON, inplace_b) {
235 TEST_REQUIRES_ARM_NEON;
236 for (size_t n = 1; n < 128; n += 11) {
237 VAddMicrokernelTester()
238 .iterations(1)
239 .n(n)
240 .inplace_b(true)
241 .Test(xnn_q8_vadd_ukernel__neon);
242 }
243 }
244
TEST(Q8_VADD__NEON,inplace_a_and_b)245 TEST(Q8_VADD__NEON, inplace_a_and_b) {
246 TEST_REQUIRES_ARM_NEON;
247 for (size_t n = 1; n < 128; n += 11) {
248 VAddMicrokernelTester()
249 .iterations(1)
250 .n(n)
251 .inplace_a(true)
252 .inplace_b(true)
253 .Test(xnn_q8_vadd_ukernel__neon);
254 }
255 }
256
TEST(Q8_VADD__NEON,a_scale)257 TEST(Q8_VADD__NEON, a_scale) {
258 TEST_REQUIRES_ARM_NEON;
259 for (size_t n = 1; n < 128; n += 11) {
260 for (float a_scale = 1.0e-2; a_scale < 1.0e+2; a_scale *= 1.7f) {
261 VAddMicrokernelTester()
262 .iterations(1)
263 .n(n)
264 .a_scale(a_scale)
265 .Test(xnn_q8_vadd_ukernel__neon);
266 }
267 }
268 }
269
TEST(Q8_VADD__NEON,b_scale)270 TEST(Q8_VADD__NEON, b_scale) {
271 TEST_REQUIRES_ARM_NEON;
272 for (size_t n = 1; n < 128; n += 11) {
273 for (float b_scale = 1.0e-2; b_scale < 1.0e+2; b_scale *= 1.7f) {
274 VAddMicrokernelTester()
275 .iterations(1)
276 .n(n)
277 .b_scale(b_scale)
278 .Test(xnn_q8_vadd_ukernel__neon);
279 }
280 }
281 }
282
TEST(Q8_VADD__NEON,y_scale)283 TEST(Q8_VADD__NEON, y_scale) {
284 TEST_REQUIRES_ARM_NEON;
285 for (size_t n = 1; n < 128; n += 11) {
286 for (float y_scale = 1.0e-2; y_scale < 1.0e+2; y_scale *= 1.7f) {
287 VAddMicrokernelTester()
288 .iterations(1)
289 .n(n)
290 .y_scale(y_scale)
291 .Test(xnn_q8_vadd_ukernel__neon);
292 }
293 }
294 }
295
TEST(Q8_VADD__NEON,a_zero_point)296 TEST(Q8_VADD__NEON, a_zero_point) {
297 TEST_REQUIRES_ARM_NEON;
298 for (size_t n = 1; n < 128; n += 11) {
299 for (int32_t a_zero_point = 0; a_zero_point <= 255; a_zero_point += 51) {
300 VAddMicrokernelTester()
301 .iterations(1)
302 .n(n)
303 .a_zero_point(uint8_t(a_zero_point))
304 .Test(xnn_q8_vadd_ukernel__neon);
305 }
306 }
307 }
308
TEST(Q8_VADD__NEON,b_zero_point)309 TEST(Q8_VADD__NEON, b_zero_point) {
310 TEST_REQUIRES_ARM_NEON;
311 for (size_t n = 1; n < 128; n += 11) {
312 for (int32_t b_zero_point = 0; b_zero_point <= 255; b_zero_point += 51) {
313 VAddMicrokernelTester()
314 .iterations(1)
315 .n(n)
316 .b_zero_point(uint8_t(b_zero_point))
317 .Test(xnn_q8_vadd_ukernel__neon);
318 }
319 }
320 }
321
TEST(Q8_VADD__NEON,y_zero_point)322 TEST(Q8_VADD__NEON, y_zero_point) {
323 TEST_REQUIRES_ARM_NEON;
324 for (size_t n = 1; n < 128; n += 11) {
325 for (int32_t y_zero_point = 0; y_zero_point <= 255; y_zero_point += 51) {
326 VAddMicrokernelTester()
327 .iterations(1)
328 .n(n)
329 .y_zero_point(uint8_t(y_zero_point))
330 .Test(xnn_q8_vadd_ukernel__neon);
331 }
332 }
333 }
334
TEST(Q8_VADD__NEON,qmin)335 TEST(Q8_VADD__NEON, qmin) {
336 TEST_REQUIRES_ARM_NEON;
337 for (size_t n = 1; n < 128; n += 11) {
338 VAddMicrokernelTester()
339 .iterations(1)
340 .n(n)
341 .qmin(128)
342 .Test(xnn_q8_vadd_ukernel__neon);
343 }
344 }
345
TEST(Q8_VADD__NEON,qmax)346 TEST(Q8_VADD__NEON, qmax) {
347 TEST_REQUIRES_ARM_NEON;
348 for (size_t n = 1; n < 128; n += 11) {
349 VAddMicrokernelTester()
350 .iterations(1)
351 .n(n)
352 .qmax(128)
353 .Test(xnn_q8_vadd_ukernel__neon);
354 }
355 }
356 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
357
TEST(Q8_VADD__SCALAR,n_eq_1)358 TEST(Q8_VADD__SCALAR, n_eq_1) {
359 VAddMicrokernelTester()
360 .n(1)
361 .Test(xnn_q8_vadd_ukernel__scalar, VAddMicrokernelTester::Variant::Scalar);
362 }
363
TEST(Q8_VADD__SCALAR,n_gt_1)364 TEST(Q8_VADD__SCALAR, n_gt_1) {
365 for (size_t n = 2; n < 8; n++) {
366 VAddMicrokernelTester()
367 .n(n)
368 .Test(xnn_q8_vadd_ukernel__scalar, VAddMicrokernelTester::Variant::Scalar);
369 }
370 }
371
TEST(Q8_VADD__SCALAR,inplace_a)372 TEST(Q8_VADD__SCALAR, inplace_a) {
373 for (size_t n = 1; n < 16; n += 3) {
374 VAddMicrokernelTester()
375 .iterations(1)
376 .n(n)
377 .inplace_a(true)
378 .Test(xnn_q8_vadd_ukernel__scalar, VAddMicrokernelTester::Variant::Scalar);
379 }
380 }
381
TEST(Q8_VADD__SCALAR,inplace_b)382 TEST(Q8_VADD__SCALAR, inplace_b) {
383 for (size_t n = 1; n < 16; n += 3) {
384 VAddMicrokernelTester()
385 .iterations(1)
386 .n(n)
387 .inplace_b(true)
388 .Test(xnn_q8_vadd_ukernel__scalar, VAddMicrokernelTester::Variant::Scalar);
389 }
390 }
391
TEST(Q8_VADD__SCALAR,inplace_a_and_b)392 TEST(Q8_VADD__SCALAR, inplace_a_and_b) {
393 for (size_t n = 1; n < 16; n += 3) {
394 VAddMicrokernelTester()
395 .iterations(1)
396 .n(n)
397 .inplace_a(true)
398 .inplace_b(true)
399 .Test(xnn_q8_vadd_ukernel__scalar, VAddMicrokernelTester::Variant::Scalar);
400 }
401 }
402
TEST(Q8_VADD__SCALAR,a_scale)403 TEST(Q8_VADD__SCALAR, a_scale) {
404 for (size_t n = 1; n < 16; n += 3) {
405 for (float a_scale = 1.0e-2; a_scale < 1.0e+2; a_scale *= 1.7f) {
406 VAddMicrokernelTester()
407 .iterations(1)
408 .n(n)
409 .a_scale(a_scale)
410 .Test(xnn_q8_vadd_ukernel__scalar, VAddMicrokernelTester::Variant::Scalar);
411 }
412 }
413 }
414
TEST(Q8_VADD__SCALAR,b_scale)415 TEST(Q8_VADD__SCALAR, b_scale) {
416 for (size_t n = 1; n < 16; n += 3) {
417 for (float b_scale = 1.0e-2; b_scale < 1.0e+2; b_scale *= 1.7f) {
418 VAddMicrokernelTester()
419 .iterations(1)
420 .n(n)
421 .b_scale(b_scale)
422 .Test(xnn_q8_vadd_ukernel__scalar, VAddMicrokernelTester::Variant::Scalar);
423 }
424 }
425 }
426
TEST(Q8_VADD__SCALAR,y_scale)427 TEST(Q8_VADD__SCALAR, y_scale) {
428 for (size_t n = 1; n < 16; n += 3) {
429 for (float y_scale = 1.0e-2; y_scale < 1.0e+2; y_scale *= 1.7f) {
430 VAddMicrokernelTester()
431 .iterations(1)
432 .n(n)
433 .y_scale(y_scale)
434 .Test(xnn_q8_vadd_ukernel__scalar, VAddMicrokernelTester::Variant::Scalar);
435 }
436 }
437 }
438
TEST(Q8_VADD__SCALAR,a_zero_point)439 TEST(Q8_VADD__SCALAR, a_zero_point) {
440 for (size_t n = 1; n < 16; n += 3) {
441 for (int32_t a_zero_point = 0; a_zero_point <= 255; a_zero_point += 51) {
442 VAddMicrokernelTester()
443 .iterations(1)
444 .n(n)
445 .a_zero_point(uint8_t(a_zero_point))
446 .Test(xnn_q8_vadd_ukernel__scalar, VAddMicrokernelTester::Variant::Scalar);
447 }
448 }
449 }
450
TEST(Q8_VADD__SCALAR,b_zero_point)451 TEST(Q8_VADD__SCALAR, b_zero_point) {
452 for (size_t n = 1; n < 16; n += 3) {
453 for (int32_t b_zero_point = 0; b_zero_point <= 255; b_zero_point += 51) {
454 VAddMicrokernelTester()
455 .iterations(1)
456 .n(n)
457 .b_zero_point(uint8_t(b_zero_point))
458 .Test(xnn_q8_vadd_ukernel__scalar, VAddMicrokernelTester::Variant::Scalar);
459 }
460 }
461 }
462
TEST(Q8_VADD__SCALAR,y_zero_point)463 TEST(Q8_VADD__SCALAR, y_zero_point) {
464 for (size_t n = 1; n < 16; n += 3) {
465 for (int32_t y_zero_point = 0; y_zero_point <= 255; y_zero_point += 51) {
466 VAddMicrokernelTester()
467 .iterations(1)
468 .n(n)
469 .y_zero_point(uint8_t(y_zero_point))
470 .Test(xnn_q8_vadd_ukernel__scalar, VAddMicrokernelTester::Variant::Scalar);
471 }
472 }
473 }
474
TEST(Q8_VADD__SCALAR,qmin)475 TEST(Q8_VADD__SCALAR, qmin) {
476 for (size_t n = 1; n < 16; n += 3) {
477 VAddMicrokernelTester()
478 .iterations(1)
479 .n(n)
480 .qmin(128)
481 .Test(xnn_q8_vadd_ukernel__scalar, VAddMicrokernelTester::Variant::Scalar);
482 }
483 }
484
TEST(Q8_VADD__SCALAR,qmax)485 TEST(Q8_VADD__SCALAR, qmax) {
486 for (size_t n = 1; n < 16; n += 3) {
487 VAddMicrokernelTester()
488 .iterations(1)
489 .n(n)
490 .qmax(128)
491 .Test(xnn_q8_vadd_ukernel__scalar, VAddMicrokernelTester::Variant::Scalar);
492 }
493 }
494