1 // Copyright 2015, VIXL authors
2 // All rights reserved.
3 //
4 // Redistribution and use in source and binary forms, with or without
5 // modification, are permitted provided that the following conditions are met:
6 //
7 // * Redistributions of source code must retain the above copyright notice,
8 // this list of conditions and the following disclaimer.
9 // * Redistributions in binary form must reproduce the above copyright notice,
10 // this list of conditions and the following disclaimer in the documentation
11 // and/or other materials provided with the distribution.
12 // * Neither the name of ARM Limited nor the names of its contributors may be
13 // used to endorse or promote products derived from this software without
14 // specific prior written permission.
15 //
16 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
17 // ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18 // WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19 // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
20 // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
22 // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
23 // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
24 // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26
27 #include <cfloat>
28 #include <cstdio>
29
30 #include "test-runner.h"
31 #include "test-utils.h"
32
33 #include "aarch64/test-simulator-inputs-aarch64.h"
34 #include "aarch64/test-simulator-traces-aarch64.h"
35 #include "aarch64/test-utils-aarch64.h"
36
37 #include "aarch64/macro-assembler-aarch64.h"
38 #include "aarch64/simulator-aarch64.h"
39
40 namespace vixl {
41 namespace aarch64 {
42
43 // ==== Simulator Tests ====
44 //
45 // These simulator tests check instruction behaviour against a trace taken from
46 // real AArch64 hardware. The same test code is used to generate the trace; the
47 // results are printed to stdout when the test is run with
48 // --generate_test_trace.
49 //
50 // The input lists and expected results are stored in test/traces. The expected
51 // results can be regenerated using tools/generate_simulator_traces.py. Adding a
52 // test for a new instruction is described at the top of
53 // test-simulator-traces-aarch64.h.
54
55 #define __ masm.
56 #define TEST(name) TEST_(AARCH64_SIM_##name)
57
58 #ifdef VIXL_INCLUDE_SIMULATOR_AARCH64
59
60 #define SETUP() \
61 MacroAssembler masm; \
62 Decoder decoder; \
63 Simulator* simulator = \
64 Test::run_debugger() ? new Debugger(&decoder) : new Simulator(&decoder); \
65 simulator->SetColouredTrace(Test::coloured_trace()); \
66 simulator->SetInstructionStats(Test::instruction_stats());
67
68 #define START() \
69 masm.Reset(); \
70 simulator->ResetState(); \
71 __ PushCalleeSavedRegisters(); \
72 if (Test::trace_reg()) { \
73 __ Trace(LOG_STATE, TRACE_ENABLE); \
74 } \
75 if (Test::trace_write()) { \
76 __ Trace(LOG_WRITE, TRACE_ENABLE); \
77 } \
78 if (Test::trace_sim()) { \
79 __ Trace(LOG_DISASM, TRACE_ENABLE); \
80 } \
81 if (Test::instruction_stats()) { \
82 __ EnableInstrumentation(); \
83 }
84
85 #define END() \
86 if (Test::instruction_stats()) { \
87 __ DisableInstrumentation(); \
88 } \
89 __ Trace(LOG_ALL, TRACE_DISABLE); \
90 __ PopCalleeSavedRegisters(); \
91 __ Ret(); \
92 masm.FinalizeCode()
93
94 #define RUN() \
95 simulator->RunFrom(masm.GetBuffer()->GetStartAddress<Instruction*>())
96
97 #define TEARDOWN() delete simulator;
98
99 #else // VIXL_INCLUDE_SIMULATOR_AARCH64
100
101 #define SETUP() \
102 MacroAssembler masm; \
103 CPU::SetUp()
104
105 #define START() \
106 masm.Reset(); \
107 __ PushCalleeSavedRegisters()
108
109 #define END() \
110 __ PopCalleeSavedRegisters(); \
111 __ Ret(); \
112 masm.FinalizeCode()
113
114 #define RUN() \
115 { \
116 masm.GetBuffer()->SetExecutable(); \
117 ExecuteMemory(masm.GetBuffer()->GetStartAddress<byte*>(), \
118 masm.GetSizeOfCodeGenerated()); \
119 masm.GetBuffer()->SetWritable(); \
120 }
121
122 #define TEARDOWN()
123
124 #endif // VIXL_INCLUDE_SIMULATOR_AARCH64
125
126
127 // The maximum number of errors to report in detail for each test.
128 static const unsigned kErrorReportLimit = 8;
129
130
131 // Overloaded versions of RawbitsToDouble and RawbitsToFloat for use in the
132 // templated test functions.
rawbits_to_fp(uint32_t bits)133 static float rawbits_to_fp(uint32_t bits) { return RawbitsToFloat(bits); }
134
rawbits_to_fp(uint64_t bits)135 static double rawbits_to_fp(uint64_t bits) { return RawbitsToDouble(bits); }
136
137
138 // MacroAssembler member function pointers to pass to the test dispatchers.
139 typedef void (MacroAssembler::*Test1OpFPHelper_t)(const FPRegister& fd,
140 const FPRegister& fn);
141 typedef void (MacroAssembler::*Test2OpFPHelper_t)(const FPRegister& fd,
142 const FPRegister& fn,
143 const FPRegister& fm);
144 typedef void (MacroAssembler::*Test3OpFPHelper_t)(const FPRegister& fd,
145 const FPRegister& fn,
146 const FPRegister& fm,
147 const FPRegister& fa);
148 typedef void (MacroAssembler::*TestFPCmpHelper_t)(const FPRegister& fn,
149 const FPRegister& fm);
150 typedef void (MacroAssembler::*TestFPCmpZeroHelper_t)(const FPRegister& fn,
151 double value);
152 typedef void (MacroAssembler::*TestFPToIntHelper_t)(const Register& rd,
153 const FPRegister& fn);
154 typedef void (MacroAssembler::*TestFPToFixedHelper_t)(const Register& rd,
155 const FPRegister& fn,
156 int fbits);
157 typedef void (MacroAssembler::*TestFixedToFPHelper_t)(const FPRegister& fd,
158 const Register& rn,
159 int fbits);
160 // TODO: 'Test2OpNEONHelper_t' and 'Test2OpFPHelper_t' can be
161 // consolidated into one routine.
162 typedef void (MacroAssembler::*Test1OpNEONHelper_t)(const VRegister& vd,
163 const VRegister& vn);
164 typedef void (MacroAssembler::*Test2OpNEONHelper_t)(const VRegister& vd,
165 const VRegister& vn,
166 const VRegister& vm);
167 typedef void (MacroAssembler::*TestByElementNEONHelper_t)(const VRegister& vd,
168 const VRegister& vn,
169 const VRegister& vm,
170 int vm_index);
171 typedef void (MacroAssembler::*TestOpImmOpImmVdUpdateNEONHelper_t)(
172 const VRegister& vd, int imm1, const VRegister& vn, int imm2);
173
174 // This helps using the same typename for both the function pointer
175 // and the array of immediates passed to helper routines.
176 template <typename T>
177 class Test2OpImmediateNEONHelper_t {
178 public:
179 typedef void (MacroAssembler::*mnemonic)(const VRegister& vd,
180 const VRegister& vn,
181 T imm);
182 };
183
184
185 // Maximum number of hex characters required to represent values of either
186 // templated type.
187 template <typename Ta, typename Tb>
MaxHexCharCount()188 static unsigned MaxHexCharCount() {
189 unsigned count = static_cast<unsigned>(std::max(sizeof(Ta), sizeof(Tb)));
190 return (count * 8) / 4;
191 }
192
193
194 // Standard test dispatchers.
195
196
Test1Op_Helper(Test1OpFPHelper_t helper,uintptr_t inputs,unsigned inputs_length,uintptr_t results,unsigned d_size,unsigned n_size)197 static void Test1Op_Helper(Test1OpFPHelper_t helper,
198 uintptr_t inputs,
199 unsigned inputs_length,
200 uintptr_t results,
201 unsigned d_size,
202 unsigned n_size) {
203 VIXL_ASSERT((d_size == kDRegSize) || (d_size == kSRegSize));
204 VIXL_ASSERT((n_size == kDRegSize) || (n_size == kSRegSize));
205
206 SETUP();
207 START();
208
209 // Roll up the loop to keep the code size down.
210 Label loop_n;
211
212 Register out = x0;
213 Register inputs_base = x1;
214 Register length = w2;
215 Register index_n = w3;
216
217 const int n_index_shift =
218 (n_size == kDRegSize) ? kDRegSizeInBytesLog2 : kSRegSizeInBytesLog2;
219
220 FPRegister fd = (d_size == kDRegSize) ? d0 : s0;
221 FPRegister fn = (n_size == kDRegSize) ? d1 : s1;
222
223 __ Mov(out, results);
224 __ Mov(inputs_base, inputs);
225 __ Mov(length, inputs_length);
226
227 __ Mov(index_n, 0);
228 __ Bind(&loop_n);
229 __ Ldr(fn, MemOperand(inputs_base, index_n, UXTW, n_index_shift));
230
231 {
232 SingleEmissionCheckScope guard(&masm);
233 (masm.*helper)(fd, fn);
234 }
235 __ Str(fd, MemOperand(out, fd.GetSizeInBytes(), PostIndex));
236
237 __ Add(index_n, index_n, 1);
238 __ Cmp(index_n, inputs_length);
239 __ B(lo, &loop_n);
240
241 END();
242 RUN();
243 TEARDOWN();
244 }
245
246
247 // Test FP instructions. The inputs[] and expected[] arrays should be arrays of
248 // rawbits representations of doubles or floats. This ensures that exact bit
249 // comparisons can be performed.
250 template <typename Tn, typename Td>
Test1Op(const char * name,Test1OpFPHelper_t helper,const Tn inputs[],unsigned inputs_length,const Td expected[],unsigned expected_length)251 static void Test1Op(const char* name,
252 Test1OpFPHelper_t helper,
253 const Tn inputs[],
254 unsigned inputs_length,
255 const Td expected[],
256 unsigned expected_length) {
257 VIXL_ASSERT(inputs_length > 0);
258
259 const unsigned results_length = inputs_length;
260 Td* results = new Td[results_length];
261
262 const unsigned d_bits = sizeof(Td) * 8;
263 const unsigned n_bits = sizeof(Tn) * 8;
264
265 Test1Op_Helper(helper,
266 reinterpret_cast<uintptr_t>(inputs),
267 inputs_length,
268 reinterpret_cast<uintptr_t>(results),
269 d_bits,
270 n_bits);
271
272 if (Test::generate_test_trace()) {
273 // Print the results.
274 printf("const uint%u_t kExpected_%s[] = {\n", d_bits, name);
275 for (unsigned d = 0; d < results_length; d++) {
276 printf(" 0x%0*" PRIx64 ",\n",
277 d_bits / 4,
278 static_cast<uint64_t>(results[d]));
279 }
280 printf("};\n");
281 printf("const unsigned kExpectedCount_%s = %u;\n", name, results_length);
282 } else {
283 // Check the results.
284 VIXL_CHECK(expected_length == results_length);
285 unsigned error_count = 0;
286 unsigned d = 0;
287 for (unsigned n = 0; n < inputs_length; n++, d++) {
288 if (results[d] != expected[d]) {
289 if (++error_count > kErrorReportLimit) continue;
290
291 printf("%s 0x%0*" PRIx64 " (%s %g):\n",
292 name,
293 n_bits / 4,
294 static_cast<uint64_t>(inputs[n]),
295 name,
296 rawbits_to_fp(inputs[n]));
297 printf(" Expected: 0x%0*" PRIx64 " (%g)\n",
298 d_bits / 4,
299 static_cast<uint64_t>(expected[d]),
300 rawbits_to_fp(expected[d]));
301 printf(" Found: 0x%0*" PRIx64 " (%g)\n",
302 d_bits / 4,
303 static_cast<uint64_t>(results[d]),
304 rawbits_to_fp(results[d]));
305 printf("\n");
306 }
307 }
308 VIXL_ASSERT(d == expected_length);
309 if (error_count > kErrorReportLimit) {
310 printf("%u other errors follow.\n", error_count - kErrorReportLimit);
311 }
312 VIXL_CHECK(error_count == 0);
313 }
314 delete[] results;
315 }
316
317
Test2Op_Helper(Test2OpFPHelper_t helper,uintptr_t inputs,unsigned inputs_length,uintptr_t results,unsigned reg_size)318 static void Test2Op_Helper(Test2OpFPHelper_t helper,
319 uintptr_t inputs,
320 unsigned inputs_length,
321 uintptr_t results,
322 unsigned reg_size) {
323 VIXL_ASSERT((reg_size == kDRegSize) || (reg_size == kSRegSize));
324
325 SETUP();
326 START();
327
328 // Roll up the loop to keep the code size down.
329 Label loop_n, loop_m;
330
331 Register out = x0;
332 Register inputs_base = x1;
333 Register length = w2;
334 Register index_n = w3;
335 Register index_m = w4;
336
337 bool double_op = reg_size == kDRegSize;
338 const int index_shift =
339 double_op ? kDRegSizeInBytesLog2 : kSRegSizeInBytesLog2;
340
341 FPRegister fd = double_op ? d0 : s0;
342 FPRegister fn = double_op ? d1 : s1;
343 FPRegister fm = double_op ? d2 : s2;
344
345 __ Mov(out, results);
346 __ Mov(inputs_base, inputs);
347 __ Mov(length, inputs_length);
348
349 __ Mov(index_n, 0);
350 __ Bind(&loop_n);
351 __ Ldr(fn, MemOperand(inputs_base, index_n, UXTW, index_shift));
352
353 __ Mov(index_m, 0);
354 __ Bind(&loop_m);
355 __ Ldr(fm, MemOperand(inputs_base, index_m, UXTW, index_shift));
356
357 {
358 SingleEmissionCheckScope guard(&masm);
359 (masm.*helper)(fd, fn, fm);
360 }
361 __ Str(fd, MemOperand(out, fd.GetSizeInBytes(), PostIndex));
362
363 __ Add(index_m, index_m, 1);
364 __ Cmp(index_m, inputs_length);
365 __ B(lo, &loop_m);
366
367 __ Add(index_n, index_n, 1);
368 __ Cmp(index_n, inputs_length);
369 __ B(lo, &loop_n);
370
371 END();
372 RUN();
373 TEARDOWN();
374 }
375
376
377 // Test FP instructions. The inputs[] and expected[] arrays should be arrays of
378 // rawbits representations of doubles or floats. This ensures that exact bit
379 // comparisons can be performed.
380 template <typename T>
Test2Op(const char * name,Test2OpFPHelper_t helper,const T inputs[],unsigned inputs_length,const T expected[],unsigned expected_length)381 static void Test2Op(const char* name,
382 Test2OpFPHelper_t helper,
383 const T inputs[],
384 unsigned inputs_length,
385 const T expected[],
386 unsigned expected_length) {
387 VIXL_ASSERT(inputs_length > 0);
388
389 const unsigned results_length = inputs_length * inputs_length;
390 T* results = new T[results_length];
391
392 const unsigned bits = sizeof(T) * 8;
393
394 Test2Op_Helper(helper,
395 reinterpret_cast<uintptr_t>(inputs),
396 inputs_length,
397 reinterpret_cast<uintptr_t>(results),
398 bits);
399
400 if (Test::generate_test_trace()) {
401 // Print the results.
402 printf("const uint%u_t kExpected_%s[] = {\n", bits, name);
403 for (unsigned d = 0; d < results_length; d++) {
404 printf(" 0x%0*" PRIx64 ",\n",
405 bits / 4,
406 static_cast<uint64_t>(results[d]));
407 }
408 printf("};\n");
409 printf("const unsigned kExpectedCount_%s = %u;\n", name, results_length);
410 } else {
411 // Check the results.
412 VIXL_CHECK(expected_length == results_length);
413 unsigned error_count = 0;
414 unsigned d = 0;
415 for (unsigned n = 0; n < inputs_length; n++) {
416 for (unsigned m = 0; m < inputs_length; m++, d++) {
417 if (results[d] != expected[d]) {
418 if (++error_count > kErrorReportLimit) continue;
419
420 printf("%s 0x%0*" PRIx64 ", 0x%0*" PRIx64 " (%s %g %g):\n",
421 name,
422 bits / 4,
423 static_cast<uint64_t>(inputs[n]),
424 bits / 4,
425 static_cast<uint64_t>(inputs[m]),
426 name,
427 rawbits_to_fp(inputs[n]),
428 rawbits_to_fp(inputs[m]));
429 printf(" Expected: 0x%0*" PRIx64 " (%g)\n",
430 bits / 4,
431 static_cast<uint64_t>(expected[d]),
432 rawbits_to_fp(expected[d]));
433 printf(" Found: 0x%0*" PRIx64 " (%g)\n",
434 bits / 4,
435 static_cast<uint64_t>(results[d]),
436 rawbits_to_fp(results[d]));
437 printf("\n");
438 }
439 }
440 }
441 VIXL_ASSERT(d == expected_length);
442 if (error_count > kErrorReportLimit) {
443 printf("%u other errors follow.\n", error_count - kErrorReportLimit);
444 }
445 VIXL_CHECK(error_count == 0);
446 }
447 delete[] results;
448 }
449
450
Test3Op_Helper(Test3OpFPHelper_t helper,uintptr_t inputs,unsigned inputs_length,uintptr_t results,unsigned reg_size)451 static void Test3Op_Helper(Test3OpFPHelper_t helper,
452 uintptr_t inputs,
453 unsigned inputs_length,
454 uintptr_t results,
455 unsigned reg_size) {
456 VIXL_ASSERT((reg_size == kDRegSize) || (reg_size == kSRegSize));
457
458 SETUP();
459 START();
460
461 // Roll up the loop to keep the code size down.
462 Label loop_n, loop_m, loop_a;
463
464 Register out = x0;
465 Register inputs_base = x1;
466 Register length = w2;
467 Register index_n = w3;
468 Register index_m = w4;
469 Register index_a = w5;
470
471 bool double_op = reg_size == kDRegSize;
472 const int index_shift =
473 double_op ? kDRegSizeInBytesLog2 : kSRegSizeInBytesLog2;
474
475 FPRegister fd = double_op ? d0 : s0;
476 FPRegister fn = double_op ? d1 : s1;
477 FPRegister fm = double_op ? d2 : s2;
478 FPRegister fa = double_op ? d3 : s3;
479
480 __ Mov(out, results);
481 __ Mov(inputs_base, inputs);
482 __ Mov(length, inputs_length);
483
484 __ Mov(index_n, 0);
485 __ Bind(&loop_n);
486 __ Ldr(fn, MemOperand(inputs_base, index_n, UXTW, index_shift));
487
488 __ Mov(index_m, 0);
489 __ Bind(&loop_m);
490 __ Ldr(fm, MemOperand(inputs_base, index_m, UXTW, index_shift));
491
492 __ Mov(index_a, 0);
493 __ Bind(&loop_a);
494 __ Ldr(fa, MemOperand(inputs_base, index_a, UXTW, index_shift));
495
496 {
497 SingleEmissionCheckScope guard(&masm);
498 (masm.*helper)(fd, fn, fm, fa);
499 }
500 __ Str(fd, MemOperand(out, fd.GetSizeInBytes(), PostIndex));
501
502 __ Add(index_a, index_a, 1);
503 __ Cmp(index_a, inputs_length);
504 __ B(lo, &loop_a);
505
506 __ Add(index_m, index_m, 1);
507 __ Cmp(index_m, inputs_length);
508 __ B(lo, &loop_m);
509
510 __ Add(index_n, index_n, 1);
511 __ Cmp(index_n, inputs_length);
512 __ B(lo, &loop_n);
513
514 END();
515 RUN();
516 TEARDOWN();
517 }
518
519
520 // Test FP instructions. The inputs[] and expected[] arrays should be arrays of
521 // rawbits representations of doubles or floats. This ensures that exact bit
522 // comparisons can be performed.
523 template <typename T>
Test3Op(const char * name,Test3OpFPHelper_t helper,const T inputs[],unsigned inputs_length,const T expected[],unsigned expected_length)524 static void Test3Op(const char* name,
525 Test3OpFPHelper_t helper,
526 const T inputs[],
527 unsigned inputs_length,
528 const T expected[],
529 unsigned expected_length) {
530 VIXL_ASSERT(inputs_length > 0);
531
532 const unsigned results_length = inputs_length * inputs_length * inputs_length;
533 T* results = new T[results_length];
534
535 const unsigned bits = sizeof(T) * 8;
536
537 Test3Op_Helper(helper,
538 reinterpret_cast<uintptr_t>(inputs),
539 inputs_length,
540 reinterpret_cast<uintptr_t>(results),
541 bits);
542
543 if (Test::generate_test_trace()) {
544 // Print the results.
545 printf("const uint%u_t kExpected_%s[] = {\n", bits, name);
546 for (unsigned d = 0; d < results_length; d++) {
547 printf(" 0x%0*" PRIx64 ",\n",
548 bits / 4,
549 static_cast<uint64_t>(results[d]));
550 }
551 printf("};\n");
552 printf("const unsigned kExpectedCount_%s = %u;\n", name, results_length);
553 } else {
554 // Check the results.
555 VIXL_CHECK(expected_length == results_length);
556 unsigned error_count = 0;
557 unsigned d = 0;
558 for (unsigned n = 0; n < inputs_length; n++) {
559 for (unsigned m = 0; m < inputs_length; m++) {
560 for (unsigned a = 0; a < inputs_length; a++, d++) {
561 if (results[d] != expected[d]) {
562 if (++error_count > kErrorReportLimit) continue;
563
564 printf("%s 0x%0*" PRIx64 ", 0x%0*" PRIx64 ", 0x%0*" PRIx64
565 " (%s %g %g %g):\n",
566 name,
567 bits / 4,
568 static_cast<uint64_t>(inputs[n]),
569 bits / 4,
570 static_cast<uint64_t>(inputs[m]),
571 bits / 4,
572 static_cast<uint64_t>(inputs[a]),
573 name,
574 rawbits_to_fp(inputs[n]),
575 rawbits_to_fp(inputs[m]),
576 rawbits_to_fp(inputs[a]));
577 printf(" Expected: 0x%0*" PRIx64 " (%g)\n",
578 bits / 4,
579 static_cast<uint64_t>(expected[d]),
580 rawbits_to_fp(expected[d]));
581 printf(" Found: 0x%0*" PRIx64 " (%g)\n",
582 bits / 4,
583 static_cast<uint64_t>(results[d]),
584 rawbits_to_fp(results[d]));
585 printf("\n");
586 }
587 }
588 }
589 }
590 VIXL_ASSERT(d == expected_length);
591 if (error_count > kErrorReportLimit) {
592 printf("%u other errors follow.\n", error_count - kErrorReportLimit);
593 }
594 VIXL_CHECK(error_count == 0);
595 }
596 delete[] results;
597 }
598
599
TestCmp_Helper(TestFPCmpHelper_t helper,uintptr_t inputs,unsigned inputs_length,uintptr_t results,unsigned reg_size)600 static void TestCmp_Helper(TestFPCmpHelper_t helper,
601 uintptr_t inputs,
602 unsigned inputs_length,
603 uintptr_t results,
604 unsigned reg_size) {
605 VIXL_ASSERT((reg_size == kDRegSize) || (reg_size == kSRegSize));
606
607 SETUP();
608 START();
609
610 // Roll up the loop to keep the code size down.
611 Label loop_n, loop_m;
612
613 Register out = x0;
614 Register inputs_base = x1;
615 Register length = w2;
616 Register index_n = w3;
617 Register index_m = w4;
618 Register flags = x5;
619
620 bool double_op = reg_size == kDRegSize;
621 const int index_shift =
622 double_op ? kDRegSizeInBytesLog2 : kSRegSizeInBytesLog2;
623
624 FPRegister fn = double_op ? d1 : s1;
625 FPRegister fm = double_op ? d2 : s2;
626
627 __ Mov(out, results);
628 __ Mov(inputs_base, inputs);
629 __ Mov(length, inputs_length);
630
631 __ Mov(index_n, 0);
632 __ Bind(&loop_n);
633 __ Ldr(fn, MemOperand(inputs_base, index_n, UXTW, index_shift));
634
635 __ Mov(index_m, 0);
636 __ Bind(&loop_m);
637 __ Ldr(fm, MemOperand(inputs_base, index_m, UXTW, index_shift));
638
639 {
640 SingleEmissionCheckScope guard(&masm);
641 (masm.*helper)(fn, fm);
642 }
643 __ Mrs(flags, NZCV);
644 __ Ubfx(flags, flags, 28, 4);
645 __ Strb(flags, MemOperand(out, 1, PostIndex));
646
647 __ Add(index_m, index_m, 1);
648 __ Cmp(index_m, inputs_length);
649 __ B(lo, &loop_m);
650
651 __ Add(index_n, index_n, 1);
652 __ Cmp(index_n, inputs_length);
653 __ B(lo, &loop_n);
654
655 END();
656 RUN();
657 TEARDOWN();
658 }
659
660
661 // Test FP instructions. The inputs[] and expected[] arrays should be arrays of
662 // rawbits representations of doubles or floats. This ensures that exact bit
663 // comparisons can be performed.
664 template <typename T>
TestCmp(const char * name,TestFPCmpHelper_t helper,const T inputs[],unsigned inputs_length,const uint8_t expected[],unsigned expected_length)665 static void TestCmp(const char* name,
666 TestFPCmpHelper_t helper,
667 const T inputs[],
668 unsigned inputs_length,
669 const uint8_t expected[],
670 unsigned expected_length) {
671 VIXL_ASSERT(inputs_length > 0);
672
673 const unsigned results_length = inputs_length * inputs_length;
674 uint8_t* results = new uint8_t[results_length];
675
676 const unsigned bits = sizeof(T) * 8;
677
678 TestCmp_Helper(helper,
679 reinterpret_cast<uintptr_t>(inputs),
680 inputs_length,
681 reinterpret_cast<uintptr_t>(results),
682 bits);
683
684 if (Test::generate_test_trace()) {
685 // Print the results.
686 printf("const uint8_t kExpected_%s[] = {\n", name);
687 for (unsigned d = 0; d < results_length; d++) {
688 // Each NZCV result only requires 4 bits.
689 VIXL_ASSERT((results[d] & 0xf) == results[d]);
690 printf(" 0x%" PRIx8 ",\n", results[d]);
691 }
692 printf("};\n");
693 printf("const unsigned kExpectedCount_%s = %u;\n", name, results_length);
694 } else {
695 // Check the results.
696 VIXL_CHECK(expected_length == results_length);
697 unsigned error_count = 0;
698 unsigned d = 0;
699 for (unsigned n = 0; n < inputs_length; n++) {
700 for (unsigned m = 0; m < inputs_length; m++, d++) {
701 if (results[d] != expected[d]) {
702 if (++error_count > kErrorReportLimit) continue;
703
704 printf("%s 0x%0*" PRIx64 ", 0x%0*" PRIx64 " (%s %g %g):\n",
705 name,
706 bits / 4,
707 static_cast<uint64_t>(inputs[n]),
708 bits / 4,
709 static_cast<uint64_t>(inputs[m]),
710 name,
711 rawbits_to_fp(inputs[n]),
712 rawbits_to_fp(inputs[m]));
713 printf(" Expected: %c%c%c%c (0x%" PRIx8 ")\n",
714 (expected[d] & 0x8) ? 'N' : 'n',
715 (expected[d] & 0x4) ? 'Z' : 'z',
716 (expected[d] & 0x2) ? 'C' : 'c',
717 (expected[d] & 0x1) ? 'V' : 'v',
718 expected[d]);
719 printf(" Found: %c%c%c%c (0x%" PRIx8 ")\n",
720 (results[d] & 0x8) ? 'N' : 'n',
721 (results[d] & 0x4) ? 'Z' : 'z',
722 (results[d] & 0x2) ? 'C' : 'c',
723 (results[d] & 0x1) ? 'V' : 'v',
724 results[d]);
725 printf("\n");
726 }
727 }
728 }
729 VIXL_ASSERT(d == expected_length);
730 if (error_count > kErrorReportLimit) {
731 printf("%u other errors follow.\n", error_count - kErrorReportLimit);
732 }
733 VIXL_CHECK(error_count == 0);
734 }
735 delete[] results;
736 }
737
738
TestCmpZero_Helper(TestFPCmpZeroHelper_t helper,uintptr_t inputs,unsigned inputs_length,uintptr_t results,unsigned reg_size)739 static void TestCmpZero_Helper(TestFPCmpZeroHelper_t helper,
740 uintptr_t inputs,
741 unsigned inputs_length,
742 uintptr_t results,
743 unsigned reg_size) {
744 VIXL_ASSERT((reg_size == kDRegSize) || (reg_size == kSRegSize));
745
746 SETUP();
747 START();
748
749 // Roll up the loop to keep the code size down.
750 Label loop_n, loop_m;
751
752 Register out = x0;
753 Register inputs_base = x1;
754 Register length = w2;
755 Register index_n = w3;
756 Register flags = x4;
757
758 bool double_op = reg_size == kDRegSize;
759 const int index_shift =
760 double_op ? kDRegSizeInBytesLog2 : kSRegSizeInBytesLog2;
761
762 FPRegister fn = double_op ? d1 : s1;
763
764 __ Mov(out, results);
765 __ Mov(inputs_base, inputs);
766 __ Mov(length, inputs_length);
767
768 __ Mov(index_n, 0);
769 __ Bind(&loop_n);
770 __ Ldr(fn, MemOperand(inputs_base, index_n, UXTW, index_shift));
771
772 {
773 SingleEmissionCheckScope guard(&masm);
774 (masm.*helper)(fn, 0.0);
775 }
776 __ Mrs(flags, NZCV);
777 __ Ubfx(flags, flags, 28, 4);
778 __ Strb(flags, MemOperand(out, 1, PostIndex));
779
780 __ Add(index_n, index_n, 1);
781 __ Cmp(index_n, inputs_length);
782 __ B(lo, &loop_n);
783
784 END();
785 RUN();
786 TEARDOWN();
787 }
788
789
790 // Test FP instructions. The inputs[] and expected[] arrays should be arrays of
791 // rawbits representations of doubles or floats. This ensures that exact bit
792 // comparisons can be performed.
793 template <typename T>
TestCmpZero(const char * name,TestFPCmpZeroHelper_t helper,const T inputs[],unsigned inputs_length,const uint8_t expected[],unsigned expected_length)794 static void TestCmpZero(const char* name,
795 TestFPCmpZeroHelper_t helper,
796 const T inputs[],
797 unsigned inputs_length,
798 const uint8_t expected[],
799 unsigned expected_length) {
800 VIXL_ASSERT(inputs_length > 0);
801
802 const unsigned results_length = inputs_length;
803 uint8_t* results = new uint8_t[results_length];
804
805 const unsigned bits = sizeof(T) * 8;
806
807 TestCmpZero_Helper(helper,
808 reinterpret_cast<uintptr_t>(inputs),
809 inputs_length,
810 reinterpret_cast<uintptr_t>(results),
811 bits);
812
813 if (Test::generate_test_trace()) {
814 // Print the results.
815 printf("const uint8_t kExpected_%s[] = {\n", name);
816 for (unsigned d = 0; d < results_length; d++) {
817 // Each NZCV result only requires 4 bits.
818 VIXL_ASSERT((results[d] & 0xf) == results[d]);
819 printf(" 0x%" PRIx8 ",\n", results[d]);
820 }
821 printf("};\n");
822 printf("const unsigned kExpectedCount_%s = %u;\n", name, results_length);
823 } else {
824 // Check the results.
825 VIXL_CHECK(expected_length == results_length);
826 unsigned error_count = 0;
827 unsigned d = 0;
828 for (unsigned n = 0; n < inputs_length; n++, d++) {
829 if (results[d] != expected[d]) {
830 if (++error_count > kErrorReportLimit) continue;
831
832 printf("%s 0x%0*" PRIx64 ", 0x%0*u (%s %g #0.0):\n",
833 name,
834 bits / 4,
835 static_cast<uint64_t>(inputs[n]),
836 bits / 4,
837 0,
838 name,
839 rawbits_to_fp(inputs[n]));
840 printf(" Expected: %c%c%c%c (0x%" PRIx8 ")\n",
841 (expected[d] & 0x8) ? 'N' : 'n',
842 (expected[d] & 0x4) ? 'Z' : 'z',
843 (expected[d] & 0x2) ? 'C' : 'c',
844 (expected[d] & 0x1) ? 'V' : 'v',
845 expected[d]);
846 printf(" Found: %c%c%c%c (0x%" PRIx8 ")\n",
847 (results[d] & 0x8) ? 'N' : 'n',
848 (results[d] & 0x4) ? 'Z' : 'z',
849 (results[d] & 0x2) ? 'C' : 'c',
850 (results[d] & 0x1) ? 'V' : 'v',
851 results[d]);
852 printf("\n");
853 }
854 }
855 VIXL_ASSERT(d == expected_length);
856 if (error_count > kErrorReportLimit) {
857 printf("%u other errors follow.\n", error_count - kErrorReportLimit);
858 }
859 VIXL_CHECK(error_count == 0);
860 }
861 delete[] results;
862 }
863
864
TestFPToFixed_Helper(TestFPToFixedHelper_t helper,uintptr_t inputs,unsigned inputs_length,uintptr_t results,unsigned d_size,unsigned n_size)865 static void TestFPToFixed_Helper(TestFPToFixedHelper_t helper,
866 uintptr_t inputs,
867 unsigned inputs_length,
868 uintptr_t results,
869 unsigned d_size,
870 unsigned n_size) {
871 VIXL_ASSERT((d_size == kXRegSize) || (d_size == kWRegSize));
872 VIXL_ASSERT((n_size == kDRegSize) || (n_size == kSRegSize));
873
874 SETUP();
875 START();
876
877 // Roll up the loop to keep the code size down.
878 Label loop_n;
879
880 Register out = x0;
881 Register inputs_base = x1;
882 Register length = w2;
883 Register index_n = w3;
884
885 const int n_index_shift =
886 (n_size == kDRegSize) ? kDRegSizeInBytesLog2 : kSRegSizeInBytesLog2;
887
888 Register rd = (d_size == kXRegSize) ? Register(x10) : Register(w10);
889 FPRegister fn = (n_size == kDRegSize) ? d1 : s1;
890
891 __ Mov(out, results);
892 __ Mov(inputs_base, inputs);
893 __ Mov(length, inputs_length);
894
895 __ Mov(index_n, 0);
896 __ Bind(&loop_n);
897 __ Ldr(fn, MemOperand(inputs_base, index_n, UXTW, n_index_shift));
898
899 for (unsigned fbits = 0; fbits <= d_size; ++fbits) {
900 {
901 SingleEmissionCheckScope guard(&masm);
902 (masm.*helper)(rd, fn, fbits);
903 }
904 __ Str(rd, MemOperand(out, rd.GetSizeInBytes(), PostIndex));
905 }
906
907 __ Add(index_n, index_n, 1);
908 __ Cmp(index_n, inputs_length);
909 __ B(lo, &loop_n);
910
911 END();
912 RUN();
913 TEARDOWN();
914 }
915
916
TestFPToInt_Helper(TestFPToIntHelper_t helper,uintptr_t inputs,unsigned inputs_length,uintptr_t results,unsigned d_size,unsigned n_size)917 static void TestFPToInt_Helper(TestFPToIntHelper_t helper,
918 uintptr_t inputs,
919 unsigned inputs_length,
920 uintptr_t results,
921 unsigned d_size,
922 unsigned n_size) {
923 VIXL_ASSERT((d_size == kXRegSize) || (d_size == kWRegSize));
924 VIXL_ASSERT((n_size == kDRegSize) || (n_size == kSRegSize));
925
926 SETUP();
927 START();
928
929 // Roll up the loop to keep the code size down.
930 Label loop_n;
931
932 Register out = x0;
933 Register inputs_base = x1;
934 Register length = w2;
935 Register index_n = w3;
936
937 const int n_index_shift =
938 (n_size == kDRegSize) ? kDRegSizeInBytesLog2 : kSRegSizeInBytesLog2;
939
940 Register rd = (d_size == kXRegSize) ? Register(x10) : Register(w10);
941 FPRegister fn = (n_size == kDRegSize) ? d1 : s1;
942
943 __ Mov(out, results);
944 __ Mov(inputs_base, inputs);
945 __ Mov(length, inputs_length);
946
947 __ Mov(index_n, 0);
948 __ Bind(&loop_n);
949 __ Ldr(fn, MemOperand(inputs_base, index_n, UXTW, n_index_shift));
950
951 {
952 SingleEmissionCheckScope guard(&masm);
953 (masm.*helper)(rd, fn);
954 }
955 __ Str(rd, MemOperand(out, rd.GetSizeInBytes(), PostIndex));
956
957 __ Add(index_n, index_n, 1);
958 __ Cmp(index_n, inputs_length);
959 __ B(lo, &loop_n);
960
961 END();
962 RUN();
963 TEARDOWN();
964 }
965
966
967 // Test FP instructions.
968 // - The inputs[] array should be an array of rawbits representations of
969 // doubles or floats. This ensures that exact bit comparisons can be
970 // performed.
971 // - The expected[] array should be an array of signed integers.
972 template <typename Tn, typename Td>
TestFPToS(const char * name,TestFPToIntHelper_t helper,const Tn inputs[],unsigned inputs_length,const Td expected[],unsigned expected_length)973 static void TestFPToS(const char* name,
974 TestFPToIntHelper_t helper,
975 const Tn inputs[],
976 unsigned inputs_length,
977 const Td expected[],
978 unsigned expected_length) {
979 VIXL_ASSERT(inputs_length > 0);
980
981 const unsigned results_length = inputs_length;
982 Td* results = new Td[results_length];
983
984 const unsigned d_bits = sizeof(Td) * 8;
985 const unsigned n_bits = sizeof(Tn) * 8;
986
987 TestFPToInt_Helper(helper,
988 reinterpret_cast<uintptr_t>(inputs),
989 inputs_length,
990 reinterpret_cast<uintptr_t>(results),
991 d_bits,
992 n_bits);
993
994 if (Test::generate_test_trace()) {
995 // Print the results.
996 printf("const int%u_t kExpected_%s[] = {\n", d_bits, name);
997 // There is no simple C++ literal for INT*_MIN that doesn't produce
998 // warnings, so we use an appropriate constant in that case instead.
999 // Deriving int_d_min in this way (rather than just checking INT64_MIN and
1000 // the like) avoids warnings about comparing values with differing ranges.
1001 const int64_t int_d_max = (UINT64_C(1) << (d_bits - 1)) - 1;
1002 const int64_t int_d_min = -(int_d_max)-1;
1003 for (unsigned d = 0; d < results_length; d++) {
1004 if (results[d] == int_d_min) {
1005 printf(" -INT%u_C(%" PRId64 ") - 1,\n", d_bits, int_d_max);
1006 } else {
1007 // Some constants (such as those between INT32_MAX and UINT32_MAX)
1008 // trigger compiler warnings. To avoid these warnings, use an
1009 // appropriate macro to make the type explicit.
1010 int64_t result_int64 = static_cast<int64_t>(results[d]);
1011 if (result_int64 >= 0) {
1012 printf(" INT%u_C(%" PRId64 "),\n", d_bits, result_int64);
1013 } else {
1014 printf(" -INT%u_C(%" PRId64 "),\n", d_bits, -result_int64);
1015 }
1016 }
1017 }
1018 printf("};\n");
1019 printf("const unsigned kExpectedCount_%s = %u;\n", name, results_length);
1020 } else {
1021 // Check the results.
1022 VIXL_CHECK(expected_length == results_length);
1023 unsigned error_count = 0;
1024 unsigned d = 0;
1025 for (unsigned n = 0; n < inputs_length; n++, d++) {
1026 if (results[d] != expected[d]) {
1027 if (++error_count > kErrorReportLimit) continue;
1028
1029 printf("%s 0x%0*" PRIx64 " (%s %g):\n",
1030 name,
1031 n_bits / 4,
1032 static_cast<uint64_t>(inputs[n]),
1033 name,
1034 rawbits_to_fp(inputs[n]));
1035 printf(" Expected: 0x%0*" PRIx64 " (%" PRId64 ")\n",
1036 d_bits / 4,
1037 static_cast<uint64_t>(expected[d]),
1038 static_cast<int64_t>(expected[d]));
1039 printf(" Found: 0x%0*" PRIx64 " (%" PRId64 ")\n",
1040 d_bits / 4,
1041 static_cast<uint64_t>(results[d]),
1042 static_cast<int64_t>(results[d]));
1043 printf("\n");
1044 }
1045 }
1046 VIXL_ASSERT(d == expected_length);
1047 if (error_count > kErrorReportLimit) {
1048 printf("%u other errors follow.\n", error_count - kErrorReportLimit);
1049 }
1050 VIXL_CHECK(error_count == 0);
1051 }
1052 delete[] results;
1053 }
1054
1055
1056 // Test FP instructions.
1057 // - The inputs[] array should be an array of rawbits representations of
1058 // doubles or floats. This ensures that exact bit comparisons can be
1059 // performed.
1060 // - The expected[] array should be an array of unsigned integers.
1061 template <typename Tn, typename Td>
TestFPToU(const char * name,TestFPToIntHelper_t helper,const Tn inputs[],unsigned inputs_length,const Td expected[],unsigned expected_length)1062 static void TestFPToU(const char* name,
1063 TestFPToIntHelper_t helper,
1064 const Tn inputs[],
1065 unsigned inputs_length,
1066 const Td expected[],
1067 unsigned expected_length) {
1068 VIXL_ASSERT(inputs_length > 0);
1069
1070 const unsigned results_length = inputs_length;
1071 Td* results = new Td[results_length];
1072
1073 const unsigned d_bits = sizeof(Td) * 8;
1074 const unsigned n_bits = sizeof(Tn) * 8;
1075
1076 TestFPToInt_Helper(helper,
1077 reinterpret_cast<uintptr_t>(inputs),
1078 inputs_length,
1079 reinterpret_cast<uintptr_t>(results),
1080 d_bits,
1081 n_bits);
1082
1083 if (Test::generate_test_trace()) {
1084 // Print the results.
1085 printf("const uint%u_t kExpected_%s[] = {\n", d_bits, name);
1086 for (unsigned d = 0; d < results_length; d++) {
1087 printf(" %" PRIu64 "u,\n", static_cast<uint64_t>(results[d]));
1088 }
1089 printf("};\n");
1090 printf("const unsigned kExpectedCount_%s = %u;\n", name, results_length);
1091 } else {
1092 // Check the results.
1093 VIXL_CHECK(expected_length == results_length);
1094 unsigned error_count = 0;
1095 unsigned d = 0;
1096 for (unsigned n = 0; n < inputs_length; n++, d++) {
1097 if (results[d] != expected[d]) {
1098 if (++error_count > kErrorReportLimit) continue;
1099
1100 printf("%s 0x%0*" PRIx64 " (%s %g):\n",
1101 name,
1102 n_bits / 4,
1103 static_cast<uint64_t>(inputs[n]),
1104 name,
1105 rawbits_to_fp(inputs[n]));
1106 printf(" Expected: 0x%0*" PRIx64 " (%" PRIu64 ")\n",
1107 d_bits / 4,
1108 static_cast<uint64_t>(expected[d]),
1109 static_cast<uint64_t>(expected[d]));
1110 printf(" Found: 0x%0*" PRIx64 " (%" PRIu64 ")\n",
1111 d_bits / 4,
1112 static_cast<uint64_t>(results[d]),
1113 static_cast<uint64_t>(results[d]));
1114 printf("\n");
1115 }
1116 }
1117 VIXL_ASSERT(d == expected_length);
1118 if (error_count > kErrorReportLimit) {
1119 printf("%u other errors follow.\n", error_count - kErrorReportLimit);
1120 }
1121 VIXL_CHECK(error_count == 0);
1122 }
1123 delete[] results;
1124 }
1125
1126
1127 // Test FP instructions.
1128 // - The inputs[] array should be an array of rawbits representations of
1129 // doubles or floats. This ensures that exact bit comparisons can be
1130 // performed.
1131 // - The expected[] array should be an array of signed integers.
1132 template <typename Tn, typename Td>
TestFPToFixedS(const char * name,TestFPToFixedHelper_t helper,const Tn inputs[],unsigned inputs_length,const Td expected[],unsigned expected_length)1133 static void TestFPToFixedS(const char* name,
1134 TestFPToFixedHelper_t helper,
1135 const Tn inputs[],
1136 unsigned inputs_length,
1137 const Td expected[],
1138 unsigned expected_length) {
1139 VIXL_ASSERT(inputs_length > 0);
1140
1141 const unsigned d_bits = sizeof(Td) * 8;
1142 const unsigned n_bits = sizeof(Tn) * 8;
1143
1144 const unsigned results_length = inputs_length * (d_bits + 1);
1145 Td* results = new Td[results_length];
1146
1147 TestFPToFixed_Helper(helper,
1148 reinterpret_cast<uintptr_t>(inputs),
1149 inputs_length,
1150 reinterpret_cast<uintptr_t>(results),
1151 d_bits,
1152 n_bits);
1153
1154 if (Test::generate_test_trace()) {
1155 // Print the results.
1156 printf("const int%u_t kExpected_%s[] = {\n", d_bits, name);
1157 // There is no simple C++ literal for INT*_MIN that doesn't produce
1158 // warnings, so we use an appropriate constant in that case instead.
1159 // Deriving int_d_min in this way (rather than just checking INT64_MIN and
1160 // the like) avoids warnings about comparing values with differing ranges.
1161 const int64_t int_d_max = (UINT64_C(1) << (d_bits - 1)) - 1;
1162 const int64_t int_d_min = -(int_d_max)-1;
1163 for (unsigned d = 0; d < results_length; d++) {
1164 if (results[d] == int_d_min) {
1165 printf(" -INT%u_C(%" PRId64 ") - 1,\n", d_bits, int_d_max);
1166 } else {
1167 // Some constants (such as those between INT32_MAX and UINT32_MAX)
1168 // trigger compiler warnings. To avoid these warnings, use an
1169 // appropriate macro to make the type explicit.
1170 int64_t result_int64 = static_cast<int64_t>(results[d]);
1171 if (result_int64 >= 0) {
1172 printf(" INT%u_C(%" PRId64 "),\n", d_bits, result_int64);
1173 } else {
1174 printf(" -INT%u_C(%" PRId64 "),\n", d_bits, -result_int64);
1175 }
1176 }
1177 }
1178 printf("};\n");
1179 printf("const unsigned kExpectedCount_%s = %u;\n", name, results_length);
1180 } else {
1181 // Check the results.
1182 VIXL_CHECK(expected_length == results_length);
1183 unsigned error_count = 0;
1184 unsigned d = 0;
1185 for (unsigned n = 0; n < inputs_length; n++) {
1186 for (unsigned fbits = 0; fbits <= d_bits; ++fbits, d++) {
1187 if (results[d] != expected[d]) {
1188 if (++error_count > kErrorReportLimit) continue;
1189
1190 printf("%s 0x%0*" PRIx64 " #%d (%s %g #%d):\n",
1191 name,
1192 n_bits / 4,
1193 static_cast<uint64_t>(inputs[n]),
1194 fbits,
1195 name,
1196 rawbits_to_fp(inputs[n]),
1197 fbits);
1198 printf(" Expected: 0x%0*" PRIx64 " (%" PRId64 ")\n",
1199 d_bits / 4,
1200 static_cast<uint64_t>(expected[d]),
1201 static_cast<int64_t>(expected[d]));
1202 printf(" Found: 0x%0*" PRIx64 " (%" PRId64 ")\n",
1203 d_bits / 4,
1204 static_cast<uint64_t>(results[d]),
1205 static_cast<int64_t>(results[d]));
1206 printf("\n");
1207 }
1208 }
1209 }
1210 VIXL_ASSERT(d == expected_length);
1211 if (error_count > kErrorReportLimit) {
1212 printf("%u other errors follow.\n", error_count - kErrorReportLimit);
1213 }
1214 VIXL_CHECK(error_count == 0);
1215 }
1216 delete[] results;
1217 }
1218
1219
1220 // Test FP instructions.
1221 // - The inputs[] array should be an array of rawbits representations of
1222 // doubles or floats. This ensures that exact bit comparisons can be
1223 // performed.
1224 // - The expected[] array should be an array of unsigned integers.
1225 template <typename Tn, typename Td>
TestFPToFixedU(const char * name,TestFPToFixedHelper_t helper,const Tn inputs[],unsigned inputs_length,const Td expected[],unsigned expected_length)1226 static void TestFPToFixedU(const char* name,
1227 TestFPToFixedHelper_t helper,
1228 const Tn inputs[],
1229 unsigned inputs_length,
1230 const Td expected[],
1231 unsigned expected_length) {
1232 VIXL_ASSERT(inputs_length > 0);
1233
1234 const unsigned d_bits = sizeof(Td) * 8;
1235 const unsigned n_bits = sizeof(Tn) * 8;
1236
1237 const unsigned results_length = inputs_length * (d_bits + 1);
1238 Td* results = new Td[results_length];
1239
1240 TestFPToFixed_Helper(helper,
1241 reinterpret_cast<uintptr_t>(inputs),
1242 inputs_length,
1243 reinterpret_cast<uintptr_t>(results),
1244 d_bits,
1245 n_bits);
1246
1247 if (Test::generate_test_trace()) {
1248 // Print the results.
1249 printf("const uint%u_t kExpected_%s[] = {\n", d_bits, name);
1250 for (unsigned d = 0; d < results_length; d++) {
1251 printf(" %" PRIu64 "u,\n", static_cast<uint64_t>(results[d]));
1252 }
1253 printf("};\n");
1254 printf("const unsigned kExpectedCount_%s = %u;\n", name, results_length);
1255 } else {
1256 // Check the results.
1257 VIXL_CHECK(expected_length == results_length);
1258 unsigned error_count = 0;
1259 unsigned d = 0;
1260 for (unsigned n = 0; n < inputs_length; n++) {
1261 for (unsigned fbits = 0; fbits <= d_bits; ++fbits, d++) {
1262 if (results[d] != expected[d]) {
1263 if (++error_count > kErrorReportLimit) continue;
1264
1265 printf("%s 0x%0*" PRIx64 " #%d (%s %g #%d):\n",
1266 name,
1267 n_bits / 4,
1268 static_cast<uint64_t>(inputs[n]),
1269 fbits,
1270 name,
1271 rawbits_to_fp(inputs[n]),
1272 fbits);
1273 printf(" Expected: 0x%0*" PRIx64 " (%" PRIu64 ")\n",
1274 d_bits / 4,
1275 static_cast<uint64_t>(expected[d]),
1276 static_cast<uint64_t>(expected[d]));
1277 printf(" Found: 0x%0*" PRIx64 " (%" PRIu64 ")\n",
1278 d_bits / 4,
1279 static_cast<uint64_t>(results[d]),
1280 static_cast<uint64_t>(results[d]));
1281 printf("\n");
1282 }
1283 }
1284 }
1285 VIXL_ASSERT(d == expected_length);
1286 if (error_count > kErrorReportLimit) {
1287 printf("%u other errors follow.\n", error_count - kErrorReportLimit);
1288 }
1289 VIXL_CHECK(error_count == 0);
1290 }
1291 delete[] results;
1292 }
1293
1294
1295 // ==== Tests for instructions of the form <INST> VReg, VReg. ====
1296
1297
Test1OpNEON_Helper(Test1OpNEONHelper_t helper,uintptr_t inputs_n,unsigned inputs_n_length,uintptr_t results,VectorFormat vd_form,VectorFormat vn_form)1298 static void Test1OpNEON_Helper(Test1OpNEONHelper_t helper,
1299 uintptr_t inputs_n,
1300 unsigned inputs_n_length,
1301 uintptr_t results,
1302 VectorFormat vd_form,
1303 VectorFormat vn_form) {
1304 VIXL_ASSERT(vd_form != kFormatUndefined);
1305 VIXL_ASSERT(vn_form != kFormatUndefined);
1306
1307 SETUP();
1308 START();
1309
1310 // Roll up the loop to keep the code size down.
1311 Label loop_n;
1312
1313 Register out = x0;
1314 Register inputs_n_base = x1;
1315 Register inputs_n_last_16bytes = x3;
1316 Register index_n = x5;
1317
1318 // TODO: Refactor duplicate definitions below with a VRegister::As() routine.
1319 const unsigned vd_bits = RegisterSizeInBitsFromFormat(vd_form);
1320 const unsigned vd_lane_count = LaneCountFromFormat(vd_form);
1321
1322 const unsigned vn_bits = RegisterSizeInBitsFromFormat(vn_form);
1323 const unsigned vn_lane_count = LaneCountFromFormat(vn_form);
1324 const unsigned vn_lane_bytes = LaneSizeInBytesFromFormat(vn_form);
1325 const unsigned vn_lane_bytes_log2 = LaneSizeInBytesLog2FromFormat(vn_form);
1326 const unsigned vn_lane_bits = LaneSizeInBitsFromFormat(vn_form);
1327
1328
1329 // These will be either a D- or a Q-register form, with a single lane
1330 // (for use in scalar load and store operations).
1331 VRegister vd = VRegister(0, vd_bits);
1332 VRegister vn = v1.V16B();
1333 VRegister vntmp = v3.V16B();
1334
1335 // These will have the correct format for use when calling 'helper'.
1336 VRegister vd_helper = VRegister(0, vd_bits, vd_lane_count);
1337 VRegister vn_helper = VRegister(1, vn_bits, vn_lane_count);
1338
1339 // 'v*tmp_single' will be either 'Vt.B', 'Vt.H', 'Vt.S' or 'Vt.D'.
1340 VRegister vntmp_single = VRegister(3, vn_lane_bits);
1341
1342 __ Mov(out, results);
1343
1344 __ Mov(inputs_n_base, inputs_n);
1345 __ Mov(inputs_n_last_16bytes,
1346 inputs_n + (vn_lane_bytes * inputs_n_length) - 16);
1347
1348 __ Ldr(vn, MemOperand(inputs_n_last_16bytes));
1349
1350 __ Mov(index_n, 0);
1351 __ Bind(&loop_n);
1352
1353 __ Ldr(vntmp_single,
1354 MemOperand(inputs_n_base, index_n, LSL, vn_lane_bytes_log2));
1355 __ Ext(vn, vn, vntmp, vn_lane_bytes);
1356
1357 // Set the destination to zero.
1358 // TODO: Setting the destination to values other than zero
1359 // might be a better test for instructions such as sqxtn2
1360 // which may leave parts of V registers unchanged.
1361 __ Movi(vd.V16B(), 0);
1362
1363 {
1364 SingleEmissionCheckScope guard(&masm);
1365 (masm.*helper)(vd_helper, vn_helper);
1366 }
1367 __ Str(vd, MemOperand(out, vd.GetSizeInBytes(), PostIndex));
1368
1369 __ Add(index_n, index_n, 1);
1370 __ Cmp(index_n, inputs_n_length);
1371 __ B(lo, &loop_n);
1372
1373 END();
1374 RUN();
1375 TEARDOWN();
1376 }
1377
1378
1379 // Test NEON instructions. The inputs_*[] and expected[] arrays should be
1380 // arrays of rawbit representation of input values. This ensures that
1381 // exact bit comparisons can be performed.
1382 template <typename Td, typename Tn>
Test1OpNEON(const char * name,Test1OpNEONHelper_t helper,const Tn inputs_n[],unsigned inputs_n_length,const Td expected[],unsigned expected_length,VectorFormat vd_form,VectorFormat vn_form)1383 static void Test1OpNEON(const char* name,
1384 Test1OpNEONHelper_t helper,
1385 const Tn inputs_n[],
1386 unsigned inputs_n_length,
1387 const Td expected[],
1388 unsigned expected_length,
1389 VectorFormat vd_form,
1390 VectorFormat vn_form) {
1391 VIXL_ASSERT(inputs_n_length > 0);
1392
1393 const unsigned vd_lane_count = LaneCountFromFormat(vd_form);
1394 const unsigned vn_lane_bytes = LaneSizeInBytesFromFormat(vn_form);
1395 const unsigned vn_lane_count = LaneCountFromFormat(vn_form);
1396
1397 const unsigned results_length = inputs_n_length;
1398 Td* results = new Td[results_length * vd_lane_count];
1399 const unsigned lane_bit = sizeof(Td) * 8;
1400 const unsigned lane_len_in_hex = MaxHexCharCount<Td, Tn>();
1401
1402 Test1OpNEON_Helper(helper,
1403 reinterpret_cast<uintptr_t>(inputs_n),
1404 inputs_n_length,
1405 reinterpret_cast<uintptr_t>(results),
1406 vd_form,
1407 vn_form);
1408
1409 if (Test::generate_test_trace()) {
1410 // Print the results.
1411 printf("const uint%u_t kExpected_NEON_%s[] = {\n", lane_bit, name);
1412 for (unsigned iteration = 0; iteration < results_length; iteration++) {
1413 printf(" ");
1414 // Output a separate result for each element of the result vector.
1415 for (unsigned lane = 0; lane < vd_lane_count; lane++) {
1416 unsigned index = lane + (iteration * vd_lane_count);
1417 printf(" 0x%0*" PRIx64 ",",
1418 lane_len_in_hex,
1419 static_cast<uint64_t>(results[index]));
1420 }
1421 printf("\n");
1422 }
1423
1424 printf("};\n");
1425 printf("const unsigned kExpectedCount_NEON_%s = %u;\n",
1426 name,
1427 results_length);
1428 } else {
1429 // Check the results.
1430 VIXL_CHECK(expected_length == results_length);
1431 unsigned error_count = 0;
1432 unsigned d = 0;
1433 const char* padding = " ";
1434 VIXL_ASSERT(strlen(padding) >= (lane_len_in_hex + 1));
1435 for (unsigned n = 0; n < inputs_n_length; n++, d++) {
1436 bool error_in_vector = false;
1437
1438 for (unsigned lane = 0; lane < vd_lane_count; lane++) {
1439 unsigned output_index = (n * vd_lane_count) + lane;
1440
1441 if (results[output_index] != expected[output_index]) {
1442 error_in_vector = true;
1443 break;
1444 }
1445 }
1446
1447 if (error_in_vector && (++error_count <= kErrorReportLimit)) {
1448 printf("%s\n", name);
1449 printf(" Vn%.*s| Vd%.*s| Expected\n",
1450 lane_len_in_hex + 1,
1451 padding,
1452 lane_len_in_hex + 1,
1453 padding);
1454
1455 const unsigned first_index_n =
1456 inputs_n_length - (16 / vn_lane_bytes) + n + 1;
1457
1458 for (unsigned lane = 0; lane < std::max(vd_lane_count, vn_lane_count);
1459 lane++) {
1460 unsigned output_index = (n * vd_lane_count) + lane;
1461 unsigned input_index_n = (first_index_n + lane) % inputs_n_length;
1462
1463 printf("%c0x%0*" PRIx64 " | 0x%0*" PRIx64
1464 " "
1465 "| 0x%0*" PRIx64 "\n",
1466 results[output_index] != expected[output_index] ? '*' : ' ',
1467 lane_len_in_hex,
1468 static_cast<uint64_t>(inputs_n[input_index_n]),
1469 lane_len_in_hex,
1470 static_cast<uint64_t>(results[output_index]),
1471 lane_len_in_hex,
1472 static_cast<uint64_t>(expected[output_index]));
1473 }
1474 }
1475 }
1476 VIXL_ASSERT(d == expected_length);
1477 if (error_count > kErrorReportLimit) {
1478 printf("%u other errors follow.\n", error_count - kErrorReportLimit);
1479 }
1480 VIXL_CHECK(error_count == 0);
1481 }
1482 delete[] results;
1483 }
1484
1485
1486 // ==== Tests for instructions of the form <mnemonic> <V><d>, <Vn>.<T> ====
1487 // where <V> is one of B, H, S or D registers.
1488 // e.g. saddlv H1, v0.8B
1489
1490 // TODO: Change tests to store all lanes of the resulting V register.
1491 // Some tests store all 128 bits of the resulting V register to
1492 // check the simulator's behaviour on the rest of the register.
1493 // This is better than storing the affected lanes only.
1494 // Change any tests such as the 'Across' template to do the same.
1495
Test1OpAcrossNEON_Helper(Test1OpNEONHelper_t helper,uintptr_t inputs_n,unsigned inputs_n_length,uintptr_t results,VectorFormat vd_form,VectorFormat vn_form)1496 static void Test1OpAcrossNEON_Helper(Test1OpNEONHelper_t helper,
1497 uintptr_t inputs_n,
1498 unsigned inputs_n_length,
1499 uintptr_t results,
1500 VectorFormat vd_form,
1501 VectorFormat vn_form) {
1502 VIXL_ASSERT(vd_form != kFormatUndefined);
1503 VIXL_ASSERT(vn_form != kFormatUndefined);
1504
1505 SETUP();
1506 START();
1507
1508 // Roll up the loop to keep the code size down.
1509 Label loop_n;
1510
1511 Register out = x0;
1512 Register inputs_n_base = x1;
1513 Register inputs_n_last_vector = x3;
1514 Register index_n = x5;
1515
1516 // TODO: Refactor duplicate definitions below with a VRegister::As() routine.
1517 const unsigned vd_bits = RegisterSizeInBitsFromFormat(vd_form);
1518 const unsigned vn_bits = RegisterSizeInBitsFromFormat(vn_form);
1519 const unsigned vn_lane_count = LaneCountFromFormat(vn_form);
1520 const unsigned vn_lane_bytes = LaneSizeInBytesFromFormat(vn_form);
1521 const unsigned vn_lane_bytes_log2 = LaneSizeInBytesLog2FromFormat(vn_form);
1522 const unsigned vn_lane_bits = LaneSizeInBitsFromFormat(vn_form);
1523
1524 // Test destructive operations by (arbitrarily) using the same register for
1525 // B and S lane sizes.
1526 bool destructive = (vd_bits == kBRegSize) || (vd_bits == kSRegSize);
1527
1528 // Create two aliases for v0; the first is the destination for the tested
1529 // instruction, the second, the whole Q register to check the results.
1530 VRegister vd = VRegister(0, vd_bits);
1531 VRegister vdstr = VRegister(0, kQRegSize);
1532
1533 VRegister vn = VRegister(1, vn_bits);
1534 VRegister vntmp = VRegister(3, vn_bits);
1535
1536 // These will have the correct format for use when calling 'helper'.
1537 VRegister vd_helper = VRegister(0, vn_bits, vn_lane_count);
1538 VRegister vn_helper = VRegister(1, vn_bits, vn_lane_count);
1539
1540 // 'v*tmp_single' will be either 'Vt.B', 'Vt.H', 'Vt.S' or 'Vt.D'.
1541 VRegister vntmp_single = VRegister(3, vn_lane_bits);
1542
1543 // Same registers for use in the 'ext' instructions.
1544 VRegister vn_ext = (kDRegSize == vn_bits) ? vn.V8B() : vn.V16B();
1545 VRegister vntmp_ext = (kDRegSize == vn_bits) ? vntmp.V8B() : vntmp.V16B();
1546
1547 __ Mov(out, results);
1548
1549 __ Mov(inputs_n_base, inputs_n);
1550 __ Mov(inputs_n_last_vector,
1551 inputs_n + vn_lane_bytes * (inputs_n_length - vn_lane_count));
1552
1553 __ Ldr(vn, MemOperand(inputs_n_last_vector));
1554
1555 __ Mov(index_n, 0);
1556 __ Bind(&loop_n);
1557
1558 __ Ldr(vntmp_single,
1559 MemOperand(inputs_n_base, index_n, LSL, vn_lane_bytes_log2));
1560 __ Ext(vn_ext, vn_ext, vntmp_ext, vn_lane_bytes);
1561
1562 if (destructive) {
1563 __ Mov(vd_helper, vn_helper);
1564 SingleEmissionCheckScope guard(&masm);
1565 (masm.*helper)(vd, vd_helper);
1566 } else {
1567 SingleEmissionCheckScope guard(&masm);
1568 (masm.*helper)(vd, vn_helper);
1569 }
1570
1571 __ Str(vdstr, MemOperand(out, kQRegSizeInBytes, PostIndex));
1572
1573 __ Add(index_n, index_n, 1);
1574 __ Cmp(index_n, inputs_n_length);
1575 __ B(lo, &loop_n);
1576
1577 END();
1578 RUN();
1579 TEARDOWN();
1580 }
1581
1582 // Test NEON instructions. The inputs_*[] and expected[] arrays should be
1583 // arrays of rawbit representation of input values. This ensures that
1584 // exact bit comparisons can be performed.
1585 template <typename Td, typename Tn>
Test1OpAcrossNEON(const char * name,Test1OpNEONHelper_t helper,const Tn inputs_n[],unsigned inputs_n_length,const Td expected[],unsigned expected_length,VectorFormat vd_form,VectorFormat vn_form)1586 static void Test1OpAcrossNEON(const char* name,
1587 Test1OpNEONHelper_t helper,
1588 const Tn inputs_n[],
1589 unsigned inputs_n_length,
1590 const Td expected[],
1591 unsigned expected_length,
1592 VectorFormat vd_form,
1593 VectorFormat vn_form) {
1594 VIXL_ASSERT(inputs_n_length > 0);
1595
1596 const unsigned vd_lane_count = LaneCountFromFormat(vd_form);
1597 const unsigned vd_lanes_per_q = MaxLaneCountFromFormat(vd_form);
1598
1599 const unsigned results_length = inputs_n_length;
1600 Td* results = new Td[results_length * vd_lanes_per_q];
1601 const unsigned lane_bit = sizeof(Td) * 8;
1602 const unsigned lane_len_in_hex = MaxHexCharCount<Td, Tn>();
1603
1604 Test1OpAcrossNEON_Helper(helper,
1605 reinterpret_cast<uintptr_t>(inputs_n),
1606 inputs_n_length,
1607 reinterpret_cast<uintptr_t>(results),
1608 vd_form,
1609 vn_form);
1610
1611 if (Test::generate_test_trace()) {
1612 // Print the results.
1613 printf("const uint%u_t kExpected_NEON_%s[] = {\n", lane_bit, name);
1614 for (unsigned iteration = 0; iteration < results_length; iteration++) {
1615 printf(" ");
1616 // Output a separate result for each element of the result vector.
1617 for (unsigned lane = 0; lane < vd_lane_count; lane++) {
1618 unsigned index = lane + (iteration * vd_lane_count);
1619 printf(" 0x%0*" PRIx64 ",",
1620 lane_len_in_hex,
1621 static_cast<uint64_t>(results[index]));
1622 }
1623 printf("\n");
1624 }
1625
1626 printf("};\n");
1627 printf("const unsigned kExpectedCount_NEON_%s = %u;\n",
1628 name,
1629 results_length);
1630 } else {
1631 // Check the results.
1632 VIXL_CHECK(expected_length == results_length);
1633 unsigned error_count = 0;
1634 unsigned d = 0;
1635 const char* padding = " ";
1636 VIXL_ASSERT(strlen(padding) >= (lane_len_in_hex + 1));
1637 for (unsigned n = 0; n < inputs_n_length; n++, d++) {
1638 bool error_in_vector = false;
1639
1640 for (unsigned lane = 0; lane < vd_lane_count; lane++) {
1641 unsigned expected_index = (n * vd_lane_count) + lane;
1642 unsigned results_index = (n * vd_lanes_per_q) + lane;
1643
1644 if (results[results_index] != expected[expected_index]) {
1645 error_in_vector = true;
1646 break;
1647 }
1648 }
1649
1650 // For across operations, the remaining lanes should be zero.
1651 for (unsigned lane = vd_lane_count; lane < vd_lanes_per_q; lane++) {
1652 unsigned results_index = (n * vd_lanes_per_q) + lane;
1653 if (results[results_index] != 0) {
1654 error_in_vector = true;
1655 break;
1656 }
1657 }
1658
1659 if (error_in_vector && (++error_count <= kErrorReportLimit)) {
1660 const unsigned vn_lane_count = LaneCountFromFormat(vn_form);
1661
1662 printf("%s\n", name);
1663 printf(" Vn%.*s| Vd%.*s| Expected\n",
1664 lane_len_in_hex + 1,
1665 padding,
1666 lane_len_in_hex + 1,
1667 padding);
1668
1669 // TODO: In case of an error, all tests print out as many elements as
1670 // there are lanes in the output or input vectors. This way
1671 // the viewer can read all the values that were needed for the
1672 // operation but the output contains also unnecessary values.
1673 // These prints can be improved according to the arguments
1674 // passed to test functions.
1675 // This output for the 'Across' category has the required
1676 // modifications.
1677 for (unsigned lane = 0; lane < vn_lane_count; lane++) {
1678 unsigned results_index =
1679 (n * vd_lanes_per_q) + ((vn_lane_count - 1) - lane);
1680 unsigned input_index_n =
1681 (inputs_n_length - vn_lane_count + n + 1 + lane) %
1682 inputs_n_length;
1683
1684 Td expect = 0;
1685 if ((vn_lane_count - 1) == lane) {
1686 // This is the last lane to be printed, ie. the least-significant
1687 // lane, so use the expected value; any other lane should be zero.
1688 unsigned expected_index = n * vd_lane_count;
1689 expect = expected[expected_index];
1690 }
1691 printf("%c0x%0*" PRIx64 " | 0x%0*" PRIx64 " | 0x%0*" PRIx64 "\n",
1692 results[results_index] != expect ? '*' : ' ',
1693 lane_len_in_hex,
1694 static_cast<uint64_t>(inputs_n[input_index_n]),
1695 lane_len_in_hex,
1696 static_cast<uint64_t>(results[results_index]),
1697 lane_len_in_hex,
1698 static_cast<uint64_t>(expect));
1699 }
1700 }
1701 }
1702 VIXL_ASSERT(d == expected_length);
1703 if (error_count > kErrorReportLimit) {
1704 printf("%u other errors follow.\n", error_count - kErrorReportLimit);
1705 }
1706 VIXL_CHECK(error_count == 0);
1707 }
1708 delete[] results;
1709 }
1710
1711
1712 // ==== Tests for instructions of the form <INST> VReg, VReg, VReg. ====
1713
1714 // TODO: Iterate over inputs_d once the traces file is split.
1715
Test2OpNEON_Helper(Test2OpNEONHelper_t helper,uintptr_t inputs_d,uintptr_t inputs_n,unsigned inputs_n_length,uintptr_t inputs_m,unsigned inputs_m_length,uintptr_t results,VectorFormat vd_form,VectorFormat vn_form,VectorFormat vm_form)1716 static void Test2OpNEON_Helper(Test2OpNEONHelper_t helper,
1717 uintptr_t inputs_d,
1718 uintptr_t inputs_n,
1719 unsigned inputs_n_length,
1720 uintptr_t inputs_m,
1721 unsigned inputs_m_length,
1722 uintptr_t results,
1723 VectorFormat vd_form,
1724 VectorFormat vn_form,
1725 VectorFormat vm_form) {
1726 VIXL_ASSERT(vd_form != kFormatUndefined);
1727 VIXL_ASSERT(vn_form != kFormatUndefined);
1728 VIXL_ASSERT(vm_form != kFormatUndefined);
1729
1730 SETUP();
1731 START();
1732
1733 // Roll up the loop to keep the code size down.
1734 Label loop_n, loop_m;
1735
1736 Register out = x0;
1737 Register inputs_n_base = x1;
1738 Register inputs_m_base = x2;
1739 Register inputs_d_base = x3;
1740 Register inputs_n_last_16bytes = x4;
1741 Register inputs_m_last_16bytes = x5;
1742 Register index_n = x6;
1743 Register index_m = x7;
1744
1745 // TODO: Refactor duplicate definitions below with a VRegister::As() routine.
1746 const unsigned vd_bits = RegisterSizeInBitsFromFormat(vd_form);
1747 const unsigned vd_lane_count = LaneCountFromFormat(vd_form);
1748
1749 const unsigned vn_bits = RegisterSizeInBitsFromFormat(vn_form);
1750 const unsigned vn_lane_count = LaneCountFromFormat(vn_form);
1751 const unsigned vn_lane_bytes = LaneSizeInBytesFromFormat(vn_form);
1752 const unsigned vn_lane_bytes_log2 = LaneSizeInBytesLog2FromFormat(vn_form);
1753 const unsigned vn_lane_bits = LaneSizeInBitsFromFormat(vn_form);
1754
1755 const unsigned vm_bits = RegisterSizeInBitsFromFormat(vm_form);
1756 const unsigned vm_lane_count = LaneCountFromFormat(vm_form);
1757 const unsigned vm_lane_bytes = LaneSizeInBytesFromFormat(vm_form);
1758 const unsigned vm_lane_bytes_log2 = LaneSizeInBytesLog2FromFormat(vm_form);
1759 const unsigned vm_lane_bits = LaneSizeInBitsFromFormat(vm_form);
1760
1761
1762 // Always load and store 128 bits regardless of the format.
1763 VRegister vd = v0.V16B();
1764 VRegister vn = v1.V16B();
1765 VRegister vm = v2.V16B();
1766 VRegister vntmp = v3.V16B();
1767 VRegister vmtmp = v4.V16B();
1768 VRegister vres = v5.V16B();
1769
1770 // These will have the correct format for calling the 'helper'.
1771 VRegister vn_helper = VRegister(1, vn_bits, vn_lane_count);
1772 VRegister vm_helper = VRegister(2, vm_bits, vm_lane_count);
1773 VRegister vres_helper = VRegister(5, vd_bits, vd_lane_count);
1774
1775 // 'v*tmp_single' will be either 'Vt.B', 'Vt.H', 'Vt.S' or 'Vt.D'.
1776 VRegister vntmp_single = VRegister(3, vn_lane_bits);
1777 VRegister vmtmp_single = VRegister(4, vm_lane_bits);
1778
1779 __ Mov(out, results);
1780
1781 __ Mov(inputs_d_base, inputs_d);
1782
1783 __ Mov(inputs_n_base, inputs_n);
1784 __ Mov(inputs_n_last_16bytes, inputs_n + (inputs_n_length - 16));
1785 __ Mov(inputs_m_base, inputs_m);
1786 __ Mov(inputs_m_last_16bytes, inputs_m + (inputs_m_length - 16));
1787
1788 __ Ldr(vd, MemOperand(inputs_d_base));
1789 __ Ldr(vn, MemOperand(inputs_n_last_16bytes));
1790 __ Ldr(vm, MemOperand(inputs_m_last_16bytes));
1791
1792 __ Mov(index_n, 0);
1793 __ Bind(&loop_n);
1794
1795 __ Ldr(vntmp_single,
1796 MemOperand(inputs_n_base, index_n, LSL, vn_lane_bytes_log2));
1797 __ Ext(vn, vn, vntmp, vn_lane_bytes);
1798
1799 __ Mov(index_m, 0);
1800 __ Bind(&loop_m);
1801
1802 __ Ldr(vmtmp_single,
1803 MemOperand(inputs_m_base, index_m, LSL, vm_lane_bytes_log2));
1804 __ Ext(vm, vm, vmtmp, vm_lane_bytes);
1805
1806 __ Mov(vres, vd);
1807 {
1808 SingleEmissionCheckScope guard(&masm);
1809 (masm.*helper)(vres_helper, vn_helper, vm_helper);
1810 }
1811 __ Str(vres, MemOperand(out, vd.GetSizeInBytes(), PostIndex));
1812
1813 __ Add(index_m, index_m, 1);
1814 __ Cmp(index_m, inputs_m_length);
1815 __ B(lo, &loop_m);
1816
1817 __ Add(index_n, index_n, 1);
1818 __ Cmp(index_n, inputs_n_length);
1819 __ B(lo, &loop_n);
1820
1821 END();
1822 RUN();
1823 TEARDOWN();
1824 }
1825
1826
1827 // Test NEON instructions. The inputs_*[] and expected[] arrays should be
1828 // arrays of rawbit representation of input values. This ensures that
1829 // exact bit comparisons can be performed.
1830 template <typename Td, typename Tn, typename Tm>
Test2OpNEON(const char * name,Test2OpNEONHelper_t helper,const Td inputs_d[],const Tn inputs_n[],unsigned inputs_n_length,const Tm inputs_m[],unsigned inputs_m_length,const Td expected[],unsigned expected_length,VectorFormat vd_form,VectorFormat vn_form,VectorFormat vm_form)1831 static void Test2OpNEON(const char* name,
1832 Test2OpNEONHelper_t helper,
1833 const Td inputs_d[],
1834 const Tn inputs_n[],
1835 unsigned inputs_n_length,
1836 const Tm inputs_m[],
1837 unsigned inputs_m_length,
1838 const Td expected[],
1839 unsigned expected_length,
1840 VectorFormat vd_form,
1841 VectorFormat vn_form,
1842 VectorFormat vm_form) {
1843 VIXL_ASSERT(inputs_n_length > 0 && inputs_m_length > 0);
1844
1845 const unsigned vd_lane_count = MaxLaneCountFromFormat(vd_form);
1846
1847 const unsigned results_length = inputs_n_length * inputs_m_length;
1848 Td* results = new Td[results_length * vd_lane_count];
1849 const unsigned lane_bit = sizeof(Td) * 8;
1850 const unsigned lane_len_in_hex = MaxHexCharCount<Td, Tm>();
1851
1852 Test2OpNEON_Helper(helper,
1853 reinterpret_cast<uintptr_t>(inputs_d),
1854 reinterpret_cast<uintptr_t>(inputs_n),
1855 inputs_n_length,
1856 reinterpret_cast<uintptr_t>(inputs_m),
1857 inputs_m_length,
1858 reinterpret_cast<uintptr_t>(results),
1859 vd_form,
1860 vn_form,
1861 vm_form);
1862
1863 if (Test::generate_test_trace()) {
1864 // Print the results.
1865 printf("const uint%u_t kExpected_NEON_%s[] = {\n", lane_bit, name);
1866 for (unsigned iteration = 0; iteration < results_length; iteration++) {
1867 printf(" ");
1868 // Output a separate result for each element of the result vector.
1869 for (unsigned lane = 0; lane < vd_lane_count; lane++) {
1870 unsigned index = lane + (iteration * vd_lane_count);
1871 printf(" 0x%0*" PRIx64 ",",
1872 lane_len_in_hex,
1873 static_cast<uint64_t>(results[index]));
1874 }
1875 printf("\n");
1876 }
1877
1878 printf("};\n");
1879 printf("const unsigned kExpectedCount_NEON_%s = %u;\n",
1880 name,
1881 results_length);
1882 } else {
1883 // Check the results.
1884 VIXL_CHECK(expected_length == results_length);
1885 unsigned error_count = 0;
1886 unsigned d = 0;
1887 const char* padding = " ";
1888 VIXL_ASSERT(strlen(padding) >= (lane_len_in_hex + 1));
1889 for (unsigned n = 0; n < inputs_n_length; n++) {
1890 for (unsigned m = 0; m < inputs_m_length; m++, d++) {
1891 bool error_in_vector = false;
1892
1893 for (unsigned lane = 0; lane < vd_lane_count; lane++) {
1894 unsigned output_index = (n * inputs_m_length * vd_lane_count) +
1895 (m * vd_lane_count) + lane;
1896
1897 if (results[output_index] != expected[output_index]) {
1898 error_in_vector = true;
1899 break;
1900 }
1901 }
1902
1903 if (error_in_vector && (++error_count <= kErrorReportLimit)) {
1904 printf("%s\n", name);
1905 printf(" Vd%.*s| Vn%.*s| Vm%.*s| Vd%.*s| Expected\n",
1906 lane_len_in_hex + 1,
1907 padding,
1908 lane_len_in_hex + 1,
1909 padding,
1910 lane_len_in_hex + 1,
1911 padding,
1912 lane_len_in_hex + 1,
1913 padding);
1914
1915 for (unsigned lane = 0; lane < vd_lane_count; lane++) {
1916 unsigned output_index = (n * inputs_m_length * vd_lane_count) +
1917 (m * vd_lane_count) + lane;
1918 unsigned input_index_n =
1919 (inputs_n_length - vd_lane_count + n + 1 + lane) %
1920 inputs_n_length;
1921 unsigned input_index_m =
1922 (inputs_m_length - vd_lane_count + m + 1 + lane) %
1923 inputs_m_length;
1924
1925 printf("%c0x%0*" PRIx64 " | 0x%0*" PRIx64 " | 0x%0*" PRIx64
1926 " "
1927 "| 0x%0*" PRIx64 " | 0x%0*" PRIx64 "\n",
1928 results[output_index] != expected[output_index] ? '*' : ' ',
1929 lane_len_in_hex,
1930 static_cast<uint64_t>(inputs_d[lane]),
1931 lane_len_in_hex,
1932 static_cast<uint64_t>(inputs_n[input_index_n]),
1933 lane_len_in_hex,
1934 static_cast<uint64_t>(inputs_m[input_index_m]),
1935 lane_len_in_hex,
1936 static_cast<uint64_t>(results[output_index]),
1937 lane_len_in_hex,
1938 static_cast<uint64_t>(expected[output_index]));
1939 }
1940 }
1941 }
1942 }
1943 VIXL_ASSERT(d == expected_length);
1944 if (error_count > kErrorReportLimit) {
1945 printf("%u other errors follow.\n", error_count - kErrorReportLimit);
1946 }
1947 VIXL_CHECK(error_count == 0);
1948 }
1949 delete[] results;
1950 }
1951
1952
1953 // ==== Tests for instructions of the form <INST> Vd, Vn, Vm[<#index>]. ====
1954
TestByElementNEON_Helper(TestByElementNEONHelper_t helper,uintptr_t inputs_d,uintptr_t inputs_n,unsigned inputs_n_length,uintptr_t inputs_m,unsigned inputs_m_length,const int indices[],unsigned indices_length,uintptr_t results,VectorFormat vd_form,VectorFormat vn_form,VectorFormat vm_form)1955 static void TestByElementNEON_Helper(TestByElementNEONHelper_t helper,
1956 uintptr_t inputs_d,
1957 uintptr_t inputs_n,
1958 unsigned inputs_n_length,
1959 uintptr_t inputs_m,
1960 unsigned inputs_m_length,
1961 const int indices[],
1962 unsigned indices_length,
1963 uintptr_t results,
1964 VectorFormat vd_form,
1965 VectorFormat vn_form,
1966 VectorFormat vm_form) {
1967 VIXL_ASSERT(vd_form != kFormatUndefined);
1968 VIXL_ASSERT(vn_form != kFormatUndefined);
1969 VIXL_ASSERT(vm_form != kFormatUndefined);
1970
1971 SETUP();
1972 START();
1973
1974 // Roll up the loop to keep the code size down.
1975 Label loop_n, loop_m;
1976
1977 Register out = x0;
1978 Register inputs_n_base = x1;
1979 Register inputs_m_base = x2;
1980 Register inputs_d_base = x3;
1981 Register inputs_n_last_16bytes = x4;
1982 Register inputs_m_last_16bytes = x5;
1983 Register index_n = x6;
1984 Register index_m = x7;
1985
1986 // TODO: Refactor duplicate definitions below with a VRegister::As() routine.
1987 const unsigned vd_bits = RegisterSizeInBitsFromFormat(vd_form);
1988 const unsigned vd_lane_count = LaneCountFromFormat(vd_form);
1989
1990 const unsigned vn_bits = RegisterSizeInBitsFromFormat(vn_form);
1991 const unsigned vn_lane_count = LaneCountFromFormat(vn_form);
1992 const unsigned vn_lane_bytes = LaneSizeInBytesFromFormat(vn_form);
1993 const unsigned vn_lane_bytes_log2 = LaneSizeInBytesLog2FromFormat(vn_form);
1994 const unsigned vn_lane_bits = LaneSizeInBitsFromFormat(vn_form);
1995
1996 const unsigned vm_bits = RegisterSizeInBitsFromFormat(vm_form);
1997 const unsigned vm_lane_count = LaneCountFromFormat(vm_form);
1998 const unsigned vm_lane_bytes = LaneSizeInBytesFromFormat(vm_form);
1999 const unsigned vm_lane_bytes_log2 = LaneSizeInBytesLog2FromFormat(vm_form);
2000 const unsigned vm_lane_bits = LaneSizeInBitsFromFormat(vm_form);
2001
2002
2003 // Always load and store 128 bits regardless of the format.
2004 VRegister vd = v0.V16B();
2005 VRegister vn = v1.V16B();
2006 VRegister vm = v2.V16B();
2007 VRegister vntmp = v3.V16B();
2008 VRegister vmtmp = v4.V16B();
2009 VRegister vres = v5.V16B();
2010
2011 // These will have the correct format for calling the 'helper'.
2012 VRegister vn_helper = VRegister(1, vn_bits, vn_lane_count);
2013 VRegister vm_helper = VRegister(2, vm_bits, vm_lane_count);
2014 VRegister vres_helper = VRegister(5, vd_bits, vd_lane_count);
2015
2016 // 'v*tmp_single' will be either 'Vt.B', 'Vt.H', 'Vt.S' or 'Vt.D'.
2017 VRegister vntmp_single = VRegister(3, vn_lane_bits);
2018 VRegister vmtmp_single = VRegister(4, vm_lane_bits);
2019
2020 __ Mov(out, results);
2021
2022 __ Mov(inputs_d_base, inputs_d);
2023
2024 __ Mov(inputs_n_base, inputs_n);
2025 __ Mov(inputs_n_last_16bytes, inputs_n + (inputs_n_length - 16));
2026 __ Mov(inputs_m_base, inputs_m);
2027 __ Mov(inputs_m_last_16bytes, inputs_m + (inputs_m_length - 16));
2028
2029 __ Ldr(vd, MemOperand(inputs_d_base));
2030 __ Ldr(vn, MemOperand(inputs_n_last_16bytes));
2031 __ Ldr(vm, MemOperand(inputs_m_last_16bytes));
2032
2033 __ Mov(index_n, 0);
2034 __ Bind(&loop_n);
2035
2036 __ Ldr(vntmp_single,
2037 MemOperand(inputs_n_base, index_n, LSL, vn_lane_bytes_log2));
2038 __ Ext(vn, vn, vntmp, vn_lane_bytes);
2039
2040 __ Mov(index_m, 0);
2041 __ Bind(&loop_m);
2042
2043 __ Ldr(vmtmp_single,
2044 MemOperand(inputs_m_base, index_m, LSL, vm_lane_bytes_log2));
2045 __ Ext(vm, vm, vmtmp, vm_lane_bytes);
2046
2047 __ Mov(vres, vd);
2048 {
2049 for (unsigned i = 0; i < indices_length; i++) {
2050 {
2051 SingleEmissionCheckScope guard(&masm);
2052 (masm.*helper)(vres_helper, vn_helper, vm_helper, indices[i]);
2053 }
2054 __ Str(vres, MemOperand(out, vd.GetSizeInBytes(), PostIndex));
2055 }
2056 }
2057
2058 __ Add(index_m, index_m, 1);
2059 __ Cmp(index_m, inputs_m_length);
2060 __ B(lo, &loop_m);
2061
2062 __ Add(index_n, index_n, 1);
2063 __ Cmp(index_n, inputs_n_length);
2064 __ B(lo, &loop_n);
2065
2066 END();
2067 RUN();
2068 TEARDOWN();
2069 }
2070
2071
2072 // Test NEON instructions. The inputs_*[] and expected[] arrays should be
2073 // arrays of rawbit representation of input values. This ensures that
2074 // exact bit comparisons can be performed.
2075 template <typename Td, typename Tn, typename Tm>
TestByElementNEON(const char * name,TestByElementNEONHelper_t helper,const Td inputs_d[],const Tn inputs_n[],unsigned inputs_n_length,const Tm inputs_m[],unsigned inputs_m_length,const int indices[],unsigned indices_length,const Td expected[],unsigned expected_length,VectorFormat vd_form,VectorFormat vn_form,VectorFormat vm_form)2076 static void TestByElementNEON(const char* name,
2077 TestByElementNEONHelper_t helper,
2078 const Td inputs_d[],
2079 const Tn inputs_n[],
2080 unsigned inputs_n_length,
2081 const Tm inputs_m[],
2082 unsigned inputs_m_length,
2083 const int indices[],
2084 unsigned indices_length,
2085 const Td expected[],
2086 unsigned expected_length,
2087 VectorFormat vd_form,
2088 VectorFormat vn_form,
2089 VectorFormat vm_form) {
2090 VIXL_ASSERT(inputs_n_length > 0);
2091 VIXL_ASSERT(inputs_m_length > 0);
2092 VIXL_ASSERT(indices_length > 0);
2093
2094 const unsigned vd_lane_count = MaxLaneCountFromFormat(vd_form);
2095
2096 const unsigned results_length =
2097 inputs_n_length * inputs_m_length * indices_length;
2098 Td* results = new Td[results_length * vd_lane_count];
2099 const unsigned lane_bit = sizeof(Td) * 8;
2100 const unsigned lane_len_in_hex = MaxHexCharCount<Td, Tm>();
2101
2102 TestByElementNEON_Helper(helper,
2103 reinterpret_cast<uintptr_t>(inputs_d),
2104 reinterpret_cast<uintptr_t>(inputs_n),
2105 inputs_n_length,
2106 reinterpret_cast<uintptr_t>(inputs_m),
2107 inputs_m_length,
2108 indices,
2109 indices_length,
2110 reinterpret_cast<uintptr_t>(results),
2111 vd_form,
2112 vn_form,
2113 vm_form);
2114
2115 if (Test::generate_test_trace()) {
2116 // Print the results.
2117 printf("const uint%u_t kExpected_NEON_%s[] = {\n", lane_bit, name);
2118 for (unsigned iteration = 0; iteration < results_length; iteration++) {
2119 printf(" ");
2120 // Output a separate result for each element of the result vector.
2121 for (unsigned lane = 0; lane < vd_lane_count; lane++) {
2122 unsigned index = lane + (iteration * vd_lane_count);
2123 printf(" 0x%0*" PRIx64 ",",
2124 lane_len_in_hex,
2125 static_cast<uint64_t>(results[index]));
2126 }
2127 printf("\n");
2128 }
2129
2130 printf("};\n");
2131 printf("const unsigned kExpectedCount_NEON_%s = %u;\n",
2132 name,
2133 results_length);
2134 } else {
2135 // Check the results.
2136 VIXL_CHECK(expected_length == results_length);
2137 unsigned error_count = 0;
2138 unsigned d = 0;
2139 const char* padding = " ";
2140 VIXL_ASSERT(strlen(padding) >= (lane_len_in_hex + 1));
2141 for (unsigned n = 0; n < inputs_n_length; n++) {
2142 for (unsigned m = 0; m < inputs_m_length; m++) {
2143 for (unsigned index = 0; index < indices_length; index++, d++) {
2144 bool error_in_vector = false;
2145
2146 for (unsigned lane = 0; lane < vd_lane_count; lane++) {
2147 unsigned output_index =
2148 (n * inputs_m_length * indices_length * vd_lane_count) +
2149 (m * indices_length * vd_lane_count) + (index * vd_lane_count) +
2150 lane;
2151
2152 if (results[output_index] != expected[output_index]) {
2153 error_in_vector = true;
2154 break;
2155 }
2156 }
2157
2158 if (error_in_vector && (++error_count <= kErrorReportLimit)) {
2159 printf("%s\n", name);
2160 printf(" Vd%.*s| Vn%.*s| Vm%.*s| Index | Vd%.*s| Expected\n",
2161 lane_len_in_hex + 1,
2162 padding,
2163 lane_len_in_hex + 1,
2164 padding,
2165 lane_len_in_hex + 1,
2166 padding,
2167 lane_len_in_hex + 1,
2168 padding);
2169
2170 for (unsigned lane = 0; lane < vd_lane_count; lane++) {
2171 unsigned output_index =
2172 (n * inputs_m_length * indices_length * vd_lane_count) +
2173 (m * indices_length * vd_lane_count) +
2174 (index * vd_lane_count) + lane;
2175 unsigned input_index_n =
2176 (inputs_n_length - vd_lane_count + n + 1 + lane) %
2177 inputs_n_length;
2178 unsigned input_index_m =
2179 (inputs_m_length - vd_lane_count + m + 1 + lane) %
2180 inputs_m_length;
2181
2182 printf("%c0x%0*" PRIx64 " | 0x%0*" PRIx64 " | 0x%0*" PRIx64
2183 " "
2184 "| [%3d] | 0x%0*" PRIx64 " | 0x%0*" PRIx64 "\n",
2185 results[output_index] != expected[output_index] ? '*'
2186 : ' ',
2187 lane_len_in_hex,
2188 static_cast<uint64_t>(inputs_d[lane]),
2189 lane_len_in_hex,
2190 static_cast<uint64_t>(inputs_n[input_index_n]),
2191 lane_len_in_hex,
2192 static_cast<uint64_t>(inputs_m[input_index_m]),
2193 indices[index],
2194 lane_len_in_hex,
2195 static_cast<uint64_t>(results[output_index]),
2196 lane_len_in_hex,
2197 static_cast<uint64_t>(expected[output_index]));
2198 }
2199 }
2200 }
2201 }
2202 }
2203 VIXL_ASSERT(d == expected_length);
2204 if (error_count > kErrorReportLimit) {
2205 printf("%u other errors follow.\n", error_count - kErrorReportLimit);
2206 }
2207 VIXL_CHECK(error_count == 0);
2208 }
2209 delete[] results;
2210 }
2211
2212
2213 // ==== Tests for instructions of the form <INST> VReg, VReg, #Immediate. ====
2214
2215
2216 template <typename Tm>
Test2OpImmNEON_Helper(typename Test2OpImmediateNEONHelper_t<Tm>::mnemonic helper,uintptr_t inputs_n,unsigned inputs_n_length,const Tm inputs_m[],unsigned inputs_m_length,uintptr_t results,VectorFormat vd_form,VectorFormat vn_form)2217 void Test2OpImmNEON_Helper(
2218 typename Test2OpImmediateNEONHelper_t<Tm>::mnemonic helper,
2219 uintptr_t inputs_n,
2220 unsigned inputs_n_length,
2221 const Tm inputs_m[],
2222 unsigned inputs_m_length,
2223 uintptr_t results,
2224 VectorFormat vd_form,
2225 VectorFormat vn_form) {
2226 VIXL_ASSERT(vd_form != kFormatUndefined && vn_form != kFormatUndefined);
2227
2228 SETUP();
2229 START();
2230
2231 // Roll up the loop to keep the code size down.
2232 Label loop_n;
2233
2234 Register out = x0;
2235 Register inputs_n_base = x1;
2236 Register inputs_n_last_16bytes = x3;
2237 Register index_n = x5;
2238
2239 // TODO: Refactor duplicate definitions below with a VRegister::As() routine.
2240 const unsigned vd_bits = RegisterSizeInBitsFromFormat(vd_form);
2241 const unsigned vd_lane_count = LaneCountFromFormat(vd_form);
2242
2243 const unsigned vn_bits = RegisterSizeInBitsFromFormat(vn_form);
2244 const unsigned vn_lane_count = LaneCountFromFormat(vn_form);
2245 const unsigned vn_lane_bytes = LaneSizeInBytesFromFormat(vn_form);
2246 const unsigned vn_lane_bytes_log2 = LaneSizeInBytesLog2FromFormat(vn_form);
2247 const unsigned vn_lane_bits = LaneSizeInBitsFromFormat(vn_form);
2248
2249
2250 // These will be either a D- or a Q-register form, with a single lane
2251 // (for use in scalar load and store operations).
2252 VRegister vd = VRegister(0, vd_bits);
2253 VRegister vn = v1.V16B();
2254 VRegister vntmp = v3.V16B();
2255
2256 // These will have the correct format for use when calling 'helper'.
2257 VRegister vd_helper = VRegister(0, vd_bits, vd_lane_count);
2258 VRegister vn_helper = VRegister(1, vn_bits, vn_lane_count);
2259
2260 // 'v*tmp_single' will be either 'Vt.B', 'Vt.H', 'Vt.S' or 'Vt.D'.
2261 VRegister vntmp_single = VRegister(3, vn_lane_bits);
2262
2263 __ Mov(out, results);
2264
2265 __ Mov(inputs_n_base, inputs_n);
2266 __ Mov(inputs_n_last_16bytes,
2267 inputs_n + (vn_lane_bytes * inputs_n_length) - 16);
2268
2269 __ Ldr(vn, MemOperand(inputs_n_last_16bytes));
2270
2271 __ Mov(index_n, 0);
2272 __ Bind(&loop_n);
2273
2274 __ Ldr(vntmp_single,
2275 MemOperand(inputs_n_base, index_n, LSL, vn_lane_bytes_log2));
2276 __ Ext(vn, vn, vntmp, vn_lane_bytes);
2277
2278 // Set the destination to zero for tests such as '[r]shrn2'.
2279 // TODO: Setting the destination to values other than zero might be a better
2280 // test for shift and accumulate instructions (srsra/ssra/usra/ursra).
2281 __ Movi(vd.V16B(), 0);
2282
2283 {
2284 for (unsigned i = 0; i < inputs_m_length; i++) {
2285 {
2286 SingleEmissionCheckScope guard(&masm);
2287 (masm.*helper)(vd_helper, vn_helper, inputs_m[i]);
2288 }
2289 __ Str(vd, MemOperand(out, vd.GetSizeInBytes(), PostIndex));
2290 }
2291 }
2292
2293 __ Add(index_n, index_n, 1);
2294 __ Cmp(index_n, inputs_n_length);
2295 __ B(lo, &loop_n);
2296
2297 END();
2298 RUN();
2299 TEARDOWN();
2300 }
2301
2302
2303 // Test NEON instructions. The inputs_*[] and expected[] arrays should be
2304 // arrays of rawbit representation of input values. This ensures that
2305 // exact bit comparisons can be performed.
2306 template <typename Td, typename Tn, typename Tm>
Test2OpImmNEON(const char * name,typename Test2OpImmediateNEONHelper_t<Tm>::mnemonic helper,const Tn inputs_n[],unsigned inputs_n_length,const Tm inputs_m[],unsigned inputs_m_length,const Td expected[],unsigned expected_length,VectorFormat vd_form,VectorFormat vn_form)2307 static void Test2OpImmNEON(
2308 const char* name,
2309 typename Test2OpImmediateNEONHelper_t<Tm>::mnemonic helper,
2310 const Tn inputs_n[],
2311 unsigned inputs_n_length,
2312 const Tm inputs_m[],
2313 unsigned inputs_m_length,
2314 const Td expected[],
2315 unsigned expected_length,
2316 VectorFormat vd_form,
2317 VectorFormat vn_form) {
2318 VIXL_ASSERT(inputs_n_length > 0 && inputs_m_length > 0);
2319
2320 const unsigned vd_lane_count = LaneCountFromFormat(vd_form);
2321 const unsigned vn_lane_bytes = LaneSizeInBytesFromFormat(vn_form);
2322 const unsigned vn_lane_count = LaneCountFromFormat(vn_form);
2323
2324 const unsigned results_length = inputs_n_length * inputs_m_length;
2325 Td* results = new Td[results_length * vd_lane_count];
2326 const unsigned lane_bit = sizeof(Td) * 8;
2327 const unsigned lane_len_in_hex = MaxHexCharCount<Td, Tn>();
2328
2329 Test2OpImmNEON_Helper(helper,
2330 reinterpret_cast<uintptr_t>(inputs_n),
2331 inputs_n_length,
2332 inputs_m,
2333 inputs_m_length,
2334 reinterpret_cast<uintptr_t>(results),
2335 vd_form,
2336 vn_form);
2337
2338 if (Test::generate_test_trace()) {
2339 // Print the results.
2340 printf("const uint%u_t kExpected_NEON_%s[] = {\n", lane_bit, name);
2341 for (unsigned iteration = 0; iteration < results_length; iteration++) {
2342 printf(" ");
2343 // Output a separate result for each element of the result vector.
2344 for (unsigned lane = 0; lane < vd_lane_count; lane++) {
2345 unsigned index = lane + (iteration * vd_lane_count);
2346 printf(" 0x%0*" PRIx64 ",",
2347 lane_len_in_hex,
2348 static_cast<uint64_t>(results[index]));
2349 }
2350 printf("\n");
2351 }
2352
2353 printf("};\n");
2354 printf("const unsigned kExpectedCount_NEON_%s = %u;\n",
2355 name,
2356 results_length);
2357 } else {
2358 // Check the results.
2359 VIXL_CHECK(expected_length == results_length);
2360 unsigned error_count = 0;
2361 unsigned d = 0;
2362 const char* padding = " ";
2363 VIXL_ASSERT(strlen(padding) >= (lane_len_in_hex + 1));
2364 for (unsigned n = 0; n < inputs_n_length; n++) {
2365 for (unsigned m = 0; m < inputs_m_length; m++, d++) {
2366 bool error_in_vector = false;
2367
2368 for (unsigned lane = 0; lane < vd_lane_count; lane++) {
2369 unsigned output_index = (n * inputs_m_length * vd_lane_count) +
2370 (m * vd_lane_count) + lane;
2371
2372 if (results[output_index] != expected[output_index]) {
2373 error_in_vector = true;
2374 break;
2375 }
2376 }
2377
2378 if (error_in_vector && (++error_count <= kErrorReportLimit)) {
2379 printf("%s\n", name);
2380 printf(" Vn%.*s| Imm%.*s| Vd%.*s| Expected\n",
2381 lane_len_in_hex + 1,
2382 padding,
2383 lane_len_in_hex,
2384 padding,
2385 lane_len_in_hex + 1,
2386 padding);
2387
2388 const unsigned first_index_n =
2389 inputs_n_length - (16 / vn_lane_bytes) + n + 1;
2390
2391 for (unsigned lane = 0; lane < std::max(vd_lane_count, vn_lane_count);
2392 lane++) {
2393 unsigned output_index = (n * inputs_m_length * vd_lane_count) +
2394 (m * vd_lane_count) + lane;
2395 unsigned input_index_n = (first_index_n + lane) % inputs_n_length;
2396 unsigned input_index_m = m;
2397
2398 printf("%c0x%0*" PRIx64 " | 0x%0*" PRIx64
2399 " "
2400 "| 0x%0*" PRIx64 " | 0x%0*" PRIx64 "\n",
2401 results[output_index] != expected[output_index] ? '*' : ' ',
2402 lane_len_in_hex,
2403 static_cast<uint64_t>(inputs_n[input_index_n]),
2404 lane_len_in_hex,
2405 static_cast<uint64_t>(inputs_m[input_index_m]),
2406 lane_len_in_hex,
2407 static_cast<uint64_t>(results[output_index]),
2408 lane_len_in_hex,
2409 static_cast<uint64_t>(expected[output_index]));
2410 }
2411 }
2412 }
2413 }
2414 VIXL_ASSERT(d == expected_length);
2415 if (error_count > kErrorReportLimit) {
2416 printf("%u other errors follow.\n", error_count - kErrorReportLimit);
2417 }
2418 VIXL_CHECK(error_count == 0);
2419 }
2420 delete[] results;
2421 }
2422
2423
2424 // ==== Tests for instructions of the form <INST> VReg, #Imm, VReg, #Imm. ====
2425
2426
TestOpImmOpImmNEON_Helper(TestOpImmOpImmVdUpdateNEONHelper_t helper,uintptr_t inputs_d,const int inputs_imm1[],unsigned inputs_imm1_length,uintptr_t inputs_n,unsigned inputs_n_length,const int inputs_imm2[],unsigned inputs_imm2_length,uintptr_t results,VectorFormat vd_form,VectorFormat vn_form)2427 static void TestOpImmOpImmNEON_Helper(TestOpImmOpImmVdUpdateNEONHelper_t helper,
2428 uintptr_t inputs_d,
2429 const int inputs_imm1[],
2430 unsigned inputs_imm1_length,
2431 uintptr_t inputs_n,
2432 unsigned inputs_n_length,
2433 const int inputs_imm2[],
2434 unsigned inputs_imm2_length,
2435 uintptr_t results,
2436 VectorFormat vd_form,
2437 VectorFormat vn_form) {
2438 VIXL_ASSERT(vd_form != kFormatUndefined);
2439 VIXL_ASSERT(vn_form != kFormatUndefined);
2440
2441 SETUP();
2442 START();
2443
2444 // Roll up the loop to keep the code size down.
2445 Label loop_n;
2446
2447 Register out = x0;
2448 Register inputs_d_base = x1;
2449 Register inputs_n_base = x2;
2450 Register inputs_n_last_vector = x4;
2451 Register index_n = x6;
2452
2453 // TODO: Refactor duplicate definitions below with a VRegister::As() routine.
2454 const unsigned vd_bits = RegisterSizeInBitsFromFormat(vd_form);
2455 const unsigned vd_lane_count = LaneCountFromFormat(vd_form);
2456
2457 const unsigned vn_bits = RegisterSizeInBitsFromFormat(vn_form);
2458 const unsigned vn_lane_count = LaneCountFromFormat(vn_form);
2459 const unsigned vn_lane_bytes = LaneSizeInBytesFromFormat(vn_form);
2460 const unsigned vn_lane_bytes_log2 = LaneSizeInBytesLog2FromFormat(vn_form);
2461 const unsigned vn_lane_bits = LaneSizeInBitsFromFormat(vn_form);
2462
2463
2464 // These will be either a D- or a Q-register form, with a single lane
2465 // (for use in scalar load and store operations).
2466 VRegister vd = VRegister(0, vd_bits);
2467 VRegister vn = VRegister(1, vn_bits);
2468 VRegister vntmp = VRegister(4, vn_bits);
2469 VRegister vres = VRegister(5, vn_bits);
2470
2471 VRegister vn_helper = VRegister(1, vn_bits, vn_lane_count);
2472 VRegister vres_helper = VRegister(5, vd_bits, vd_lane_count);
2473
2474 // 'v*tmp_single' will be either 'Vt.B', 'Vt.H', 'Vt.S' or 'Vt.D'.
2475 VRegister vntmp_single = VRegister(4, vn_lane_bits);
2476
2477 // Same registers for use in the 'ext' instructions.
2478 VRegister vn_ext = (kDRegSize == vn_bits) ? vn.V8B() : vn.V16B();
2479 VRegister vntmp_ext = (kDRegSize == vn_bits) ? vntmp.V8B() : vntmp.V16B();
2480
2481 __ Mov(out, results);
2482
2483 __ Mov(inputs_d_base, inputs_d);
2484
2485 __ Mov(inputs_n_base, inputs_n);
2486 __ Mov(inputs_n_last_vector,
2487 inputs_n + vn_lane_bytes * (inputs_n_length - vn_lane_count));
2488
2489 __ Ldr(vd, MemOperand(inputs_d_base));
2490
2491 __ Ldr(vn, MemOperand(inputs_n_last_vector));
2492
2493 __ Mov(index_n, 0);
2494 __ Bind(&loop_n);
2495
2496 __ Ldr(vntmp_single,
2497 MemOperand(inputs_n_base, index_n, LSL, vn_lane_bytes_log2));
2498 __ Ext(vn_ext, vn_ext, vntmp_ext, vn_lane_bytes);
2499
2500 {
2501 EmissionCheckScope guard(&masm,
2502 kInstructionSize * inputs_imm1_length *
2503 inputs_imm2_length * 3);
2504 for (unsigned i = 0; i < inputs_imm1_length; i++) {
2505 for (unsigned j = 0; j < inputs_imm2_length; j++) {
2506 __ Mov(vres, vd);
2507 (masm.*helper)(vres_helper, inputs_imm1[i], vn_helper, inputs_imm2[j]);
2508 __ Str(vres, MemOperand(out, vd.GetSizeInBytes(), PostIndex));
2509 }
2510 }
2511 }
2512
2513 __ Add(index_n, index_n, 1);
2514 __ Cmp(index_n, inputs_n_length);
2515 __ B(lo, &loop_n);
2516
2517 END();
2518 RUN();
2519 TEARDOWN();
2520 }
2521
2522
2523 // Test NEON instructions. The inputs_*[] and expected[] arrays should be
2524 // arrays of rawbit representation of input values. This ensures that
2525 // exact bit comparisons can be performed.
2526 template <typename Td, typename Tn>
TestOpImmOpImmNEON(const char * name,TestOpImmOpImmVdUpdateNEONHelper_t helper,const Td inputs_d[],const int inputs_imm1[],unsigned inputs_imm1_length,const Tn inputs_n[],unsigned inputs_n_length,const int inputs_imm2[],unsigned inputs_imm2_length,const Td expected[],unsigned expected_length,VectorFormat vd_form,VectorFormat vn_form)2527 static void TestOpImmOpImmNEON(const char* name,
2528 TestOpImmOpImmVdUpdateNEONHelper_t helper,
2529 const Td inputs_d[],
2530 const int inputs_imm1[],
2531 unsigned inputs_imm1_length,
2532 const Tn inputs_n[],
2533 unsigned inputs_n_length,
2534 const int inputs_imm2[],
2535 unsigned inputs_imm2_length,
2536 const Td expected[],
2537 unsigned expected_length,
2538 VectorFormat vd_form,
2539 VectorFormat vn_form) {
2540 VIXL_ASSERT(inputs_n_length > 0);
2541 VIXL_ASSERT(inputs_imm1_length > 0);
2542 VIXL_ASSERT(inputs_imm2_length > 0);
2543
2544 const unsigned vd_lane_count = LaneCountFromFormat(vd_form);
2545
2546 const unsigned results_length =
2547 inputs_n_length * inputs_imm1_length * inputs_imm2_length;
2548
2549 Td* results = new Td[results_length * vd_lane_count];
2550 const unsigned lane_bit = sizeof(Td) * 8;
2551 const unsigned lane_len_in_hex = MaxHexCharCount<Td, Tn>();
2552
2553 TestOpImmOpImmNEON_Helper(helper,
2554 reinterpret_cast<uintptr_t>(inputs_d),
2555 inputs_imm1,
2556 inputs_imm1_length,
2557 reinterpret_cast<uintptr_t>(inputs_n),
2558 inputs_n_length,
2559 inputs_imm2,
2560 inputs_imm2_length,
2561 reinterpret_cast<uintptr_t>(results),
2562 vd_form,
2563 vn_form);
2564
2565 if (Test::generate_test_trace()) {
2566 // Print the results.
2567 printf("const uint%u_t kExpected_NEON_%s[] = {\n", lane_bit, name);
2568 for (unsigned iteration = 0; iteration < results_length; iteration++) {
2569 printf(" ");
2570 // Output a separate result for each element of the result vector.
2571 for (unsigned lane = 0; lane < vd_lane_count; lane++) {
2572 unsigned index = lane + (iteration * vd_lane_count);
2573 printf(" 0x%0*" PRIx64 ",",
2574 lane_len_in_hex,
2575 static_cast<uint64_t>(results[index]));
2576 }
2577 printf("\n");
2578 }
2579
2580 printf("};\n");
2581 printf("const unsigned kExpectedCount_NEON_%s = %u;\n",
2582 name,
2583 results_length);
2584 } else {
2585 // Check the results.
2586 VIXL_CHECK(expected_length == results_length);
2587 unsigned error_count = 0;
2588 unsigned counted_length = 0;
2589 const char* padding = " ";
2590 VIXL_ASSERT(strlen(padding) >= (lane_len_in_hex + 1));
2591 for (unsigned n = 0; n < inputs_n_length; n++) {
2592 for (unsigned imm1 = 0; imm1 < inputs_imm1_length; imm1++) {
2593 for (unsigned imm2 = 0; imm2 < inputs_imm2_length; imm2++) {
2594 bool error_in_vector = false;
2595
2596 counted_length++;
2597
2598 for (unsigned lane = 0; lane < vd_lane_count; lane++) {
2599 unsigned output_index =
2600 (n * inputs_imm1_length * inputs_imm2_length * vd_lane_count) +
2601 (imm1 * inputs_imm2_length * vd_lane_count) +
2602 (imm2 * vd_lane_count) + lane;
2603
2604 if (results[output_index] != expected[output_index]) {
2605 error_in_vector = true;
2606 break;
2607 }
2608 }
2609
2610 if (error_in_vector && (++error_count <= kErrorReportLimit)) {
2611 printf("%s\n", name);
2612 printf(" Vd%.*s| Imm%.*s| Vn%.*s| Imm%.*s| Vd%.*s| Expected\n",
2613 lane_len_in_hex + 1,
2614 padding,
2615 lane_len_in_hex,
2616 padding,
2617 lane_len_in_hex + 1,
2618 padding,
2619 lane_len_in_hex,
2620 padding,
2621 lane_len_in_hex + 1,
2622 padding);
2623
2624 for (unsigned lane = 0; lane < vd_lane_count; lane++) {
2625 unsigned output_index =
2626 (n * inputs_imm1_length * inputs_imm2_length *
2627 vd_lane_count) +
2628 (imm1 * inputs_imm2_length * vd_lane_count) +
2629 (imm2 * vd_lane_count) + lane;
2630 unsigned input_index_n =
2631 (inputs_n_length - vd_lane_count + n + 1 + lane) %
2632 inputs_n_length;
2633 unsigned input_index_imm1 = imm1;
2634 unsigned input_index_imm2 = imm2;
2635
2636 printf("%c0x%0*" PRIx64 " | 0x%0*" PRIx64 " | 0x%0*" PRIx64
2637 " "
2638 "| 0x%0*" PRIx64 " | 0x%0*" PRIx64 " | 0x%0*" PRIx64 "\n",
2639 results[output_index] != expected[output_index] ? '*'
2640 : ' ',
2641 lane_len_in_hex,
2642 static_cast<uint64_t>(inputs_d[lane]),
2643 lane_len_in_hex,
2644 static_cast<uint64_t>(inputs_imm1[input_index_imm1]),
2645 lane_len_in_hex,
2646 static_cast<uint64_t>(inputs_n[input_index_n]),
2647 lane_len_in_hex,
2648 static_cast<uint64_t>(inputs_imm2[input_index_imm2]),
2649 lane_len_in_hex,
2650 static_cast<uint64_t>(results[output_index]),
2651 lane_len_in_hex,
2652 static_cast<uint64_t>(expected[output_index]));
2653 }
2654 }
2655 }
2656 }
2657 }
2658 VIXL_ASSERT(counted_length == expected_length);
2659 if (error_count > kErrorReportLimit) {
2660 printf("%u other errors follow.\n", error_count - kErrorReportLimit);
2661 }
2662 VIXL_CHECK(error_count == 0);
2663 }
2664 delete[] results;
2665 }
2666
2667
2668 // ==== Floating-point tests. ====
2669
2670
2671 // Standard floating-point test expansion for both double- and single-precision
2672 // operations.
2673 #define STRINGIFY(s) #s
2674
2675 #define CALL_TEST_FP_HELPER(mnemonic, variant, type, input) \
2676 Test##type(STRINGIFY(mnemonic) "_" STRINGIFY(variant), \
2677 &MacroAssembler::mnemonic, \
2678 input, \
2679 sizeof(input) / sizeof(input[0]), \
2680 kExpected_##mnemonic##_##variant, \
2681 kExpectedCount_##mnemonic##_##variant)
2682
2683 #define DEFINE_TEST_FP(mnemonic, type, input) \
2684 TEST(mnemonic##_d) { \
2685 CALL_TEST_FP_HELPER(mnemonic, d, type, kInputDouble##input); \
2686 } \
2687 TEST(mnemonic##_s) { \
2688 CALL_TEST_FP_HELPER(mnemonic, s, type, kInputFloat##input); \
2689 }
2690
2691 // TODO: Test with a newer version of valgrind.
2692 //
2693 // Note: valgrind-3.10.0 does not properly interpret libm's fma() on x86_64.
2694 // Therefore this test will be exiting though an ASSERT and thus leaking
2695 // memory.
2696 DEFINE_TEST_FP(fmadd, 3Op, Basic)
2697 DEFINE_TEST_FP(fmsub, 3Op, Basic)
2698 DEFINE_TEST_FP(fnmadd, 3Op, Basic)
2699 DEFINE_TEST_FP(fnmsub, 3Op, Basic)
2700
2701 DEFINE_TEST_FP(fadd, 2Op, Basic)
2702 DEFINE_TEST_FP(fdiv, 2Op, Basic)
2703 DEFINE_TEST_FP(fmax, 2Op, Basic)
2704 DEFINE_TEST_FP(fmaxnm, 2Op, Basic)
2705 DEFINE_TEST_FP(fmin, 2Op, Basic)
2706 DEFINE_TEST_FP(fminnm, 2Op, Basic)
2707 DEFINE_TEST_FP(fmul, 2Op, Basic)
2708 DEFINE_TEST_FP(fsub, 2Op, Basic)
2709 DEFINE_TEST_FP(fnmul, 2Op, Basic)
2710
2711 DEFINE_TEST_FP(fabs, 1Op, Basic)
2712 DEFINE_TEST_FP(fmov, 1Op, Basic)
2713 DEFINE_TEST_FP(fneg, 1Op, Basic)
2714 DEFINE_TEST_FP(fsqrt, 1Op, Basic)
2715 DEFINE_TEST_FP(frinta, 1Op, Conversions)
2716 DEFINE_TEST_FP(frinti, 1Op, Conversions)
2717 DEFINE_TEST_FP(frintm, 1Op, Conversions)
2718 DEFINE_TEST_FP(frintn, 1Op, Conversions)
2719 DEFINE_TEST_FP(frintp, 1Op, Conversions)
2720 DEFINE_TEST_FP(frintx, 1Op, Conversions)
2721 DEFINE_TEST_FP(frintz, 1Op, Conversions)
2722
TEST(fcmp_d)2723 TEST(fcmp_d) { CALL_TEST_FP_HELPER(fcmp, d, Cmp, kInputDoubleBasic); }
TEST(fcmp_s)2724 TEST(fcmp_s) { CALL_TEST_FP_HELPER(fcmp, s, Cmp, kInputFloatBasic); }
TEST(fcmp_dz)2725 TEST(fcmp_dz) { CALL_TEST_FP_HELPER(fcmp, dz, CmpZero, kInputDoubleBasic); }
TEST(fcmp_sz)2726 TEST(fcmp_sz) { CALL_TEST_FP_HELPER(fcmp, sz, CmpZero, kInputFloatBasic); }
2727
TEST(fcvt_sd)2728 TEST(fcvt_sd) { CALL_TEST_FP_HELPER(fcvt, sd, 1Op, kInputDoubleConversions); }
TEST(fcvt_ds)2729 TEST(fcvt_ds) { CALL_TEST_FP_HELPER(fcvt, ds, 1Op, kInputFloatConversions); }
2730
2731 #define DEFINE_TEST_FP_TO_INT(mnemonic, type, input) \
2732 TEST(mnemonic##_xd) { \
2733 CALL_TEST_FP_HELPER(mnemonic, xd, type, kInputDouble##input); \
2734 } \
2735 TEST(mnemonic##_xs) { \
2736 CALL_TEST_FP_HELPER(mnemonic, xs, type, kInputFloat##input); \
2737 } \
2738 TEST(mnemonic##_wd) { \
2739 CALL_TEST_FP_HELPER(mnemonic, wd, type, kInputDouble##input); \
2740 } \
2741 TEST(mnemonic##_ws) { \
2742 CALL_TEST_FP_HELPER(mnemonic, ws, type, kInputFloat##input); \
2743 }
2744
DEFINE_TEST_FP_TO_INT(fcvtas,FPToS,Conversions)2745 DEFINE_TEST_FP_TO_INT(fcvtas, FPToS, Conversions)
2746 DEFINE_TEST_FP_TO_INT(fcvtau, FPToU, Conversions)
2747 DEFINE_TEST_FP_TO_INT(fcvtms, FPToS, Conversions)
2748 DEFINE_TEST_FP_TO_INT(fcvtmu, FPToU, Conversions)
2749 DEFINE_TEST_FP_TO_INT(fcvtns, FPToS, Conversions)
2750 DEFINE_TEST_FP_TO_INT(fcvtnu, FPToU, Conversions)
2751 DEFINE_TEST_FP_TO_INT(fcvtzs, FPToFixedS, Conversions)
2752 DEFINE_TEST_FP_TO_INT(fcvtzu, FPToFixedU, Conversions)
2753
2754 // TODO: Scvtf-fixed-point
2755 // TODO: Scvtf-integer
2756 // TODO: Ucvtf-fixed-point
2757 // TODO: Ucvtf-integer
2758
2759 // TODO: Fccmp
2760 // TODO: Fcsel
2761
2762
2763 // ==== NEON Tests. ====
2764
2765 #define CALL_TEST_NEON_HELPER_1Op(mnemonic, vdform, vnform, input_n) \
2766 Test1OpNEON(STRINGIFY(mnemonic) "_" STRINGIFY(vdform), \
2767 &MacroAssembler::mnemonic, \
2768 input_n, \
2769 (sizeof(input_n) / sizeof(input_n[0])), \
2770 kExpected_NEON_##mnemonic##_##vdform, \
2771 kExpectedCount_NEON_##mnemonic##_##vdform, \
2772 kFormat##vdform, \
2773 kFormat##vnform)
2774
2775 #define CALL_TEST_NEON_HELPER_1OpAcross(mnemonic, vdform, vnform, input_n) \
2776 Test1OpAcrossNEON(STRINGIFY(mnemonic) "_" STRINGIFY(vdform) "_" STRINGIFY( \
2777 vnform), \
2778 &MacroAssembler::mnemonic, \
2779 input_n, \
2780 (sizeof(input_n) / sizeof(input_n[0])), \
2781 kExpected_NEON_##mnemonic##_##vdform##_##vnform, \
2782 kExpectedCount_NEON_##mnemonic##_##vdform##_##vnform, \
2783 kFormat##vdform, \
2784 kFormat##vnform)
2785
2786 #define CALL_TEST_NEON_HELPER_2Op( \
2787 mnemonic, vdform, vnform, vmform, input_d, input_n, input_m) \
2788 Test2OpNEON(STRINGIFY(mnemonic) "_" STRINGIFY(vdform), \
2789 &MacroAssembler::mnemonic, \
2790 input_d, \
2791 input_n, \
2792 (sizeof(input_n) / sizeof(input_n[0])), \
2793 input_m, \
2794 (sizeof(input_m) / sizeof(input_m[0])), \
2795 kExpected_NEON_##mnemonic##_##vdform, \
2796 kExpectedCount_NEON_##mnemonic##_##vdform, \
2797 kFormat##vdform, \
2798 kFormat##vnform, \
2799 kFormat##vmform)
2800
2801 #define CALL_TEST_NEON_HELPER_2OpImm( \
2802 mnemonic, vdform, vnform, input_n, input_m) \
2803 Test2OpImmNEON(STRINGIFY(mnemonic) "_" STRINGIFY(vdform) "_2OPIMM", \
2804 &MacroAssembler::mnemonic, \
2805 input_n, \
2806 (sizeof(input_n) / sizeof(input_n[0])), \
2807 input_m, \
2808 (sizeof(input_m) / sizeof(input_m[0])), \
2809 kExpected_NEON_##mnemonic##_##vdform##_2OPIMM, \
2810 kExpectedCount_NEON_##mnemonic##_##vdform##_2OPIMM, \
2811 kFormat##vdform, \
2812 kFormat##vnform)
2813
2814 #define CALL_TEST_NEON_HELPER_ByElement( \
2815 mnemonic, vdform, vnform, vmform, input_d, input_n, input_m, indices) \
2816 TestByElementNEON( \
2817 STRINGIFY(mnemonic) "_" STRINGIFY(vdform) "_" STRINGIFY( \
2818 vnform) "_" STRINGIFY(vmform), \
2819 &MacroAssembler::mnemonic, \
2820 input_d, \
2821 input_n, \
2822 (sizeof(input_n) / sizeof(input_n[0])), \
2823 input_m, \
2824 (sizeof(input_m) / sizeof(input_m[0])), \
2825 indices, \
2826 (sizeof(indices) / sizeof(indices[0])), \
2827 kExpected_NEON_##mnemonic##_##vdform##_##vnform##_##vmform, \
2828 kExpectedCount_NEON_##mnemonic##_##vdform##_##vnform##_##vmform, \
2829 kFormat##vdform, \
2830 kFormat##vnform, \
2831 kFormat##vmform)
2832
2833 #define CALL_TEST_NEON_HELPER_OpImmOpImm(helper, \
2834 mnemonic, \
2835 vdform, \
2836 vnform, \
2837 input_d, \
2838 input_imm1, \
2839 input_n, \
2840 input_imm2) \
2841 TestOpImmOpImmNEON(STRINGIFY(mnemonic) "_" STRINGIFY(vdform), \
2842 helper, \
2843 input_d, \
2844 input_imm1, \
2845 (sizeof(input_imm1) / sizeof(input_imm1[0])), \
2846 input_n, \
2847 (sizeof(input_n) / sizeof(input_n[0])), \
2848 input_imm2, \
2849 (sizeof(input_imm2) / sizeof(input_imm2[0])), \
2850 kExpected_NEON_##mnemonic##_##vdform, \
2851 kExpectedCount_NEON_##mnemonic##_##vdform, \
2852 kFormat##vdform, \
2853 kFormat##vnform)
2854
2855 #define CALL_TEST_NEON_HELPER_2SAME(mnemonic, variant, input) \
2856 CALL_TEST_NEON_HELPER_1Op(mnemonic, variant, variant, input)
2857
2858 #define DEFINE_TEST_NEON_2SAME_8B_16B(mnemonic, input) \
2859 TEST(mnemonic##_8B) { \
2860 CALL_TEST_NEON_HELPER_2SAME(mnemonic, 8B, kInput8bits##input); \
2861 } \
2862 TEST(mnemonic##_16B) { \
2863 CALL_TEST_NEON_HELPER_2SAME(mnemonic, 16B, kInput8bits##input); \
2864 }
2865
2866 #define DEFINE_TEST_NEON_2SAME_4H_8H(mnemonic, input) \
2867 TEST(mnemonic##_4H) { \
2868 CALL_TEST_NEON_HELPER_2SAME(mnemonic, 4H, kInput16bits##input); \
2869 } \
2870 TEST(mnemonic##_8H) { \
2871 CALL_TEST_NEON_HELPER_2SAME(mnemonic, 8H, kInput16bits##input); \
2872 }
2873
2874 #define DEFINE_TEST_NEON_2SAME_2S_4S(mnemonic, input) \
2875 TEST(mnemonic##_2S) { \
2876 CALL_TEST_NEON_HELPER_2SAME(mnemonic, 2S, kInput32bits##input); \
2877 } \
2878 TEST(mnemonic##_4S) { \
2879 CALL_TEST_NEON_HELPER_2SAME(mnemonic, 4S, kInput32bits##input); \
2880 }
2881
2882 #define DEFINE_TEST_NEON_2SAME_BH(mnemonic, input) \
2883 DEFINE_TEST_NEON_2SAME_8B_16B(mnemonic, input) \
2884 DEFINE_TEST_NEON_2SAME_4H_8H(mnemonic, input)
2885
2886 #define DEFINE_TEST_NEON_2SAME_NO2D(mnemonic, input) \
2887 DEFINE_TEST_NEON_2SAME_BH(mnemonic, input) \
2888 DEFINE_TEST_NEON_2SAME_2S_4S(mnemonic, input)
2889
2890 #define DEFINE_TEST_NEON_2SAME(mnemonic, input) \
2891 DEFINE_TEST_NEON_2SAME_NO2D(mnemonic, input) \
2892 TEST(mnemonic##_2D) { \
2893 CALL_TEST_NEON_HELPER_2SAME(mnemonic, 2D, kInput64bits##input); \
2894 }
2895 #define DEFINE_TEST_NEON_2SAME_SD(mnemonic, input) \
2896 DEFINE_TEST_NEON_2SAME_2S_4S(mnemonic, input) \
2897 TEST(mnemonic##_2D) { \
2898 CALL_TEST_NEON_HELPER_2SAME(mnemonic, 2D, kInput64bits##input); \
2899 }
2900
2901 #define DEFINE_TEST_NEON_2SAME_FP(mnemonic, input) \
2902 TEST(mnemonic##_2S) { \
2903 CALL_TEST_NEON_HELPER_2SAME(mnemonic, 2S, kInputFloat##input); \
2904 } \
2905 TEST(mnemonic##_4S) { \
2906 CALL_TEST_NEON_HELPER_2SAME(mnemonic, 4S, kInputFloat##input); \
2907 } \
2908 TEST(mnemonic##_2D) { \
2909 CALL_TEST_NEON_HELPER_2SAME(mnemonic, 2D, kInputDouble##input); \
2910 }
2911
2912 #define DEFINE_TEST_NEON_2SAME_FP_SCALAR(mnemonic, input) \
2913 TEST(mnemonic##_S) { \
2914 CALL_TEST_NEON_HELPER_2SAME(mnemonic, S, kInputFloat##input); \
2915 } \
2916 TEST(mnemonic##_D) { \
2917 CALL_TEST_NEON_HELPER_2SAME(mnemonic, D, kInputDouble##input); \
2918 }
2919
2920 #define DEFINE_TEST_NEON_2SAME_SCALAR_B(mnemonic, input) \
2921 TEST(mnemonic##_B) { \
2922 CALL_TEST_NEON_HELPER_2SAME(mnemonic, B, kInput8bits##input); \
2923 }
2924 #define DEFINE_TEST_NEON_2SAME_SCALAR_H(mnemonic, input) \
2925 TEST(mnemonic##_H) { \
2926 CALL_TEST_NEON_HELPER_2SAME(mnemonic, H, kInput16bits##input); \
2927 }
2928 #define DEFINE_TEST_NEON_2SAME_SCALAR_S(mnemonic, input) \
2929 TEST(mnemonic##_S) { \
2930 CALL_TEST_NEON_HELPER_2SAME(mnemonic, S, kInput32bits##input); \
2931 }
2932 #define DEFINE_TEST_NEON_2SAME_SCALAR_D(mnemonic, input) \
2933 TEST(mnemonic##_D) { \
2934 CALL_TEST_NEON_HELPER_2SAME(mnemonic, D, kInput64bits##input); \
2935 }
2936
2937 #define DEFINE_TEST_NEON_2SAME_SCALAR(mnemonic, input) \
2938 DEFINE_TEST_NEON_2SAME_SCALAR_B(mnemonic, input) \
2939 DEFINE_TEST_NEON_2SAME_SCALAR_H(mnemonic, input) \
2940 DEFINE_TEST_NEON_2SAME_SCALAR_S(mnemonic, input) \
2941 DEFINE_TEST_NEON_2SAME_SCALAR_D(mnemonic, input)
2942
2943 #define DEFINE_TEST_NEON_2SAME_SCALAR_SD(mnemonic, input) \
2944 DEFINE_TEST_NEON_2SAME_SCALAR_S(mnemonic, input) \
2945 DEFINE_TEST_NEON_2SAME_SCALAR_D(mnemonic, input)
2946
2947
2948 #define CALL_TEST_NEON_HELPER_ACROSS(mnemonic, vd_form, vn_form, input_n) \
2949 CALL_TEST_NEON_HELPER_1OpAcross(mnemonic, vd_form, vn_form, input_n)
2950
2951 #define DEFINE_TEST_NEON_ACROSS(mnemonic, input) \
2952 TEST(mnemonic##_B_8B) { \
2953 CALL_TEST_NEON_HELPER_ACROSS(mnemonic, B, 8B, kInput8bits##input); \
2954 } \
2955 TEST(mnemonic##_B_16B) { \
2956 CALL_TEST_NEON_HELPER_ACROSS(mnemonic, B, 16B, kInput8bits##input); \
2957 } \
2958 TEST(mnemonic##_H_4H) { \
2959 CALL_TEST_NEON_HELPER_ACROSS(mnemonic, H, 4H, kInput16bits##input); \
2960 } \
2961 TEST(mnemonic##_H_8H) { \
2962 CALL_TEST_NEON_HELPER_ACROSS(mnemonic, H, 8H, kInput16bits##input); \
2963 } \
2964 TEST(mnemonic##_S_4S) { \
2965 CALL_TEST_NEON_HELPER_ACROSS(mnemonic, S, 4S, kInput32bits##input); \
2966 }
2967
2968 #define DEFINE_TEST_NEON_ACROSS_LONG(mnemonic, input) \
2969 TEST(mnemonic##_H_8B) { \
2970 CALL_TEST_NEON_HELPER_ACROSS(mnemonic, H, 8B, kInput8bits##input); \
2971 } \
2972 TEST(mnemonic##_H_16B) { \
2973 CALL_TEST_NEON_HELPER_ACROSS(mnemonic, H, 16B, kInput8bits##input); \
2974 } \
2975 TEST(mnemonic##_S_4H) { \
2976 CALL_TEST_NEON_HELPER_ACROSS(mnemonic, S, 4H, kInput16bits##input); \
2977 } \
2978 TEST(mnemonic##_S_8H) { \
2979 CALL_TEST_NEON_HELPER_ACROSS(mnemonic, S, 8H, kInput16bits##input); \
2980 } \
2981 TEST(mnemonic##_D_4S) { \
2982 CALL_TEST_NEON_HELPER_ACROSS(mnemonic, D, 4S, kInput32bits##input); \
2983 }
2984
2985 #define DEFINE_TEST_NEON_ACROSS_FP(mnemonic, input) \
2986 TEST(mnemonic##_S_4S) { \
2987 CALL_TEST_NEON_HELPER_ACROSS(mnemonic, S, 4S, kInputFloat##input); \
2988 }
2989
2990 #define CALL_TEST_NEON_HELPER_2DIFF(mnemonic, vdform, vnform, input_n) \
2991 CALL_TEST_NEON_HELPER_1Op(mnemonic, vdform, vnform, input_n)
2992
2993 #define DEFINE_TEST_NEON_2DIFF_LONG(mnemonic, input) \
2994 TEST(mnemonic##_4H) { \
2995 CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 4H, 8B, kInput8bits##input); \
2996 } \
2997 TEST(mnemonic##_8H) { \
2998 CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 8H, 16B, kInput8bits##input); \
2999 } \
3000 TEST(mnemonic##_2S) { \
3001 CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 2S, 4H, kInput16bits##input); \
3002 } \
3003 TEST(mnemonic##_4S) { \
3004 CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 4S, 8H, kInput16bits##input); \
3005 } \
3006 TEST(mnemonic##_1D) { \
3007 CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 1D, 2S, kInput32bits##input); \
3008 } \
3009 TEST(mnemonic##_2D) { \
3010 CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 2D, 4S, kInput32bits##input); \
3011 }
3012
3013 #define DEFINE_TEST_NEON_2DIFF_NARROW(mnemonic, input) \
3014 TEST(mnemonic##_8B) { \
3015 CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 8B, 8H, kInput16bits##input); \
3016 } \
3017 TEST(mnemonic##_4H) { \
3018 CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 4H, 4S, kInput32bits##input); \
3019 } \
3020 TEST(mnemonic##_2S) { \
3021 CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 2S, 2D, kInput64bits##input); \
3022 } \
3023 TEST(mnemonic##2_16B) { \
3024 CALL_TEST_NEON_HELPER_2DIFF(mnemonic##2, 16B, 8H, kInput16bits##input); \
3025 } \
3026 TEST(mnemonic##2_8H) { \
3027 CALL_TEST_NEON_HELPER_2DIFF(mnemonic##2, 8H, 4S, kInput32bits##input); \
3028 } \
3029 TEST(mnemonic##2_4S) { \
3030 CALL_TEST_NEON_HELPER_2DIFF(mnemonic##2, 4S, 2D, kInput64bits##input); \
3031 }
3032
3033 #define DEFINE_TEST_NEON_2DIFF_FP_LONG(mnemonic, input) \
3034 TEST(mnemonic##_4S) { \
3035 CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 4S, 4H, kInputFloat16##input); \
3036 } \
3037 TEST(mnemonic##_2D) { \
3038 CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 2D, 2S, kInputFloat##input); \
3039 } \
3040 TEST(mnemonic##2_4S) { \
3041 CALL_TEST_NEON_HELPER_2DIFF(mnemonic##2, 4S, 8H, kInputFloat16##input); \
3042 } \
3043 TEST(mnemonic##2_2D) { \
3044 CALL_TEST_NEON_HELPER_2DIFF(mnemonic##2, 2D, 4S, kInputFloat##input); \
3045 }
3046
3047 #define DEFINE_TEST_NEON_2DIFF_FP_NARROW(mnemonic, input) \
3048 TEST(mnemonic##_4H) { \
3049 CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 4H, 4S, kInputFloat##input); \
3050 } \
3051 TEST(mnemonic##_2S) { \
3052 CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 2S, 2D, kInputDouble##input); \
3053 } \
3054 TEST(mnemonic##2_8H) { \
3055 CALL_TEST_NEON_HELPER_2DIFF(mnemonic##2, 8H, 4S, kInputFloat##input); \
3056 } \
3057 TEST(mnemonic##2_4S) { \
3058 CALL_TEST_NEON_HELPER_2DIFF(mnemonic##2, 4S, 2D, kInputDouble##input); \
3059 }
3060
3061 #define DEFINE_TEST_NEON_2DIFF_FP_NARROW_2S(mnemonic, input) \
3062 TEST(mnemonic##_2S) { \
3063 CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 2S, 2D, kInputDouble##input); \
3064 } \
3065 TEST(mnemonic##2_4S) { \
3066 CALL_TEST_NEON_HELPER_2DIFF(mnemonic##2, 4S, 2D, kInputDouble##input); \
3067 }
3068
3069 #define DEFINE_TEST_NEON_2DIFF_SCALAR_NARROW(mnemonic, input) \
3070 TEST(mnemonic##_B) { \
3071 CALL_TEST_NEON_HELPER_2DIFF(mnemonic, B, H, kInput16bits##input); \
3072 } \
3073 TEST(mnemonic##_H) { \
3074 CALL_TEST_NEON_HELPER_2DIFF(mnemonic, H, S, kInput32bits##input); \
3075 } \
3076 TEST(mnemonic##_S) { \
3077 CALL_TEST_NEON_HELPER_2DIFF(mnemonic, S, D, kInput64bits##input); \
3078 }
3079
3080 #define DEFINE_TEST_NEON_2DIFF_FP_SCALAR_SD(mnemonic, input) \
3081 TEST(mnemonic##_S) { \
3082 CALL_TEST_NEON_HELPER_2DIFF(mnemonic, S, 2S, kInputFloat##input); \
3083 } \
3084 TEST(mnemonic##_D) { \
3085 CALL_TEST_NEON_HELPER_2DIFF(mnemonic, D, 2D, kInputDouble##input); \
3086 }
3087
3088 #define CALL_TEST_NEON_HELPER_3SAME(mnemonic, variant, input_d, input_nm) \
3089 { \
3090 CALL_TEST_NEON_HELPER_2Op(mnemonic, \
3091 variant, \
3092 variant, \
3093 variant, \
3094 input_d, \
3095 input_nm, \
3096 input_nm); \
3097 }
3098
3099 #define DEFINE_TEST_NEON_3SAME_8B_16B(mnemonic, input) \
3100 TEST(mnemonic##_8B) { \
3101 CALL_TEST_NEON_HELPER_3SAME(mnemonic, \
3102 8B, \
3103 kInput8bitsAccDestination, \
3104 kInput8bits##input); \
3105 } \
3106 TEST(mnemonic##_16B) { \
3107 CALL_TEST_NEON_HELPER_3SAME(mnemonic, \
3108 16B, \
3109 kInput8bitsAccDestination, \
3110 kInput8bits##input); \
3111 }
3112
3113 #define DEFINE_TEST_NEON_3SAME_HS(mnemonic, input) \
3114 TEST(mnemonic##_4H) { \
3115 CALL_TEST_NEON_HELPER_3SAME(mnemonic, \
3116 4H, \
3117 kInput16bitsAccDestination, \
3118 kInput16bits##input); \
3119 } \
3120 TEST(mnemonic##_8H) { \
3121 CALL_TEST_NEON_HELPER_3SAME(mnemonic, \
3122 8H, \
3123 kInput16bitsAccDestination, \
3124 kInput16bits##input); \
3125 } \
3126 TEST(mnemonic##_2S) { \
3127 CALL_TEST_NEON_HELPER_3SAME(mnemonic, \
3128 2S, \
3129 kInput32bitsAccDestination, \
3130 kInput32bits##input); \
3131 } \
3132 TEST(mnemonic##_4S) { \
3133 CALL_TEST_NEON_HELPER_3SAME(mnemonic, \
3134 4S, \
3135 kInput32bitsAccDestination, \
3136 kInput32bits##input); \
3137 }
3138
3139 #define DEFINE_TEST_NEON_3SAME_NO2D(mnemonic, input) \
3140 DEFINE_TEST_NEON_3SAME_8B_16B(mnemonic, input) \
3141 DEFINE_TEST_NEON_3SAME_HS(mnemonic, input)
3142
3143 #define DEFINE_TEST_NEON_3SAME(mnemonic, input) \
3144 DEFINE_TEST_NEON_3SAME_NO2D(mnemonic, input) \
3145 TEST(mnemonic##_2D) { \
3146 CALL_TEST_NEON_HELPER_3SAME(mnemonic, \
3147 2D, \
3148 kInput64bitsAccDestination, \
3149 kInput64bits##input); \
3150 }
3151
3152 #define DEFINE_TEST_NEON_3SAME_FP(mnemonic, input) \
3153 TEST(mnemonic##_2S) { \
3154 CALL_TEST_NEON_HELPER_3SAME(mnemonic, \
3155 2S, \
3156 kInputFloatAccDestination, \
3157 kInputFloat##input); \
3158 } \
3159 TEST(mnemonic##_4S) { \
3160 CALL_TEST_NEON_HELPER_3SAME(mnemonic, \
3161 4S, \
3162 kInputFloatAccDestination, \
3163 kInputFloat##input); \
3164 } \
3165 TEST(mnemonic##_2D) { \
3166 CALL_TEST_NEON_HELPER_3SAME(mnemonic, \
3167 2D, \
3168 kInputDoubleAccDestination, \
3169 kInputDouble##input); \
3170 }
3171
3172 #define DEFINE_TEST_NEON_3SAME_SCALAR_D(mnemonic, input) \
3173 TEST(mnemonic##_D) { \
3174 CALL_TEST_NEON_HELPER_3SAME(mnemonic, \
3175 D, \
3176 kInput64bitsAccDestination, \
3177 kInput64bits##input); \
3178 }
3179
3180 #define DEFINE_TEST_NEON_3SAME_SCALAR_HS(mnemonic, input) \
3181 TEST(mnemonic##_H) { \
3182 CALL_TEST_NEON_HELPER_3SAME(mnemonic, \
3183 H, \
3184 kInput16bitsAccDestination, \
3185 kInput16bits##input); \
3186 } \
3187 TEST(mnemonic##_S) { \
3188 CALL_TEST_NEON_HELPER_3SAME(mnemonic, \
3189 S, \
3190 kInput32bitsAccDestination, \
3191 kInput32bits##input); \
3192 }
3193
3194 #define DEFINE_TEST_NEON_3SAME_SCALAR(mnemonic, input) \
3195 TEST(mnemonic##_B) { \
3196 CALL_TEST_NEON_HELPER_3SAME(mnemonic, \
3197 B, \
3198 kInput8bitsAccDestination, \
3199 kInput8bits##input); \
3200 } \
3201 TEST(mnemonic##_H) { \
3202 CALL_TEST_NEON_HELPER_3SAME(mnemonic, \
3203 H, \
3204 kInput16bitsAccDestination, \
3205 kInput16bits##input); \
3206 } \
3207 TEST(mnemonic##_S) { \
3208 CALL_TEST_NEON_HELPER_3SAME(mnemonic, \
3209 S, \
3210 kInput32bitsAccDestination, \
3211 kInput32bits##input); \
3212 } \
3213 TEST(mnemonic##_D) { \
3214 CALL_TEST_NEON_HELPER_3SAME(mnemonic, \
3215 D, \
3216 kInput64bitsAccDestination, \
3217 kInput64bits##input); \
3218 }
3219
3220 #define DEFINE_TEST_NEON_3SAME_FP_SCALAR(mnemonic, input) \
3221 TEST(mnemonic##_S) { \
3222 CALL_TEST_NEON_HELPER_3SAME(mnemonic, \
3223 S, \
3224 kInputFloatAccDestination, \
3225 kInputFloat##input); \
3226 } \
3227 TEST(mnemonic##_D) { \
3228 CALL_TEST_NEON_HELPER_3SAME(mnemonic, \
3229 D, \
3230 kInputDoubleAccDestination, \
3231 kInputDouble##input); \
3232 }
3233
3234 #define CALL_TEST_NEON_HELPER_3DIFF( \
3235 mnemonic, vdform, vnform, vmform, input_d, input_n, input_m) \
3236 { \
3237 CALL_TEST_NEON_HELPER_2Op(mnemonic, \
3238 vdform, \
3239 vnform, \
3240 vmform, \
3241 input_d, \
3242 input_n, \
3243 input_m); \
3244 }
3245
3246 #define DEFINE_TEST_NEON_3DIFF_LONG_8H(mnemonic, input) \
3247 TEST(mnemonic##_8H) { \
3248 CALL_TEST_NEON_HELPER_3DIFF(mnemonic, \
3249 8H, \
3250 8B, \
3251 8B, \
3252 kInput16bitsAccDestination, \
3253 kInput8bits##input, \
3254 kInput8bits##input); \
3255 } \
3256 TEST(mnemonic##2_8H) { \
3257 CALL_TEST_NEON_HELPER_3DIFF(mnemonic##2, \
3258 8H, \
3259 16B, \
3260 16B, \
3261 kInput16bitsAccDestination, \
3262 kInput8bits##input, \
3263 kInput8bits##input); \
3264 }
3265
3266 #define DEFINE_TEST_NEON_3DIFF_LONG_4S(mnemonic, input) \
3267 TEST(mnemonic##_4S) { \
3268 CALL_TEST_NEON_HELPER_3DIFF(mnemonic, \
3269 4S, \
3270 4H, \
3271 4H, \
3272 kInput32bitsAccDestination, \
3273 kInput16bits##input, \
3274 kInput16bits##input); \
3275 } \
3276 TEST(mnemonic##2_4S) { \
3277 CALL_TEST_NEON_HELPER_3DIFF(mnemonic##2, \
3278 4S, \
3279 8H, \
3280 8H, \
3281 kInput32bitsAccDestination, \
3282 kInput16bits##input, \
3283 kInput16bits##input); \
3284 }
3285
3286 #define DEFINE_TEST_NEON_3DIFF_LONG_2D(mnemonic, input) \
3287 TEST(mnemonic##_2D) { \
3288 CALL_TEST_NEON_HELPER_3DIFF(mnemonic, \
3289 2D, \
3290 2S, \
3291 2S, \
3292 kInput64bitsAccDestination, \
3293 kInput32bits##input, \
3294 kInput32bits##input); \
3295 } \
3296 TEST(mnemonic##2_2D) { \
3297 CALL_TEST_NEON_HELPER_3DIFF(mnemonic##2, \
3298 2D, \
3299 4S, \
3300 4S, \
3301 kInput64bitsAccDestination, \
3302 kInput32bits##input, \
3303 kInput32bits##input); \
3304 }
3305
3306 #define DEFINE_TEST_NEON_3DIFF_LONG_SD(mnemonic, input) \
3307 DEFINE_TEST_NEON_3DIFF_LONG_4S(mnemonic, input) \
3308 DEFINE_TEST_NEON_3DIFF_LONG_2D(mnemonic, input)
3309
3310 #define DEFINE_TEST_NEON_3DIFF_LONG(mnemonic, input) \
3311 DEFINE_TEST_NEON_3DIFF_LONG_8H(mnemonic, input) \
3312 DEFINE_TEST_NEON_3DIFF_LONG_4S(mnemonic, input) \
3313 DEFINE_TEST_NEON_3DIFF_LONG_2D(mnemonic, input)
3314
3315 #define DEFINE_TEST_NEON_3DIFF_SCALAR_LONG_S(mnemonic, input) \
3316 TEST(mnemonic##_S) { \
3317 CALL_TEST_NEON_HELPER_3DIFF(mnemonic, \
3318 S, \
3319 H, \
3320 H, \
3321 kInput32bitsAccDestination, \
3322 kInput16bits##input, \
3323 kInput16bits##input); \
3324 }
3325
3326 #define DEFINE_TEST_NEON_3DIFF_SCALAR_LONG_D(mnemonic, input) \
3327 TEST(mnemonic##_D) { \
3328 CALL_TEST_NEON_HELPER_3DIFF(mnemonic, \
3329 D, \
3330 S, \
3331 S, \
3332 kInput64bitsAccDestination, \
3333 kInput32bits##input, \
3334 kInput32bits##input); \
3335 }
3336
3337 #define DEFINE_TEST_NEON_3DIFF_SCALAR_LONG_SD(mnemonic, input) \
3338 DEFINE_TEST_NEON_3DIFF_SCALAR_LONG_S(mnemonic, input) \
3339 DEFINE_TEST_NEON_3DIFF_SCALAR_LONG_D(mnemonic, input)
3340
3341 #define DEFINE_TEST_NEON_3DIFF_WIDE(mnemonic, input) \
3342 TEST(mnemonic##_8H) { \
3343 CALL_TEST_NEON_HELPER_3DIFF(mnemonic, \
3344 8H, \
3345 8H, \
3346 8B, \
3347 kInput16bitsAccDestination, \
3348 kInput16bits##input, \
3349 kInput8bits##input); \
3350 } \
3351 TEST(mnemonic##_4S) { \
3352 CALL_TEST_NEON_HELPER_3DIFF(mnemonic, \
3353 4S, \
3354 4S, \
3355 4H, \
3356 kInput32bitsAccDestination, \
3357 kInput32bits##input, \
3358 kInput16bits##input); \
3359 } \
3360 TEST(mnemonic##_2D) { \
3361 CALL_TEST_NEON_HELPER_3DIFF(mnemonic, \
3362 2D, \
3363 2D, \
3364 2S, \
3365 kInput64bitsAccDestination, \
3366 kInput64bits##input, \
3367 kInput32bits##input); \
3368 } \
3369 TEST(mnemonic##2_8H) { \
3370 CALL_TEST_NEON_HELPER_3DIFF(mnemonic##2, \
3371 8H, \
3372 8H, \
3373 16B, \
3374 kInput16bitsAccDestination, \
3375 kInput16bits##input, \
3376 kInput8bits##input); \
3377 } \
3378 TEST(mnemonic##2_4S) { \
3379 CALL_TEST_NEON_HELPER_3DIFF(mnemonic##2, \
3380 4S, \
3381 4S, \
3382 8H, \
3383 kInput32bitsAccDestination, \
3384 kInput32bits##input, \
3385 kInput16bits##input); \
3386 } \
3387 TEST(mnemonic##2_2D) { \
3388 CALL_TEST_NEON_HELPER_3DIFF(mnemonic##2, \
3389 2D, \
3390 2D, \
3391 4S, \
3392 kInput64bitsAccDestination, \
3393 kInput64bits##input, \
3394 kInput32bits##input); \
3395 }
3396
3397 #define DEFINE_TEST_NEON_3DIFF_NARROW(mnemonic, input) \
3398 TEST(mnemonic##_8B) { \
3399 CALL_TEST_NEON_HELPER_3DIFF(mnemonic, \
3400 8B, \
3401 8H, \
3402 8H, \
3403 kInput8bitsAccDestination, \
3404 kInput16bits##input, \
3405 kInput16bits##input); \
3406 } \
3407 TEST(mnemonic##_4H) { \
3408 CALL_TEST_NEON_HELPER_3DIFF(mnemonic, \
3409 4H, \
3410 4S, \
3411 4S, \
3412 kInput16bitsAccDestination, \
3413 kInput32bits##input, \
3414 kInput32bits##input); \
3415 } \
3416 TEST(mnemonic##_2S) { \
3417 CALL_TEST_NEON_HELPER_3DIFF(mnemonic, \
3418 2S, \
3419 2D, \
3420 2D, \
3421 kInput32bitsAccDestination, \
3422 kInput64bits##input, \
3423 kInput64bits##input); \
3424 } \
3425 TEST(mnemonic##2_16B) { \
3426 CALL_TEST_NEON_HELPER_3DIFF(mnemonic##2, \
3427 16B, \
3428 8H, \
3429 8H, \
3430 kInput8bitsAccDestination, \
3431 kInput16bits##input, \
3432 kInput16bits##input); \
3433 } \
3434 TEST(mnemonic##2_8H) { \
3435 CALL_TEST_NEON_HELPER_3DIFF(mnemonic##2, \
3436 8H, \
3437 4S, \
3438 4S, \
3439 kInput16bitsAccDestination, \
3440 kInput32bits##input, \
3441 kInput32bits##input); \
3442 } \
3443 TEST(mnemonic##2_4S) { \
3444 CALL_TEST_NEON_HELPER_3DIFF(mnemonic##2, \
3445 4S, \
3446 2D, \
3447 2D, \
3448 kInput32bitsAccDestination, \
3449 kInput64bits##input, \
3450 kInput64bits##input); \
3451 }
3452
3453 #define CALL_TEST_NEON_HELPER_2OPIMM( \
3454 mnemonic, vdform, vnform, input_n, input_imm) \
3455 { \
3456 CALL_TEST_NEON_HELPER_2OpImm(mnemonic, \
3457 vdform, \
3458 vnform, \
3459 input_n, \
3460 input_imm); \
3461 }
3462
3463 #define DEFINE_TEST_NEON_2OPIMM(mnemonic, input, input_imm) \
3464 TEST(mnemonic##_8B_2OPIMM) { \
3465 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \
3466 8B, \
3467 8B, \
3468 kInput8bits##input, \
3469 kInput8bitsImm##input_imm); \
3470 } \
3471 TEST(mnemonic##_16B_2OPIMM) { \
3472 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \
3473 16B, \
3474 16B, \
3475 kInput8bits##input, \
3476 kInput8bitsImm##input_imm); \
3477 } \
3478 TEST(mnemonic##_4H_2OPIMM) { \
3479 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \
3480 4H, \
3481 4H, \
3482 kInput16bits##input, \
3483 kInput16bitsImm##input_imm); \
3484 } \
3485 TEST(mnemonic##_8H_2OPIMM) { \
3486 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \
3487 8H, \
3488 8H, \
3489 kInput16bits##input, \
3490 kInput16bitsImm##input_imm); \
3491 } \
3492 TEST(mnemonic##_2S_2OPIMM) { \
3493 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \
3494 2S, \
3495 2S, \
3496 kInput32bits##input, \
3497 kInput32bitsImm##input_imm); \
3498 } \
3499 TEST(mnemonic##_4S_2OPIMM) { \
3500 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \
3501 4S, \
3502 4S, \
3503 kInput32bits##input, \
3504 kInput32bitsImm##input_imm); \
3505 } \
3506 TEST(mnemonic##_2D_2OPIMM) { \
3507 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \
3508 2D, \
3509 2D, \
3510 kInput64bits##input, \
3511 kInput64bitsImm##input_imm); \
3512 }
3513
3514 #define DEFINE_TEST_NEON_2OPIMM_COPY(mnemonic, input, input_imm) \
3515 TEST(mnemonic##_8B_2OPIMM) { \
3516 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \
3517 8B, \
3518 B, \
3519 kInput8bits##input, \
3520 kInput8bitsImm##input_imm); \
3521 } \
3522 TEST(mnemonic##_16B_2OPIMM) { \
3523 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \
3524 16B, \
3525 B, \
3526 kInput8bits##input, \
3527 kInput8bitsImm##input_imm); \
3528 } \
3529 TEST(mnemonic##_4H_2OPIMM) { \
3530 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \
3531 4H, \
3532 H, \
3533 kInput16bits##input, \
3534 kInput16bitsImm##input_imm); \
3535 } \
3536 TEST(mnemonic##_8H_2OPIMM) { \
3537 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \
3538 8H, \
3539 H, \
3540 kInput16bits##input, \
3541 kInput16bitsImm##input_imm); \
3542 } \
3543 TEST(mnemonic##_2S_2OPIMM) { \
3544 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \
3545 2S, \
3546 S, \
3547 kInput32bits##input, \
3548 kInput32bitsImm##input_imm); \
3549 } \
3550 TEST(mnemonic##_4S_2OPIMM) { \
3551 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \
3552 4S, \
3553 S, \
3554 kInput32bits##input, \
3555 kInput32bitsImm##input_imm); \
3556 } \
3557 TEST(mnemonic##_2D_2OPIMM) { \
3558 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \
3559 2D, \
3560 D, \
3561 kInput64bits##input, \
3562 kInput64bitsImm##input_imm); \
3563 }
3564
3565 #define DEFINE_TEST_NEON_2OPIMM_NARROW(mnemonic, input, input_imm) \
3566 TEST(mnemonic##_8B_2OPIMM) { \
3567 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \
3568 8B, \
3569 8H, \
3570 kInput16bits##input, \
3571 kInput8bitsImm##input_imm); \
3572 } \
3573 TEST(mnemonic##_4H_2OPIMM) { \
3574 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \
3575 4H, \
3576 4S, \
3577 kInput32bits##input, \
3578 kInput16bitsImm##input_imm); \
3579 } \
3580 TEST(mnemonic##_2S_2OPIMM) { \
3581 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \
3582 2S, \
3583 2D, \
3584 kInput64bits##input, \
3585 kInput32bitsImm##input_imm); \
3586 } \
3587 TEST(mnemonic##2_16B_2OPIMM) { \
3588 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic##2, \
3589 16B, \
3590 8H, \
3591 kInput16bits##input, \
3592 kInput8bitsImm##input_imm); \
3593 } \
3594 TEST(mnemonic##2_8H_2OPIMM) { \
3595 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic##2, \
3596 8H, \
3597 4S, \
3598 kInput32bits##input, \
3599 kInput16bitsImm##input_imm); \
3600 } \
3601 TEST(mnemonic##2_4S_2OPIMM) { \
3602 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic##2, \
3603 4S, \
3604 2D, \
3605 kInput64bits##input, \
3606 kInput32bitsImm##input_imm); \
3607 }
3608
3609 #define DEFINE_TEST_NEON_2OPIMM_SCALAR_NARROW(mnemonic, input, input_imm) \
3610 TEST(mnemonic##_B_2OPIMM) { \
3611 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \
3612 B, \
3613 H, \
3614 kInput16bits##input, \
3615 kInput8bitsImm##input_imm); \
3616 } \
3617 TEST(mnemonic##_H_2OPIMM) { \
3618 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \
3619 H, \
3620 S, \
3621 kInput32bits##input, \
3622 kInput16bitsImm##input_imm); \
3623 } \
3624 TEST(mnemonic##_S_2OPIMM) { \
3625 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \
3626 S, \
3627 D, \
3628 kInput64bits##input, \
3629 kInput32bitsImm##input_imm); \
3630 }
3631
3632 #define DEFINE_TEST_NEON_2OPIMM_FCMP_ZERO(mnemonic, input, input_imm) \
3633 TEST(mnemonic##_2S_2OPIMM) { \
3634 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \
3635 2S, \
3636 2S, \
3637 kInputFloat##Basic, \
3638 kInputDoubleImm##input_imm) \
3639 } \
3640 TEST(mnemonic##_4S_2OPIMM) { \
3641 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \
3642 4S, \
3643 4S, \
3644 kInputFloat##input, \
3645 kInputDoubleImm##input_imm); \
3646 } \
3647 TEST(mnemonic##_2D_2OPIMM) { \
3648 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \
3649 2D, \
3650 2D, \
3651 kInputDouble##input, \
3652 kInputDoubleImm##input_imm); \
3653 }
3654
3655 #define DEFINE_TEST_NEON_2OPIMM_FP(mnemonic, input, input_imm) \
3656 TEST(mnemonic##_2S_2OPIMM) { \
3657 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \
3658 2S, \
3659 2S, \
3660 kInputFloat##Basic, \
3661 kInput32bitsImm##input_imm) \
3662 } \
3663 TEST(mnemonic##_4S_2OPIMM) { \
3664 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \
3665 4S, \
3666 4S, \
3667 kInputFloat##input, \
3668 kInput32bitsImm##input_imm) \
3669 } \
3670 TEST(mnemonic##_2D_2OPIMM) { \
3671 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \
3672 2D, \
3673 2D, \
3674 kInputDouble##input, \
3675 kInput64bitsImm##input_imm) \
3676 }
3677
3678 #define DEFINE_TEST_NEON_2OPIMM_FP_SCALAR(mnemonic, input, input_imm) \
3679 TEST(mnemonic##_S_2OPIMM) { \
3680 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \
3681 S, \
3682 S, \
3683 kInputFloat##Basic, \
3684 kInput32bitsImm##input_imm) \
3685 } \
3686 TEST(mnemonic##_D_2OPIMM) { \
3687 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \
3688 D, \
3689 D, \
3690 kInputDouble##input, \
3691 kInput64bitsImm##input_imm) \
3692 }
3693
3694 #define DEFINE_TEST_NEON_2OPIMM_SD(mnemonic, input, input_imm) \
3695 TEST(mnemonic##_2S_2OPIMM) { \
3696 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \
3697 2S, \
3698 2S, \
3699 kInput32bits##input, \
3700 kInput32bitsImm##input_imm); \
3701 } \
3702 TEST(mnemonic##_4S_2OPIMM) { \
3703 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \
3704 4S, \
3705 4S, \
3706 kInput32bits##input, \
3707 kInput32bitsImm##input_imm); \
3708 } \
3709 TEST(mnemonic##_2D_2OPIMM) { \
3710 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \
3711 2D, \
3712 2D, \
3713 kInput64bits##input, \
3714 kInput64bitsImm##input_imm); \
3715 }
3716
3717 #define DEFINE_TEST_NEON_2OPIMM_SCALAR_D(mnemonic, input, input_imm) \
3718 TEST(mnemonic##_D_2OPIMM) { \
3719 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \
3720 D, \
3721 D, \
3722 kInput64bits##input, \
3723 kInput64bitsImm##input_imm); \
3724 }
3725
3726 #define DEFINE_TEST_NEON_2OPIMM_SCALAR_SD(mnemonic, input, input_imm) \
3727 TEST(mnemonic##_S_2OPIMM) { \
3728 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \
3729 S, \
3730 S, \
3731 kInput32bits##input, \
3732 kInput32bitsImm##input_imm); \
3733 } \
3734 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(mnemonic, input, input_imm)
3735
3736 #define DEFINE_TEST_NEON_2OPIMM_FP_SCALAR_D(mnemonic, input, input_imm) \
3737 TEST(mnemonic##_D_2OPIMM) { \
3738 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \
3739 D, \
3740 D, \
3741 kInputDouble##input, \
3742 kInputDoubleImm##input_imm); \
3743 }
3744
3745 #define DEFINE_TEST_NEON_2OPIMM_FP_SCALAR_SD(mnemonic, input, input_imm) \
3746 TEST(mnemonic##_S_2OPIMM) { \
3747 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \
3748 S, \
3749 S, \
3750 kInputFloat##input, \
3751 kInputDoubleImm##input_imm); \
3752 } \
3753 DEFINE_TEST_NEON_2OPIMM_FP_SCALAR_D(mnemonic, input, input_imm)
3754
3755 #define DEFINE_TEST_NEON_2OPIMM_SCALAR(mnemonic, input, input_imm) \
3756 TEST(mnemonic##_B_2OPIMM) { \
3757 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \
3758 B, \
3759 B, \
3760 kInput8bits##input, \
3761 kInput8bitsImm##input_imm); \
3762 } \
3763 TEST(mnemonic##_H_2OPIMM) { \
3764 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \
3765 H, \
3766 H, \
3767 kInput16bits##input, \
3768 kInput16bitsImm##input_imm); \
3769 } \
3770 DEFINE_TEST_NEON_2OPIMM_SCALAR_SD(mnemonic, input, input_imm)
3771
3772 #define DEFINE_TEST_NEON_2OPIMM_LONG(mnemonic, input, input_imm) \
3773 TEST(mnemonic##_8H_2OPIMM) { \
3774 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \
3775 8H, \
3776 8B, \
3777 kInput8bits##input, \
3778 kInput8bitsImm##input_imm); \
3779 } \
3780 TEST(mnemonic##_4S_2OPIMM) { \
3781 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \
3782 4S, \
3783 4H, \
3784 kInput16bits##input, \
3785 kInput16bitsImm##input_imm); \
3786 } \
3787 TEST(mnemonic##_2D_2OPIMM) { \
3788 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \
3789 2D, \
3790 2S, \
3791 kInput32bits##input, \
3792 kInput32bitsImm##input_imm); \
3793 } \
3794 TEST(mnemonic##2_8H_2OPIMM) { \
3795 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic##2, \
3796 8H, \
3797 16B, \
3798 kInput8bits##input, \
3799 kInput8bitsImm##input_imm); \
3800 } \
3801 TEST(mnemonic##2_4S_2OPIMM) { \
3802 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic##2, \
3803 4S, \
3804 8H, \
3805 kInput16bits##input, \
3806 kInput16bitsImm##input_imm); \
3807 } \
3808 TEST(mnemonic##2_2D_2OPIMM) { \
3809 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic##2, \
3810 2D, \
3811 4S, \
3812 kInput32bits##input, \
3813 kInput32bitsImm##input_imm); \
3814 }
3815
3816 #define CALL_TEST_NEON_HELPER_BYELEMENT( \
3817 mnemonic, vdform, vnform, vmform, input_d, input_n, input_m, indices) \
3818 { \
3819 CALL_TEST_NEON_HELPER_ByElement(mnemonic, \
3820 vdform, \
3821 vnform, \
3822 vmform, \
3823 input_d, \
3824 input_n, \
3825 input_m, \
3826 indices); \
3827 }
3828
3829 #define DEFINE_TEST_NEON_BYELEMENT(mnemonic, input_d, input_n, input_m) \
3830 TEST(mnemonic##_4H_4H_H) { \
3831 CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic, \
3832 4H, \
3833 4H, \
3834 H, \
3835 kInput16bits##input_d, \
3836 kInput16bits##input_n, \
3837 kInput16bits##input_m, \
3838 kInputHIndices); \
3839 } \
3840 TEST(mnemonic##_8H_8H_H) { \
3841 CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic, \
3842 8H, \
3843 8H, \
3844 H, \
3845 kInput16bits##input_d, \
3846 kInput16bits##input_n, \
3847 kInput16bits##input_m, \
3848 kInputHIndices); \
3849 } \
3850 TEST(mnemonic##_2S_2S_S) { \
3851 CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic, \
3852 2S, \
3853 2S, \
3854 S, \
3855 kInput32bits##input_d, \
3856 kInput32bits##input_n, \
3857 kInput32bits##input_m, \
3858 kInputSIndices); \
3859 } \
3860 TEST(mnemonic##_4S_4S_S) { \
3861 CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic, \
3862 4S, \
3863 4S, \
3864 S, \
3865 kInput32bits##input_d, \
3866 kInput32bits##input_n, \
3867 kInput32bits##input_m, \
3868 kInputSIndices); \
3869 }
3870
3871 #define DEFINE_TEST_NEON_BYELEMENT_SCALAR(mnemonic, input_d, input_n, input_m) \
3872 TEST(mnemonic##_H_H_H) { \
3873 CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic, \
3874 H, \
3875 H, \
3876 H, \
3877 kInput16bits##input_d, \
3878 kInput16bits##input_n, \
3879 kInput16bits##input_m, \
3880 kInputHIndices); \
3881 } \
3882 TEST(mnemonic##_S_S_S) { \
3883 CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic, \
3884 S, \
3885 S, \
3886 S, \
3887 kInput32bits##input_d, \
3888 kInput32bits##input_n, \
3889 kInput32bits##input_m, \
3890 kInputSIndices); \
3891 }
3892
3893 #define DEFINE_TEST_NEON_FP_BYELEMENT(mnemonic, input_d, input_n, input_m) \
3894 TEST(mnemonic##_2S_2S_S) { \
3895 CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic, \
3896 2S, \
3897 2S, \
3898 S, \
3899 kInputFloat##input_d, \
3900 kInputFloat##input_n, \
3901 kInputFloat##input_m, \
3902 kInputSIndices); \
3903 } \
3904 TEST(mnemonic##_4S_4S_S) { \
3905 CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic, \
3906 4S, \
3907 4S, \
3908 S, \
3909 kInputFloat##input_d, \
3910 kInputFloat##input_n, \
3911 kInputFloat##input_m, \
3912 kInputSIndices); \
3913 } \
3914 TEST(mnemonic##_2D_2D_D) { \
3915 CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic, \
3916 2D, \
3917 2D, \
3918 D, \
3919 kInputDouble##input_d, \
3920 kInputDouble##input_n, \
3921 kInputDouble##input_m, \
3922 kInputDIndices); \
3923 }
3924
3925 #define DEFINE_TEST_NEON_FP_BYELEMENT_SCALAR(mnemonic, inp_d, inp_n, inp_m) \
3926 TEST(mnemonic##_S_S_S) { \
3927 CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic, \
3928 S, \
3929 S, \
3930 S, \
3931 kInputFloat##inp_d, \
3932 kInputFloat##inp_n, \
3933 kInputFloat##inp_m, \
3934 kInputSIndices); \
3935 } \
3936 TEST(mnemonic##_D_D_D) { \
3937 CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic, \
3938 D, \
3939 D, \
3940 D, \
3941 kInputDouble##inp_d, \
3942 kInputDouble##inp_n, \
3943 kInputDouble##inp_m, \
3944 kInputDIndices); \
3945 }
3946
3947
3948 #define DEFINE_TEST_NEON_BYELEMENT_DIFF(mnemonic, input_d, input_n, input_m) \
3949 TEST(mnemonic##_4S_4H_H) { \
3950 CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic, \
3951 4S, \
3952 4H, \
3953 H, \
3954 kInput32bits##input_d, \
3955 kInput16bits##input_n, \
3956 kInput16bits##input_m, \
3957 kInputHIndices); \
3958 } \
3959 TEST(mnemonic##2_4S_8H_H) { \
3960 CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic##2, \
3961 4S, \
3962 8H, \
3963 H, \
3964 kInput32bits##input_d, \
3965 kInput16bits##input_n, \
3966 kInput16bits##input_m, \
3967 kInputHIndices); \
3968 } \
3969 TEST(mnemonic##_2D_2S_S) { \
3970 CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic, \
3971 2D, \
3972 2S, \
3973 S, \
3974 kInput64bits##input_d, \
3975 kInput32bits##input_n, \
3976 kInput32bits##input_m, \
3977 kInputSIndices); \
3978 } \
3979 TEST(mnemonic##2_2D_4S_S) { \
3980 CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic##2, \
3981 2D, \
3982 4S, \
3983 S, \
3984 kInput64bits##input_d, \
3985 kInput32bits##input_n, \
3986 kInput32bits##input_m, \
3987 kInputSIndices); \
3988 }
3989
3990 #define DEFINE_TEST_NEON_BYELEMENT_DIFF_SCALAR( \
3991 mnemonic, input_d, input_n, input_m) \
3992 TEST(mnemonic##_S_H_H) { \
3993 CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic, \
3994 S, \
3995 H, \
3996 H, \
3997 kInput32bits##input_d, \
3998 kInput16bits##input_n, \
3999 kInput16bits##input_m, \
4000 kInputHIndices); \
4001 } \
4002 TEST(mnemonic##_D_S_S) { \
4003 CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic, \
4004 D, \
4005 S, \
4006 S, \
4007 kInput64bits##input_d, \
4008 kInput32bits##input_n, \
4009 kInput32bits##input_m, \
4010 kInputSIndices); \
4011 }
4012
4013
4014 #define CALL_TEST_NEON_HELPER_2OP2IMM( \
4015 mnemonic, variant, input_d, input_imm1, input_n, input_imm2) \
4016 { \
4017 CALL_TEST_NEON_HELPER_OpImmOpImm(&MacroAssembler::mnemonic, \
4018 mnemonic, \
4019 variant, \
4020 variant, \
4021 input_d, \
4022 input_imm1, \
4023 input_n, \
4024 input_imm2); \
4025 }
4026
4027 #define DEFINE_TEST_NEON_2OP2IMM( \
4028 mnemonic, input_d, input_imm1, input_n, input_imm2) \
4029 TEST(mnemonic##_B) { \
4030 CALL_TEST_NEON_HELPER_2OP2IMM(mnemonic, \
4031 16B, \
4032 kInput8bits##input_d, \
4033 kInput8bitsImm##input_imm1, \
4034 kInput8bits##input_n, \
4035 kInput8bitsImm##input_imm2); \
4036 } \
4037 TEST(mnemonic##_H) { \
4038 CALL_TEST_NEON_HELPER_2OP2IMM(mnemonic, \
4039 8H, \
4040 kInput16bits##input_d, \
4041 kInput16bitsImm##input_imm1, \
4042 kInput16bits##input_n, \
4043 kInput16bitsImm##input_imm2); \
4044 } \
4045 TEST(mnemonic##_S) { \
4046 CALL_TEST_NEON_HELPER_2OP2IMM(mnemonic, \
4047 4S, \
4048 kInput32bits##input_d, \
4049 kInput32bitsImm##input_imm1, \
4050 kInput32bits##input_n, \
4051 kInput32bitsImm##input_imm2); \
4052 } \
4053 TEST(mnemonic##_D) { \
4054 CALL_TEST_NEON_HELPER_2OP2IMM(mnemonic, \
4055 2D, \
4056 kInput64bits##input_d, \
4057 kInput64bitsImm##input_imm1, \
4058 kInput64bits##input_n, \
4059 kInput64bitsImm##input_imm2); \
4060 }
4061
4062
4063 // Advanced SIMD copy.
4064 DEFINE_TEST_NEON_2OP2IMM(
4065 ins, Basic, LaneCountFromZero, Basic, LaneCountFromZero)
4066 DEFINE_TEST_NEON_2OPIMM_COPY(dup, Basic, LaneCountFromZero)
4067
4068
4069 // Advanced SIMD scalar copy.
4070 DEFINE_TEST_NEON_2OPIMM_SCALAR(dup, Basic, LaneCountFromZero)
4071
4072
4073 // Advanced SIMD three same.
4074 DEFINE_TEST_NEON_3SAME_NO2D(shadd, Basic)
4075 DEFINE_TEST_NEON_3SAME(sqadd, Basic)
4076 DEFINE_TEST_NEON_3SAME_NO2D(srhadd, Basic)
4077 DEFINE_TEST_NEON_3SAME_NO2D(shsub, Basic)
4078 DEFINE_TEST_NEON_3SAME(sqsub, Basic)
4079 DEFINE_TEST_NEON_3SAME(cmgt, Basic)
4080 DEFINE_TEST_NEON_3SAME(cmge, Basic)
4081 DEFINE_TEST_NEON_3SAME(sshl, Basic)
4082 DEFINE_TEST_NEON_3SAME(sqshl, Basic)
4083 DEFINE_TEST_NEON_3SAME(srshl, Basic)
4084 DEFINE_TEST_NEON_3SAME(sqrshl, Basic)
4085 DEFINE_TEST_NEON_3SAME_NO2D(smax, Basic)
4086 DEFINE_TEST_NEON_3SAME_NO2D(smin, Basic)
4087 DEFINE_TEST_NEON_3SAME_NO2D(sabd, Basic)
4088 DEFINE_TEST_NEON_3SAME_NO2D(saba, Basic)
4089 DEFINE_TEST_NEON_3SAME(add, Basic)
4090 DEFINE_TEST_NEON_3SAME(cmtst, Basic)
4091 DEFINE_TEST_NEON_3SAME_NO2D(mla, Basic)
4092 DEFINE_TEST_NEON_3SAME_NO2D(mul, Basic)
4093 DEFINE_TEST_NEON_3SAME_NO2D(smaxp, Basic)
4094 DEFINE_TEST_NEON_3SAME_NO2D(sminp, Basic)
4095 DEFINE_TEST_NEON_3SAME_HS(sqdmulh, Basic)
4096 DEFINE_TEST_NEON_3SAME(addp, Basic)
4097 DEFINE_TEST_NEON_3SAME_FP(fmaxnm, Basic)
4098 DEFINE_TEST_NEON_3SAME_FP(fmla, Basic)
4099 DEFINE_TEST_NEON_3SAME_FP(fadd, Basic)
4100 DEFINE_TEST_NEON_3SAME_FP(fmulx, Basic)
4101 DEFINE_TEST_NEON_3SAME_FP(fcmeq, Basic)
4102 DEFINE_TEST_NEON_3SAME_FP(fmax, Basic)
4103 DEFINE_TEST_NEON_3SAME_FP(frecps, Basic)
4104 DEFINE_TEST_NEON_3SAME_8B_16B(and_, Basic)
4105 DEFINE_TEST_NEON_3SAME_8B_16B(bic, Basic)
4106 DEFINE_TEST_NEON_3SAME_FP(fminnm, Basic)
4107 DEFINE_TEST_NEON_3SAME_FP(fmls, Basic)
4108 DEFINE_TEST_NEON_3SAME_FP(fsub, Basic)
4109 DEFINE_TEST_NEON_3SAME_FP(fmin, Basic)
4110 DEFINE_TEST_NEON_3SAME_FP(frsqrts, Basic)
4111 DEFINE_TEST_NEON_3SAME_8B_16B(orr, Basic)
4112 DEFINE_TEST_NEON_3SAME_8B_16B(orn, Basic)
4113 DEFINE_TEST_NEON_3SAME_NO2D(uhadd, Basic)
4114 DEFINE_TEST_NEON_3SAME(uqadd, Basic)
4115 DEFINE_TEST_NEON_3SAME_NO2D(urhadd, Basic)
4116 DEFINE_TEST_NEON_3SAME_NO2D(uhsub, Basic)
4117 DEFINE_TEST_NEON_3SAME(uqsub, Basic)
4118 DEFINE_TEST_NEON_3SAME(cmhi, Basic)
4119 DEFINE_TEST_NEON_3SAME(cmhs, Basic)
4120 DEFINE_TEST_NEON_3SAME(ushl, Basic)
4121 DEFINE_TEST_NEON_3SAME(uqshl, Basic)
4122 DEFINE_TEST_NEON_3SAME(urshl, Basic)
4123 DEFINE_TEST_NEON_3SAME(uqrshl, Basic)
4124 DEFINE_TEST_NEON_3SAME_NO2D(umax, Basic)
4125 DEFINE_TEST_NEON_3SAME_NO2D(umin, Basic)
4126 DEFINE_TEST_NEON_3SAME_NO2D(uabd, Basic)
4127 DEFINE_TEST_NEON_3SAME_NO2D(uaba, Basic)
4128 DEFINE_TEST_NEON_3SAME(sub, Basic)
4129 DEFINE_TEST_NEON_3SAME(cmeq, Basic)
4130 DEFINE_TEST_NEON_3SAME_NO2D(mls, Basic)
4131 DEFINE_TEST_NEON_3SAME_8B_16B(pmul, Basic)
4132 DEFINE_TEST_NEON_3SAME_NO2D(uminp, Basic)
4133 DEFINE_TEST_NEON_3SAME_NO2D(umaxp, Basic)
4134 DEFINE_TEST_NEON_3SAME_HS(sqrdmulh, Basic)
4135 DEFINE_TEST_NEON_3SAME_FP(fmaxnmp, Basic)
4136 DEFINE_TEST_NEON_3SAME_FP(faddp, Basic)
4137 DEFINE_TEST_NEON_3SAME_FP(fmul, Basic)
4138 DEFINE_TEST_NEON_3SAME_FP(fcmge, Basic)
4139 DEFINE_TEST_NEON_3SAME_FP(facge, Basic)
4140 DEFINE_TEST_NEON_3SAME_FP(fmaxp, Basic)
4141 DEFINE_TEST_NEON_3SAME_FP(fdiv, Basic)
4142 DEFINE_TEST_NEON_3SAME_8B_16B(eor, Basic)
4143 DEFINE_TEST_NEON_3SAME_8B_16B(bsl, Basic)
4144 DEFINE_TEST_NEON_3SAME_FP(fminnmp, Basic)
4145 DEFINE_TEST_NEON_3SAME_FP(fabd, Basic)
4146 DEFINE_TEST_NEON_3SAME_FP(fcmgt, Basic)
4147 DEFINE_TEST_NEON_3SAME_FP(facgt, Basic)
4148 DEFINE_TEST_NEON_3SAME_FP(fminp, Basic)
4149 DEFINE_TEST_NEON_3SAME_8B_16B(bit, Basic)
4150 DEFINE_TEST_NEON_3SAME_8B_16B(bif, Basic)
4151
4152
4153 // Advanced SIMD scalar three same.
4154 DEFINE_TEST_NEON_3SAME_SCALAR(sqadd, Basic)
4155 DEFINE_TEST_NEON_3SAME_SCALAR(sqsub, Basic)
4156 DEFINE_TEST_NEON_3SAME_SCALAR_D(cmgt, Basic)
4157 DEFINE_TEST_NEON_3SAME_SCALAR_D(cmge, Basic)
4158 DEFINE_TEST_NEON_3SAME_SCALAR_D(sshl, Basic)
4159 DEFINE_TEST_NEON_3SAME_SCALAR(sqshl, Basic)
4160 DEFINE_TEST_NEON_3SAME_SCALAR_D(srshl, Basic)
4161 DEFINE_TEST_NEON_3SAME_SCALAR(sqrshl, Basic)
4162 DEFINE_TEST_NEON_3SAME_SCALAR_D(add, Basic)
4163 DEFINE_TEST_NEON_3SAME_SCALAR_D(cmtst, Basic)
4164 DEFINE_TEST_NEON_3SAME_SCALAR_HS(sqdmulh, Basic)
4165 DEFINE_TEST_NEON_3SAME_FP_SCALAR(fmulx, Basic)
4166 DEFINE_TEST_NEON_3SAME_FP_SCALAR(fcmeq, Basic)
4167 DEFINE_TEST_NEON_3SAME_FP_SCALAR(frecps, Basic)
4168 DEFINE_TEST_NEON_3SAME_FP_SCALAR(frsqrts, Basic)
4169 DEFINE_TEST_NEON_3SAME_SCALAR_D(uqadd, Basic)
4170 DEFINE_TEST_NEON_3SAME_SCALAR_D(uqsub, Basic)
4171 DEFINE_TEST_NEON_3SAME_SCALAR_D(cmhi, Basic)
4172 DEFINE_TEST_NEON_3SAME_SCALAR_D(cmhs, Basic)
4173 DEFINE_TEST_NEON_3SAME_SCALAR_D(ushl, Basic)
4174 DEFINE_TEST_NEON_3SAME_SCALAR(uqshl, Basic)
4175 DEFINE_TEST_NEON_3SAME_SCALAR_D(urshl, Basic)
4176 DEFINE_TEST_NEON_3SAME_SCALAR(uqrshl, Basic)
4177 DEFINE_TEST_NEON_3SAME_SCALAR_D(sub, Basic)
4178 DEFINE_TEST_NEON_3SAME_SCALAR_D(cmeq, Basic)
4179 DEFINE_TEST_NEON_3SAME_SCALAR_HS(sqrdmulh, Basic)
4180 DEFINE_TEST_NEON_3SAME_FP_SCALAR(fcmge, Basic)
4181 DEFINE_TEST_NEON_3SAME_FP_SCALAR(facge, Basic)
4182 DEFINE_TEST_NEON_3SAME_FP_SCALAR(fabd, Basic)
4183 DEFINE_TEST_NEON_3SAME_FP_SCALAR(fcmgt, Basic)
4184 DEFINE_TEST_NEON_3SAME_FP_SCALAR(facgt, Basic)
4185
4186
4187 // Advanced SIMD three different.
4188 DEFINE_TEST_NEON_3DIFF_LONG(saddl, Basic)
4189 DEFINE_TEST_NEON_3DIFF_WIDE(saddw, Basic)
4190 DEFINE_TEST_NEON_3DIFF_LONG(ssubl, Basic)
4191 DEFINE_TEST_NEON_3DIFF_WIDE(ssubw, Basic)
4192 DEFINE_TEST_NEON_3DIFF_NARROW(addhn, Basic)
4193 DEFINE_TEST_NEON_3DIFF_LONG(sabal, Basic)
4194 DEFINE_TEST_NEON_3DIFF_NARROW(subhn, Basic)
4195 DEFINE_TEST_NEON_3DIFF_LONG(sabdl, Basic)
4196 DEFINE_TEST_NEON_3DIFF_LONG(smlal, Basic)
4197 DEFINE_TEST_NEON_3DIFF_LONG_SD(sqdmlal, Basic)
4198 DEFINE_TEST_NEON_3DIFF_LONG(smlsl, Basic)
4199 DEFINE_TEST_NEON_3DIFF_LONG_SD(sqdmlsl, Basic)
4200 DEFINE_TEST_NEON_3DIFF_LONG(smull, Basic)
4201 DEFINE_TEST_NEON_3DIFF_LONG_SD(sqdmull, Basic)
4202 DEFINE_TEST_NEON_3DIFF_LONG_8H(pmull, Basic)
4203 DEFINE_TEST_NEON_3DIFF_LONG(uaddl, Basic)
4204 DEFINE_TEST_NEON_3DIFF_WIDE(uaddw, Basic)
4205 DEFINE_TEST_NEON_3DIFF_LONG(usubl, Basic)
4206 DEFINE_TEST_NEON_3DIFF_WIDE(usubw, Basic)
4207 DEFINE_TEST_NEON_3DIFF_NARROW(raddhn, Basic)
4208 DEFINE_TEST_NEON_3DIFF_LONG(uabal, Basic)
4209 DEFINE_TEST_NEON_3DIFF_NARROW(rsubhn, Basic)
4210 DEFINE_TEST_NEON_3DIFF_LONG(uabdl, Basic)
4211 DEFINE_TEST_NEON_3DIFF_LONG(umlal, Basic)
4212 DEFINE_TEST_NEON_3DIFF_LONG(umlsl, Basic)
4213 DEFINE_TEST_NEON_3DIFF_LONG(umull, Basic)
4214
4215
4216 // Advanced SIMD scalar three different.
4217 DEFINE_TEST_NEON_3DIFF_SCALAR_LONG_SD(sqdmlal, Basic)
4218 DEFINE_TEST_NEON_3DIFF_SCALAR_LONG_SD(sqdmlsl, Basic)
4219 DEFINE_TEST_NEON_3DIFF_SCALAR_LONG_SD(sqdmull, Basic)
4220
4221
4222 // Advanced SIMD scalar pairwise.
4223 TEST(addp_SCALAR) {
4224 CALL_TEST_NEON_HELPER_2DIFF(addp, D, 2D, kInput64bitsBasic);
4225 }
DEFINE_TEST_NEON_2DIFF_FP_SCALAR_SD(fmaxnmp,Basic)4226 DEFINE_TEST_NEON_2DIFF_FP_SCALAR_SD(fmaxnmp, Basic)
4227 DEFINE_TEST_NEON_2DIFF_FP_SCALAR_SD(faddp, Basic)
4228 DEFINE_TEST_NEON_2DIFF_FP_SCALAR_SD(fmaxp, Basic)
4229 DEFINE_TEST_NEON_2DIFF_FP_SCALAR_SD(fminnmp, Basic)
4230 DEFINE_TEST_NEON_2DIFF_FP_SCALAR_SD(fminp, Basic)
4231
4232
4233 // Advanced SIMD shift by immediate.
4234 DEFINE_TEST_NEON_2OPIMM(sshr, Basic, TypeWidth)
4235 DEFINE_TEST_NEON_2OPIMM(ssra, Basic, TypeWidth)
4236 DEFINE_TEST_NEON_2OPIMM(srshr, Basic, TypeWidth)
4237 DEFINE_TEST_NEON_2OPIMM(srsra, Basic, TypeWidth)
4238 DEFINE_TEST_NEON_2OPIMM(shl, Basic, TypeWidthFromZero)
4239 DEFINE_TEST_NEON_2OPIMM(sqshl, Basic, TypeWidthFromZero)
4240 DEFINE_TEST_NEON_2OPIMM_NARROW(shrn, Basic, TypeWidth)
4241 DEFINE_TEST_NEON_2OPIMM_NARROW(rshrn, Basic, TypeWidth)
4242 DEFINE_TEST_NEON_2OPIMM_NARROW(sqshrn, Basic, TypeWidth)
4243 DEFINE_TEST_NEON_2OPIMM_NARROW(sqrshrn, Basic, TypeWidth)
4244 DEFINE_TEST_NEON_2OPIMM_LONG(sshll, Basic, TypeWidthFromZero)
4245 DEFINE_TEST_NEON_2OPIMM_SD(scvtf,
4246 FixedPointConversions,
4247 TypeWidthFromZeroToWidth)
4248 DEFINE_TEST_NEON_2OPIMM_FP(fcvtzs, Conversions, TypeWidthFromZeroToWidth)
4249 DEFINE_TEST_NEON_2OPIMM(ushr, Basic, TypeWidth)
4250 DEFINE_TEST_NEON_2OPIMM(usra, Basic, TypeWidth)
4251 DEFINE_TEST_NEON_2OPIMM(urshr, Basic, TypeWidth)
4252 DEFINE_TEST_NEON_2OPIMM(ursra, Basic, TypeWidth)
4253 DEFINE_TEST_NEON_2OPIMM(sri, Basic, TypeWidth)
4254 DEFINE_TEST_NEON_2OPIMM(sli, Basic, TypeWidthFromZero)
4255 DEFINE_TEST_NEON_2OPIMM(sqshlu, Basic, TypeWidthFromZero)
4256 DEFINE_TEST_NEON_2OPIMM(uqshl, Basic, TypeWidthFromZero)
4257 DEFINE_TEST_NEON_2OPIMM_NARROW(sqshrun, Basic, TypeWidth)
4258 DEFINE_TEST_NEON_2OPIMM_NARROW(sqrshrun, Basic, TypeWidth)
4259 DEFINE_TEST_NEON_2OPIMM_NARROW(uqshrn, Basic, TypeWidth)
4260 DEFINE_TEST_NEON_2OPIMM_NARROW(uqrshrn, Basic, TypeWidth)
4261 DEFINE_TEST_NEON_2OPIMM_LONG(ushll, Basic, TypeWidthFromZero)
4262 DEFINE_TEST_NEON_2OPIMM_SD(ucvtf,
4263 FixedPointConversions,
4264 TypeWidthFromZeroToWidth)
4265 DEFINE_TEST_NEON_2OPIMM_FP(fcvtzu, Conversions, TypeWidthFromZeroToWidth)
4266
4267
4268 // Advanced SIMD scalar shift by immediate..
4269 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(sshr, Basic, TypeWidth)
4270 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(ssra, Basic, TypeWidth)
4271 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(srshr, Basic, TypeWidth)
4272 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(srsra, Basic, TypeWidth)
4273 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(shl, Basic, TypeWidthFromZero)
4274 DEFINE_TEST_NEON_2OPIMM_SCALAR(sqshl, Basic, TypeWidthFromZero)
4275 DEFINE_TEST_NEON_2OPIMM_SCALAR_NARROW(sqshrn, Basic, TypeWidth)
4276 DEFINE_TEST_NEON_2OPIMM_SCALAR_NARROW(sqrshrn, Basic, TypeWidth)
4277 DEFINE_TEST_NEON_2OPIMM_SCALAR_SD(scvtf,
4278 FixedPointConversions,
4279 TypeWidthFromZeroToWidth)
4280 DEFINE_TEST_NEON_2OPIMM_FP_SCALAR(fcvtzs, Conversions, TypeWidthFromZeroToWidth)
4281 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(ushr, Basic, TypeWidth)
4282 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(usra, Basic, TypeWidth)
4283 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(urshr, Basic, TypeWidth)
4284 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(ursra, Basic, TypeWidth)
4285 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(sri, Basic, TypeWidth)
4286 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(sli, Basic, TypeWidthFromZero)
4287 DEFINE_TEST_NEON_2OPIMM_SCALAR(sqshlu, Basic, TypeWidthFromZero)
4288 DEFINE_TEST_NEON_2OPIMM_SCALAR(uqshl, Basic, TypeWidthFromZero)
4289 DEFINE_TEST_NEON_2OPIMM_SCALAR_NARROW(sqshrun, Basic, TypeWidth)
4290 DEFINE_TEST_NEON_2OPIMM_SCALAR_NARROW(sqrshrun, Basic, TypeWidth)
4291 DEFINE_TEST_NEON_2OPIMM_SCALAR_NARROW(uqshrn, Basic, TypeWidth)
4292 DEFINE_TEST_NEON_2OPIMM_SCALAR_NARROW(uqrshrn, Basic, TypeWidth)
4293 DEFINE_TEST_NEON_2OPIMM_SCALAR_SD(ucvtf,
4294 FixedPointConversions,
4295 TypeWidthFromZeroToWidth)
4296 DEFINE_TEST_NEON_2OPIMM_FP_SCALAR(fcvtzu, Conversions, TypeWidthFromZeroToWidth)
4297
4298
4299 // Advanced SIMD two-register miscellaneous.
4300 DEFINE_TEST_NEON_2SAME_NO2D(rev64, Basic)
4301 DEFINE_TEST_NEON_2SAME_8B_16B(rev16, Basic)
4302 DEFINE_TEST_NEON_2DIFF_LONG(saddlp, Basic)
4303 DEFINE_TEST_NEON_2SAME(suqadd, Basic)
4304 DEFINE_TEST_NEON_2SAME_NO2D(cls, Basic)
4305 DEFINE_TEST_NEON_2SAME_8B_16B(cnt, Basic)
4306 DEFINE_TEST_NEON_2DIFF_LONG(sadalp, Basic)
4307 DEFINE_TEST_NEON_2SAME(sqabs, Basic)
4308 DEFINE_TEST_NEON_2OPIMM(cmgt, Basic, Zero)
4309 DEFINE_TEST_NEON_2OPIMM(cmeq, Basic, Zero)
4310 DEFINE_TEST_NEON_2OPIMM(cmlt, Basic, Zero)
4311 DEFINE_TEST_NEON_2SAME(abs, Basic)
4312 DEFINE_TEST_NEON_2DIFF_NARROW(xtn, Basic)
4313 DEFINE_TEST_NEON_2DIFF_NARROW(sqxtn, Basic)
4314 DEFINE_TEST_NEON_2DIFF_FP_NARROW(fcvtn, Conversions)
4315 DEFINE_TEST_NEON_2DIFF_FP_LONG(fcvtl, Conversions)
4316 DEFINE_TEST_NEON_2SAME_FP(frintn, Conversions)
4317 DEFINE_TEST_NEON_2SAME_FP(frintm, Conversions)
4318 DEFINE_TEST_NEON_2SAME_FP(fcvtns, Conversions)
4319 DEFINE_TEST_NEON_2SAME_FP(fcvtms, Conversions)
4320 DEFINE_TEST_NEON_2SAME_FP(fcvtas, Conversions)
4321 // SCVTF (vector, integer) covered by SCVTF(vector, fixed point) with fbits 0.
4322 DEFINE_TEST_NEON_2OPIMM_FCMP_ZERO(fcmgt, Basic, Zero)
4323 DEFINE_TEST_NEON_2OPIMM_FCMP_ZERO(fcmeq, Basic, Zero)
4324 DEFINE_TEST_NEON_2OPIMM_FCMP_ZERO(fcmlt, Basic, Zero)
4325 DEFINE_TEST_NEON_2SAME_FP(fabs, Basic)
4326 DEFINE_TEST_NEON_2SAME_FP(frintp, Conversions)
4327 DEFINE_TEST_NEON_2SAME_FP(frintz, Conversions)
4328 DEFINE_TEST_NEON_2SAME_FP(fcvtps, Conversions)
4329 // FCVTZS(vector, integer) covered by FCVTZS(vector, fixed point) with fbits 0.
4330 DEFINE_TEST_NEON_2SAME_2S_4S(urecpe, Basic)
4331 DEFINE_TEST_NEON_2SAME_FP(frecpe, Basic)
4332 DEFINE_TEST_NEON_2SAME_BH(rev32, Basic)
4333 DEFINE_TEST_NEON_2DIFF_LONG(uaddlp, Basic)
4334 DEFINE_TEST_NEON_2SAME(usqadd, Basic)
4335 DEFINE_TEST_NEON_2SAME_NO2D(clz, Basic)
4336 DEFINE_TEST_NEON_2DIFF_LONG(uadalp, Basic)
4337 DEFINE_TEST_NEON_2SAME(sqneg, Basic)
4338 DEFINE_TEST_NEON_2OPIMM(cmge, Basic, Zero)
4339 DEFINE_TEST_NEON_2OPIMM(cmle, Basic, Zero)
4340 DEFINE_TEST_NEON_2SAME(neg, Basic)
4341 DEFINE_TEST_NEON_2DIFF_NARROW(sqxtun, Basic)
4342 DEFINE_TEST_NEON_2OPIMM_LONG(shll, Basic, SHLL)
4343 DEFINE_TEST_NEON_2DIFF_NARROW(uqxtn, Basic)
4344 DEFINE_TEST_NEON_2DIFF_FP_NARROW_2S(fcvtxn, Conversions)
4345 DEFINE_TEST_NEON_2SAME_FP(frinta, Conversions)
4346 DEFINE_TEST_NEON_2SAME_FP(frintx, Conversions)
4347 DEFINE_TEST_NEON_2SAME_FP(fcvtnu, Conversions)
4348 DEFINE_TEST_NEON_2SAME_FP(fcvtmu, Conversions)
4349 DEFINE_TEST_NEON_2SAME_FP(fcvtau, Conversions)
4350 // UCVTF (vector, integer) covered by UCVTF(vector, fixed point) with fbits 0.
4351 DEFINE_TEST_NEON_2SAME_8B_16B(not_, Basic)
4352 DEFINE_TEST_NEON_2SAME_8B_16B(rbit, Basic)
4353 DEFINE_TEST_NEON_2OPIMM_FCMP_ZERO(fcmge, Basic, Zero)
4354 DEFINE_TEST_NEON_2OPIMM_FCMP_ZERO(fcmle, Basic, Zero)
4355 DEFINE_TEST_NEON_2SAME_FP(fneg, Basic)
4356 DEFINE_TEST_NEON_2SAME_FP(frinti, Conversions)
4357 DEFINE_TEST_NEON_2SAME_FP(fcvtpu, Conversions)
4358 // FCVTZU(vector, integer) covered by FCVTZU(vector, fixed point) with fbits 0.
4359 DEFINE_TEST_NEON_2SAME_2S_4S(ursqrte, Basic)
4360 DEFINE_TEST_NEON_2SAME_FP(frsqrte, Basic)
4361 DEFINE_TEST_NEON_2SAME_FP(fsqrt, Basic)
4362
4363
4364 // Advanced SIMD scalar two-register miscellaneous.
4365 DEFINE_TEST_NEON_2SAME_SCALAR(suqadd, Basic)
4366 DEFINE_TEST_NEON_2SAME_SCALAR(sqabs, Basic)
4367 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(cmgt, Basic, Zero)
4368 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(cmeq, Basic, Zero)
4369 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(cmlt, Basic, Zero)
4370 DEFINE_TEST_NEON_2SAME_SCALAR_D(abs, Basic)
4371 DEFINE_TEST_NEON_2DIFF_SCALAR_NARROW(sqxtn, Basic)
4372 DEFINE_TEST_NEON_2SAME_FP_SCALAR(fcvtns, Conversions)
4373 DEFINE_TEST_NEON_2SAME_FP_SCALAR(fcvtms, Conversions)
4374 DEFINE_TEST_NEON_2SAME_FP_SCALAR(fcvtas, Conversions)
4375 // SCVTF (vector, integer) covered by SCVTF(vector, fixed point) with fbits 0.
4376 DEFINE_TEST_NEON_2OPIMM_FP_SCALAR_SD(fcmgt, Basic, Zero)
4377 DEFINE_TEST_NEON_2OPIMM_FP_SCALAR_SD(fcmeq, Basic, Zero)
4378 DEFINE_TEST_NEON_2OPIMM_FP_SCALAR_SD(fcmlt, Basic, Zero)
4379 DEFINE_TEST_NEON_2SAME_FP_SCALAR(fcvtps, Conversions)
4380 // FCVTZS(vector, integer) covered by FCVTZS(vector, fixed point) with fbits 0.
4381 DEFINE_TEST_NEON_2SAME_FP_SCALAR(frecpe, Basic)
4382 DEFINE_TEST_NEON_2SAME_FP_SCALAR(frecpx, Basic)
4383 DEFINE_TEST_NEON_2SAME_SCALAR(usqadd, Basic)
4384 DEFINE_TEST_NEON_2SAME_SCALAR(sqneg, Basic)
4385 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(cmge, Basic, Zero)
4386 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(cmle, Basic, Zero)
4387 DEFINE_TEST_NEON_2SAME_SCALAR_D(neg, Basic)
4388 DEFINE_TEST_NEON_2DIFF_SCALAR_NARROW(sqxtun, Basic)
4389 DEFINE_TEST_NEON_2DIFF_SCALAR_NARROW(uqxtn, Basic)
4390 TEST(fcvtxn_SCALAR) {
4391 CALL_TEST_NEON_HELPER_2DIFF(fcvtxn, S, D, kInputDoubleConversions);
4392 }
4393 DEFINE_TEST_NEON_2SAME_FP_SCALAR(fcvtnu, Conversions)
4394 DEFINE_TEST_NEON_2SAME_FP_SCALAR(fcvtmu, Conversions)
4395 DEFINE_TEST_NEON_2SAME_FP_SCALAR(fcvtau, Conversions)
4396 // UCVTF (vector, integer) covered by UCVTF(vector, fixed point) with fbits 0.
4397 DEFINE_TEST_NEON_2OPIMM_FP_SCALAR_SD(fcmge, Basic, Zero)
4398 DEFINE_TEST_NEON_2OPIMM_FP_SCALAR_SD(fcmle, Basic, Zero)
4399 DEFINE_TEST_NEON_2SAME_FP_SCALAR(fcvtpu, Conversions)
4400 // FCVTZU(vector, integer) covered by FCVTZU(vector, fixed point) with fbits 0.
4401 DEFINE_TEST_NEON_2SAME_FP_SCALAR(frsqrte, Basic)
4402
4403
4404 // Advanced SIMD across lanes.
4405 DEFINE_TEST_NEON_ACROSS_LONG(saddlv, Basic)
4406 DEFINE_TEST_NEON_ACROSS(smaxv, Basic)
4407 DEFINE_TEST_NEON_ACROSS(sminv, Basic)
4408 DEFINE_TEST_NEON_ACROSS(addv, Basic)
4409 DEFINE_TEST_NEON_ACROSS_LONG(uaddlv, Basic)
4410 DEFINE_TEST_NEON_ACROSS(umaxv, Basic)
4411 DEFINE_TEST_NEON_ACROSS(uminv, Basic)
4412 DEFINE_TEST_NEON_ACROSS_FP(fmaxnmv, Basic)
4413 DEFINE_TEST_NEON_ACROSS_FP(fmaxv, Basic)
4414 DEFINE_TEST_NEON_ACROSS_FP(fminnmv, Basic)
4415 DEFINE_TEST_NEON_ACROSS_FP(fminv, Basic)
4416
4417
4418 // Advanced SIMD permute.
4419 DEFINE_TEST_NEON_3SAME(uzp1, Basic)
4420 DEFINE_TEST_NEON_3SAME(trn1, Basic)
4421 DEFINE_TEST_NEON_3SAME(zip1, Basic)
4422 DEFINE_TEST_NEON_3SAME(uzp2, Basic)
4423 DEFINE_TEST_NEON_3SAME(trn2, Basic)
4424 DEFINE_TEST_NEON_3SAME(zip2, Basic)
4425
4426
4427 // Advanced SIMD vector x indexed element.
4428 DEFINE_TEST_NEON_BYELEMENT_DIFF(smlal, Basic, Basic, Basic)
4429 DEFINE_TEST_NEON_BYELEMENT_DIFF(sqdmlal, Basic, Basic, Basic)
4430 DEFINE_TEST_NEON_BYELEMENT_DIFF(smlsl, Basic, Basic, Basic)
4431 DEFINE_TEST_NEON_BYELEMENT_DIFF(sqdmlsl, Basic, Basic, Basic)
4432 DEFINE_TEST_NEON_BYELEMENT(mul, Basic, Basic, Basic)
4433 DEFINE_TEST_NEON_BYELEMENT_DIFF(smull, Basic, Basic, Basic)
4434 DEFINE_TEST_NEON_BYELEMENT_DIFF(sqdmull, Basic, Basic, Basic)
4435 DEFINE_TEST_NEON_BYELEMENT(sqdmulh, Basic, Basic, Basic)
4436 DEFINE_TEST_NEON_BYELEMENT(sqrdmulh, Basic, Basic, Basic)
4437 DEFINE_TEST_NEON_FP_BYELEMENT(fmla, Basic, Basic, Basic)
4438 DEFINE_TEST_NEON_FP_BYELEMENT(fmls, Basic, Basic, Basic)
4439 DEFINE_TEST_NEON_FP_BYELEMENT(fmul, Basic, Basic, Basic)
4440 DEFINE_TEST_NEON_BYELEMENT(mla, Basic, Basic, Basic)
4441 DEFINE_TEST_NEON_BYELEMENT_DIFF(umlal, Basic, Basic, Basic)
4442 DEFINE_TEST_NEON_BYELEMENT(mls, Basic, Basic, Basic)
4443 DEFINE_TEST_NEON_BYELEMENT_DIFF(umlsl, Basic, Basic, Basic)
4444 DEFINE_TEST_NEON_BYELEMENT_DIFF(umull, Basic, Basic, Basic)
4445 DEFINE_TEST_NEON_FP_BYELEMENT(fmulx, Basic, Basic, Basic)
4446
4447
4448 // Advanced SIMD scalar x indexed element.
4449 DEFINE_TEST_NEON_BYELEMENT_DIFF_SCALAR(sqdmlal, Basic, Basic, Basic)
4450 DEFINE_TEST_NEON_BYELEMENT_DIFF_SCALAR(sqdmlsl, Basic, Basic, Basic)
4451 DEFINE_TEST_NEON_BYELEMENT_DIFF_SCALAR(sqdmull, Basic, Basic, Basic)
4452 DEFINE_TEST_NEON_BYELEMENT_SCALAR(sqdmulh, Basic, Basic, Basic)
4453 DEFINE_TEST_NEON_BYELEMENT_SCALAR(sqrdmulh, Basic, Basic, Basic)
4454 DEFINE_TEST_NEON_FP_BYELEMENT_SCALAR(fmla, Basic, Basic, Basic)
4455 DEFINE_TEST_NEON_FP_BYELEMENT_SCALAR(fmls, Basic, Basic, Basic)
4456 DEFINE_TEST_NEON_FP_BYELEMENT_SCALAR(fmul, Basic, Basic, Basic)
4457 DEFINE_TEST_NEON_FP_BYELEMENT_SCALAR(fmulx, Basic, Basic, Basic)
4458
4459 } // namespace aarch64
4460 } // namespace vixl
4461