1 // Copyright 2015, VIXL authors
2 // All rights reserved.
3 //
4 // Redistribution and use in source and binary forms, with or without
5 // modification, are permitted provided that the following conditions are met:
6 //
7 // * Redistributions of source code must retain the above copyright notice,
8 // this list of conditions and the following disclaimer.
9 // * Redistributions in binary form must reproduce the above copyright notice,
10 // this list of conditions and the following disclaimer in the documentation
11 // and/or other materials provided with the distribution.
12 // * Neither the name of ARM Limited nor the names of its contributors may be
13 // used to endorse or promote products derived from this software without
14 // specific prior written permission.
15 //
16 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
17 // ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18 // WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19 // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
20 // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
22 // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
23 // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
24 // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26
27 #include <cfloat>
28 #include <cstdio>
29
30 #include <sstream>
31
32 #include "test-runner.h"
33 #include "test-utils.h"
34
35 #include "aarch64/test-simulator-inputs-aarch64.h"
36 #include "aarch64/test-simulator-traces-aarch64.h"
37 #include "aarch64/test-utils-aarch64.h"
38
39 #include "aarch64/cpu-features-auditor-aarch64.h"
40 #include "aarch64/macro-assembler-aarch64.h"
41 #include "aarch64/simulator-aarch64.h"
42
43 namespace vixl {
44 namespace aarch64 {
45
46 // ==== Simulator Tests ====
47 //
48 // These simulator tests check instruction behaviour against a trace taken from
49 // real AArch64 hardware. The same test code is used to generate the trace; the
50 // results are printed to stdout when the test is run with
51 // --generate_test_trace.
52 //
53 // The input lists and expected results are stored in test/traces. The expected
54 // results can be regenerated using tools/generate_simulator_traces.py. Adding a
55 // test for a new instruction is described at the top of
56 // test-simulator-traces-aarch64.h.
57
58 #define __ masm.
59 #define TEST(name) TEST_(AARCH64_SIM_##name)
60
61 #define SETUP() SETUP_WITH_FEATURES(CPUFeatures())
62
63 #ifdef VIXL_INCLUDE_SIMULATOR_AARCH64
64
65 #define SETUP_WITH_FEATURES(...) \
66 MacroAssembler masm; \
67 masm.SetCPUFeatures(CPUFeatures(__VA_ARGS__)); \
68 Decoder decoder; \
69 Simulator simulator(&decoder); \
70 simulator.SetColouredTrace(Test::coloured_trace());
71
72 #define START() \
73 masm.Reset(); \
74 simulator.ResetState(); \
75 __ PushCalleeSavedRegisters(); \
76 if (Test::trace_reg()) { \
77 __ Trace(LOG_STATE, TRACE_ENABLE); \
78 } \
79 if (Test::trace_write()) { \
80 __ Trace(LOG_WRITE, TRACE_ENABLE); \
81 } \
82 if (Test::trace_sim()) { \
83 __ Trace(LOG_DISASM, TRACE_ENABLE); \
84 }
85
86 #define END() \
87 __ Trace(LOG_ALL, TRACE_DISABLE); \
88 __ PopCalleeSavedRegisters(); \
89 __ Ret(); \
90 masm.FinalizeCode()
91
92 #define TRY_RUN(skipped) \
93 DISASSEMBLE(); \
94 simulator.RunFrom(masm.GetBuffer()->GetStartAddress<Instruction*>()); \
95 /* The simulator can run every test. */ \
96 *skipped = false
97
98
99 #else // VIXL_INCLUDE_SIMULATOR_AARCH64
100
101 #define SETUP_WITH_FEATURES(...) \
102 MacroAssembler masm; \
103 masm.SetCPUFeatures(CPUFeatures(__VA_ARGS__)); \
104 CPU::SetUp()
105
106 #define START() \
107 masm.Reset(); \
108 __ PushCalleeSavedRegisters()
109
110 #define END() \
111 __ PopCalleeSavedRegisters(); \
112 __ Ret(); \
113 masm.FinalizeCode()
114
115 #define TRY_RUN(skipped) \
116 DISASSEMBLE(); \
117 /* If the test uses features that the current CPU doesn't support, don't */ \
118 /* attempt to run it natively. */ \
119 { \
120 Decoder decoder; \
121 /* TODO: Once available, use runtime feature detection. The use of */ \
122 /* AArch64LegacyBaseline is a stopgap. */ \
123 const CPUFeatures& this_machine = CPUFeatures::AArch64LegacyBaseline(); \
124 CPUFeaturesAuditor auditor(&decoder, this_machine); \
125 CodeBuffer* buffer = masm.GetBuffer(); \
126 decoder.Decode(buffer->GetStartAddress<Instruction*>(), \
127 buffer->GetEndAddress<Instruction*>()); \
128 const CPUFeatures& requirements = auditor.GetSeenFeatures(); \
129 if (this_machine.Has(requirements)) { \
130 masm.GetBuffer()->SetExecutable(); \
131 ExecuteMemory(buffer->GetStartAddress<byte*>(), \
132 masm.GetSizeOfCodeGenerated()); \
133 masm.GetBuffer()->SetWritable(); \
134 *skipped = false; \
135 } else { \
136 std::stringstream os; \
137 /* Note: This message needs to match REGEXP_MISSING_FEATURES from */ \
138 /* tools/threaded_test.py. */ \
139 os << "SKIPPED: Missing features: { "; \
140 os << requirements.Without(this_machine) << " }\n"; \
141 printf("%s", os.str().c_str()); \
142 *skipped = true; \
143 } \
144 }
145
146
147 #endif // VIXL_INCLUDE_SIMULATOR_AARCH64
148
149
150 #define DISASSEMBLE() \
151 if (Test::disassemble()) { \
152 PrintDisassembler disasm(stdout); \
153 CodeBuffer* buffer = masm.GetBuffer(); \
154 Instruction* start = buffer->GetStartAddress<Instruction*>(); \
155 Instruction* end = buffer->GetEndAddress<Instruction*>(); \
156 disasm.DisassembleBuffer(start, end); \
157 }
158
159 // The maximum number of errors to report in detail for each test.
160 static const unsigned kErrorReportLimit = 8;
161
162
163 // Overloaded versions of RawbitsToDouble and RawbitsToFloat for use in the
164 // templated test functions.
rawbits_to_fp(uint32_t bits)165 static float rawbits_to_fp(uint32_t bits) { return RawbitsToFloat(bits); }
166
rawbits_to_fp(uint64_t bits)167 static double rawbits_to_fp(uint64_t bits) { return RawbitsToDouble(bits); }
168
169 // The rawbits_to_fp functions are only used for printing decimal values so we
170 // just approximate FP16 as double.
rawbits_to_fp(uint16_t bits)171 static double rawbits_to_fp(uint16_t bits) {
172 return FPToDouble(RawbitsToFloat16(bits), kIgnoreDefaultNaN);
173 }
174
175
176 // MacroAssembler member function pointers to pass to the test dispatchers.
177 typedef void (MacroAssembler::*Test1OpFPHelper_t)(const VRegister& fd,
178 const VRegister& fn);
179 typedef void (MacroAssembler::*Test2OpFPHelper_t)(const VRegister& fd,
180 const VRegister& fn,
181 const VRegister& fm);
182 typedef void (MacroAssembler::*Test3OpFPHelper_t)(const VRegister& fd,
183 const VRegister& fn,
184 const VRegister& fm,
185 const VRegister& fa);
186 typedef void (MacroAssembler::*TestFPCmpHelper_t)(const VRegister& fn,
187 const VRegister& fm);
188 typedef void (MacroAssembler::*TestFPCmpZeroHelper_t)(const VRegister& fn,
189 double value);
190 typedef void (MacroAssembler::*TestFPToIntHelper_t)(const Register& rd,
191 const VRegister& fn);
192 typedef void (MacroAssembler::*TestFPToFixedHelper_t)(const Register& rd,
193 const VRegister& fn,
194 int fbits);
195 typedef void (MacroAssembler::*TestFixedToFPHelper_t)(const VRegister& fd,
196 const Register& rn,
197 int fbits);
198 // TODO: 'Test2OpNEONHelper_t' and 'Test2OpFPHelper_t' can be
199 // consolidated into one routine.
200 typedef void (MacroAssembler::*Test1OpNEONHelper_t)(const VRegister& vd,
201 const VRegister& vn);
202 typedef void (MacroAssembler::*Test2OpNEONHelper_t)(const VRegister& vd,
203 const VRegister& vn,
204 const VRegister& vm);
205 typedef void (MacroAssembler::*TestByElementNEONHelper_t)(const VRegister& vd,
206 const VRegister& vn,
207 const VRegister& vm,
208 int vm_index);
209 typedef void (MacroAssembler::*TestOpImmOpImmVdUpdateNEONHelper_t)(
210 const VRegister& vd, int imm1, const VRegister& vn, int imm2);
211
212 // This helps using the same typename for both the function pointer
213 // and the array of immediates passed to helper routines.
214 template <typename T>
215 class Test2OpImmediateNEONHelper_t {
216 public:
217 typedef void (MacroAssembler::*mnemonic)(const VRegister& vd,
218 const VRegister& vn,
219 T imm);
220 };
221
222
223 // Maximum number of hex characters required to represent values of either
224 // templated type.
225 template <typename Ta, typename Tb>
MaxHexCharCount()226 static unsigned MaxHexCharCount() {
227 unsigned count = static_cast<unsigned>(std::max(sizeof(Ta), sizeof(Tb)));
228 return (count * 8) / 4;
229 }
230
231
232 // Standard test dispatchers.
233
234
Test1Op_Helper(Test1OpFPHelper_t helper,uintptr_t inputs,unsigned inputs_length,uintptr_t results,unsigned d_size,unsigned n_size,bool * skipped)235 static void Test1Op_Helper(Test1OpFPHelper_t helper,
236 uintptr_t inputs,
237 unsigned inputs_length,
238 uintptr_t results,
239 unsigned d_size,
240 unsigned n_size,
241 bool* skipped) {
242 VIXL_ASSERT((d_size == kDRegSize) || (d_size == kSRegSize) ||
243 (d_size == kHRegSize));
244 VIXL_ASSERT((n_size == kDRegSize) || (n_size == kSRegSize) ||
245 (n_size == kHRegSize));
246
247 CPUFeatures features;
248 features.Combine(CPUFeatures::kFP, CPUFeatures::kFPHalf);
249 // For frint{32,64}{x,y} variants.
250 features.Combine(CPUFeatures::kFrintToFixedSizedInt);
251 SETUP_WITH_FEATURES(features);
252 START();
253
254 // Roll up the loop to keep the code size down.
255 Label loop_n;
256
257 Register out = x0;
258 Register inputs_base = x1;
259 Register length = w2;
260 Register index_n = w3;
261
262 int n_index_shift;
263 VRegister fd;
264 VRegister fn;
265 if (n_size == kDRegSize) {
266 n_index_shift = kDRegSizeInBytesLog2;
267 fn = d1;
268 } else if (n_size == kSRegSize) {
269 n_index_shift = kSRegSizeInBytesLog2;
270 fn = s1;
271 } else {
272 n_index_shift = kHRegSizeInBytesLog2;
273 fn = h1;
274 }
275
276 if (d_size == kDRegSize) {
277 fd = d0;
278 } else if (d_size == kSRegSize) {
279 fd = s0;
280 } else {
281 fd = h0;
282 }
283
284
285 __ Mov(out, results);
286 __ Mov(inputs_base, inputs);
287 __ Mov(length, inputs_length);
288
289 __ Mov(index_n, 0);
290 __ Bind(&loop_n);
291 __ Ldr(fn, MemOperand(inputs_base, index_n, UXTW, n_index_shift));
292
293 {
294 SingleEmissionCheckScope guard(&masm);
295 (masm.*helper)(fd, fn);
296 }
297 __ Str(fd, MemOperand(out, fd.GetSizeInBytes(), PostIndex));
298
299 __ Add(index_n, index_n, 1);
300 __ Cmp(index_n, inputs_length);
301 __ B(lo, &loop_n);
302
303 END();
304 TRY_RUN(skipped);
305 }
306
307
308 // Test FP instructions. The inputs[] and expected[] arrays should be arrays of
309 // rawbits representations of doubles or floats. This ensures that exact bit
310 // comparisons can be performed.
311 template <typename Tn, typename Td>
Test1Op(const char * name,Test1OpFPHelper_t helper,const Tn inputs[],unsigned inputs_length,const Td expected[],unsigned expected_length)312 static void Test1Op(const char* name,
313 Test1OpFPHelper_t helper,
314 const Tn inputs[],
315 unsigned inputs_length,
316 const Td expected[],
317 unsigned expected_length) {
318 VIXL_ASSERT(inputs_length > 0);
319
320 const unsigned results_length = inputs_length;
321 Td* results = new Td[results_length];
322
323 const unsigned d_bits = sizeof(Td) * 8;
324 const unsigned n_bits = sizeof(Tn) * 8;
325 bool skipped;
326
327 Test1Op_Helper(helper,
328 reinterpret_cast<uintptr_t>(inputs),
329 inputs_length,
330 reinterpret_cast<uintptr_t>(results),
331 d_bits,
332 n_bits,
333 &skipped);
334
335 if (Test::generate_test_trace()) {
336 // Print the results.
337 printf("const uint%u_t kExpected_%s[] = {\n", d_bits, name);
338 for (unsigned d = 0; d < results_length; d++) {
339 printf(" 0x%0*" PRIx64 ",\n",
340 d_bits / 4,
341 static_cast<uint64_t>(results[d]));
342 }
343 printf("};\n");
344 printf("const unsigned kExpectedCount_%s = %u;\n", name, results_length);
345 } else if (!skipped) {
346 // Check the results.
347 VIXL_CHECK(expected_length == results_length);
348 unsigned error_count = 0;
349 unsigned d = 0;
350 for (unsigned n = 0; n < inputs_length; n++, d++) {
351 if (results[d] != expected[d]) {
352 if (++error_count > kErrorReportLimit) continue;
353
354 printf("%s 0x%0*" PRIx64 " (%s %g):\n",
355 name,
356 n_bits / 4,
357 static_cast<uint64_t>(inputs[n]),
358 name,
359 rawbits_to_fp(inputs[n]));
360 printf(" Expected: 0x%0*" PRIx64 " (%g)\n",
361 d_bits / 4,
362 static_cast<uint64_t>(expected[d]),
363 rawbits_to_fp(expected[d]));
364 printf(" Found: 0x%0*" PRIx64 " (%g)\n",
365 d_bits / 4,
366 static_cast<uint64_t>(results[d]),
367 rawbits_to_fp(results[d]));
368 printf("\n");
369 }
370 }
371 VIXL_ASSERT(d == expected_length);
372 if (error_count > kErrorReportLimit) {
373 printf("%u other errors follow.\n", error_count - kErrorReportLimit);
374 }
375 VIXL_CHECK(error_count == 0);
376 }
377 delete[] results;
378 }
379
380
Test2Op_Helper(Test2OpFPHelper_t helper,uintptr_t inputs,unsigned inputs_length,uintptr_t results,unsigned reg_size,bool * skipped)381 static void Test2Op_Helper(Test2OpFPHelper_t helper,
382 uintptr_t inputs,
383 unsigned inputs_length,
384 uintptr_t results,
385 unsigned reg_size,
386 bool* skipped) {
387 VIXL_ASSERT((reg_size == kDRegSize) || (reg_size == kSRegSize) ||
388 (reg_size == kHRegSize));
389
390 SETUP_WITH_FEATURES(CPUFeatures::kFP, CPUFeatures::kFPHalf);
391 START();
392
393 // Roll up the loop to keep the code size down.
394 Label loop_n, loop_m;
395
396 Register out = x0;
397 Register inputs_base = x1;
398 Register length = w2;
399 Register index_n = w3;
400 Register index_m = w4;
401
402 bool double_op = reg_size == kDRegSize;
403 bool float_op = reg_size == kSRegSize;
404 int index_shift;
405 if (double_op) {
406 index_shift = kDRegSizeInBytesLog2;
407 } else if (float_op) {
408 index_shift = kSRegSizeInBytesLog2;
409 } else {
410 index_shift = kHRegSizeInBytesLog2;
411 }
412
413 VRegister fd;
414 VRegister fn;
415 VRegister fm;
416
417 if (double_op) {
418 fd = d0;
419 fn = d1;
420 fm = d2;
421 } else if (float_op) {
422 fd = s0;
423 fn = s1;
424 fm = s2;
425 } else {
426 fd = h0;
427 fn = h1;
428 fm = h2;
429 }
430
431 __ Mov(out, results);
432 __ Mov(inputs_base, inputs);
433 __ Mov(length, inputs_length);
434
435 __ Mov(index_n, 0);
436 __ Bind(&loop_n);
437 __ Ldr(fn, MemOperand(inputs_base, index_n, UXTW, index_shift));
438
439 __ Mov(index_m, 0);
440 __ Bind(&loop_m);
441 __ Ldr(fm, MemOperand(inputs_base, index_m, UXTW, index_shift));
442
443 {
444 SingleEmissionCheckScope guard(&masm);
445 (masm.*helper)(fd, fn, fm);
446 }
447 __ Str(fd, MemOperand(out, fd.GetSizeInBytes(), PostIndex));
448
449 __ Add(index_m, index_m, 1);
450 __ Cmp(index_m, inputs_length);
451 __ B(lo, &loop_m);
452
453 __ Add(index_n, index_n, 1);
454 __ Cmp(index_n, inputs_length);
455 __ B(lo, &loop_n);
456
457 END();
458 TRY_RUN(skipped);
459 }
460
461
462 // Test FP instructions. The inputs[] and expected[] arrays should be arrays of
463 // rawbits representations of doubles or floats. This ensures that exact bit
464 // comparisons can be performed.
465 template <typename T>
Test2Op(const char * name,Test2OpFPHelper_t helper,const T inputs[],unsigned inputs_length,const T expected[],unsigned expected_length)466 static void Test2Op(const char* name,
467 Test2OpFPHelper_t helper,
468 const T inputs[],
469 unsigned inputs_length,
470 const T expected[],
471 unsigned expected_length) {
472 VIXL_ASSERT(inputs_length > 0);
473
474 const unsigned results_length = inputs_length * inputs_length;
475 T* results = new T[results_length];
476
477 const unsigned bits = sizeof(T) * 8;
478 bool skipped;
479
480 Test2Op_Helper(helper,
481 reinterpret_cast<uintptr_t>(inputs),
482 inputs_length,
483 reinterpret_cast<uintptr_t>(results),
484 bits,
485 &skipped);
486
487 if (Test::generate_test_trace()) {
488 // Print the results.
489 printf("const uint%u_t kExpected_%s[] = {\n", bits, name);
490 for (unsigned d = 0; d < results_length; d++) {
491 printf(" 0x%0*" PRIx64 ",\n",
492 bits / 4,
493 static_cast<uint64_t>(results[d]));
494 }
495 printf("};\n");
496 printf("const unsigned kExpectedCount_%s = %u;\n", name, results_length);
497 } else if (!skipped) {
498 // Check the results.
499 VIXL_CHECK(expected_length == results_length);
500 unsigned error_count = 0;
501 unsigned d = 0;
502 for (unsigned n = 0; n < inputs_length; n++) {
503 for (unsigned m = 0; m < inputs_length; m++, d++) {
504 if (results[d] != expected[d]) {
505 if (++error_count > kErrorReportLimit) continue;
506
507 printf("%s 0x%0*" PRIx64 ", 0x%0*" PRIx64 " (%s %g %g):\n",
508 name,
509 bits / 4,
510 static_cast<uint64_t>(inputs[n]),
511 bits / 4,
512 static_cast<uint64_t>(inputs[m]),
513 name,
514 rawbits_to_fp(inputs[n]),
515 rawbits_to_fp(inputs[m]));
516 printf(" Expected: 0x%0*" PRIx64 " (%g)\n",
517 bits / 4,
518 static_cast<uint64_t>(expected[d]),
519 rawbits_to_fp(expected[d]));
520 printf(" Found: 0x%0*" PRIx64 " (%g)\n",
521 bits / 4,
522 static_cast<uint64_t>(results[d]),
523 rawbits_to_fp(results[d]));
524 printf("\n");
525 }
526 }
527 }
528 VIXL_ASSERT(d == expected_length);
529 if (error_count > kErrorReportLimit) {
530 printf("%u other errors follow.\n", error_count - kErrorReportLimit);
531 }
532 VIXL_CHECK(error_count == 0);
533 }
534 delete[] results;
535 }
536
537
Test3Op_Helper(Test3OpFPHelper_t helper,uintptr_t inputs,unsigned inputs_length,uintptr_t results,unsigned reg_size,bool * skipped)538 static void Test3Op_Helper(Test3OpFPHelper_t helper,
539 uintptr_t inputs,
540 unsigned inputs_length,
541 uintptr_t results,
542 unsigned reg_size,
543 bool* skipped) {
544 VIXL_ASSERT((reg_size == kDRegSize) || (reg_size == kSRegSize) ||
545 (reg_size == kHRegSize));
546
547 SETUP_WITH_FEATURES(CPUFeatures::kFP, CPUFeatures::kFPHalf);
548 START();
549
550 // Roll up the loop to keep the code size down.
551 Label loop_n, loop_m, loop_a;
552
553 Register out = x0;
554 Register inputs_base = x1;
555 Register length = w2;
556 Register index_n = w3;
557 Register index_m = w4;
558 Register index_a = w5;
559
560 bool double_op = reg_size == kDRegSize;
561 bool single_op = reg_size == kSRegSize;
562 int index_shift;
563 VRegister fd(0, reg_size);
564 VRegister fn(1, reg_size);
565 VRegister fm(2, reg_size);
566 VRegister fa(3, reg_size);
567 if (double_op) {
568 index_shift = kDRegSizeInBytesLog2;
569 } else if (single_op) {
570 index_shift = kSRegSizeInBytesLog2;
571 } else {
572 index_shift = kHRegSizeInBytesLog2;
573 }
574
575 __ Mov(out, results);
576 __ Mov(inputs_base, inputs);
577 __ Mov(length, inputs_length);
578
579 __ Mov(index_n, 0);
580 __ Bind(&loop_n);
581 __ Ldr(fn, MemOperand(inputs_base, index_n, UXTW, index_shift));
582
583 __ Mov(index_m, 0);
584 __ Bind(&loop_m);
585 __ Ldr(fm, MemOperand(inputs_base, index_m, UXTW, index_shift));
586
587 __ Mov(index_a, 0);
588 __ Bind(&loop_a);
589 __ Ldr(fa, MemOperand(inputs_base, index_a, UXTW, index_shift));
590
591 {
592 SingleEmissionCheckScope guard(&masm);
593 (masm.*helper)(fd, fn, fm, fa);
594 }
595 __ Str(fd, MemOperand(out, fd.GetSizeInBytes(), PostIndex));
596
597 __ Add(index_a, index_a, 1);
598 __ Cmp(index_a, inputs_length);
599 __ B(lo, &loop_a);
600
601 __ Add(index_m, index_m, 1);
602 __ Cmp(index_m, inputs_length);
603 __ B(lo, &loop_m);
604
605 __ Add(index_n, index_n, 1);
606 __ Cmp(index_n, inputs_length);
607 __ B(lo, &loop_n);
608
609 END();
610 TRY_RUN(skipped);
611 }
612
613
614 // Test FP instructions. The inputs[] and expected[] arrays should be arrays of
615 // rawbits representations of doubles or floats. This ensures that exact bit
616 // comparisons can be performed.
617 template <typename T>
Test3Op(const char * name,Test3OpFPHelper_t helper,const T inputs[],unsigned inputs_length,const T expected[],unsigned expected_length)618 static void Test3Op(const char* name,
619 Test3OpFPHelper_t helper,
620 const T inputs[],
621 unsigned inputs_length,
622 const T expected[],
623 unsigned expected_length) {
624 VIXL_ASSERT(inputs_length > 0);
625
626 const unsigned results_length = inputs_length * inputs_length * inputs_length;
627 T* results = new T[results_length];
628
629 const unsigned bits = sizeof(T) * 8;
630 bool skipped;
631
632 Test3Op_Helper(helper,
633 reinterpret_cast<uintptr_t>(inputs),
634 inputs_length,
635 reinterpret_cast<uintptr_t>(results),
636 bits,
637 &skipped);
638
639 if (Test::generate_test_trace()) {
640 // Print the results.
641 printf("const uint%u_t kExpected_%s[] = {\n", bits, name);
642 for (unsigned d = 0; d < results_length; d++) {
643 printf(" 0x%0*" PRIx64 ",\n",
644 bits / 4,
645 static_cast<uint64_t>(results[d]));
646 }
647 printf("};\n");
648 printf("const unsigned kExpectedCount_%s = %u;\n", name, results_length);
649 } else if (!skipped) {
650 // Check the results.
651 VIXL_CHECK(expected_length == results_length);
652 unsigned error_count = 0;
653 unsigned d = 0;
654 for (unsigned n = 0; n < inputs_length; n++) {
655 for (unsigned m = 0; m < inputs_length; m++) {
656 for (unsigned a = 0; a < inputs_length; a++, d++) {
657 if (results[d] != expected[d]) {
658 if (++error_count > kErrorReportLimit) continue;
659
660 printf("%s 0x%0*" PRIx64 ", 0x%0*" PRIx64 ", 0x%0*" PRIx64
661 " (%s %g %g %g):\n",
662 name,
663 bits / 4,
664 static_cast<uint64_t>(inputs[n]),
665 bits / 4,
666 static_cast<uint64_t>(inputs[m]),
667 bits / 4,
668 static_cast<uint64_t>(inputs[a]),
669 name,
670 rawbits_to_fp(inputs[n]),
671 rawbits_to_fp(inputs[m]),
672 rawbits_to_fp(inputs[a]));
673 printf(" Expected: 0x%0*" PRIx64 " (%g)\n",
674 bits / 4,
675 static_cast<uint64_t>(expected[d]),
676 rawbits_to_fp(expected[d]));
677 printf(" Found: 0x%0*" PRIx64 " (%g)\n",
678 bits / 4,
679 static_cast<uint64_t>(results[d]),
680 rawbits_to_fp(results[d]));
681 printf("\n");
682 }
683 }
684 }
685 }
686 VIXL_ASSERT(d == expected_length);
687 if (error_count > kErrorReportLimit) {
688 printf("%u other errors follow.\n", error_count - kErrorReportLimit);
689 }
690 VIXL_CHECK(error_count == 0);
691 }
692 delete[] results;
693 }
694
695
TestCmp_Helper(TestFPCmpHelper_t helper,uintptr_t inputs,unsigned inputs_length,uintptr_t results,unsigned reg_size,bool * skipped)696 static void TestCmp_Helper(TestFPCmpHelper_t helper,
697 uintptr_t inputs,
698 unsigned inputs_length,
699 uintptr_t results,
700 unsigned reg_size,
701 bool* skipped) {
702 VIXL_ASSERT((reg_size == kDRegSize) || (reg_size == kSRegSize));
703
704 SETUP_WITH_FEATURES(CPUFeatures::kFP);
705 START();
706
707 // Roll up the loop to keep the code size down.
708 Label loop_n, loop_m;
709
710 Register out = x0;
711 Register inputs_base = x1;
712 Register length = w2;
713 Register index_n = w3;
714 Register index_m = w4;
715 Register flags = x5;
716
717 bool double_op = reg_size == kDRegSize;
718 const int index_shift =
719 double_op ? kDRegSizeInBytesLog2 : kSRegSizeInBytesLog2;
720
721 VRegister fn = double_op ? d1 : s1;
722 VRegister fm = double_op ? d2 : s2;
723
724 __ Mov(out, results);
725 __ Mov(inputs_base, inputs);
726 __ Mov(length, inputs_length);
727
728 __ Mov(index_n, 0);
729 __ Bind(&loop_n);
730 __ Ldr(fn, MemOperand(inputs_base, index_n, UXTW, index_shift));
731
732 __ Mov(index_m, 0);
733 __ Bind(&loop_m);
734 __ Ldr(fm, MemOperand(inputs_base, index_m, UXTW, index_shift));
735
736 {
737 SingleEmissionCheckScope guard(&masm);
738 (masm.*helper)(fn, fm);
739 }
740 __ Mrs(flags, NZCV);
741 __ Ubfx(flags, flags, 28, 4);
742 __ Strb(flags, MemOperand(out, 1, PostIndex));
743
744 __ Add(index_m, index_m, 1);
745 __ Cmp(index_m, inputs_length);
746 __ B(lo, &loop_m);
747
748 __ Add(index_n, index_n, 1);
749 __ Cmp(index_n, inputs_length);
750 __ B(lo, &loop_n);
751
752 END();
753 TRY_RUN(skipped);
754 }
755
756
757 // Test FP instructions. The inputs[] and expected[] arrays should be arrays of
758 // rawbits representations of doubles or floats. This ensures that exact bit
759 // comparisons can be performed.
760 template <typename T>
TestCmp(const char * name,TestFPCmpHelper_t helper,const T inputs[],unsigned inputs_length,const uint8_t expected[],unsigned expected_length)761 static void TestCmp(const char* name,
762 TestFPCmpHelper_t helper,
763 const T inputs[],
764 unsigned inputs_length,
765 const uint8_t expected[],
766 unsigned expected_length) {
767 VIXL_ASSERT(inputs_length > 0);
768
769 const unsigned results_length = inputs_length * inputs_length;
770 uint8_t* results = new uint8_t[results_length];
771
772 const unsigned bits = sizeof(T) * 8;
773 bool skipped;
774
775 TestCmp_Helper(helper,
776 reinterpret_cast<uintptr_t>(inputs),
777 inputs_length,
778 reinterpret_cast<uintptr_t>(results),
779 bits,
780 &skipped);
781
782 if (Test::generate_test_trace()) {
783 // Print the results.
784 printf("const uint8_t kExpected_%s[] = {\n", name);
785 for (unsigned d = 0; d < results_length; d++) {
786 // Each NZCV result only requires 4 bits.
787 VIXL_ASSERT((results[d] & 0xf) == results[d]);
788 printf(" 0x%" PRIx8 ",\n", results[d]);
789 }
790 printf("};\n");
791 printf("const unsigned kExpectedCount_%s = %u;\n", name, results_length);
792 } else if (!skipped) {
793 // Check the results.
794 VIXL_CHECK(expected_length == results_length);
795 unsigned error_count = 0;
796 unsigned d = 0;
797 for (unsigned n = 0; n < inputs_length; n++) {
798 for (unsigned m = 0; m < inputs_length; m++, d++) {
799 if (results[d] != expected[d]) {
800 if (++error_count > kErrorReportLimit) continue;
801
802 printf("%s 0x%0*" PRIx64 ", 0x%0*" PRIx64 " (%s %g %g):\n",
803 name,
804 bits / 4,
805 static_cast<uint64_t>(inputs[n]),
806 bits / 4,
807 static_cast<uint64_t>(inputs[m]),
808 name,
809 rawbits_to_fp(inputs[n]),
810 rawbits_to_fp(inputs[m]));
811 printf(" Expected: %c%c%c%c (0x%" PRIx8 ")\n",
812 (expected[d] & 0x8) ? 'N' : 'n',
813 (expected[d] & 0x4) ? 'Z' : 'z',
814 (expected[d] & 0x2) ? 'C' : 'c',
815 (expected[d] & 0x1) ? 'V' : 'v',
816 expected[d]);
817 printf(" Found: %c%c%c%c (0x%" PRIx8 ")\n",
818 (results[d] & 0x8) ? 'N' : 'n',
819 (results[d] & 0x4) ? 'Z' : 'z',
820 (results[d] & 0x2) ? 'C' : 'c',
821 (results[d] & 0x1) ? 'V' : 'v',
822 results[d]);
823 printf("\n");
824 }
825 }
826 }
827 VIXL_ASSERT(d == expected_length);
828 if (error_count > kErrorReportLimit) {
829 printf("%u other errors follow.\n", error_count - kErrorReportLimit);
830 }
831 VIXL_CHECK(error_count == 0);
832 }
833 delete[] results;
834 }
835
836
TestCmpZero_Helper(TestFPCmpZeroHelper_t helper,uintptr_t inputs,unsigned inputs_length,uintptr_t results,unsigned reg_size,bool * skipped)837 static void TestCmpZero_Helper(TestFPCmpZeroHelper_t helper,
838 uintptr_t inputs,
839 unsigned inputs_length,
840 uintptr_t results,
841 unsigned reg_size,
842 bool* skipped) {
843 VIXL_ASSERT((reg_size == kDRegSize) || (reg_size == kSRegSize));
844
845 SETUP_WITH_FEATURES(CPUFeatures::kFP);
846 START();
847
848 // Roll up the loop to keep the code size down.
849 Label loop_n, loop_m;
850
851 Register out = x0;
852 Register inputs_base = x1;
853 Register length = w2;
854 Register index_n = w3;
855 Register flags = x4;
856
857 bool double_op = reg_size == kDRegSize;
858 const int index_shift =
859 double_op ? kDRegSizeInBytesLog2 : kSRegSizeInBytesLog2;
860
861 VRegister fn = double_op ? d1 : s1;
862
863 __ Mov(out, results);
864 __ Mov(inputs_base, inputs);
865 __ Mov(length, inputs_length);
866
867 __ Mov(index_n, 0);
868 __ Bind(&loop_n);
869 __ Ldr(fn, MemOperand(inputs_base, index_n, UXTW, index_shift));
870
871 {
872 SingleEmissionCheckScope guard(&masm);
873 (masm.*helper)(fn, 0.0);
874 }
875 __ Mrs(flags, NZCV);
876 __ Ubfx(flags, flags, 28, 4);
877 __ Strb(flags, MemOperand(out, 1, PostIndex));
878
879 __ Add(index_n, index_n, 1);
880 __ Cmp(index_n, inputs_length);
881 __ B(lo, &loop_n);
882
883 END();
884 TRY_RUN(skipped);
885 }
886
887
888 // Test FP instructions. The inputs[] and expected[] arrays should be arrays of
889 // rawbits representations of doubles or floats. This ensures that exact bit
890 // comparisons can be performed.
891 template <typename T>
TestCmpZero(const char * name,TestFPCmpZeroHelper_t helper,const T inputs[],unsigned inputs_length,const uint8_t expected[],unsigned expected_length)892 static void TestCmpZero(const char* name,
893 TestFPCmpZeroHelper_t helper,
894 const T inputs[],
895 unsigned inputs_length,
896 const uint8_t expected[],
897 unsigned expected_length) {
898 VIXL_ASSERT(inputs_length > 0);
899
900 const unsigned results_length = inputs_length;
901 uint8_t* results = new uint8_t[results_length];
902
903 const unsigned bits = sizeof(T) * 8;
904 bool skipped;
905
906 TestCmpZero_Helper(helper,
907 reinterpret_cast<uintptr_t>(inputs),
908 inputs_length,
909 reinterpret_cast<uintptr_t>(results),
910 bits,
911 &skipped);
912
913 if (Test::generate_test_trace()) {
914 // Print the results.
915 printf("const uint8_t kExpected_%s[] = {\n", name);
916 for (unsigned d = 0; d < results_length; d++) {
917 // Each NZCV result only requires 4 bits.
918 VIXL_ASSERT((results[d] & 0xf) == results[d]);
919 printf(" 0x%" PRIx8 ",\n", results[d]);
920 }
921 printf("};\n");
922 printf("const unsigned kExpectedCount_%s = %u;\n", name, results_length);
923 } else if (!skipped) {
924 // Check the results.
925 VIXL_CHECK(expected_length == results_length);
926 unsigned error_count = 0;
927 unsigned d = 0;
928 for (unsigned n = 0; n < inputs_length; n++, d++) {
929 if (results[d] != expected[d]) {
930 if (++error_count > kErrorReportLimit) continue;
931
932 printf("%s 0x%0*" PRIx64 ", 0x%0*u (%s %g #0.0):\n",
933 name,
934 bits / 4,
935 static_cast<uint64_t>(inputs[n]),
936 bits / 4,
937 0,
938 name,
939 rawbits_to_fp(inputs[n]));
940 printf(" Expected: %c%c%c%c (0x%" PRIx8 ")\n",
941 (expected[d] & 0x8) ? 'N' : 'n',
942 (expected[d] & 0x4) ? 'Z' : 'z',
943 (expected[d] & 0x2) ? 'C' : 'c',
944 (expected[d] & 0x1) ? 'V' : 'v',
945 expected[d]);
946 printf(" Found: %c%c%c%c (0x%" PRIx8 ")\n",
947 (results[d] & 0x8) ? 'N' : 'n',
948 (results[d] & 0x4) ? 'Z' : 'z',
949 (results[d] & 0x2) ? 'C' : 'c',
950 (results[d] & 0x1) ? 'V' : 'v',
951 results[d]);
952 printf("\n");
953 }
954 }
955 VIXL_ASSERT(d == expected_length);
956 if (error_count > kErrorReportLimit) {
957 printf("%u other errors follow.\n", error_count - kErrorReportLimit);
958 }
959 VIXL_CHECK(error_count == 0);
960 }
961 delete[] results;
962 }
963
964
TestFPToFixed_Helper(TestFPToFixedHelper_t helper,uintptr_t inputs,unsigned inputs_length,uintptr_t results,unsigned d_size,unsigned n_size,bool * skipped)965 static void TestFPToFixed_Helper(TestFPToFixedHelper_t helper,
966 uintptr_t inputs,
967 unsigned inputs_length,
968 uintptr_t results,
969 unsigned d_size,
970 unsigned n_size,
971 bool* skipped) {
972 VIXL_ASSERT((d_size == kXRegSize) || (d_size == kWRegSize));
973 VIXL_ASSERT((n_size == kDRegSize) || (n_size == kSRegSize) ||
974 (n_size == kHRegSize));
975
976 SETUP_WITH_FEATURES(CPUFeatures::kFP, CPUFeatures::kFPHalf);
977 START();
978
979 // Roll up the loop to keep the code size down.
980 Label loop_n;
981
982 Register out = x0;
983 Register inputs_base = x1;
984 Register length = w2;
985 Register index_n = w3;
986
987 int n_index_shift;
988 if (n_size == kDRegSize) {
989 n_index_shift = kDRegSizeInBytesLog2;
990 } else if (n_size == kSRegSize) {
991 n_index_shift = kSRegSizeInBytesLog2;
992 } else {
993 n_index_shift = kHRegSizeInBytesLog2;
994 }
995
996 Register rd = (d_size == kXRegSize) ? Register(x10) : Register(w10);
997 VRegister fn;
998 if (n_size == kDRegSize) {
999 fn = d1;
1000 } else if (n_size == kSRegSize) {
1001 fn = s1;
1002 } else {
1003 fn = h1;
1004 }
1005
1006 __ Mov(out, results);
1007 __ Mov(inputs_base, inputs);
1008 __ Mov(length, inputs_length);
1009
1010 __ Mov(index_n, 0);
1011 __ Bind(&loop_n);
1012 __ Ldr(fn, MemOperand(inputs_base, index_n, UXTW, n_index_shift));
1013
1014 for (unsigned fbits = 0; fbits <= d_size; ++fbits) {
1015 {
1016 SingleEmissionCheckScope guard(&masm);
1017 (masm.*helper)(rd, fn, fbits);
1018 }
1019 __ Str(rd, MemOperand(out, rd.GetSizeInBytes(), PostIndex));
1020 }
1021
1022 __ Add(index_n, index_n, 1);
1023 __ Cmp(index_n, inputs_length);
1024 __ B(lo, &loop_n);
1025
1026 END();
1027 TRY_RUN(skipped);
1028 }
1029
1030
TestFPToInt_Helper(TestFPToIntHelper_t helper,uintptr_t inputs,unsigned inputs_length,uintptr_t results,unsigned d_size,unsigned n_size,bool * skipped)1031 static void TestFPToInt_Helper(TestFPToIntHelper_t helper,
1032 uintptr_t inputs,
1033 unsigned inputs_length,
1034 uintptr_t results,
1035 unsigned d_size,
1036 unsigned n_size,
1037 bool* skipped) {
1038 VIXL_ASSERT((d_size == kXRegSize) || (d_size == kWRegSize));
1039 VIXL_ASSERT((n_size == kDRegSize) || (n_size == kSRegSize) ||
1040 (n_size == kHRegSize));
1041
1042 SETUP_WITH_FEATURES(CPUFeatures::kFP,
1043 CPUFeatures::kFPHalf,
1044 CPUFeatures::kJSCVT);
1045 START();
1046
1047 // Roll up the loop to keep the code size down.
1048 Label loop_n;
1049
1050 Register out = x0;
1051 Register inputs_base = x1;
1052 Register length = w2;
1053 Register index_n = w3;
1054
1055 int n_index_shift;
1056 if (n_size == kDRegSize) {
1057 n_index_shift = kDRegSizeInBytesLog2;
1058 } else if (n_size == kSRegSize) {
1059 n_index_shift = kSRegSizeInBytesLog2;
1060 } else {
1061 n_index_shift = kHRegSizeInBytesLog2;
1062 }
1063
1064 Register rd = (d_size == kXRegSize) ? Register(x10) : Register(w10);
1065 VRegister fn;
1066 if (n_size == kDRegSize) {
1067 fn = d1;
1068 } else if (n_size == kSRegSize) {
1069 fn = s1;
1070 } else {
1071 fn = h1;
1072 }
1073
1074 __ Mov(out, results);
1075 __ Mov(inputs_base, inputs);
1076 __ Mov(length, inputs_length);
1077
1078 __ Mov(index_n, 0);
1079 __ Bind(&loop_n);
1080 __ Ldr(fn, MemOperand(inputs_base, index_n, UXTW, n_index_shift));
1081
1082 {
1083 SingleEmissionCheckScope guard(&masm);
1084 (masm.*helper)(rd, fn);
1085 }
1086 __ Str(rd, MemOperand(out, rd.GetSizeInBytes(), PostIndex));
1087
1088 __ Add(index_n, index_n, 1);
1089 __ Cmp(index_n, inputs_length);
1090 __ B(lo, &loop_n);
1091
1092 END();
1093 TRY_RUN(skipped);
1094 }
1095
1096
1097 // Test FP instructions.
1098 // - The inputs[] array should be an array of rawbits representations of
1099 // doubles or floats. This ensures that exact bit comparisons can be
1100 // performed.
1101 // - The expected[] array should be an array of signed integers.
1102 template <typename Tn, typename Td>
TestFPToS(const char * name,TestFPToIntHelper_t helper,const Tn inputs[],unsigned inputs_length,const Td expected[],unsigned expected_length)1103 static void TestFPToS(const char* name,
1104 TestFPToIntHelper_t helper,
1105 const Tn inputs[],
1106 unsigned inputs_length,
1107 const Td expected[],
1108 unsigned expected_length) {
1109 VIXL_ASSERT(inputs_length > 0);
1110
1111 const unsigned results_length = inputs_length;
1112 Td* results = new Td[results_length];
1113
1114 const unsigned d_bits = sizeof(Td) * 8;
1115 const unsigned n_bits = sizeof(Tn) * 8;
1116 bool skipped;
1117
1118 TestFPToInt_Helper(helper,
1119 reinterpret_cast<uintptr_t>(inputs),
1120 inputs_length,
1121 reinterpret_cast<uintptr_t>(results),
1122 d_bits,
1123 n_bits,
1124 &skipped);
1125
1126 if (Test::generate_test_trace()) {
1127 // Print the results.
1128 printf("const int%u_t kExpected_%s[] = {\n", d_bits, name);
1129 // There is no simple C++ literal for INT*_MIN that doesn't produce
1130 // warnings, so we use an appropriate constant in that case instead.
1131 // Deriving int_d_min in this way (rather than just checking INT64_MIN and
1132 // the like) avoids warnings about comparing values with differing ranges.
1133 const int64_t int_d_max = (UINT64_C(1) << (d_bits - 1)) - 1;
1134 const int64_t int_d_min = -(int_d_max)-1;
1135 for (unsigned d = 0; d < results_length; d++) {
1136 if (results[d] == int_d_min) {
1137 printf(" -INT%u_C(%" PRId64 ") - 1,\n", d_bits, int_d_max);
1138 } else {
1139 // Some constants (such as those between INT32_MAX and UINT32_MAX)
1140 // trigger compiler warnings. To avoid these warnings, use an
1141 // appropriate macro to make the type explicit.
1142 int64_t result_int64 = static_cast<int64_t>(results[d]);
1143 if (result_int64 >= 0) {
1144 printf(" INT%u_C(%" PRId64 "),\n", d_bits, result_int64);
1145 } else {
1146 printf(" -INT%u_C(%" PRId64 "),\n", d_bits, -result_int64);
1147 }
1148 }
1149 }
1150 printf("};\n");
1151 printf("const unsigned kExpectedCount_%s = %u;\n", name, results_length);
1152 } else if (!skipped) {
1153 // Check the results.
1154 VIXL_CHECK(expected_length == results_length);
1155 unsigned error_count = 0;
1156 unsigned d = 0;
1157 for (unsigned n = 0; n < inputs_length; n++, d++) {
1158 if (results[d] != expected[d]) {
1159 if (++error_count > kErrorReportLimit) continue;
1160
1161 printf("%s 0x%0*" PRIx64 " (%s %g):\n",
1162 name,
1163 n_bits / 4,
1164 static_cast<uint64_t>(inputs[n]),
1165 name,
1166 rawbits_to_fp(inputs[n]));
1167 printf(" Expected: 0x%0*" PRIx64 " (%" PRId64 ")\n",
1168 d_bits / 4,
1169 static_cast<uint64_t>(expected[d]),
1170 static_cast<int64_t>(expected[d]));
1171 printf(" Found: 0x%0*" PRIx64 " (%" PRId64 ")\n",
1172 d_bits / 4,
1173 static_cast<uint64_t>(results[d]),
1174 static_cast<int64_t>(results[d]));
1175 printf("\n");
1176 }
1177 }
1178 VIXL_ASSERT(d == expected_length);
1179 if (error_count > kErrorReportLimit) {
1180 printf("%u other errors follow.\n", error_count - kErrorReportLimit);
1181 }
1182 VIXL_CHECK(error_count == 0);
1183 }
1184 delete[] results;
1185 }
1186
1187
1188 // Test FP instructions.
1189 // - The inputs[] array should be an array of rawbits representations of
1190 // doubles or floats. This ensures that exact bit comparisons can be
1191 // performed.
1192 // - The expected[] array should be an array of unsigned integers.
1193 template <typename Tn, typename Td>
TestFPToU(const char * name,TestFPToIntHelper_t helper,const Tn inputs[],unsigned inputs_length,const Td expected[],unsigned expected_length)1194 static void TestFPToU(const char* name,
1195 TestFPToIntHelper_t helper,
1196 const Tn inputs[],
1197 unsigned inputs_length,
1198 const Td expected[],
1199 unsigned expected_length) {
1200 VIXL_ASSERT(inputs_length > 0);
1201
1202 const unsigned results_length = inputs_length;
1203 Td* results = new Td[results_length];
1204
1205 const unsigned d_bits = sizeof(Td) * 8;
1206 const unsigned n_bits = sizeof(Tn) * 8;
1207 bool skipped;
1208
1209 TestFPToInt_Helper(helper,
1210 reinterpret_cast<uintptr_t>(inputs),
1211 inputs_length,
1212 reinterpret_cast<uintptr_t>(results),
1213 d_bits,
1214 n_bits,
1215 &skipped);
1216
1217 if (Test::generate_test_trace()) {
1218 // Print the results.
1219 printf("const uint%u_t kExpected_%s[] = {\n", d_bits, name);
1220 for (unsigned d = 0; d < results_length; d++) {
1221 printf(" %" PRIu64 "u,\n", static_cast<uint64_t>(results[d]));
1222 }
1223 printf("};\n");
1224 printf("const unsigned kExpectedCount_%s = %u;\n", name, results_length);
1225 } else if (!skipped) {
1226 // Check the results.
1227 VIXL_CHECK(expected_length == results_length);
1228 unsigned error_count = 0;
1229 unsigned d = 0;
1230 for (unsigned n = 0; n < inputs_length; n++, d++) {
1231 if (results[d] != expected[d]) {
1232 if (++error_count > kErrorReportLimit) continue;
1233
1234 printf("%s 0x%0*" PRIx64 " (%s %g):\n",
1235 name,
1236 n_bits / 4,
1237 static_cast<uint64_t>(inputs[n]),
1238 name,
1239 rawbits_to_fp(inputs[n]));
1240 printf(" Expected: 0x%0*" PRIx64 " (%" PRIu64 ")\n",
1241 d_bits / 4,
1242 static_cast<uint64_t>(expected[d]),
1243 static_cast<uint64_t>(expected[d]));
1244 printf(" Found: 0x%0*" PRIx64 " (%" PRIu64 ")\n",
1245 d_bits / 4,
1246 static_cast<uint64_t>(results[d]),
1247 static_cast<uint64_t>(results[d]));
1248 printf("\n");
1249 }
1250 }
1251 VIXL_ASSERT(d == expected_length);
1252 if (error_count > kErrorReportLimit) {
1253 printf("%u other errors follow.\n", error_count - kErrorReportLimit);
1254 }
1255 VIXL_CHECK(error_count == 0);
1256 }
1257 delete[] results;
1258 }
1259
1260
1261 // Test FP instructions.
1262 // - The inputs[] array should be an array of rawbits representations of
1263 // doubles or floats. This ensures that exact bit comparisons can be
1264 // performed.
1265 // - The expected[] array should be an array of signed integers.
1266 template <typename Tn, typename Td>
TestFPToFixedS(const char * name,TestFPToFixedHelper_t helper,const Tn inputs[],unsigned inputs_length,const Td expected[],unsigned expected_length)1267 static void TestFPToFixedS(const char* name,
1268 TestFPToFixedHelper_t helper,
1269 const Tn inputs[],
1270 unsigned inputs_length,
1271 const Td expected[],
1272 unsigned expected_length) {
1273 VIXL_ASSERT(inputs_length > 0);
1274
1275 const unsigned d_bits = sizeof(Td) * 8;
1276 const unsigned n_bits = sizeof(Tn) * 8;
1277
1278 const unsigned results_length = inputs_length * (d_bits + 1);
1279 Td* results = new Td[results_length];
1280
1281 bool skipped;
1282
1283 TestFPToFixed_Helper(helper,
1284 reinterpret_cast<uintptr_t>(inputs),
1285 inputs_length,
1286 reinterpret_cast<uintptr_t>(results),
1287 d_bits,
1288 n_bits,
1289 &skipped);
1290
1291 if (Test::generate_test_trace()) {
1292 // Print the results.
1293 printf("const int%u_t kExpected_%s[] = {\n", d_bits, name);
1294 // There is no simple C++ literal for INT*_MIN that doesn't produce
1295 // warnings, so we use an appropriate constant in that case instead.
1296 // Deriving int_d_min in this way (rather than just checking INT64_MIN and
1297 // the like) avoids warnings about comparing values with differing ranges.
1298 const int64_t int_d_max = (UINT64_C(1) << (d_bits - 1)) - 1;
1299 const int64_t int_d_min = -(int_d_max)-1;
1300 for (unsigned d = 0; d < results_length; d++) {
1301 if (results[d] == int_d_min) {
1302 printf(" -INT%u_C(%" PRId64 ") - 1,\n", d_bits, int_d_max);
1303 } else {
1304 // Some constants (such as those between INT32_MAX and UINT32_MAX)
1305 // trigger compiler warnings. To avoid these warnings, use an
1306 // appropriate macro to make the type explicit.
1307 int64_t result_int64 = static_cast<int64_t>(results[d]);
1308 if (result_int64 >= 0) {
1309 printf(" INT%u_C(%" PRId64 "),\n", d_bits, result_int64);
1310 } else {
1311 printf(" -INT%u_C(%" PRId64 "),\n", d_bits, -result_int64);
1312 }
1313 }
1314 }
1315 printf("};\n");
1316 printf("const unsigned kExpectedCount_%s = %u;\n", name, results_length);
1317 } else if (!skipped) {
1318 // Check the results.
1319 VIXL_CHECK(expected_length == results_length);
1320 unsigned error_count = 0;
1321 unsigned d = 0;
1322 for (unsigned n = 0; n < inputs_length; n++) {
1323 for (unsigned fbits = 0; fbits <= d_bits; ++fbits, d++) {
1324 if (results[d] != expected[d]) {
1325 if (++error_count > kErrorReportLimit) continue;
1326
1327 printf("%s 0x%0*" PRIx64 " #%d (%s %g #%d):\n",
1328 name,
1329 n_bits / 4,
1330 static_cast<uint64_t>(inputs[n]),
1331 fbits,
1332 name,
1333 rawbits_to_fp(inputs[n]),
1334 fbits);
1335 printf(" Expected: 0x%0*" PRIx64 " (%" PRId64 ")\n",
1336 d_bits / 4,
1337 static_cast<uint64_t>(expected[d]),
1338 static_cast<int64_t>(expected[d]));
1339 printf(" Found: 0x%0*" PRIx64 " (%" PRId64 ")\n",
1340 d_bits / 4,
1341 static_cast<uint64_t>(results[d]),
1342 static_cast<int64_t>(results[d]));
1343 printf("\n");
1344 }
1345 }
1346 }
1347 VIXL_ASSERT(d == expected_length);
1348 if (error_count > kErrorReportLimit) {
1349 printf("%u other errors follow.\n", error_count - kErrorReportLimit);
1350 }
1351 VIXL_CHECK(error_count == 0);
1352 }
1353 delete[] results;
1354 }
1355
1356
1357 // Test FP instructions.
1358 // - The inputs[] array should be an array of rawbits representations of
1359 // doubles or floats. This ensures that exact bit comparisons can be
1360 // performed.
1361 // - The expected[] array should be an array of unsigned integers.
1362 template <typename Tn, typename Td>
TestFPToFixedU(const char * name,TestFPToFixedHelper_t helper,const Tn inputs[],unsigned inputs_length,const Td expected[],unsigned expected_length)1363 static void TestFPToFixedU(const char* name,
1364 TestFPToFixedHelper_t helper,
1365 const Tn inputs[],
1366 unsigned inputs_length,
1367 const Td expected[],
1368 unsigned expected_length) {
1369 VIXL_ASSERT(inputs_length > 0);
1370
1371 const unsigned d_bits = sizeof(Td) * 8;
1372 const unsigned n_bits = sizeof(Tn) * 8;
1373
1374 const unsigned results_length = inputs_length * (d_bits + 1);
1375 Td* results = new Td[results_length];
1376
1377 bool skipped;
1378
1379 TestFPToFixed_Helper(helper,
1380 reinterpret_cast<uintptr_t>(inputs),
1381 inputs_length,
1382 reinterpret_cast<uintptr_t>(results),
1383 d_bits,
1384 n_bits,
1385 &skipped);
1386
1387 if (Test::generate_test_trace()) {
1388 // Print the results.
1389 printf("const uint%u_t kExpected_%s[] = {\n", d_bits, name);
1390 for (unsigned d = 0; d < results_length; d++) {
1391 printf(" %" PRIu64 "u,\n", static_cast<uint64_t>(results[d]));
1392 }
1393 printf("};\n");
1394 printf("const unsigned kExpectedCount_%s = %u;\n", name, results_length);
1395 } else if (!skipped) {
1396 // Check the results.
1397 VIXL_CHECK(expected_length == results_length);
1398 unsigned error_count = 0;
1399 unsigned d = 0;
1400 for (unsigned n = 0; n < inputs_length; n++) {
1401 for (unsigned fbits = 0; fbits <= d_bits; ++fbits, d++) {
1402 if (results[d] != expected[d]) {
1403 if (++error_count > kErrorReportLimit) continue;
1404
1405 printf("%s 0x%0*" PRIx64 " #%d (%s %g #%d):\n",
1406 name,
1407 n_bits / 4,
1408 static_cast<uint64_t>(inputs[n]),
1409 fbits,
1410 name,
1411 rawbits_to_fp(inputs[n]),
1412 fbits);
1413 printf(" Expected: 0x%0*" PRIx64 " (%" PRIu64 ")\n",
1414 d_bits / 4,
1415 static_cast<uint64_t>(expected[d]),
1416 static_cast<uint64_t>(expected[d]));
1417 printf(" Found: 0x%0*" PRIx64 " (%" PRIu64 ")\n",
1418 d_bits / 4,
1419 static_cast<uint64_t>(results[d]),
1420 static_cast<uint64_t>(results[d]));
1421 printf("\n");
1422 }
1423 }
1424 }
1425 VIXL_ASSERT(d == expected_length);
1426 if (error_count > kErrorReportLimit) {
1427 printf("%u other errors follow.\n", error_count - kErrorReportLimit);
1428 }
1429 VIXL_CHECK(error_count == 0);
1430 }
1431 delete[] results;
1432 }
1433
1434
1435 // ==== Tests for instructions of the form <INST> VReg, VReg. ====
1436
1437
Test1OpNEON_Helper(Test1OpNEONHelper_t helper,uintptr_t inputs_n,unsigned inputs_n_length,uintptr_t results,VectorFormat vd_form,VectorFormat vn_form,bool * skipped)1438 static void Test1OpNEON_Helper(Test1OpNEONHelper_t helper,
1439 uintptr_t inputs_n,
1440 unsigned inputs_n_length,
1441 uintptr_t results,
1442 VectorFormat vd_form,
1443 VectorFormat vn_form,
1444 bool* skipped) {
1445 VIXL_ASSERT(vd_form != kFormatUndefined);
1446 VIXL_ASSERT(vn_form != kFormatUndefined);
1447
1448 CPUFeatures features;
1449 features.Combine(CPUFeatures::kNEON,
1450 CPUFeatures::kFP,
1451 CPUFeatures::kRDM,
1452 CPUFeatures::kNEONHalf);
1453 // For frint{32,64}{x,y} variants.
1454 features.Combine(CPUFeatures::kFrintToFixedSizedInt);
1455 SETUP_WITH_FEATURES(features);
1456 START();
1457
1458 // Roll up the loop to keep the code size down.
1459 Label loop_n;
1460
1461 Register out = x0;
1462 Register inputs_n_base = x1;
1463 Register inputs_n_last_16bytes = x3;
1464 Register index_n = x5;
1465
1466 // TODO: Refactor duplicate definitions below with a VRegister::As() routine.
1467 const unsigned vd_bits = RegisterSizeInBitsFromFormat(vd_form);
1468 const unsigned vd_lane_count = LaneCountFromFormat(vd_form);
1469
1470 const unsigned vn_bits = RegisterSizeInBitsFromFormat(vn_form);
1471 const unsigned vn_lane_count = LaneCountFromFormat(vn_form);
1472 const unsigned vn_lane_bytes = LaneSizeInBytesFromFormat(vn_form);
1473 const unsigned vn_lane_bytes_log2 = LaneSizeInBytesLog2FromFormat(vn_form);
1474 const unsigned vn_lane_bits = LaneSizeInBitsFromFormat(vn_form);
1475
1476
1477 // These will be either a D- or a Q-register form, with a single lane
1478 // (for use in scalar load and store operations).
1479 VRegister vd = VRegister(0, vd_bits);
1480 VRegister vn = v1.V16B();
1481 VRegister vntmp = v3.V16B();
1482
1483 // These will have the correct format for use when calling 'helper'.
1484 VRegister vd_helper = VRegister(0, vd_bits, vd_lane_count);
1485 VRegister vn_helper = VRegister(1, vn_bits, vn_lane_count);
1486
1487 // 'v*tmp_single' will be either 'Vt.B', 'Vt.H', 'Vt.S' or 'Vt.D'.
1488 VRegister vntmp_single = VRegister(3, vn_lane_bits);
1489
1490 __ Mov(out, results);
1491
1492 __ Mov(inputs_n_base, inputs_n);
1493 __ Mov(inputs_n_last_16bytes,
1494 inputs_n + (vn_lane_bytes * inputs_n_length) - 16);
1495
1496 __ Ldr(vn, MemOperand(inputs_n_last_16bytes));
1497
1498 __ Mov(index_n, 0);
1499 __ Bind(&loop_n);
1500
1501 __ Ldr(vntmp_single,
1502 MemOperand(inputs_n_base, index_n, LSL, vn_lane_bytes_log2));
1503 __ Ext(vn, vn, vntmp, vn_lane_bytes);
1504
1505 // Set the destination to zero.
1506 // TODO: Setting the destination to values other than zero
1507 // might be a better test for instructions such as sqxtn2
1508 // which may leave parts of V registers unchanged.
1509 __ Movi(vd.V16B(), 0);
1510
1511 {
1512 SingleEmissionCheckScope guard(&masm);
1513 (masm.*helper)(vd_helper, vn_helper);
1514 }
1515 __ Str(vd, MemOperand(out, vd.GetSizeInBytes(), PostIndex));
1516
1517 __ Add(index_n, index_n, 1);
1518 __ Cmp(index_n, inputs_n_length);
1519 __ B(lo, &loop_n);
1520
1521 END();
1522 TRY_RUN(skipped);
1523 }
1524
1525
1526 // Test NEON instructions. The inputs_*[] and expected[] arrays should be
1527 // arrays of rawbit representation of input values. This ensures that
1528 // exact bit comparisons can be performed.
1529 template <typename Td, typename Tn>
Test1OpNEON(const char * name,Test1OpNEONHelper_t helper,const Tn inputs_n[],unsigned inputs_n_length,const Td expected[],unsigned expected_length,VectorFormat vd_form,VectorFormat vn_form)1530 static void Test1OpNEON(const char* name,
1531 Test1OpNEONHelper_t helper,
1532 const Tn inputs_n[],
1533 unsigned inputs_n_length,
1534 const Td expected[],
1535 unsigned expected_length,
1536 VectorFormat vd_form,
1537 VectorFormat vn_form) {
1538 VIXL_ASSERT(inputs_n_length > 0);
1539
1540 const unsigned vd_lane_count = LaneCountFromFormat(vd_form);
1541 const unsigned vn_lane_bytes = LaneSizeInBytesFromFormat(vn_form);
1542 const unsigned vn_lane_count = LaneCountFromFormat(vn_form);
1543
1544 const unsigned results_length = inputs_n_length;
1545 Td* results = new Td[results_length * vd_lane_count];
1546 const unsigned lane_bit = sizeof(Td) * 8;
1547 const unsigned lane_len_in_hex = MaxHexCharCount<Td, Tn>();
1548
1549 bool skipped;
1550
1551 Test1OpNEON_Helper(helper,
1552 reinterpret_cast<uintptr_t>(inputs_n),
1553 inputs_n_length,
1554 reinterpret_cast<uintptr_t>(results),
1555 vd_form,
1556 vn_form,
1557 &skipped);
1558
1559 if (Test::generate_test_trace()) {
1560 // Print the results.
1561 printf("const uint%u_t kExpected_NEON_%s[] = {\n", lane_bit, name);
1562 for (unsigned iteration = 0; iteration < results_length; iteration++) {
1563 printf(" ");
1564 // Output a separate result for each element of the result vector.
1565 for (unsigned lane = 0; lane < vd_lane_count; lane++) {
1566 unsigned index = lane + (iteration * vd_lane_count);
1567 printf(" 0x%0*" PRIx64 ",",
1568 lane_len_in_hex,
1569 static_cast<uint64_t>(results[index]));
1570 }
1571 printf("\n");
1572 }
1573
1574 printf("};\n");
1575 printf("const unsigned kExpectedCount_NEON_%s = %u;\n",
1576 name,
1577 results_length);
1578 } else if (!skipped) {
1579 // Check the results.
1580 VIXL_CHECK(expected_length == results_length);
1581 unsigned error_count = 0;
1582 unsigned d = 0;
1583 const char* padding = " ";
1584 VIXL_ASSERT(strlen(padding) >= (lane_len_in_hex + 1));
1585 for (unsigned n = 0; n < inputs_n_length; n++, d++) {
1586 bool error_in_vector = false;
1587
1588 for (unsigned lane = 0; lane < vd_lane_count; lane++) {
1589 unsigned output_index = (n * vd_lane_count) + lane;
1590
1591 if (results[output_index] != expected[output_index]) {
1592 error_in_vector = true;
1593 break;
1594 }
1595 }
1596
1597 if (error_in_vector && (++error_count <= kErrorReportLimit)) {
1598 printf("%s\n", name);
1599 printf(" Vn%.*s| Vd%.*s| Expected\n",
1600 lane_len_in_hex + 1,
1601 padding,
1602 lane_len_in_hex + 1,
1603 padding);
1604
1605 const unsigned first_index_n =
1606 inputs_n_length - (16 / vn_lane_bytes) + n + 1;
1607
1608 for (unsigned lane = 0; lane < std::max(vd_lane_count, vn_lane_count);
1609 lane++) {
1610 unsigned output_index = (n * vd_lane_count) + lane;
1611 unsigned input_index_n = (first_index_n + lane) % inputs_n_length;
1612
1613 printf("%c0x%0*" PRIx64 " | 0x%0*" PRIx64
1614 " "
1615 "| 0x%0*" PRIx64 "\n",
1616 results[output_index] != expected[output_index] ? '*' : ' ',
1617 lane_len_in_hex,
1618 static_cast<uint64_t>(inputs_n[input_index_n]),
1619 lane_len_in_hex,
1620 static_cast<uint64_t>(results[output_index]),
1621 lane_len_in_hex,
1622 static_cast<uint64_t>(expected[output_index]));
1623 }
1624 }
1625 }
1626 VIXL_ASSERT(d == expected_length);
1627 if (error_count > kErrorReportLimit) {
1628 printf("%u other errors follow.\n", error_count - kErrorReportLimit);
1629 }
1630 VIXL_CHECK(error_count == 0);
1631 }
1632 delete[] results;
1633 }
1634
1635
1636 // ==== Tests for instructions of the form <mnemonic> <V><d>, <Vn>.<T> ====
1637 // where <V> is one of B, H, S or D registers.
1638 // e.g. saddlv H1, v0.8B
1639
1640 // TODO: Change tests to store all lanes of the resulting V register.
1641 // Some tests store all 128 bits of the resulting V register to
1642 // check the simulator's behaviour on the rest of the register.
1643 // This is better than storing the affected lanes only.
1644 // Change any tests such as the 'Across' template to do the same.
1645
Test1OpAcrossNEON_Helper(Test1OpNEONHelper_t helper,uintptr_t inputs_n,unsigned inputs_n_length,uintptr_t results,VectorFormat vd_form,VectorFormat vn_form,bool * skipped)1646 static void Test1OpAcrossNEON_Helper(Test1OpNEONHelper_t helper,
1647 uintptr_t inputs_n,
1648 unsigned inputs_n_length,
1649 uintptr_t results,
1650 VectorFormat vd_form,
1651 VectorFormat vn_form,
1652 bool* skipped) {
1653 VIXL_ASSERT(vd_form != kFormatUndefined);
1654 VIXL_ASSERT(vn_form != kFormatUndefined);
1655
1656 SETUP_WITH_FEATURES(CPUFeatures::kNEON,
1657 CPUFeatures::kFP,
1658 CPUFeatures::kNEONHalf);
1659 START();
1660
1661 // Roll up the loop to keep the code size down.
1662 Label loop_n;
1663
1664 Register out = x0;
1665 Register inputs_n_base = x1;
1666 Register inputs_n_last_vector = x3;
1667 Register index_n = x5;
1668
1669 // TODO: Refactor duplicate definitions below with a VRegister::As() routine.
1670 const unsigned vd_bits = RegisterSizeInBitsFromFormat(vd_form);
1671 const unsigned vn_bits = RegisterSizeInBitsFromFormat(vn_form);
1672 const unsigned vn_lane_count = LaneCountFromFormat(vn_form);
1673 const unsigned vn_lane_bytes = LaneSizeInBytesFromFormat(vn_form);
1674 const unsigned vn_lane_bytes_log2 = LaneSizeInBytesLog2FromFormat(vn_form);
1675 const unsigned vn_lane_bits = LaneSizeInBitsFromFormat(vn_form);
1676
1677 // Test destructive operations by (arbitrarily) using the same register for
1678 // B and S lane sizes.
1679 bool destructive = (vd_bits == kBRegSize) || (vd_bits == kSRegSize);
1680
1681 // Create two aliases for v0; the first is the destination for the tested
1682 // instruction, the second, the whole Q register to check the results.
1683 VRegister vd = VRegister(0, vd_bits);
1684 VRegister vdstr = VRegister(0, kQRegSize);
1685
1686 VRegister vn = VRegister(1, vn_bits);
1687 VRegister vntmp = VRegister(3, vn_bits);
1688
1689 // These will have the correct format for use when calling 'helper'.
1690 VRegister vd_helper = VRegister(0, vn_bits, vn_lane_count);
1691 VRegister vn_helper = VRegister(1, vn_bits, vn_lane_count);
1692
1693 // 'v*tmp_single' will be either 'Vt.B', 'Vt.H', 'Vt.S' or 'Vt.D'.
1694 VRegister vntmp_single = VRegister(3, vn_lane_bits);
1695
1696 // Same registers for use in the 'ext' instructions.
1697 VRegister vn_ext = (kDRegSize == vn_bits) ? vn.V8B() : vn.V16B();
1698 VRegister vntmp_ext = (kDRegSize == vn_bits) ? vntmp.V8B() : vntmp.V16B();
1699
1700 __ Mov(out, results);
1701
1702 __ Mov(inputs_n_base, inputs_n);
1703 __ Mov(inputs_n_last_vector,
1704 inputs_n + vn_lane_bytes * (inputs_n_length - vn_lane_count));
1705
1706 __ Ldr(vn, MemOperand(inputs_n_last_vector));
1707
1708 __ Mov(index_n, 0);
1709 __ Bind(&loop_n);
1710
1711 __ Ldr(vntmp_single,
1712 MemOperand(inputs_n_base, index_n, LSL, vn_lane_bytes_log2));
1713 __ Ext(vn_ext, vn_ext, vntmp_ext, vn_lane_bytes);
1714
1715 if (destructive) {
1716 __ Mov(vd_helper, vn_helper);
1717 SingleEmissionCheckScope guard(&masm);
1718 (masm.*helper)(vd, vd_helper);
1719 } else {
1720 SingleEmissionCheckScope guard(&masm);
1721 (masm.*helper)(vd, vn_helper);
1722 }
1723
1724 __ Str(vdstr, MemOperand(out, kQRegSizeInBytes, PostIndex));
1725
1726 __ Add(index_n, index_n, 1);
1727 __ Cmp(index_n, inputs_n_length);
1728 __ B(lo, &loop_n);
1729
1730 END();
1731 TRY_RUN(skipped);
1732 }
1733
1734 // Test NEON instructions. The inputs_*[] and expected[] arrays should be
1735 // arrays of rawbit representation of input values. This ensures that
1736 // exact bit comparisons can be performed.
1737 template <typename Td, typename Tn>
Test1OpAcrossNEON(const char * name,Test1OpNEONHelper_t helper,const Tn inputs_n[],unsigned inputs_n_length,const Td expected[],unsigned expected_length,VectorFormat vd_form,VectorFormat vn_form)1738 static void Test1OpAcrossNEON(const char* name,
1739 Test1OpNEONHelper_t helper,
1740 const Tn inputs_n[],
1741 unsigned inputs_n_length,
1742 const Td expected[],
1743 unsigned expected_length,
1744 VectorFormat vd_form,
1745 VectorFormat vn_form) {
1746 VIXL_ASSERT(inputs_n_length > 0);
1747
1748 const unsigned vd_lane_count = LaneCountFromFormat(vd_form);
1749 const unsigned vd_lanes_per_q = MaxLaneCountFromFormat(vd_form);
1750
1751 const unsigned results_length = inputs_n_length;
1752 Td* results = new Td[results_length * vd_lanes_per_q];
1753 const unsigned lane_bit = sizeof(Td) * 8;
1754 const unsigned lane_len_in_hex = MaxHexCharCount<Td, Tn>();
1755
1756 bool skipped;
1757
1758 Test1OpAcrossNEON_Helper(helper,
1759 reinterpret_cast<uintptr_t>(inputs_n),
1760 inputs_n_length,
1761 reinterpret_cast<uintptr_t>(results),
1762 vd_form,
1763 vn_form,
1764 &skipped);
1765
1766 if (Test::generate_test_trace()) {
1767 // Print the results.
1768 printf("const uint%u_t kExpected_NEON_%s[] = {\n", lane_bit, name);
1769 for (unsigned iteration = 0; iteration < results_length; iteration++) {
1770 printf(" ");
1771 // Output a separate result for each element of the result vector.
1772 for (unsigned lane = 0; lane < vd_lane_count; lane++) {
1773 unsigned index = lane + (iteration * vd_lanes_per_q);
1774 printf(" 0x%0*" PRIx64 ",",
1775 lane_len_in_hex,
1776 static_cast<uint64_t>(results[index]));
1777 }
1778 printf("\n");
1779 }
1780
1781 printf("};\n");
1782 printf("const unsigned kExpectedCount_NEON_%s = %u;\n",
1783 name,
1784 results_length);
1785 } else if (!skipped) {
1786 // Check the results.
1787 VIXL_CHECK(expected_length == results_length);
1788 unsigned error_count = 0;
1789 unsigned d = 0;
1790 const char* padding = " ";
1791 VIXL_ASSERT(strlen(padding) >= (lane_len_in_hex + 1));
1792 for (unsigned n = 0; n < inputs_n_length; n++, d++) {
1793 bool error_in_vector = false;
1794
1795 for (unsigned lane = 0; lane < vd_lane_count; lane++) {
1796 unsigned expected_index = (n * vd_lane_count) + lane;
1797 unsigned results_index = (n * vd_lanes_per_q) + lane;
1798
1799 if (results[results_index] != expected[expected_index]) {
1800 error_in_vector = true;
1801 break;
1802 }
1803 }
1804
1805 // For across operations, the remaining lanes should be zero.
1806 for (unsigned lane = vd_lane_count; lane < vd_lanes_per_q; lane++) {
1807 unsigned results_index = (n * vd_lanes_per_q) + lane;
1808 if (results[results_index] != 0) {
1809 error_in_vector = true;
1810 break;
1811 }
1812 }
1813
1814 if (error_in_vector && (++error_count <= kErrorReportLimit)) {
1815 const unsigned vn_lane_count = LaneCountFromFormat(vn_form);
1816
1817 printf("%s\n", name);
1818 printf(" Vn%.*s| Vd%.*s| Expected\n",
1819 lane_len_in_hex + 1,
1820 padding,
1821 lane_len_in_hex + 1,
1822 padding);
1823
1824 // TODO: In case of an error, all tests print out as many elements as
1825 // there are lanes in the output or input vectors. This way
1826 // the viewer can read all the values that were needed for the
1827 // operation but the output contains also unnecessary values.
1828 // These prints can be improved according to the arguments
1829 // passed to test functions.
1830 // This output for the 'Across' category has the required
1831 // modifications.
1832 for (unsigned lane = 0; lane < vn_lane_count; lane++) {
1833 unsigned results_index =
1834 (n * vd_lanes_per_q) + ((vn_lane_count - 1) - lane);
1835 unsigned input_index_n =
1836 (inputs_n_length - vn_lane_count + n + 1 + lane) %
1837 inputs_n_length;
1838
1839 Td expect = 0;
1840 if ((vn_lane_count - 1) == lane) {
1841 // This is the last lane to be printed, ie. the least-significant
1842 // lane, so use the expected value; any other lane should be zero.
1843 unsigned expected_index = n * vd_lane_count;
1844 expect = expected[expected_index];
1845 }
1846 printf("%c0x%0*" PRIx64 " | 0x%0*" PRIx64 " | 0x%0*" PRIx64 "\n",
1847 results[results_index] != expect ? '*' : ' ',
1848 lane_len_in_hex,
1849 static_cast<uint64_t>(inputs_n[input_index_n]),
1850 lane_len_in_hex,
1851 static_cast<uint64_t>(results[results_index]),
1852 lane_len_in_hex,
1853 static_cast<uint64_t>(expect));
1854 }
1855 }
1856 }
1857 VIXL_ASSERT(d == expected_length);
1858 if (error_count > kErrorReportLimit) {
1859 printf("%u other errors follow.\n", error_count - kErrorReportLimit);
1860 }
1861 VIXL_CHECK(error_count == 0);
1862 }
1863 delete[] results;
1864 }
1865
1866
1867 // ==== Tests for instructions of the form <INST> VReg, VReg, VReg. ====
1868
1869 // TODO: Iterate over inputs_d once the traces file is split.
1870
Test2OpNEON_Helper(Test2OpNEONHelper_t helper,uintptr_t inputs_d,uintptr_t inputs_n,unsigned inputs_n_length,uintptr_t inputs_m,unsigned inputs_m_length,uintptr_t results,VectorFormat vd_form,VectorFormat vn_form,VectorFormat vm_form,bool * skipped)1871 static void Test2OpNEON_Helper(Test2OpNEONHelper_t helper,
1872 uintptr_t inputs_d,
1873 uintptr_t inputs_n,
1874 unsigned inputs_n_length,
1875 uintptr_t inputs_m,
1876 unsigned inputs_m_length,
1877 uintptr_t results,
1878 VectorFormat vd_form,
1879 VectorFormat vn_form,
1880 VectorFormat vm_form,
1881 bool* skipped) {
1882 VIXL_ASSERT(vd_form != kFormatUndefined);
1883 VIXL_ASSERT(vn_form != kFormatUndefined);
1884 VIXL_ASSERT(vm_form != kFormatUndefined);
1885
1886 CPUFeatures features;
1887 features.Combine(CPUFeatures::kNEON, CPUFeatures::kNEONHalf);
1888 features.Combine(CPUFeatures::kFP);
1889 features.Combine(CPUFeatures::kRDM);
1890 features.Combine(CPUFeatures::kDotProduct);
1891 features.Combine(CPUFeatures::kFHM);
1892 SETUP_WITH_FEATURES(features);
1893 START();
1894
1895 // Roll up the loop to keep the code size down.
1896 Label loop_n, loop_m;
1897
1898 Register out = x0;
1899 Register inputs_n_base = x1;
1900 Register inputs_m_base = x2;
1901 Register inputs_d_base = x3;
1902 Register inputs_n_last_16bytes = x4;
1903 Register inputs_m_last_16bytes = x5;
1904 Register index_n = x6;
1905 Register index_m = x7;
1906
1907 // TODO: Refactor duplicate definitions below with a VRegister::As() routine.
1908 const unsigned vd_bits = RegisterSizeInBitsFromFormat(vd_form);
1909 const unsigned vd_lane_count = LaneCountFromFormat(vd_form);
1910
1911 const unsigned vn_bits = RegisterSizeInBitsFromFormat(vn_form);
1912 const unsigned vn_lane_count = LaneCountFromFormat(vn_form);
1913 const unsigned vn_lane_bytes = LaneSizeInBytesFromFormat(vn_form);
1914 const unsigned vn_lane_bytes_log2 = LaneSizeInBytesLog2FromFormat(vn_form);
1915 const unsigned vn_lane_bits = LaneSizeInBitsFromFormat(vn_form);
1916
1917 const unsigned vm_bits = RegisterSizeInBitsFromFormat(vm_form);
1918 const unsigned vm_lane_count = LaneCountFromFormat(vm_form);
1919 const unsigned vm_lane_bytes = LaneSizeInBytesFromFormat(vm_form);
1920 const unsigned vm_lane_bytes_log2 = LaneSizeInBytesLog2FromFormat(vm_form);
1921 const unsigned vm_lane_bits = LaneSizeInBitsFromFormat(vm_form);
1922
1923
1924 // Always load and store 128 bits regardless of the format.
1925 VRegister vd = v0.V16B();
1926 VRegister vn = v1.V16B();
1927 VRegister vm = v2.V16B();
1928 VRegister vntmp = v3.V16B();
1929 VRegister vmtmp = v4.V16B();
1930 VRegister vres = v5.V16B();
1931
1932 // These will have the correct format for calling the 'helper'.
1933 VRegister vn_helper = VRegister(1, vn_bits, vn_lane_count);
1934 VRegister vm_helper = VRegister(2, vm_bits, vm_lane_count);
1935 VRegister vres_helper = VRegister(5, vd_bits, vd_lane_count);
1936
1937 // 'v*tmp_single' will be either 'Vt.B', 'Vt.H', 'Vt.S' or 'Vt.D'.
1938 VRegister vntmp_single = VRegister(3, vn_lane_bits);
1939 VRegister vmtmp_single = VRegister(4, vm_lane_bits);
1940
1941 __ Mov(out, results);
1942
1943 __ Mov(inputs_d_base, inputs_d);
1944
1945 __ Mov(inputs_n_base, inputs_n);
1946 __ Mov(inputs_n_last_16bytes, inputs_n + (inputs_n_length - 16));
1947 __ Mov(inputs_m_base, inputs_m);
1948 __ Mov(inputs_m_last_16bytes, inputs_m + (inputs_m_length - 16));
1949
1950 __ Ldr(vd, MemOperand(inputs_d_base));
1951 __ Ldr(vn, MemOperand(inputs_n_last_16bytes));
1952 __ Ldr(vm, MemOperand(inputs_m_last_16bytes));
1953
1954 __ Mov(index_n, 0);
1955 __ Bind(&loop_n);
1956
1957 __ Ldr(vntmp_single,
1958 MemOperand(inputs_n_base, index_n, LSL, vn_lane_bytes_log2));
1959 __ Ext(vn, vn, vntmp, vn_lane_bytes);
1960
1961 __ Mov(index_m, 0);
1962 __ Bind(&loop_m);
1963
1964 __ Ldr(vmtmp_single,
1965 MemOperand(inputs_m_base, index_m, LSL, vm_lane_bytes_log2));
1966 __ Ext(vm, vm, vmtmp, vm_lane_bytes);
1967
1968 __ Mov(vres, vd);
1969 {
1970 SingleEmissionCheckScope guard(&masm);
1971 (masm.*helper)(vres_helper, vn_helper, vm_helper);
1972 }
1973 __ Str(vres, MemOperand(out, vd.GetSizeInBytes(), PostIndex));
1974
1975 __ Add(index_m, index_m, 1);
1976 __ Cmp(index_m, inputs_m_length);
1977 __ B(lo, &loop_m);
1978
1979 __ Add(index_n, index_n, 1);
1980 __ Cmp(index_n, inputs_n_length);
1981 __ B(lo, &loop_n);
1982
1983 END();
1984 TRY_RUN(skipped);
1985 }
1986
1987
1988 // Test NEON instructions. The inputs_*[] and expected[] arrays should be
1989 // arrays of rawbit representation of input values. This ensures that
1990 // exact bit comparisons can be performed.
1991 template <typename Td, typename Tn, typename Tm>
Test2OpNEON(const char * name,Test2OpNEONHelper_t helper,const Td inputs_d[],const Tn inputs_n[],unsigned inputs_n_length,const Tm inputs_m[],unsigned inputs_m_length,const Td expected[],unsigned expected_length,VectorFormat vd_form,VectorFormat vn_form,VectorFormat vm_form)1992 static void Test2OpNEON(const char* name,
1993 Test2OpNEONHelper_t helper,
1994 const Td inputs_d[],
1995 const Tn inputs_n[],
1996 unsigned inputs_n_length,
1997 const Tm inputs_m[],
1998 unsigned inputs_m_length,
1999 const Td expected[],
2000 unsigned expected_length,
2001 VectorFormat vd_form,
2002 VectorFormat vn_form,
2003 VectorFormat vm_form) {
2004 VIXL_ASSERT(inputs_n_length > 0 && inputs_m_length > 0);
2005
2006 const unsigned vd_lane_count = MaxLaneCountFromFormat(vd_form);
2007
2008 const unsigned results_length = inputs_n_length * inputs_m_length;
2009 Td* results = new Td[results_length * vd_lane_count];
2010 const unsigned lane_bit = sizeof(Td) * 8;
2011 const unsigned lane_len_in_hex = MaxHexCharCount<Td, Tm>();
2012
2013 bool skipped;
2014
2015 Test2OpNEON_Helper(helper,
2016 reinterpret_cast<uintptr_t>(inputs_d),
2017 reinterpret_cast<uintptr_t>(inputs_n),
2018 inputs_n_length,
2019 reinterpret_cast<uintptr_t>(inputs_m),
2020 inputs_m_length,
2021 reinterpret_cast<uintptr_t>(results),
2022 vd_form,
2023 vn_form,
2024 vm_form,
2025 &skipped);
2026
2027 if (Test::generate_test_trace()) {
2028 // Print the results.
2029 printf("const uint%u_t kExpected_NEON_%s[] = {\n", lane_bit, name);
2030 for (unsigned iteration = 0; iteration < results_length; iteration++) {
2031 printf(" ");
2032 // Output a separate result for each element of the result vector.
2033 for (unsigned lane = 0; lane < vd_lane_count; lane++) {
2034 unsigned index = lane + (iteration * vd_lane_count);
2035 printf(" 0x%0*" PRIx64 ",",
2036 lane_len_in_hex,
2037 static_cast<uint64_t>(results[index]));
2038 }
2039 printf("\n");
2040 }
2041
2042 printf("};\n");
2043 printf("const unsigned kExpectedCount_NEON_%s = %u;\n",
2044 name,
2045 results_length);
2046 } else if (!skipped) {
2047 // Check the results.
2048 VIXL_CHECK(expected_length == results_length);
2049 unsigned error_count = 0;
2050 unsigned d = 0;
2051 const char* padding = " ";
2052 VIXL_ASSERT(strlen(padding) >= (lane_len_in_hex + 1));
2053 for (unsigned n = 0; n < inputs_n_length; n++) {
2054 for (unsigned m = 0; m < inputs_m_length; m++, d++) {
2055 bool error_in_vector = false;
2056
2057 for (unsigned lane = 0; lane < vd_lane_count; lane++) {
2058 unsigned output_index = (n * inputs_m_length * vd_lane_count) +
2059 (m * vd_lane_count) + lane;
2060
2061 if (results[output_index] != expected[output_index]) {
2062 error_in_vector = true;
2063 break;
2064 }
2065 }
2066
2067 if (error_in_vector && (++error_count <= kErrorReportLimit)) {
2068 printf("%s\n", name);
2069 printf(" Vd%.*s| Vn%.*s| Vm%.*s| Vd%.*s| Expected\n",
2070 lane_len_in_hex + 1,
2071 padding,
2072 lane_len_in_hex + 1,
2073 padding,
2074 lane_len_in_hex + 1,
2075 padding,
2076 lane_len_in_hex + 1,
2077 padding);
2078
2079 for (unsigned lane = 0; lane < vd_lane_count; lane++) {
2080 unsigned output_index = (n * inputs_m_length * vd_lane_count) +
2081 (m * vd_lane_count) + lane;
2082 unsigned input_index_n =
2083 (inputs_n_length - vd_lane_count + n + 1 + lane) %
2084 inputs_n_length;
2085 unsigned input_index_m =
2086 (inputs_m_length - vd_lane_count + m + 1 + lane) %
2087 inputs_m_length;
2088
2089 printf("%c0x%0*" PRIx64 " | 0x%0*" PRIx64 " | 0x%0*" PRIx64
2090 " "
2091 "| 0x%0*" PRIx64 " | 0x%0*" PRIx64 "\n",
2092 results[output_index] != expected[output_index] ? '*' : ' ',
2093 lane_len_in_hex,
2094 static_cast<uint64_t>(inputs_d[lane]),
2095 lane_len_in_hex,
2096 static_cast<uint64_t>(inputs_n[input_index_n]),
2097 lane_len_in_hex,
2098 static_cast<uint64_t>(inputs_m[input_index_m]),
2099 lane_len_in_hex,
2100 static_cast<uint64_t>(results[output_index]),
2101 lane_len_in_hex,
2102 static_cast<uint64_t>(expected[output_index]));
2103 }
2104 }
2105 }
2106 }
2107 VIXL_ASSERT(d == expected_length);
2108 if (error_count > kErrorReportLimit) {
2109 printf("%u other errors follow.\n", error_count - kErrorReportLimit);
2110 }
2111 VIXL_CHECK(error_count == 0);
2112 }
2113 delete[] results;
2114 }
2115
2116
2117 // ==== Tests for instructions of the form <INST> Vd, Vn, Vm[<#index>]. ====
2118
TestByElementNEON_Helper(TestByElementNEONHelper_t helper,uintptr_t inputs_d,uintptr_t inputs_n,unsigned inputs_n_length,uintptr_t inputs_m,unsigned inputs_m_length,const int indices[],unsigned indices_length,uintptr_t results,VectorFormat vd_form,VectorFormat vn_form,VectorFormat vm_form,unsigned vm_subvector_count,bool * skipped)2119 static void TestByElementNEON_Helper(TestByElementNEONHelper_t helper,
2120 uintptr_t inputs_d,
2121 uintptr_t inputs_n,
2122 unsigned inputs_n_length,
2123 uintptr_t inputs_m,
2124 unsigned inputs_m_length,
2125 const int indices[],
2126 unsigned indices_length,
2127 uintptr_t results,
2128 VectorFormat vd_form,
2129 VectorFormat vn_form,
2130 VectorFormat vm_form,
2131 unsigned vm_subvector_count,
2132 bool* skipped) {
2133 VIXL_ASSERT(vd_form != kFormatUndefined);
2134 VIXL_ASSERT(vn_form != kFormatUndefined);
2135 VIXL_ASSERT(vm_form != kFormatUndefined);
2136 VIXL_ASSERT((vm_subvector_count != 0) && IsPowerOf2(vm_subvector_count));
2137
2138 CPUFeatures features;
2139 features.Combine(CPUFeatures::kNEON, CPUFeatures::kNEONHalf);
2140 features.Combine(CPUFeatures::kFP);
2141 features.Combine(CPUFeatures::kRDM);
2142 features.Combine(CPUFeatures::kDotProduct);
2143 features.Combine(CPUFeatures::kFHM);
2144 SETUP_WITH_FEATURES(features);
2145
2146 START();
2147
2148 // Roll up the loop to keep the code size down.
2149 Label loop_n, loop_m;
2150
2151 Register out = x0;
2152 Register inputs_n_base = x1;
2153 Register inputs_m_base = x2;
2154 Register inputs_d_base = x3;
2155 Register inputs_n_last_16bytes = x4;
2156 Register inputs_m_last_16bytes = x5;
2157 Register index_n = x6;
2158 Register index_m = x7;
2159
2160 // TODO: Refactor duplicate definitions below with a VRegister::As() routine.
2161 const unsigned vd_bits = RegisterSizeInBitsFromFormat(vd_form);
2162 const unsigned vd_lane_count = LaneCountFromFormat(vd_form);
2163
2164 const unsigned vn_bits = RegisterSizeInBitsFromFormat(vn_form);
2165 const unsigned vn_lane_count = LaneCountFromFormat(vn_form);
2166 const unsigned vn_lane_bytes = LaneSizeInBytesFromFormat(vn_form);
2167 const unsigned vn_lane_bytes_log2 = LaneSizeInBytesLog2FromFormat(vn_form);
2168 const unsigned vn_lane_bits = LaneSizeInBitsFromFormat(vn_form);
2169
2170 const unsigned vm_bits = RegisterSizeInBitsFromFormat(vm_form);
2171 const unsigned vm_lane_count = LaneCountFromFormat(vm_form);
2172 const unsigned vm_lane_bytes = LaneSizeInBytesFromFormat(vm_form);
2173 const unsigned vm_lane_bytes_log2 = LaneSizeInBytesLog2FromFormat(vm_form);
2174 const unsigned vm_lane_bits = LaneSizeInBitsFromFormat(vm_form);
2175
2176 VIXL_ASSERT((vm_bits * vm_subvector_count) <= kQRegSize);
2177
2178 // Always load and store 128 bits regardless of the format.
2179 VRegister vd = v0.V16B();
2180 VRegister vn = v1.V16B();
2181 VRegister vm = v2.V16B();
2182 VRegister vntmp = v3.V16B();
2183 VRegister vmtmp = v4.V16B();
2184 VRegister vres = v5.V16B();
2185
2186 // These will have the correct format for calling the 'helper'.
2187 VRegister vn_helper = VRegister(1, vn_bits, vn_lane_count);
2188 VRegister vm_helper =
2189 VRegister(2, vm_bits * vm_subvector_count, vm_lane_count);
2190 VRegister vres_helper = VRegister(5, vd_bits, vd_lane_count);
2191
2192 // 'v*tmp_single' will be either 'Vt.B', 'Vt.H', 'Vt.S' or 'Vt.D'.
2193 VRegister vntmp_single = VRegister(3, vn_lane_bits);
2194 VRegister vmtmp_single = VRegister(4, vm_lane_bits);
2195
2196 __ Mov(out, results);
2197
2198 __ Mov(inputs_d_base, inputs_d);
2199
2200 __ Mov(inputs_n_base, inputs_n);
2201 __ Mov(inputs_n_last_16bytes, inputs_n + (inputs_n_length - 16));
2202 __ Mov(inputs_m_base, inputs_m);
2203 __ Mov(inputs_m_last_16bytes, inputs_m + (inputs_m_length - 16));
2204
2205 __ Ldr(vd, MemOperand(inputs_d_base));
2206 __ Ldr(vn, MemOperand(inputs_n_last_16bytes));
2207 __ Ldr(vm, MemOperand(inputs_m_last_16bytes));
2208
2209 __ Mov(index_n, 0);
2210 __ Bind(&loop_n);
2211
2212 __ Ldr(vntmp_single,
2213 MemOperand(inputs_n_base, index_n, LSL, vn_lane_bytes_log2));
2214 __ Ext(vn, vn, vntmp, vn_lane_bytes);
2215
2216 __ Mov(index_m, 0);
2217 __ Bind(&loop_m);
2218
2219 __ Ldr(vmtmp_single,
2220 MemOperand(inputs_m_base, index_m, LSL, vm_lane_bytes_log2));
2221 __ Ext(vm, vm, vmtmp, vm_lane_bytes);
2222
2223 __ Mov(vres, vd);
2224 {
2225 for (unsigned i = 0; i < indices_length; i++) {
2226 {
2227 SingleEmissionCheckScope guard(&masm);
2228 (masm.*helper)(vres_helper, vn_helper, vm_helper, indices[i]);
2229 }
2230 __ Str(vres, MemOperand(out, vd.GetSizeInBytes(), PostIndex));
2231 }
2232 }
2233
2234 __ Add(index_m, index_m, 1);
2235 __ Cmp(index_m, inputs_m_length);
2236 __ B(lo, &loop_m);
2237
2238 __ Add(index_n, index_n, 1);
2239 __ Cmp(index_n, inputs_n_length);
2240 __ B(lo, &loop_n);
2241
2242 END();
2243 TRY_RUN(skipped);
2244 }
2245
2246
2247 // Test NEON instructions. The inputs_*[] and expected[] arrays should be
2248 // arrays of rawbit representation of input values. This ensures that
2249 // exact bit comparisons can be performed.
2250 template <typename Td, typename Tn, typename Tm>
TestByElementNEON(const char * name,TestByElementNEONHelper_t helper,const Td inputs_d[],const Tn inputs_n[],unsigned inputs_n_length,const Tm inputs_m[],unsigned inputs_m_length,const int indices[],unsigned indices_length,const Td expected[],unsigned expected_length,VectorFormat vd_form,VectorFormat vn_form,VectorFormat vm_form,unsigned vm_subvector_count=1)2251 static void TestByElementNEON(const char* name,
2252 TestByElementNEONHelper_t helper,
2253 const Td inputs_d[],
2254 const Tn inputs_n[],
2255 unsigned inputs_n_length,
2256 const Tm inputs_m[],
2257 unsigned inputs_m_length,
2258 const int indices[],
2259 unsigned indices_length,
2260 const Td expected[],
2261 unsigned expected_length,
2262 VectorFormat vd_form,
2263 VectorFormat vn_form,
2264 VectorFormat vm_form,
2265 unsigned vm_subvector_count = 1) {
2266 VIXL_ASSERT(inputs_n_length > 0);
2267 VIXL_ASSERT(inputs_m_length > 0);
2268 VIXL_ASSERT(indices_length > 0);
2269
2270 const unsigned vd_lane_count = MaxLaneCountFromFormat(vd_form);
2271
2272 const unsigned results_length =
2273 inputs_n_length * inputs_m_length * indices_length;
2274 Td* results = new Td[results_length * vd_lane_count];
2275 const unsigned lane_bit = sizeof(Td) * 8;
2276 const unsigned lane_len_in_hex = MaxHexCharCount<Td, Tm>();
2277
2278 bool skipped;
2279
2280 TestByElementNEON_Helper(helper,
2281 reinterpret_cast<uintptr_t>(inputs_d),
2282 reinterpret_cast<uintptr_t>(inputs_n),
2283 inputs_n_length,
2284 reinterpret_cast<uintptr_t>(inputs_m),
2285 inputs_m_length,
2286 indices,
2287 indices_length,
2288 reinterpret_cast<uintptr_t>(results),
2289 vd_form,
2290 vn_form,
2291 vm_form,
2292 vm_subvector_count,
2293 &skipped);
2294
2295 if (Test::generate_test_trace()) {
2296 // Print the results.
2297 printf("const uint%u_t kExpected_NEON_%s[] = {\n", lane_bit, name);
2298 for (unsigned iteration = 0; iteration < results_length; iteration++) {
2299 printf(" ");
2300 // Output a separate result for each element of the result vector.
2301 for (unsigned lane = 0; lane < vd_lane_count; lane++) {
2302 unsigned index = lane + (iteration * vd_lane_count);
2303 printf(" 0x%0*" PRIx64 ",",
2304 lane_len_in_hex,
2305 static_cast<uint64_t>(results[index]));
2306 }
2307 printf("\n");
2308 }
2309
2310 printf("};\n");
2311 printf("const unsigned kExpectedCount_NEON_%s = %u;\n",
2312 name,
2313 results_length);
2314 } else if (!skipped) {
2315 // Check the results.
2316 VIXL_CHECK(expected_length == results_length);
2317 unsigned error_count = 0;
2318 unsigned d = 0;
2319 const char* padding = " ";
2320 VIXL_ASSERT(strlen(padding) >= (lane_len_in_hex + 1));
2321 for (unsigned n = 0; n < inputs_n_length; n++) {
2322 for (unsigned m = 0; m < inputs_m_length; m++) {
2323 for (unsigned index = 0; index < indices_length; index++, d++) {
2324 bool error_in_vector = false;
2325
2326 for (unsigned lane = 0; lane < vd_lane_count; lane++) {
2327 unsigned output_index =
2328 (n * inputs_m_length * indices_length * vd_lane_count) +
2329 (m * indices_length * vd_lane_count) + (index * vd_lane_count) +
2330 lane;
2331
2332 if (results[output_index] != expected[output_index]) {
2333 error_in_vector = true;
2334 break;
2335 }
2336 }
2337
2338 if (error_in_vector && (++error_count <= kErrorReportLimit)) {
2339 printf("%s\n", name);
2340 printf(" Vd%.*s| Vn%.*s| Vm%.*s| Index | Vd%.*s| Expected\n",
2341 lane_len_in_hex + 1,
2342 padding,
2343 lane_len_in_hex + 1,
2344 padding,
2345 lane_len_in_hex + 1,
2346 padding,
2347 lane_len_in_hex + 1,
2348 padding);
2349
2350 for (unsigned lane = 0; lane < vd_lane_count; lane++) {
2351 unsigned output_index =
2352 (n * inputs_m_length * indices_length * vd_lane_count) +
2353 (m * indices_length * vd_lane_count) +
2354 (index * vd_lane_count) + lane;
2355 unsigned input_index_n =
2356 (inputs_n_length - vd_lane_count + n + 1 + lane) %
2357 inputs_n_length;
2358 unsigned input_index_m =
2359 (inputs_m_length - vd_lane_count + m + 1 + lane) %
2360 inputs_m_length;
2361
2362 printf("%c0x%0*" PRIx64 " | 0x%0*" PRIx64 " | 0x%0*" PRIx64
2363 " "
2364 "| [%3d] | 0x%0*" PRIx64 " | 0x%0*" PRIx64 "\n",
2365 results[output_index] != expected[output_index] ? '*'
2366 : ' ',
2367 lane_len_in_hex,
2368 static_cast<uint64_t>(inputs_d[lane]),
2369 lane_len_in_hex,
2370 static_cast<uint64_t>(inputs_n[input_index_n]),
2371 lane_len_in_hex,
2372 static_cast<uint64_t>(inputs_m[input_index_m]),
2373 indices[index],
2374 lane_len_in_hex,
2375 static_cast<uint64_t>(results[output_index]),
2376 lane_len_in_hex,
2377 static_cast<uint64_t>(expected[output_index]));
2378 }
2379 }
2380 }
2381 }
2382 }
2383 VIXL_ASSERT(d == expected_length);
2384 if (error_count > kErrorReportLimit) {
2385 printf("%u other errors follow.\n", error_count - kErrorReportLimit);
2386 }
2387 VIXL_CHECK(error_count == 0);
2388 }
2389 delete[] results;
2390 }
2391
2392
2393 // ==== Tests for instructions of the form <INST> VReg, VReg, #Immediate. ====
2394
2395
2396 template <typename Tm>
Test2OpImmNEON_Helper(typename Test2OpImmediateNEONHelper_t<Tm>::mnemonic helper,uintptr_t inputs_n,unsigned inputs_n_length,const Tm inputs_m[],unsigned inputs_m_length,uintptr_t results,VectorFormat vd_form,VectorFormat vn_form,bool * skipped)2397 void Test2OpImmNEON_Helper(
2398 typename Test2OpImmediateNEONHelper_t<Tm>::mnemonic helper,
2399 uintptr_t inputs_n,
2400 unsigned inputs_n_length,
2401 const Tm inputs_m[],
2402 unsigned inputs_m_length,
2403 uintptr_t results,
2404 VectorFormat vd_form,
2405 VectorFormat vn_form,
2406 bool* skipped) {
2407 VIXL_ASSERT(vd_form != kFormatUndefined && vn_form != kFormatUndefined);
2408
2409 SETUP_WITH_FEATURES(CPUFeatures::kNEON,
2410 CPUFeatures::kFP,
2411 CPUFeatures::kNEONHalf);
2412 START();
2413
2414 // Roll up the loop to keep the code size down.
2415 Label loop_n;
2416
2417 Register out = x0;
2418 Register inputs_n_base = x1;
2419 Register inputs_n_last_16bytes = x3;
2420 Register index_n = x5;
2421
2422 // TODO: Refactor duplicate definitions below with a VRegister::As() routine.
2423 const unsigned vd_bits = RegisterSizeInBitsFromFormat(vd_form);
2424 const unsigned vd_lane_count = LaneCountFromFormat(vd_form);
2425
2426 const unsigned vn_bits = RegisterSizeInBitsFromFormat(vn_form);
2427 const unsigned vn_lane_count = LaneCountFromFormat(vn_form);
2428 const unsigned vn_lane_bytes = LaneSizeInBytesFromFormat(vn_form);
2429 const unsigned vn_lane_bytes_log2 = LaneSizeInBytesLog2FromFormat(vn_form);
2430 const unsigned vn_lane_bits = LaneSizeInBitsFromFormat(vn_form);
2431
2432
2433 // These will be either a D- or a Q-register form, with a single lane
2434 // (for use in scalar load and store operations).
2435 VRegister vd = VRegister(0, vd_bits);
2436 VRegister vn = v1.V16B();
2437 VRegister vntmp = v3.V16B();
2438
2439 // These will have the correct format for use when calling 'helper'.
2440 VRegister vd_helper = VRegister(0, vd_bits, vd_lane_count);
2441 VRegister vn_helper = VRegister(1, vn_bits, vn_lane_count);
2442
2443 // 'v*tmp_single' will be either 'Vt.B', 'Vt.H', 'Vt.S' or 'Vt.D'.
2444 VRegister vntmp_single = VRegister(3, vn_lane_bits);
2445
2446 __ Mov(out, results);
2447
2448 __ Mov(inputs_n_base, inputs_n);
2449 __ Mov(inputs_n_last_16bytes,
2450 inputs_n + (vn_lane_bytes * inputs_n_length) - 16);
2451
2452 __ Ldr(vn, MemOperand(inputs_n_last_16bytes));
2453
2454 __ Mov(index_n, 0);
2455 __ Bind(&loop_n);
2456
2457 __ Ldr(vntmp_single,
2458 MemOperand(inputs_n_base, index_n, LSL, vn_lane_bytes_log2));
2459 __ Ext(vn, vn, vntmp, vn_lane_bytes);
2460
2461 // Set the destination to zero for tests such as '[r]shrn2'.
2462 // TODO: Setting the destination to values other than zero might be a better
2463 // test for shift and accumulate instructions (srsra/ssra/usra/ursra).
2464 __ Movi(vd.V16B(), 0);
2465
2466 {
2467 for (unsigned i = 0; i < inputs_m_length; i++) {
2468 {
2469 SingleEmissionCheckScope guard(&masm);
2470 (masm.*helper)(vd_helper, vn_helper, inputs_m[i]);
2471 }
2472 __ Str(vd, MemOperand(out, vd.GetSizeInBytes(), PostIndex));
2473 }
2474 }
2475
2476 __ Add(index_n, index_n, 1);
2477 __ Cmp(index_n, inputs_n_length);
2478 __ B(lo, &loop_n);
2479
2480 END();
2481 TRY_RUN(skipped);
2482 }
2483
2484
2485 // Test NEON instructions. The inputs_*[] and expected[] arrays should be
2486 // arrays of rawbit representation of input values. This ensures that
2487 // exact bit comparisons can be performed.
2488 template <typename Td, typename Tn, typename Tm>
Test2OpImmNEON(const char * name,typename Test2OpImmediateNEONHelper_t<Tm>::mnemonic helper,const Tn inputs_n[],unsigned inputs_n_length,const Tm inputs_m[],unsigned inputs_m_length,const Td expected[],unsigned expected_length,VectorFormat vd_form,VectorFormat vn_form)2489 static void Test2OpImmNEON(
2490 const char* name,
2491 typename Test2OpImmediateNEONHelper_t<Tm>::mnemonic helper,
2492 const Tn inputs_n[],
2493 unsigned inputs_n_length,
2494 const Tm inputs_m[],
2495 unsigned inputs_m_length,
2496 const Td expected[],
2497 unsigned expected_length,
2498 VectorFormat vd_form,
2499 VectorFormat vn_form) {
2500 VIXL_ASSERT(inputs_n_length > 0 && inputs_m_length > 0);
2501
2502 const unsigned vd_lane_count = LaneCountFromFormat(vd_form);
2503 const unsigned vn_lane_bytes = LaneSizeInBytesFromFormat(vn_form);
2504 const unsigned vn_lane_count = LaneCountFromFormat(vn_form);
2505
2506 const unsigned results_length = inputs_n_length * inputs_m_length;
2507 Td* results = new Td[results_length * vd_lane_count];
2508 const unsigned lane_bit = sizeof(Td) * 8;
2509 const unsigned lane_len_in_hex = MaxHexCharCount<Td, Tn>();
2510
2511 bool skipped;
2512
2513 Test2OpImmNEON_Helper(helper,
2514 reinterpret_cast<uintptr_t>(inputs_n),
2515 inputs_n_length,
2516 inputs_m,
2517 inputs_m_length,
2518 reinterpret_cast<uintptr_t>(results),
2519 vd_form,
2520 vn_form,
2521 &skipped);
2522
2523 if (Test::generate_test_trace()) {
2524 // Print the results.
2525 printf("const uint%u_t kExpected_NEON_%s[] = {\n", lane_bit, name);
2526 for (unsigned iteration = 0; iteration < results_length; iteration++) {
2527 printf(" ");
2528 // Output a separate result for each element of the result vector.
2529 for (unsigned lane = 0; lane < vd_lane_count; lane++) {
2530 unsigned index = lane + (iteration * vd_lane_count);
2531 printf(" 0x%0*" PRIx64 ",",
2532 lane_len_in_hex,
2533 static_cast<uint64_t>(results[index]));
2534 }
2535 printf("\n");
2536 }
2537
2538 printf("};\n");
2539 printf("const unsigned kExpectedCount_NEON_%s = %u;\n",
2540 name,
2541 results_length);
2542 } else if (!skipped) {
2543 // Check the results.
2544 VIXL_CHECK(expected_length == results_length);
2545 unsigned error_count = 0;
2546 unsigned d = 0;
2547 const char* padding = " ";
2548 VIXL_ASSERT(strlen(padding) >= (lane_len_in_hex + 1));
2549 for (unsigned n = 0; n < inputs_n_length; n++) {
2550 for (unsigned m = 0; m < inputs_m_length; m++, d++) {
2551 bool error_in_vector = false;
2552
2553 for (unsigned lane = 0; lane < vd_lane_count; lane++) {
2554 unsigned output_index = (n * inputs_m_length * vd_lane_count) +
2555 (m * vd_lane_count) + lane;
2556
2557 if (results[output_index] != expected[output_index]) {
2558 error_in_vector = true;
2559 break;
2560 }
2561 }
2562
2563 if (error_in_vector && (++error_count <= kErrorReportLimit)) {
2564 printf("%s\n", name);
2565 printf(" Vn%.*s| Imm%.*s| Vd%.*s| Expected\n",
2566 lane_len_in_hex + 1,
2567 padding,
2568 lane_len_in_hex,
2569 padding,
2570 lane_len_in_hex + 1,
2571 padding);
2572
2573 const unsigned first_index_n =
2574 inputs_n_length - (16 / vn_lane_bytes) + n + 1;
2575
2576 for (unsigned lane = 0; lane < std::max(vd_lane_count, vn_lane_count);
2577 lane++) {
2578 unsigned output_index = (n * inputs_m_length * vd_lane_count) +
2579 (m * vd_lane_count) + lane;
2580 unsigned input_index_n = (first_index_n + lane) % inputs_n_length;
2581 unsigned input_index_m = m;
2582
2583 printf("%c0x%0*" PRIx64 " | 0x%0*" PRIx64
2584 " "
2585 "| 0x%0*" PRIx64 " | 0x%0*" PRIx64 "\n",
2586 results[output_index] != expected[output_index] ? '*' : ' ',
2587 lane_len_in_hex,
2588 static_cast<uint64_t>(inputs_n[input_index_n]),
2589 lane_len_in_hex,
2590 static_cast<uint64_t>(inputs_m[input_index_m]),
2591 lane_len_in_hex,
2592 static_cast<uint64_t>(results[output_index]),
2593 lane_len_in_hex,
2594 static_cast<uint64_t>(expected[output_index]));
2595 }
2596 }
2597 }
2598 }
2599 VIXL_ASSERT(d == expected_length);
2600 if (error_count > kErrorReportLimit) {
2601 printf("%u other errors follow.\n", error_count - kErrorReportLimit);
2602 }
2603 VIXL_CHECK(error_count == 0);
2604 }
2605 delete[] results;
2606 }
2607
2608
2609 // ==== Tests for instructions of the form <INST> VReg, #Imm, VReg, #Imm. ====
2610
2611
TestOpImmOpImmNEON_Helper(TestOpImmOpImmVdUpdateNEONHelper_t helper,uintptr_t inputs_d,const int inputs_imm1[],unsigned inputs_imm1_length,uintptr_t inputs_n,unsigned inputs_n_length,const int inputs_imm2[],unsigned inputs_imm2_length,uintptr_t results,VectorFormat vd_form,VectorFormat vn_form,bool * skipped)2612 static void TestOpImmOpImmNEON_Helper(TestOpImmOpImmVdUpdateNEONHelper_t helper,
2613 uintptr_t inputs_d,
2614 const int inputs_imm1[],
2615 unsigned inputs_imm1_length,
2616 uintptr_t inputs_n,
2617 unsigned inputs_n_length,
2618 const int inputs_imm2[],
2619 unsigned inputs_imm2_length,
2620 uintptr_t results,
2621 VectorFormat vd_form,
2622 VectorFormat vn_form,
2623 bool* skipped) {
2624 VIXL_ASSERT(vd_form != kFormatUndefined);
2625 VIXL_ASSERT(vn_form != kFormatUndefined);
2626
2627 SETUP_WITH_FEATURES(CPUFeatures::kNEON, CPUFeatures::kFP);
2628 START();
2629
2630 // Roll up the loop to keep the code size down.
2631 Label loop_n;
2632
2633 Register out = x0;
2634 Register inputs_d_base = x1;
2635 Register inputs_n_base = x2;
2636 Register inputs_n_last_vector = x4;
2637 Register index_n = x6;
2638
2639 // TODO: Refactor duplicate definitions below with a VRegister::As() routine.
2640 const unsigned vd_bits = RegisterSizeInBitsFromFormat(vd_form);
2641 const unsigned vd_lane_count = LaneCountFromFormat(vd_form);
2642
2643 const unsigned vn_bits = RegisterSizeInBitsFromFormat(vn_form);
2644 const unsigned vn_lane_count = LaneCountFromFormat(vn_form);
2645 const unsigned vn_lane_bytes = LaneSizeInBytesFromFormat(vn_form);
2646 const unsigned vn_lane_bytes_log2 = LaneSizeInBytesLog2FromFormat(vn_form);
2647 const unsigned vn_lane_bits = LaneSizeInBitsFromFormat(vn_form);
2648
2649
2650 // These will be either a D- or a Q-register form, with a single lane
2651 // (for use in scalar load and store operations).
2652 VRegister vd = VRegister(0, vd_bits);
2653 VRegister vn = VRegister(1, vn_bits);
2654 VRegister vntmp = VRegister(4, vn_bits);
2655 VRegister vres = VRegister(5, vn_bits);
2656
2657 VRegister vn_helper = VRegister(1, vn_bits, vn_lane_count);
2658 VRegister vres_helper = VRegister(5, vd_bits, vd_lane_count);
2659
2660 // 'v*tmp_single' will be either 'Vt.B', 'Vt.H', 'Vt.S' or 'Vt.D'.
2661 VRegister vntmp_single = VRegister(4, vn_lane_bits);
2662
2663 // Same registers for use in the 'ext' instructions.
2664 VRegister vn_ext = (kDRegSize == vn_bits) ? vn.V8B() : vn.V16B();
2665 VRegister vntmp_ext = (kDRegSize == vn_bits) ? vntmp.V8B() : vntmp.V16B();
2666
2667 __ Mov(out, results);
2668
2669 __ Mov(inputs_d_base, inputs_d);
2670
2671 __ Mov(inputs_n_base, inputs_n);
2672 __ Mov(inputs_n_last_vector,
2673 inputs_n + vn_lane_bytes * (inputs_n_length - vn_lane_count));
2674
2675 __ Ldr(vd, MemOperand(inputs_d_base));
2676
2677 __ Ldr(vn, MemOperand(inputs_n_last_vector));
2678
2679 __ Mov(index_n, 0);
2680 __ Bind(&loop_n);
2681
2682 __ Ldr(vntmp_single,
2683 MemOperand(inputs_n_base, index_n, LSL, vn_lane_bytes_log2));
2684 __ Ext(vn_ext, vn_ext, vntmp_ext, vn_lane_bytes);
2685
2686 {
2687 EmissionCheckScope guard(&masm,
2688 kInstructionSize * inputs_imm1_length *
2689 inputs_imm2_length * 3);
2690 for (unsigned i = 0; i < inputs_imm1_length; i++) {
2691 for (unsigned j = 0; j < inputs_imm2_length; j++) {
2692 __ Mov(vres, vd);
2693 (masm.*helper)(vres_helper, inputs_imm1[i], vn_helper, inputs_imm2[j]);
2694 __ Str(vres, MemOperand(out, vd.GetSizeInBytes(), PostIndex));
2695 }
2696 }
2697 }
2698
2699 __ Add(index_n, index_n, 1);
2700 __ Cmp(index_n, inputs_n_length);
2701 __ B(lo, &loop_n);
2702
2703 END();
2704 TRY_RUN(skipped);
2705 }
2706
2707
2708 // Test NEON instructions. The inputs_*[] and expected[] arrays should be
2709 // arrays of rawbit representation of input values. This ensures that
2710 // exact bit comparisons can be performed.
2711 template <typename Td, typename Tn>
TestOpImmOpImmNEON(const char * name,TestOpImmOpImmVdUpdateNEONHelper_t helper,const Td inputs_d[],const int inputs_imm1[],unsigned inputs_imm1_length,const Tn inputs_n[],unsigned inputs_n_length,const int inputs_imm2[],unsigned inputs_imm2_length,const Td expected[],unsigned expected_length,VectorFormat vd_form,VectorFormat vn_form)2712 static void TestOpImmOpImmNEON(const char* name,
2713 TestOpImmOpImmVdUpdateNEONHelper_t helper,
2714 const Td inputs_d[],
2715 const int inputs_imm1[],
2716 unsigned inputs_imm1_length,
2717 const Tn inputs_n[],
2718 unsigned inputs_n_length,
2719 const int inputs_imm2[],
2720 unsigned inputs_imm2_length,
2721 const Td expected[],
2722 unsigned expected_length,
2723 VectorFormat vd_form,
2724 VectorFormat vn_form) {
2725 VIXL_ASSERT(inputs_n_length > 0);
2726 VIXL_ASSERT(inputs_imm1_length > 0);
2727 VIXL_ASSERT(inputs_imm2_length > 0);
2728
2729 const unsigned vd_lane_count = LaneCountFromFormat(vd_form);
2730
2731 const unsigned results_length =
2732 inputs_n_length * inputs_imm1_length * inputs_imm2_length;
2733
2734 Td* results = new Td[results_length * vd_lane_count];
2735 const unsigned lane_bit = sizeof(Td) * 8;
2736 const unsigned lane_len_in_hex = MaxHexCharCount<Td, Tn>();
2737
2738 bool skipped;
2739
2740 TestOpImmOpImmNEON_Helper(helper,
2741 reinterpret_cast<uintptr_t>(inputs_d),
2742 inputs_imm1,
2743 inputs_imm1_length,
2744 reinterpret_cast<uintptr_t>(inputs_n),
2745 inputs_n_length,
2746 inputs_imm2,
2747 inputs_imm2_length,
2748 reinterpret_cast<uintptr_t>(results),
2749 vd_form,
2750 vn_form,
2751 &skipped);
2752
2753 if (Test::generate_test_trace()) {
2754 // Print the results.
2755 printf("const uint%u_t kExpected_NEON_%s[] = {\n", lane_bit, name);
2756 for (unsigned iteration = 0; iteration < results_length; iteration++) {
2757 printf(" ");
2758 // Output a separate result for each element of the result vector.
2759 for (unsigned lane = 0; lane < vd_lane_count; lane++) {
2760 unsigned index = lane + (iteration * vd_lane_count);
2761 printf(" 0x%0*" PRIx64 ",",
2762 lane_len_in_hex,
2763 static_cast<uint64_t>(results[index]));
2764 }
2765 printf("\n");
2766 }
2767
2768 printf("};\n");
2769 printf("const unsigned kExpectedCount_NEON_%s = %u;\n",
2770 name,
2771 results_length);
2772 } else if (!skipped) {
2773 // Check the results.
2774 VIXL_CHECK(expected_length == results_length);
2775 unsigned error_count = 0;
2776 unsigned counted_length = 0;
2777 const char* padding = " ";
2778 VIXL_ASSERT(strlen(padding) >= (lane_len_in_hex + 1));
2779 for (unsigned n = 0; n < inputs_n_length; n++) {
2780 for (unsigned imm1 = 0; imm1 < inputs_imm1_length; imm1++) {
2781 for (unsigned imm2 = 0; imm2 < inputs_imm2_length; imm2++) {
2782 bool error_in_vector = false;
2783
2784 counted_length++;
2785
2786 for (unsigned lane = 0; lane < vd_lane_count; lane++) {
2787 unsigned output_index =
2788 (n * inputs_imm1_length * inputs_imm2_length * vd_lane_count) +
2789 (imm1 * inputs_imm2_length * vd_lane_count) +
2790 (imm2 * vd_lane_count) + lane;
2791
2792 if (results[output_index] != expected[output_index]) {
2793 error_in_vector = true;
2794 break;
2795 }
2796 }
2797
2798 if (error_in_vector && (++error_count <= kErrorReportLimit)) {
2799 printf("%s\n", name);
2800 printf(" Vd%.*s| Imm%.*s| Vn%.*s| Imm%.*s| Vd%.*s| Expected\n",
2801 lane_len_in_hex + 1,
2802 padding,
2803 lane_len_in_hex,
2804 padding,
2805 lane_len_in_hex + 1,
2806 padding,
2807 lane_len_in_hex,
2808 padding,
2809 lane_len_in_hex + 1,
2810 padding);
2811
2812 for (unsigned lane = 0; lane < vd_lane_count; lane++) {
2813 unsigned output_index =
2814 (n * inputs_imm1_length * inputs_imm2_length *
2815 vd_lane_count) +
2816 (imm1 * inputs_imm2_length * vd_lane_count) +
2817 (imm2 * vd_lane_count) + lane;
2818 unsigned input_index_n =
2819 (inputs_n_length - vd_lane_count + n + 1 + lane) %
2820 inputs_n_length;
2821 unsigned input_index_imm1 = imm1;
2822 unsigned input_index_imm2 = imm2;
2823
2824 printf("%c0x%0*" PRIx64 " | 0x%0*" PRIx64 " | 0x%0*" PRIx64
2825 " "
2826 "| 0x%0*" PRIx64 " | 0x%0*" PRIx64 " | 0x%0*" PRIx64 "\n",
2827 results[output_index] != expected[output_index] ? '*'
2828 : ' ',
2829 lane_len_in_hex,
2830 static_cast<uint64_t>(inputs_d[lane]),
2831 lane_len_in_hex,
2832 static_cast<uint64_t>(inputs_imm1[input_index_imm1]),
2833 lane_len_in_hex,
2834 static_cast<uint64_t>(inputs_n[input_index_n]),
2835 lane_len_in_hex,
2836 static_cast<uint64_t>(inputs_imm2[input_index_imm2]),
2837 lane_len_in_hex,
2838 static_cast<uint64_t>(results[output_index]),
2839 lane_len_in_hex,
2840 static_cast<uint64_t>(expected[output_index]));
2841 }
2842 }
2843 }
2844 }
2845 }
2846 VIXL_ASSERT(counted_length == expected_length);
2847 if (error_count > kErrorReportLimit) {
2848 printf("%u other errors follow.\n", error_count - kErrorReportLimit);
2849 }
2850 VIXL_CHECK(error_count == 0);
2851 }
2852 delete[] results;
2853 }
2854
2855
2856 // ==== Floating-point tests. ====
2857
2858
2859 // Standard floating-point test expansion for both double- and single-precision
2860 // operations.
2861 #define STRINGIFY(s) #s
2862
2863 #define CALL_TEST_FP_HELPER(mnemonic, variant, type, input) \
2864 Test##type(STRINGIFY(mnemonic) "_" STRINGIFY(variant), \
2865 &MacroAssembler::mnemonic, \
2866 input, \
2867 sizeof(input) / sizeof(input[0]), \
2868 kExpected_##mnemonic##_##variant, \
2869 kExpectedCount_##mnemonic##_##variant)
2870
2871 #define DEFINE_TEST_FP(mnemonic, type, input) \
2872 TEST(mnemonic##_d) { \
2873 CALL_TEST_FP_HELPER(mnemonic, d, type, kInputDouble##input); \
2874 } \
2875 TEST(mnemonic##_s) { \
2876 CALL_TEST_FP_HELPER(mnemonic, s, type, kInputFloat##input); \
2877 }
2878
2879 #define DEFINE_TEST_FP_FP16(mnemonic, type, input) \
2880 TEST(mnemonic##_d) { \
2881 CALL_TEST_FP_HELPER(mnemonic, d, type, kInputDouble##input); \
2882 } \
2883 TEST(mnemonic##_s) { \
2884 CALL_TEST_FP_HELPER(mnemonic, s, type, kInputFloat##input); \
2885 } \
2886 TEST(mnemonic##_h) { \
2887 CALL_TEST_FP_HELPER(mnemonic, h, type, kInputFloat16##input); \
2888 }
2889
2890
2891 // TODO: Test with a newer version of valgrind.
2892 //
2893 // Note: valgrind-3.10.0 does not properly interpret libm's fma() on x86_64.
2894 // Therefore this test will be exiting though an ASSERT and thus leaking
2895 // memory.
2896 DEFINE_TEST_FP_FP16(fmadd, 3Op, Basic)
2897 DEFINE_TEST_FP_FP16(fmsub, 3Op, Basic)
2898 DEFINE_TEST_FP_FP16(fnmadd, 3Op, Basic)
2899 DEFINE_TEST_FP_FP16(fnmsub, 3Op, Basic)
2900
2901 DEFINE_TEST_FP_FP16(fadd, 2Op, Basic)
2902 DEFINE_TEST_FP_FP16(fdiv, 2Op, Basic)
2903 DEFINE_TEST_FP_FP16(fmax, 2Op, Basic)
2904 DEFINE_TEST_FP_FP16(fmaxnm, 2Op, Basic)
2905 DEFINE_TEST_FP_FP16(fmin, 2Op, Basic)
2906 DEFINE_TEST_FP_FP16(fminnm, 2Op, Basic)
2907 DEFINE_TEST_FP_FP16(fmul, 2Op, Basic)
2908 DEFINE_TEST_FP_FP16(fsub, 2Op, Basic)
2909 DEFINE_TEST_FP_FP16(fnmul, 2Op, Basic)
2910
2911 DEFINE_TEST_FP_FP16(fabs, 1Op, Basic)
2912 DEFINE_TEST_FP_FP16(fmov, 1Op, Basic)
2913 DEFINE_TEST_FP_FP16(fneg, 1Op, Basic)
2914 DEFINE_TEST_FP_FP16(fsqrt, 1Op, Basic)
2915 DEFINE_TEST_FP(frint32x, 1Op, Conversions)
2916 DEFINE_TEST_FP(frint64x, 1Op, Conversions)
2917 DEFINE_TEST_FP(frint32z, 1Op, Conversions)
2918 DEFINE_TEST_FP(frint64z, 1Op, Conversions)
2919 DEFINE_TEST_FP_FP16(frinta, 1Op, Conversions)
2920 DEFINE_TEST_FP_FP16(frinti, 1Op, Conversions)
2921 DEFINE_TEST_FP_FP16(frintm, 1Op, Conversions)
2922 DEFINE_TEST_FP_FP16(frintn, 1Op, Conversions)
2923 DEFINE_TEST_FP_FP16(frintp, 1Op, Conversions)
2924 DEFINE_TEST_FP_FP16(frintx, 1Op, Conversions)
2925 DEFINE_TEST_FP_FP16(frintz, 1Op, Conversions)
2926
TEST(fcmp_d)2927 TEST(fcmp_d) { CALL_TEST_FP_HELPER(fcmp, d, Cmp, kInputDoubleBasic); }
TEST(fcmp_s)2928 TEST(fcmp_s) { CALL_TEST_FP_HELPER(fcmp, s, Cmp, kInputFloatBasic); }
TEST(fcmp_dz)2929 TEST(fcmp_dz) { CALL_TEST_FP_HELPER(fcmp, dz, CmpZero, kInputDoubleBasic); }
TEST(fcmp_sz)2930 TEST(fcmp_sz) { CALL_TEST_FP_HELPER(fcmp, sz, CmpZero, kInputFloatBasic); }
2931
TEST(fcvt_sd)2932 TEST(fcvt_sd) { CALL_TEST_FP_HELPER(fcvt, sd, 1Op, kInputDoubleConversions); }
TEST(fcvt_ds)2933 TEST(fcvt_ds) { CALL_TEST_FP_HELPER(fcvt, ds, 1Op, kInputFloatConversions); }
2934
2935 #define DEFINE_TEST_FP_TO_INT(mnemonic, type, input) \
2936 TEST(mnemonic##_xd) { \
2937 CALL_TEST_FP_HELPER(mnemonic, xd, type, kInputDouble##input); \
2938 } \
2939 TEST(mnemonic##_xs) { \
2940 CALL_TEST_FP_HELPER(mnemonic, xs, type, kInputFloat##input); \
2941 } \
2942 TEST(mnemonic##_xh) { \
2943 CALL_TEST_FP_HELPER(mnemonic, xh, type, kInputFloat16##input); \
2944 } \
2945 TEST(mnemonic##_wd) { \
2946 CALL_TEST_FP_HELPER(mnemonic, wd, type, kInputDouble##input); \
2947 } \
2948 TEST(mnemonic##_ws) { \
2949 CALL_TEST_FP_HELPER(mnemonic, ws, type, kInputFloat##input); \
2950 } \
2951 TEST(mnemonic##_wh) { \
2952 CALL_TEST_FP_HELPER(mnemonic, wh, type, kInputFloat16##input); \
2953 }
2954
DEFINE_TEST_FP_TO_INT(fcvtas,FPToS,Conversions)2955 DEFINE_TEST_FP_TO_INT(fcvtas, FPToS, Conversions)
2956 DEFINE_TEST_FP_TO_INT(fcvtau, FPToU, Conversions)
2957 DEFINE_TEST_FP_TO_INT(fcvtms, FPToS, Conversions)
2958 DEFINE_TEST_FP_TO_INT(fcvtmu, FPToU, Conversions)
2959 DEFINE_TEST_FP_TO_INT(fcvtns, FPToS, Conversions)
2960 DEFINE_TEST_FP_TO_INT(fcvtnu, FPToU, Conversions)
2961 DEFINE_TEST_FP_TO_INT(fcvtzs, FPToFixedS, Conversions)
2962 DEFINE_TEST_FP_TO_INT(fcvtzu, FPToFixedU, Conversions)
2963
2964 #define DEFINE_TEST_FP_TO_JS_INT(mnemonic, type, input) \
2965 TEST(mnemonic##_wd) { \
2966 CALL_TEST_FP_HELPER(mnemonic, wd, type, kInputDouble##input); \
2967 }
2968
2969 DEFINE_TEST_FP_TO_JS_INT(fjcvtzs, FPToS, Conversions)
2970
2971 // TODO: Scvtf-fixed-point
2972 // TODO: Scvtf-integer
2973 // TODO: Ucvtf-fixed-point
2974 // TODO: Ucvtf-integer
2975
2976 // TODO: Fccmp
2977 // TODO: Fcsel
2978
2979
2980 // ==== NEON Tests. ====
2981
2982 #define CALL_TEST_NEON_HELPER_1Op(mnemonic, vdform, vnform, input_n) \
2983 Test1OpNEON(STRINGIFY(mnemonic) "_" STRINGIFY(vdform), \
2984 &MacroAssembler::mnemonic, \
2985 input_n, \
2986 (sizeof(input_n) / sizeof(input_n[0])), \
2987 kExpected_NEON_##mnemonic##_##vdform, \
2988 kExpectedCount_NEON_##mnemonic##_##vdform, \
2989 kFormat##vdform, \
2990 kFormat##vnform)
2991
2992 #define CALL_TEST_NEON_HELPER_1OpAcross(mnemonic, vdform, vnform, input_n) \
2993 Test1OpAcrossNEON(STRINGIFY(mnemonic) "_" STRINGIFY(vdform) "_" STRINGIFY( \
2994 vnform), \
2995 &MacroAssembler::mnemonic, \
2996 input_n, \
2997 (sizeof(input_n) / sizeof(input_n[0])), \
2998 kExpected_NEON_##mnemonic##_##vdform##_##vnform, \
2999 kExpectedCount_NEON_##mnemonic##_##vdform##_##vnform, \
3000 kFormat##vdform, \
3001 kFormat##vnform)
3002
3003 #define CALL_TEST_NEON_HELPER_2Op( \
3004 mnemonic, vdform, vnform, vmform, input_d, input_n, input_m) \
3005 Test2OpNEON(STRINGIFY(mnemonic) "_" STRINGIFY(vdform), \
3006 &MacroAssembler::mnemonic, \
3007 input_d, \
3008 input_n, \
3009 (sizeof(input_n) / sizeof(input_n[0])), \
3010 input_m, \
3011 (sizeof(input_m) / sizeof(input_m[0])), \
3012 kExpected_NEON_##mnemonic##_##vdform, \
3013 kExpectedCount_NEON_##mnemonic##_##vdform, \
3014 kFormat##vdform, \
3015 kFormat##vnform, \
3016 kFormat##vmform)
3017
3018 #define CALL_TEST_NEON_HELPER_2OpImm( \
3019 mnemonic, vdform, vnform, input_n, input_m) \
3020 Test2OpImmNEON(STRINGIFY(mnemonic) "_" STRINGIFY(vdform) "_2OPIMM", \
3021 &MacroAssembler::mnemonic, \
3022 input_n, \
3023 (sizeof(input_n) / sizeof(input_n[0])), \
3024 input_m, \
3025 (sizeof(input_m) / sizeof(input_m[0])), \
3026 kExpected_NEON_##mnemonic##_##vdform##_2OPIMM, \
3027 kExpectedCount_NEON_##mnemonic##_##vdform##_2OPIMM, \
3028 kFormat##vdform, \
3029 kFormat##vnform)
3030
3031 #define CALL_TEST_NEON_HELPER_ByElement( \
3032 mnemonic, vdform, vnform, vmform, input_d, input_n, input_m, indices) \
3033 TestByElementNEON( \
3034 STRINGIFY(mnemonic) "_" STRINGIFY(vdform) "_" STRINGIFY( \
3035 vnform) "_" STRINGIFY(vmform), \
3036 &MacroAssembler::mnemonic, \
3037 input_d, \
3038 input_n, \
3039 (sizeof(input_n) / sizeof(input_n[0])), \
3040 input_m, \
3041 (sizeof(input_m) / sizeof(input_m[0])), \
3042 indices, \
3043 (sizeof(indices) / sizeof(indices[0])), \
3044 kExpected_NEON_##mnemonic##_##vdform##_##vnform##_##vmform, \
3045 kExpectedCount_NEON_##mnemonic##_##vdform##_##vnform##_##vmform, \
3046 kFormat##vdform, \
3047 kFormat##vnform, \
3048 kFormat##vmform)
3049
3050 #define CALL_TEST_NEON_HELPER_ByElement_Dot_Product(mnemonic, \
3051 vdform, \
3052 vnform, \
3053 vmform, \
3054 input_d, \
3055 input_n, \
3056 input_m, \
3057 indices, \
3058 vm_subvector_count) \
3059 TestByElementNEON( \
3060 STRINGIFY(mnemonic) "_" STRINGIFY(vdform) "_" STRINGIFY( \
3061 vnform) "_" STRINGIFY(vmform), \
3062 &MacroAssembler::mnemonic, \
3063 input_d, \
3064 input_n, \
3065 (sizeof(input_n) / sizeof(input_n[0])), \
3066 input_m, \
3067 (sizeof(input_m) / sizeof(input_m[0])), \
3068 indices, \
3069 (sizeof(indices) / sizeof(indices[0])), \
3070 kExpected_NEON_##mnemonic##_##vdform##_##vnform##_##vmform, \
3071 kExpectedCount_NEON_##mnemonic##_##vdform##_##vnform##_##vmform, \
3072 kFormat##vdform, \
3073 kFormat##vnform, \
3074 kFormat##vmform, \
3075 vm_subvector_count)
3076
3077 #define CALL_TEST_NEON_HELPER_OpImmOpImm(helper, \
3078 mnemonic, \
3079 vdform, \
3080 vnform, \
3081 input_d, \
3082 input_imm1, \
3083 input_n, \
3084 input_imm2) \
3085 TestOpImmOpImmNEON(STRINGIFY(mnemonic) "_" STRINGIFY(vdform), \
3086 helper, \
3087 input_d, \
3088 input_imm1, \
3089 (sizeof(input_imm1) / sizeof(input_imm1[0])), \
3090 input_n, \
3091 (sizeof(input_n) / sizeof(input_n[0])), \
3092 input_imm2, \
3093 (sizeof(input_imm2) / sizeof(input_imm2[0])), \
3094 kExpected_NEON_##mnemonic##_##vdform, \
3095 kExpectedCount_NEON_##mnemonic##_##vdform, \
3096 kFormat##vdform, \
3097 kFormat##vnform)
3098
3099 #define CALL_TEST_NEON_HELPER_2SAME(mnemonic, variant, input) \
3100 CALL_TEST_NEON_HELPER_1Op(mnemonic, variant, variant, input)
3101
3102 #define DEFINE_TEST_NEON_2SAME_8B_16B(mnemonic, input) \
3103 TEST(mnemonic##_8B) { \
3104 CALL_TEST_NEON_HELPER_2SAME(mnemonic, 8B, kInput8bits##input); \
3105 } \
3106 TEST(mnemonic##_16B) { \
3107 CALL_TEST_NEON_HELPER_2SAME(mnemonic, 16B, kInput8bits##input); \
3108 }
3109
3110 #define DEFINE_TEST_NEON_2SAME_4H_8H(mnemonic, input) \
3111 TEST(mnemonic##_4H) { \
3112 CALL_TEST_NEON_HELPER_2SAME(mnemonic, 4H, kInput16bits##input); \
3113 } \
3114 TEST(mnemonic##_8H) { \
3115 CALL_TEST_NEON_HELPER_2SAME(mnemonic, 8H, kInput16bits##input); \
3116 }
3117
3118 #define DEFINE_TEST_NEON_2SAME_2S_4S(mnemonic, input) \
3119 TEST(mnemonic##_2S) { \
3120 CALL_TEST_NEON_HELPER_2SAME(mnemonic, 2S, kInput32bits##input); \
3121 } \
3122 TEST(mnemonic##_4S) { \
3123 CALL_TEST_NEON_HELPER_2SAME(mnemonic, 4S, kInput32bits##input); \
3124 }
3125
3126 #define DEFINE_TEST_NEON_2SAME_BH(mnemonic, input) \
3127 DEFINE_TEST_NEON_2SAME_8B_16B(mnemonic, input) \
3128 DEFINE_TEST_NEON_2SAME_4H_8H(mnemonic, input)
3129
3130 #define DEFINE_TEST_NEON_2SAME_NO2D(mnemonic, input) \
3131 DEFINE_TEST_NEON_2SAME_BH(mnemonic, input) \
3132 DEFINE_TEST_NEON_2SAME_2S_4S(mnemonic, input)
3133
3134 #define DEFINE_TEST_NEON_2SAME(mnemonic, input) \
3135 DEFINE_TEST_NEON_2SAME_NO2D(mnemonic, input) \
3136 TEST(mnemonic##_2D) { \
3137 CALL_TEST_NEON_HELPER_2SAME(mnemonic, 2D, kInput64bits##input); \
3138 }
3139 #define DEFINE_TEST_NEON_2SAME_SD(mnemonic, input) \
3140 DEFINE_TEST_NEON_2SAME_2S_4S(mnemonic, input) \
3141 TEST(mnemonic##_2D) { \
3142 CALL_TEST_NEON_HELPER_2SAME(mnemonic, 2D, kInput64bits##input); \
3143 }
3144
3145 #define DEFINE_TEST_NEON_2SAME_FP(mnemonic, input) \
3146 TEST(mnemonic##_2S) { \
3147 CALL_TEST_NEON_HELPER_2SAME(mnemonic, 2S, kInputFloat##input); \
3148 } \
3149 TEST(mnemonic##_4S) { \
3150 CALL_TEST_NEON_HELPER_2SAME(mnemonic, 4S, kInputFloat##input); \
3151 } \
3152 TEST(mnemonic##_2D) { \
3153 CALL_TEST_NEON_HELPER_2SAME(mnemonic, 2D, kInputDouble##input); \
3154 }
3155
3156 #define DEFINE_TEST_NEON_2SAME_FP_FP16(mnemonic, input) \
3157 DEFINE_TEST_NEON_2SAME_FP(mnemonic, input) \
3158 TEST(mnemonic##_4H) { \
3159 CALL_TEST_NEON_HELPER_2SAME(mnemonic, 4H, kInputFloat16##input); \
3160 } \
3161 TEST(mnemonic##_8H) { \
3162 CALL_TEST_NEON_HELPER_2SAME(mnemonic, 8H, kInputFloat16##input); \
3163 }
3164
3165 #define DEFINE_TEST_NEON_2SAME_FP_FP16_SCALAR(mnemonic, input) \
3166 TEST(mnemonic##_H) { \
3167 CALL_TEST_NEON_HELPER_2SAME(mnemonic, H, kInputFloat16##input); \
3168 } \
3169 TEST(mnemonic##_S) { \
3170 CALL_TEST_NEON_HELPER_2SAME(mnemonic, S, kInputFloat##input); \
3171 } \
3172 TEST(mnemonic##_D) { \
3173 CALL_TEST_NEON_HELPER_2SAME(mnemonic, D, kInputDouble##input); \
3174 }
3175
3176 #define DEFINE_TEST_NEON_2SAME_SCALAR_B(mnemonic, input) \
3177 TEST(mnemonic##_B) { \
3178 CALL_TEST_NEON_HELPER_2SAME(mnemonic, B, kInput8bits##input); \
3179 }
3180 #define DEFINE_TEST_NEON_2SAME_SCALAR_H(mnemonic, input) \
3181 TEST(mnemonic##_H) { \
3182 CALL_TEST_NEON_HELPER_2SAME(mnemonic, H, kInput16bits##input); \
3183 }
3184 #define DEFINE_TEST_NEON_2SAME_SCALAR_S(mnemonic, input) \
3185 TEST(mnemonic##_S) { \
3186 CALL_TEST_NEON_HELPER_2SAME(mnemonic, S, kInput32bits##input); \
3187 }
3188 #define DEFINE_TEST_NEON_2SAME_SCALAR_D(mnemonic, input) \
3189 TEST(mnemonic##_D) { \
3190 CALL_TEST_NEON_HELPER_2SAME(mnemonic, D, kInput64bits##input); \
3191 }
3192
3193 #define DEFINE_TEST_NEON_2SAME_SCALAR(mnemonic, input) \
3194 DEFINE_TEST_NEON_2SAME_SCALAR_B(mnemonic, input) \
3195 DEFINE_TEST_NEON_2SAME_SCALAR_H(mnemonic, input) \
3196 DEFINE_TEST_NEON_2SAME_SCALAR_S(mnemonic, input) \
3197 DEFINE_TEST_NEON_2SAME_SCALAR_D(mnemonic, input)
3198
3199 #define DEFINE_TEST_NEON_2SAME_SCALAR_SD(mnemonic, input) \
3200 DEFINE_TEST_NEON_2SAME_SCALAR_S(mnemonic, input) \
3201 DEFINE_TEST_NEON_2SAME_SCALAR_D(mnemonic, input)
3202
3203
3204 #define CALL_TEST_NEON_HELPER_ACROSS(mnemonic, vd_form, vn_form, input_n) \
3205 CALL_TEST_NEON_HELPER_1OpAcross(mnemonic, vd_form, vn_form, input_n)
3206
3207 #define DEFINE_TEST_NEON_ACROSS(mnemonic, input) \
3208 TEST(mnemonic##_B_8B) { \
3209 CALL_TEST_NEON_HELPER_ACROSS(mnemonic, B, 8B, kInput8bits##input); \
3210 } \
3211 TEST(mnemonic##_B_16B) { \
3212 CALL_TEST_NEON_HELPER_ACROSS(mnemonic, B, 16B, kInput8bits##input); \
3213 } \
3214 TEST(mnemonic##_H_4H) { \
3215 CALL_TEST_NEON_HELPER_ACROSS(mnemonic, H, 4H, kInput16bits##input); \
3216 } \
3217 TEST(mnemonic##_H_8H) { \
3218 CALL_TEST_NEON_HELPER_ACROSS(mnemonic, H, 8H, kInput16bits##input); \
3219 } \
3220 TEST(mnemonic##_S_4S) { \
3221 CALL_TEST_NEON_HELPER_ACROSS(mnemonic, S, 4S, kInput32bits##input); \
3222 }
3223
3224 #define DEFINE_TEST_NEON_ACROSS_LONG(mnemonic, input) \
3225 TEST(mnemonic##_H_8B) { \
3226 CALL_TEST_NEON_HELPER_ACROSS(mnemonic, H, 8B, kInput8bits##input); \
3227 } \
3228 TEST(mnemonic##_H_16B) { \
3229 CALL_TEST_NEON_HELPER_ACROSS(mnemonic, H, 16B, kInput8bits##input); \
3230 } \
3231 TEST(mnemonic##_S_4H) { \
3232 CALL_TEST_NEON_HELPER_ACROSS(mnemonic, S, 4H, kInput16bits##input); \
3233 } \
3234 TEST(mnemonic##_S_8H) { \
3235 CALL_TEST_NEON_HELPER_ACROSS(mnemonic, S, 8H, kInput16bits##input); \
3236 } \
3237 TEST(mnemonic##_D_4S) { \
3238 CALL_TEST_NEON_HELPER_ACROSS(mnemonic, D, 4S, kInput32bits##input); \
3239 }
3240
3241 #define DEFINE_TEST_NEON_ACROSS_FP(mnemonic, input) \
3242 TEST(mnemonic##_H_4H) { \
3243 CALL_TEST_NEON_HELPER_ACROSS(mnemonic, H, 4H, kInputFloat16##input); \
3244 } \
3245 TEST(mnemonic##_H_8H) { \
3246 CALL_TEST_NEON_HELPER_ACROSS(mnemonic, H, 8H, kInputFloat16##input); \
3247 } \
3248 TEST(mnemonic##_S_4S) { \
3249 CALL_TEST_NEON_HELPER_ACROSS(mnemonic, S, 4S, kInputFloat##input); \
3250 }
3251
3252 #define CALL_TEST_NEON_HELPER_2DIFF(mnemonic, vdform, vnform, input_n) \
3253 CALL_TEST_NEON_HELPER_1Op(mnemonic, vdform, vnform, input_n)
3254
3255 #define DEFINE_TEST_NEON_2DIFF_LONG(mnemonic, input) \
3256 TEST(mnemonic##_4H) { \
3257 CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 4H, 8B, kInput8bits##input); \
3258 } \
3259 TEST(mnemonic##_8H) { \
3260 CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 8H, 16B, kInput8bits##input); \
3261 } \
3262 TEST(mnemonic##_2S) { \
3263 CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 2S, 4H, kInput16bits##input); \
3264 } \
3265 TEST(mnemonic##_4S) { \
3266 CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 4S, 8H, kInput16bits##input); \
3267 } \
3268 TEST(mnemonic##_1D) { \
3269 CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 1D, 2S, kInput32bits##input); \
3270 } \
3271 TEST(mnemonic##_2D) { \
3272 CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 2D, 4S, kInput32bits##input); \
3273 }
3274
3275 #define DEFINE_TEST_NEON_2DIFF_NARROW(mnemonic, input) \
3276 TEST(mnemonic##_8B) { \
3277 CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 8B, 8H, kInput16bits##input); \
3278 } \
3279 TEST(mnemonic##_4H) { \
3280 CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 4H, 4S, kInput32bits##input); \
3281 } \
3282 TEST(mnemonic##_2S) { \
3283 CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 2S, 2D, kInput64bits##input); \
3284 } \
3285 TEST(mnemonic##2_16B) { \
3286 CALL_TEST_NEON_HELPER_2DIFF(mnemonic##2, 16B, 8H, kInput16bits##input); \
3287 } \
3288 TEST(mnemonic##2_8H) { \
3289 CALL_TEST_NEON_HELPER_2DIFF(mnemonic##2, 8H, 4S, kInput32bits##input); \
3290 } \
3291 TEST(mnemonic##2_4S) { \
3292 CALL_TEST_NEON_HELPER_2DIFF(mnemonic##2, 4S, 2D, kInput64bits##input); \
3293 }
3294
3295 #define DEFINE_TEST_NEON_2DIFF_FP_LONG(mnemonic, input) \
3296 TEST(mnemonic##_4S) { \
3297 CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 4S, 4H, kInputFloat16##input); \
3298 } \
3299 TEST(mnemonic##_2D) { \
3300 CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 2D, 2S, kInputFloat##input); \
3301 } \
3302 TEST(mnemonic##2_4S) { \
3303 CALL_TEST_NEON_HELPER_2DIFF(mnemonic##2, 4S, 8H, kInputFloat16##input); \
3304 } \
3305 TEST(mnemonic##2_2D) { \
3306 CALL_TEST_NEON_HELPER_2DIFF(mnemonic##2, 2D, 4S, kInputFloat##input); \
3307 }
3308
3309 #define DEFINE_TEST_NEON_2DIFF_FP_NARROW(mnemonic, input) \
3310 TEST(mnemonic##_4H) { \
3311 CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 4H, 4S, kInputFloat##input); \
3312 } \
3313 TEST(mnemonic##_2S) { \
3314 CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 2S, 2D, kInputDouble##input); \
3315 } \
3316 TEST(mnemonic##2_8H) { \
3317 CALL_TEST_NEON_HELPER_2DIFF(mnemonic##2, 8H, 4S, kInputFloat##input); \
3318 } \
3319 TEST(mnemonic##2_4S) { \
3320 CALL_TEST_NEON_HELPER_2DIFF(mnemonic##2, 4S, 2D, kInputDouble##input); \
3321 }
3322
3323 #define DEFINE_TEST_NEON_2DIFF_FP_NARROW_2S(mnemonic, input) \
3324 TEST(mnemonic##_2S) { \
3325 CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 2S, 2D, kInputDouble##input); \
3326 } \
3327 TEST(mnemonic##2_4S) { \
3328 CALL_TEST_NEON_HELPER_2DIFF(mnemonic##2, 4S, 2D, kInputDouble##input); \
3329 }
3330
3331 #define DEFINE_TEST_NEON_2DIFF_SCALAR_NARROW(mnemonic, input) \
3332 TEST(mnemonic##_B) { \
3333 CALL_TEST_NEON_HELPER_2DIFF(mnemonic, B, H, kInput16bits##input); \
3334 } \
3335 TEST(mnemonic##_H) { \
3336 CALL_TEST_NEON_HELPER_2DIFF(mnemonic, H, S, kInput32bits##input); \
3337 } \
3338 TEST(mnemonic##_S) { \
3339 CALL_TEST_NEON_HELPER_2DIFF(mnemonic, S, D, kInput64bits##input); \
3340 }
3341
3342 #define DEFINE_TEST_NEON_2DIFF_FP_SCALAR_SD(mnemonic, input) \
3343 TEST(mnemonic##_S) { \
3344 CALL_TEST_NEON_HELPER_2DIFF(mnemonic, S, 2S, kInputFloat##input); \
3345 } \
3346 TEST(mnemonic##_D) { \
3347 CALL_TEST_NEON_HELPER_2DIFF(mnemonic, D, 2D, kInputDouble##input); \
3348 } \
3349 TEST(mnemonic##_H) { \
3350 CALL_TEST_NEON_HELPER_2DIFF(mnemonic, H, 2H, kInputFloat16##input); \
3351 }
3352
3353 #define CALL_TEST_NEON_HELPER_3SAME(mnemonic, variant, input_d, input_nm) \
3354 { \
3355 CALL_TEST_NEON_HELPER_2Op(mnemonic, \
3356 variant, \
3357 variant, \
3358 variant, \
3359 input_d, \
3360 input_nm, \
3361 input_nm); \
3362 }
3363
3364 #define DEFINE_TEST_NEON_3SAME_8B_16B(mnemonic, input) \
3365 TEST(mnemonic##_8B) { \
3366 CALL_TEST_NEON_HELPER_3SAME(mnemonic, \
3367 8B, \
3368 kInput8bitsAccDestination, \
3369 kInput8bits##input); \
3370 } \
3371 TEST(mnemonic##_16B) { \
3372 CALL_TEST_NEON_HELPER_3SAME(mnemonic, \
3373 16B, \
3374 kInput8bitsAccDestination, \
3375 kInput8bits##input); \
3376 }
3377
3378 #define DEFINE_TEST_NEON_3SAME_HS(mnemonic, input) \
3379 TEST(mnemonic##_4H) { \
3380 CALL_TEST_NEON_HELPER_3SAME(mnemonic, \
3381 4H, \
3382 kInput16bitsAccDestination, \
3383 kInput16bits##input); \
3384 } \
3385 TEST(mnemonic##_8H) { \
3386 CALL_TEST_NEON_HELPER_3SAME(mnemonic, \
3387 8H, \
3388 kInput16bitsAccDestination, \
3389 kInput16bits##input); \
3390 } \
3391 TEST(mnemonic##_2S) { \
3392 CALL_TEST_NEON_HELPER_3SAME(mnemonic, \
3393 2S, \
3394 kInput32bitsAccDestination, \
3395 kInput32bits##input); \
3396 } \
3397 TEST(mnemonic##_4S) { \
3398 CALL_TEST_NEON_HELPER_3SAME(mnemonic, \
3399 4S, \
3400 kInput32bitsAccDestination, \
3401 kInput32bits##input); \
3402 }
3403
3404 #define DEFINE_TEST_NEON_3SAME_NO2D(mnemonic, input) \
3405 DEFINE_TEST_NEON_3SAME_8B_16B(mnemonic, input) \
3406 DEFINE_TEST_NEON_3SAME_HS(mnemonic, input)
3407
3408 #define DEFINE_TEST_NEON_3SAME(mnemonic, input) \
3409 DEFINE_TEST_NEON_3SAME_NO2D(mnemonic, input) \
3410 TEST(mnemonic##_2D) { \
3411 CALL_TEST_NEON_HELPER_3SAME(mnemonic, \
3412 2D, \
3413 kInput64bitsAccDestination, \
3414 kInput64bits##input); \
3415 }
3416
3417 #define DEFINE_TEST_NEON_3SAME_FP(mnemonic, input) \
3418 TEST(mnemonic##_4H) { \
3419 CALL_TEST_NEON_HELPER_3SAME(mnemonic, \
3420 4H, \
3421 kInputFloat16AccDestination, \
3422 kInputFloat16##input); \
3423 } \
3424 TEST(mnemonic##_8H) { \
3425 CALL_TEST_NEON_HELPER_3SAME(mnemonic, \
3426 8H, \
3427 kInputFloat16AccDestination, \
3428 kInputFloat16##input); \
3429 } \
3430 TEST(mnemonic##_2S) { \
3431 CALL_TEST_NEON_HELPER_3SAME(mnemonic, \
3432 2S, \
3433 kInputFloatAccDestination, \
3434 kInputFloat##input); \
3435 } \
3436 TEST(mnemonic##_4S) { \
3437 CALL_TEST_NEON_HELPER_3SAME(mnemonic, \
3438 4S, \
3439 kInputFloatAccDestination, \
3440 kInputFloat##input); \
3441 } \
3442 TEST(mnemonic##_2D) { \
3443 CALL_TEST_NEON_HELPER_3SAME(mnemonic, \
3444 2D, \
3445 kInputDoubleAccDestination, \
3446 kInputDouble##input); \
3447 }
3448
3449 #define DEFINE_TEST_NEON_3SAME_SCALAR_D(mnemonic, input) \
3450 TEST(mnemonic##_D) { \
3451 CALL_TEST_NEON_HELPER_3SAME(mnemonic, \
3452 D, \
3453 kInput64bitsAccDestination, \
3454 kInput64bits##input); \
3455 }
3456
3457 #define DEFINE_TEST_NEON_3SAME_SCALAR_HS(mnemonic, input) \
3458 TEST(mnemonic##_H) { \
3459 CALL_TEST_NEON_HELPER_3SAME(mnemonic, \
3460 H, \
3461 kInput16bitsAccDestination, \
3462 kInput16bits##input); \
3463 } \
3464 TEST(mnemonic##_S) { \
3465 CALL_TEST_NEON_HELPER_3SAME(mnemonic, \
3466 S, \
3467 kInput32bitsAccDestination, \
3468 kInput32bits##input); \
3469 }
3470
3471 #define DEFINE_TEST_NEON_3SAME_SCALAR(mnemonic, input) \
3472 TEST(mnemonic##_B) { \
3473 CALL_TEST_NEON_HELPER_3SAME(mnemonic, \
3474 B, \
3475 kInput8bitsAccDestination, \
3476 kInput8bits##input); \
3477 } \
3478 TEST(mnemonic##_H) { \
3479 CALL_TEST_NEON_HELPER_3SAME(mnemonic, \
3480 H, \
3481 kInput16bitsAccDestination, \
3482 kInput16bits##input); \
3483 } \
3484 TEST(mnemonic##_S) { \
3485 CALL_TEST_NEON_HELPER_3SAME(mnemonic, \
3486 S, \
3487 kInput32bitsAccDestination, \
3488 kInput32bits##input); \
3489 } \
3490 TEST(mnemonic##_D) { \
3491 CALL_TEST_NEON_HELPER_3SAME(mnemonic, \
3492 D, \
3493 kInput64bitsAccDestination, \
3494 kInput64bits##input); \
3495 }
3496
3497 #define DEFINE_TEST_NEON_3SAME_FP_SCALAR(mnemonic, input) \
3498 TEST(mnemonic##_H) { \
3499 CALL_TEST_NEON_HELPER_3SAME(mnemonic, \
3500 H, \
3501 kInputFloat16AccDestination, \
3502 kInputFloat16##input); \
3503 } \
3504 TEST(mnemonic##_S) { \
3505 CALL_TEST_NEON_HELPER_3SAME(mnemonic, \
3506 S, \
3507 kInputFloatAccDestination, \
3508 kInputFloat##input); \
3509 } \
3510 TEST(mnemonic##_D) { \
3511 CALL_TEST_NEON_HELPER_3SAME(mnemonic, \
3512 D, \
3513 kInputDoubleAccDestination, \
3514 kInputDouble##input); \
3515 }
3516
3517 #define DEFINE_TEST_NEON_FHM(mnemonic, input_d, input_n, input_m) \
3518 TEST(mnemonic##_2S) { \
3519 CALL_TEST_NEON_HELPER_3DIFF(mnemonic, \
3520 2S, \
3521 2H, \
3522 2H, \
3523 kInputFloatAccDestination, \
3524 kInputFloat16##input_n, \
3525 kInputFloat16##input_m); \
3526 } \
3527 TEST(mnemonic##_4S) { \
3528 CALL_TEST_NEON_HELPER_3DIFF(mnemonic, \
3529 4S, \
3530 4H, \
3531 4H, \
3532 kInputFloatAccDestination, \
3533 kInputFloat16##input_n, \
3534 kInputFloat16##input_m); \
3535 }
3536
3537 #define CALL_TEST_NEON_HELPER_3DIFF( \
3538 mnemonic, vdform, vnform, vmform, input_d, input_n, input_m) \
3539 { \
3540 CALL_TEST_NEON_HELPER_2Op(mnemonic, \
3541 vdform, \
3542 vnform, \
3543 vmform, \
3544 input_d, \
3545 input_n, \
3546 input_m); \
3547 }
3548
3549 #define DEFINE_TEST_NEON_3DIFF_LONG_8H(mnemonic, input) \
3550 TEST(mnemonic##_8H) { \
3551 CALL_TEST_NEON_HELPER_3DIFF(mnemonic, \
3552 8H, \
3553 8B, \
3554 8B, \
3555 kInput16bitsAccDestination, \
3556 kInput8bits##input, \
3557 kInput8bits##input); \
3558 } \
3559 TEST(mnemonic##2_8H) { \
3560 CALL_TEST_NEON_HELPER_3DIFF(mnemonic##2, \
3561 8H, \
3562 16B, \
3563 16B, \
3564 kInput16bitsAccDestination, \
3565 kInput8bits##input, \
3566 kInput8bits##input); \
3567 }
3568
3569 #define DEFINE_TEST_NEON_3DIFF_LONG_4S(mnemonic, input) \
3570 TEST(mnemonic##_4S) { \
3571 CALL_TEST_NEON_HELPER_3DIFF(mnemonic, \
3572 4S, \
3573 4H, \
3574 4H, \
3575 kInput32bitsAccDestination, \
3576 kInput16bits##input, \
3577 kInput16bits##input); \
3578 } \
3579 TEST(mnemonic##2_4S) { \
3580 CALL_TEST_NEON_HELPER_3DIFF(mnemonic##2, \
3581 4S, \
3582 8H, \
3583 8H, \
3584 kInput32bitsAccDestination, \
3585 kInput16bits##input, \
3586 kInput16bits##input); \
3587 }
3588
3589 #define DEFINE_TEST_NEON_3DIFF_LONG_2D(mnemonic, input) \
3590 TEST(mnemonic##_2D) { \
3591 CALL_TEST_NEON_HELPER_3DIFF(mnemonic, \
3592 2D, \
3593 2S, \
3594 2S, \
3595 kInput64bitsAccDestination, \
3596 kInput32bits##input, \
3597 kInput32bits##input); \
3598 } \
3599 TEST(mnemonic##2_2D) { \
3600 CALL_TEST_NEON_HELPER_3DIFF(mnemonic##2, \
3601 2D, \
3602 4S, \
3603 4S, \
3604 kInput64bitsAccDestination, \
3605 kInput32bits##input, \
3606 kInput32bits##input); \
3607 }
3608
3609 #define DEFINE_TEST_NEON_3DIFF_LONG_SD(mnemonic, input) \
3610 DEFINE_TEST_NEON_3DIFF_LONG_4S(mnemonic, input) \
3611 DEFINE_TEST_NEON_3DIFF_LONG_2D(mnemonic, input)
3612
3613 #define DEFINE_TEST_NEON_3DIFF_LONG(mnemonic, input) \
3614 DEFINE_TEST_NEON_3DIFF_LONG_8H(mnemonic, input) \
3615 DEFINE_TEST_NEON_3DIFF_LONG_4S(mnemonic, input) \
3616 DEFINE_TEST_NEON_3DIFF_LONG_2D(mnemonic, input)
3617
3618 #define DEFINE_TEST_NEON_3DIFF_SCALAR_LONG_S(mnemonic, input) \
3619 TEST(mnemonic##_S) { \
3620 CALL_TEST_NEON_HELPER_3DIFF(mnemonic, \
3621 S, \
3622 H, \
3623 H, \
3624 kInput32bitsAccDestination, \
3625 kInput16bits##input, \
3626 kInput16bits##input); \
3627 }
3628
3629 #define DEFINE_TEST_NEON_3DIFF_SCALAR_LONG_D(mnemonic, input) \
3630 TEST(mnemonic##_D) { \
3631 CALL_TEST_NEON_HELPER_3DIFF(mnemonic, \
3632 D, \
3633 S, \
3634 S, \
3635 kInput64bitsAccDestination, \
3636 kInput32bits##input, \
3637 kInput32bits##input); \
3638 }
3639
3640 #define DEFINE_TEST_NEON_3DIFF_SCALAR_LONG_SD(mnemonic, input) \
3641 DEFINE_TEST_NEON_3DIFF_SCALAR_LONG_S(mnemonic, input) \
3642 DEFINE_TEST_NEON_3DIFF_SCALAR_LONG_D(mnemonic, input)
3643
3644 #define DEFINE_TEST_NEON_3DIFF_WIDE(mnemonic, input) \
3645 TEST(mnemonic##_8H) { \
3646 CALL_TEST_NEON_HELPER_3DIFF(mnemonic, \
3647 8H, \
3648 8H, \
3649 8B, \
3650 kInput16bitsAccDestination, \
3651 kInput16bits##input, \
3652 kInput8bits##input); \
3653 } \
3654 TEST(mnemonic##_4S) { \
3655 CALL_TEST_NEON_HELPER_3DIFF(mnemonic, \
3656 4S, \
3657 4S, \
3658 4H, \
3659 kInput32bitsAccDestination, \
3660 kInput32bits##input, \
3661 kInput16bits##input); \
3662 } \
3663 TEST(mnemonic##_2D) { \
3664 CALL_TEST_NEON_HELPER_3DIFF(mnemonic, \
3665 2D, \
3666 2D, \
3667 2S, \
3668 kInput64bitsAccDestination, \
3669 kInput64bits##input, \
3670 kInput32bits##input); \
3671 } \
3672 TEST(mnemonic##2_8H) { \
3673 CALL_TEST_NEON_HELPER_3DIFF(mnemonic##2, \
3674 8H, \
3675 8H, \
3676 16B, \
3677 kInput16bitsAccDestination, \
3678 kInput16bits##input, \
3679 kInput8bits##input); \
3680 } \
3681 TEST(mnemonic##2_4S) { \
3682 CALL_TEST_NEON_HELPER_3DIFF(mnemonic##2, \
3683 4S, \
3684 4S, \
3685 8H, \
3686 kInput32bitsAccDestination, \
3687 kInput32bits##input, \
3688 kInput16bits##input); \
3689 } \
3690 TEST(mnemonic##2_2D) { \
3691 CALL_TEST_NEON_HELPER_3DIFF(mnemonic##2, \
3692 2D, \
3693 2D, \
3694 4S, \
3695 kInput64bitsAccDestination, \
3696 kInput64bits##input, \
3697 kInput32bits##input); \
3698 }
3699
3700 #define DEFINE_TEST_NEON_3DIFF_NARROW(mnemonic, input) \
3701 TEST(mnemonic##_8B) { \
3702 CALL_TEST_NEON_HELPER_3DIFF(mnemonic, \
3703 8B, \
3704 8H, \
3705 8H, \
3706 kInput8bitsAccDestination, \
3707 kInput16bits##input, \
3708 kInput16bits##input); \
3709 } \
3710 TEST(mnemonic##_4H) { \
3711 CALL_TEST_NEON_HELPER_3DIFF(mnemonic, \
3712 4H, \
3713 4S, \
3714 4S, \
3715 kInput16bitsAccDestination, \
3716 kInput32bits##input, \
3717 kInput32bits##input); \
3718 } \
3719 TEST(mnemonic##_2S) { \
3720 CALL_TEST_NEON_HELPER_3DIFF(mnemonic, \
3721 2S, \
3722 2D, \
3723 2D, \
3724 kInput32bitsAccDestination, \
3725 kInput64bits##input, \
3726 kInput64bits##input); \
3727 } \
3728 TEST(mnemonic##2_16B) { \
3729 CALL_TEST_NEON_HELPER_3DIFF(mnemonic##2, \
3730 16B, \
3731 8H, \
3732 8H, \
3733 kInput8bitsAccDestination, \
3734 kInput16bits##input, \
3735 kInput16bits##input); \
3736 } \
3737 TEST(mnemonic##2_8H) { \
3738 CALL_TEST_NEON_HELPER_3DIFF(mnemonic##2, \
3739 8H, \
3740 4S, \
3741 4S, \
3742 kInput16bitsAccDestination, \
3743 kInput32bits##input, \
3744 kInput32bits##input); \
3745 } \
3746 TEST(mnemonic##2_4S) { \
3747 CALL_TEST_NEON_HELPER_3DIFF(mnemonic##2, \
3748 4S, \
3749 2D, \
3750 2D, \
3751 kInput32bitsAccDestination, \
3752 kInput64bits##input, \
3753 kInput64bits##input); \
3754 }
3755
3756 #define DEFINE_TEST_NEON_3DIFF_DOUBLE_WIDE(mnemonic, input) \
3757 TEST(mnemonic##_2S) { \
3758 CALL_TEST_NEON_HELPER_3DIFF(mnemonic, \
3759 2S, \
3760 8B, \
3761 8B, \
3762 kInput32bitsAccDestination, \
3763 kInput8bits##input, \
3764 kInput8bits##input); \
3765 } \
3766 TEST(mnemonic##_4S) { \
3767 CALL_TEST_NEON_HELPER_3DIFF(mnemonic, \
3768 4S, \
3769 16B, \
3770 16B, \
3771 kInput32bitsAccDestination, \
3772 kInput8bits##input, \
3773 kInput8bits##input); \
3774 }
3775
3776
3777 #define CALL_TEST_NEON_HELPER_2OPIMM( \
3778 mnemonic, vdform, vnform, input_n, input_imm) \
3779 { \
3780 CALL_TEST_NEON_HELPER_2OpImm(mnemonic, \
3781 vdform, \
3782 vnform, \
3783 input_n, \
3784 input_imm); \
3785 }
3786
3787 #define DEFINE_TEST_NEON_2OPIMM(mnemonic, input, input_imm) \
3788 TEST(mnemonic##_8B_2OPIMM) { \
3789 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \
3790 8B, \
3791 8B, \
3792 kInput8bits##input, \
3793 kInput8bitsImm##input_imm); \
3794 } \
3795 TEST(mnemonic##_16B_2OPIMM) { \
3796 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \
3797 16B, \
3798 16B, \
3799 kInput8bits##input, \
3800 kInput8bitsImm##input_imm); \
3801 } \
3802 TEST(mnemonic##_4H_2OPIMM) { \
3803 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \
3804 4H, \
3805 4H, \
3806 kInput16bits##input, \
3807 kInput16bitsImm##input_imm); \
3808 } \
3809 TEST(mnemonic##_8H_2OPIMM) { \
3810 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \
3811 8H, \
3812 8H, \
3813 kInput16bits##input, \
3814 kInput16bitsImm##input_imm); \
3815 } \
3816 TEST(mnemonic##_2S_2OPIMM) { \
3817 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \
3818 2S, \
3819 2S, \
3820 kInput32bits##input, \
3821 kInput32bitsImm##input_imm); \
3822 } \
3823 TEST(mnemonic##_4S_2OPIMM) { \
3824 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \
3825 4S, \
3826 4S, \
3827 kInput32bits##input, \
3828 kInput32bitsImm##input_imm); \
3829 } \
3830 TEST(mnemonic##_2D_2OPIMM) { \
3831 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \
3832 2D, \
3833 2D, \
3834 kInput64bits##input, \
3835 kInput64bitsImm##input_imm); \
3836 }
3837
3838 #define DEFINE_TEST_NEON_2OPIMM_COPY(mnemonic, input, input_imm) \
3839 TEST(mnemonic##_8B_2OPIMM) { \
3840 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \
3841 8B, \
3842 B, \
3843 kInput8bits##input, \
3844 kInput8bitsImm##input_imm); \
3845 } \
3846 TEST(mnemonic##_16B_2OPIMM) { \
3847 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \
3848 16B, \
3849 B, \
3850 kInput8bits##input, \
3851 kInput8bitsImm##input_imm); \
3852 } \
3853 TEST(mnemonic##_4H_2OPIMM) { \
3854 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \
3855 4H, \
3856 H, \
3857 kInput16bits##input, \
3858 kInput16bitsImm##input_imm); \
3859 } \
3860 TEST(mnemonic##_8H_2OPIMM) { \
3861 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \
3862 8H, \
3863 H, \
3864 kInput16bits##input, \
3865 kInput16bitsImm##input_imm); \
3866 } \
3867 TEST(mnemonic##_2S_2OPIMM) { \
3868 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \
3869 2S, \
3870 S, \
3871 kInput32bits##input, \
3872 kInput32bitsImm##input_imm); \
3873 } \
3874 TEST(mnemonic##_4S_2OPIMM) { \
3875 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \
3876 4S, \
3877 S, \
3878 kInput32bits##input, \
3879 kInput32bitsImm##input_imm); \
3880 } \
3881 TEST(mnemonic##_2D_2OPIMM) { \
3882 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \
3883 2D, \
3884 D, \
3885 kInput64bits##input, \
3886 kInput64bitsImm##input_imm); \
3887 }
3888
3889 #define DEFINE_TEST_NEON_2OPIMM_NARROW(mnemonic, input, input_imm) \
3890 TEST(mnemonic##_8B_2OPIMM) { \
3891 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \
3892 8B, \
3893 8H, \
3894 kInput16bits##input, \
3895 kInput8bitsImm##input_imm); \
3896 } \
3897 TEST(mnemonic##_4H_2OPIMM) { \
3898 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \
3899 4H, \
3900 4S, \
3901 kInput32bits##input, \
3902 kInput16bitsImm##input_imm); \
3903 } \
3904 TEST(mnemonic##_2S_2OPIMM) { \
3905 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \
3906 2S, \
3907 2D, \
3908 kInput64bits##input, \
3909 kInput32bitsImm##input_imm); \
3910 } \
3911 TEST(mnemonic##2_16B_2OPIMM) { \
3912 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic##2, \
3913 16B, \
3914 8H, \
3915 kInput16bits##input, \
3916 kInput8bitsImm##input_imm); \
3917 } \
3918 TEST(mnemonic##2_8H_2OPIMM) { \
3919 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic##2, \
3920 8H, \
3921 4S, \
3922 kInput32bits##input, \
3923 kInput16bitsImm##input_imm); \
3924 } \
3925 TEST(mnemonic##2_4S_2OPIMM) { \
3926 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic##2, \
3927 4S, \
3928 2D, \
3929 kInput64bits##input, \
3930 kInput32bitsImm##input_imm); \
3931 }
3932
3933 #define DEFINE_TEST_NEON_2OPIMM_SCALAR_NARROW(mnemonic, input, input_imm) \
3934 TEST(mnemonic##_B_2OPIMM) { \
3935 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \
3936 B, \
3937 H, \
3938 kInput16bits##input, \
3939 kInput8bitsImm##input_imm); \
3940 } \
3941 TEST(mnemonic##_H_2OPIMM) { \
3942 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \
3943 H, \
3944 S, \
3945 kInput32bits##input, \
3946 kInput16bitsImm##input_imm); \
3947 } \
3948 TEST(mnemonic##_S_2OPIMM) { \
3949 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \
3950 S, \
3951 D, \
3952 kInput64bits##input, \
3953 kInput32bitsImm##input_imm); \
3954 }
3955
3956 #define DEFINE_TEST_NEON_2OPIMM_FCMP_ZERO(mnemonic, input, input_imm) \
3957 TEST(mnemonic##_4H_2OPIMM) { \
3958 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \
3959 4H, \
3960 4H, \
3961 kInputFloat16##input, \
3962 kInputDoubleImm##input_imm); \
3963 } \
3964 TEST(mnemonic##_8H_2OPIMM) { \
3965 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \
3966 8H, \
3967 8H, \
3968 kInputFloat16##input, \
3969 kInputDoubleImm##input_imm); \
3970 } \
3971 TEST(mnemonic##_2S_2OPIMM) { \
3972 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \
3973 2S, \
3974 2S, \
3975 kInputFloat##Basic, \
3976 kInputDoubleImm##input_imm); \
3977 } \
3978 TEST(mnemonic##_4S_2OPIMM) { \
3979 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \
3980 4S, \
3981 4S, \
3982 kInputFloat##input, \
3983 kInputDoubleImm##input_imm); \
3984 } \
3985 TEST(mnemonic##_2D_2OPIMM) { \
3986 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \
3987 2D, \
3988 2D, \
3989 kInputDouble##input, \
3990 kInputDoubleImm##input_imm); \
3991 }
3992
3993 #define DEFINE_TEST_NEON_2OPIMM_FP(mnemonic, input, input_imm) \
3994 TEST(mnemonic##_4H_2OPIMM) { \
3995 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \
3996 4H, \
3997 4H, \
3998 kInputFloat16##input, \
3999 kInput16bitsImm##input_imm); \
4000 } \
4001 TEST(mnemonic##_8H_2OPIMM) { \
4002 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \
4003 8H, \
4004 8H, \
4005 kInputFloat16##input, \
4006 kInput16bitsImm##input_imm); \
4007 } \
4008 TEST(mnemonic##_2S_2OPIMM) { \
4009 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \
4010 2S, \
4011 2S, \
4012 kInputFloat##Basic, \
4013 kInput32bitsImm##input_imm); \
4014 } \
4015 TEST(mnemonic##_4S_2OPIMM) { \
4016 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \
4017 4S, \
4018 4S, \
4019 kInputFloat##input, \
4020 kInput32bitsImm##input_imm); \
4021 } \
4022 TEST(mnemonic##_2D_2OPIMM) { \
4023 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \
4024 2D, \
4025 2D, \
4026 kInputDouble##input, \
4027 kInput64bitsImm##input_imm); \
4028 }
4029
4030 #define DEFINE_TEST_NEON_2OPIMM_FP_SCALAR(mnemonic, input, input_imm) \
4031 TEST(mnemonic##_H_2OPIMM) { \
4032 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \
4033 H, \
4034 H, \
4035 kInputFloat16##Basic, \
4036 kInput16bitsImm##input_imm); \
4037 } \
4038 TEST(mnemonic##_S_2OPIMM) { \
4039 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \
4040 S, \
4041 S, \
4042 kInputFloat##Basic, \
4043 kInput32bitsImm##input_imm); \
4044 } \
4045 TEST(mnemonic##_D_2OPIMM) { \
4046 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \
4047 D, \
4048 D, \
4049 kInputDouble##input, \
4050 kInput64bitsImm##input_imm); \
4051 }
4052
4053 #define DEFINE_TEST_NEON_2OPIMM_HSD(mnemonic, input, input_imm) \
4054 TEST(mnemonic##_4H_2OPIMM) { \
4055 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \
4056 4H, \
4057 4H, \
4058 kInput16bits##input, \
4059 kInput16bitsImm##input_imm); \
4060 } \
4061 TEST(mnemonic##_8H_2OPIMM) { \
4062 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \
4063 8H, \
4064 8H, \
4065 kInput16bits##input, \
4066 kInput16bitsImm##input_imm); \
4067 } \
4068 TEST(mnemonic##_2S_2OPIMM) { \
4069 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \
4070 2S, \
4071 2S, \
4072 kInput32bits##input, \
4073 kInput32bitsImm##input_imm); \
4074 } \
4075 TEST(mnemonic##_4S_2OPIMM) { \
4076 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \
4077 4S, \
4078 4S, \
4079 kInput32bits##input, \
4080 kInput32bitsImm##input_imm); \
4081 } \
4082 TEST(mnemonic##_2D_2OPIMM) { \
4083 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \
4084 2D, \
4085 2D, \
4086 kInput64bits##input, \
4087 kInput64bitsImm##input_imm); \
4088 }
4089
4090 #define DEFINE_TEST_NEON_2OPIMM_SCALAR_D(mnemonic, input, input_imm) \
4091 TEST(mnemonic##_D_2OPIMM) { \
4092 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \
4093 D, \
4094 D, \
4095 kInput64bits##input, \
4096 kInput64bitsImm##input_imm); \
4097 }
4098
4099 #define DEFINE_TEST_NEON_2OPIMM_SCALAR_HSD(mnemonic, input, input_imm) \
4100 TEST(mnemonic##_H_2OPIMM) { \
4101 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \
4102 H, \
4103 H, \
4104 kInput16bits##input, \
4105 kInput16bitsImm##input_imm); \
4106 } \
4107 TEST(mnemonic##_S_2OPIMM) { \
4108 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \
4109 S, \
4110 S, \
4111 kInput32bits##input, \
4112 kInput32bitsImm##input_imm); \
4113 } \
4114 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(mnemonic, input, input_imm)
4115
4116 #define DEFINE_TEST_NEON_2OPIMM_FP_SCALAR_D(mnemonic, input, input_imm) \
4117 TEST(mnemonic##_D_2OPIMM) { \
4118 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \
4119 D, \
4120 D, \
4121 kInputDouble##input, \
4122 kInputDoubleImm##input_imm); \
4123 }
4124
4125 #define DEFINE_TEST_NEON_2OPIMM_FP_SCALAR_HSD(mnemonic, input, input_imm) \
4126 TEST(mnemonic##_H_2OPIMM) { \
4127 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \
4128 H, \
4129 H, \
4130 kInputFloat16##input, \
4131 kInputDoubleImm##input_imm); \
4132 } \
4133 TEST(mnemonic##_S_2OPIMM) { \
4134 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \
4135 S, \
4136 S, \
4137 kInputFloat##input, \
4138 kInputDoubleImm##input_imm); \
4139 } \
4140 DEFINE_TEST_NEON_2OPIMM_FP_SCALAR_D(mnemonic, input, input_imm)
4141
4142 #define DEFINE_TEST_NEON_2OPIMM_SCALAR(mnemonic, input, input_imm) \
4143 TEST(mnemonic##_B_2OPIMM) { \
4144 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \
4145 B, \
4146 B, \
4147 kInput8bits##input, \
4148 kInput8bitsImm##input_imm); \
4149 } \
4150 DEFINE_TEST_NEON_2OPIMM_SCALAR_HSD(mnemonic, input, input_imm)
4151
4152 #define DEFINE_TEST_NEON_2OPIMM_LONG(mnemonic, input, input_imm) \
4153 TEST(mnemonic##_8H_2OPIMM) { \
4154 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \
4155 8H, \
4156 8B, \
4157 kInput8bits##input, \
4158 kInput8bitsImm##input_imm); \
4159 } \
4160 TEST(mnemonic##_4S_2OPIMM) { \
4161 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \
4162 4S, \
4163 4H, \
4164 kInput16bits##input, \
4165 kInput16bitsImm##input_imm); \
4166 } \
4167 TEST(mnemonic##_2D_2OPIMM) { \
4168 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \
4169 2D, \
4170 2S, \
4171 kInput32bits##input, \
4172 kInput32bitsImm##input_imm); \
4173 } \
4174 TEST(mnemonic##2_8H_2OPIMM) { \
4175 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic##2, \
4176 8H, \
4177 16B, \
4178 kInput8bits##input, \
4179 kInput8bitsImm##input_imm); \
4180 } \
4181 TEST(mnemonic##2_4S_2OPIMM) { \
4182 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic##2, \
4183 4S, \
4184 8H, \
4185 kInput16bits##input, \
4186 kInput16bitsImm##input_imm); \
4187 } \
4188 TEST(mnemonic##2_2D_2OPIMM) { \
4189 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic##2, \
4190 2D, \
4191 4S, \
4192 kInput32bits##input, \
4193 kInput32bitsImm##input_imm); \
4194 }
4195
4196 #define CALL_TEST_NEON_HELPER_BYELEMENT_DOT_PRODUCT(mnemonic, \
4197 vdform, \
4198 vnform, \
4199 vmform, \
4200 input_d, \
4201 input_n, \
4202 input_m, \
4203 indices, \
4204 vm_subvector_count) \
4205 { \
4206 CALL_TEST_NEON_HELPER_ByElement_Dot_Product(mnemonic, \
4207 vdform, \
4208 vnform, \
4209 vmform, \
4210 input_d, \
4211 input_n, \
4212 input_m, \
4213 indices, \
4214 vm_subvector_count); \
4215 }
4216
4217 #define DEFINE_TEST_NEON_BYELEMENT_DOT_PRODUCT( \
4218 mnemonic, input_d, input_n, input_m) \
4219 TEST(mnemonic##_2S_8B_B) { \
4220 CALL_TEST_NEON_HELPER_BYELEMENT_DOT_PRODUCT(mnemonic, \
4221 2S, \
4222 8B, \
4223 B, \
4224 kInput32bits##input_d, \
4225 kInput8bits##input_n, \
4226 kInput8bits##input_m, \
4227 kInputSIndices, \
4228 4); \
4229 } \
4230 TEST(mnemonic##_4S_16B_B) { \
4231 CALL_TEST_NEON_HELPER_BYELEMENT_DOT_PRODUCT(mnemonic, \
4232 4S, \
4233 16B, \
4234 B, \
4235 kInput32bits##input_d, \
4236 kInput8bits##input_n, \
4237 kInput8bits##input_m, \
4238 kInputSIndices, \
4239 4); \
4240 }
4241
4242 #define CALL_TEST_NEON_HELPER_BYELEMENT( \
4243 mnemonic, vdform, vnform, vmform, input_d, input_n, input_m, indices) \
4244 { \
4245 CALL_TEST_NEON_HELPER_ByElement(mnemonic, \
4246 vdform, \
4247 vnform, \
4248 vmform, \
4249 input_d, \
4250 input_n, \
4251 input_m, \
4252 indices); \
4253 }
4254
4255 #define DEFINE_TEST_NEON_BYELEMENT(mnemonic, input_d, input_n, input_m) \
4256 TEST(mnemonic##_4H_4H_H) { \
4257 CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic, \
4258 4H, \
4259 4H, \
4260 H, \
4261 kInput16bits##input_d, \
4262 kInput16bits##input_n, \
4263 kInput16bits##input_m, \
4264 kInputHIndices); \
4265 } \
4266 TEST(mnemonic##_8H_8H_H) { \
4267 CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic, \
4268 8H, \
4269 8H, \
4270 H, \
4271 kInput16bits##input_d, \
4272 kInput16bits##input_n, \
4273 kInput16bits##input_m, \
4274 kInputHIndices); \
4275 } \
4276 TEST(mnemonic##_2S_2S_S) { \
4277 CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic, \
4278 2S, \
4279 2S, \
4280 S, \
4281 kInput32bits##input_d, \
4282 kInput32bits##input_n, \
4283 kInput32bits##input_m, \
4284 kInputSIndices); \
4285 } \
4286 TEST(mnemonic##_4S_4S_S) { \
4287 CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic, \
4288 4S, \
4289 4S, \
4290 S, \
4291 kInput32bits##input_d, \
4292 kInput32bits##input_n, \
4293 kInput32bits##input_m, \
4294 kInputSIndices); \
4295 }
4296
4297 #define DEFINE_TEST_NEON_BYELEMENT_SCALAR(mnemonic, input_d, input_n, input_m) \
4298 TEST(mnemonic##_H_H_H) { \
4299 CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic, \
4300 H, \
4301 H, \
4302 H, \
4303 kInput16bits##input_d, \
4304 kInput16bits##input_n, \
4305 kInput16bits##input_m, \
4306 kInputHIndices); \
4307 } \
4308 TEST(mnemonic##_S_S_S) { \
4309 CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic, \
4310 S, \
4311 S, \
4312 S, \
4313 kInput32bits##input_d, \
4314 kInput32bits##input_n, \
4315 kInput32bits##input_m, \
4316 kInputSIndices); \
4317 }
4318
4319 #define DEFINE_TEST_NEON_FP_BYELEMENT(mnemonic, input_d, input_n, input_m) \
4320 TEST(mnemonic##_4H_4H_H) { \
4321 CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic, \
4322 4H, \
4323 4H, \
4324 H, \
4325 kInputFloat16##input_d, \
4326 kInputFloat16##input_n, \
4327 kInputFloat16##input_m, \
4328 kInputHIndices); \
4329 } \
4330 TEST(mnemonic##_8H_8H_H) { \
4331 CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic, \
4332 8H, \
4333 8H, \
4334 H, \
4335 kInputFloat16##input_d, \
4336 kInputFloat16##input_n, \
4337 kInputFloat16##input_m, \
4338 kInputHIndices); \
4339 } \
4340 TEST(mnemonic##_2S_2S_S) { \
4341 CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic, \
4342 2S, \
4343 2S, \
4344 S, \
4345 kInputFloat##input_d, \
4346 kInputFloat##input_n, \
4347 kInputFloat##input_m, \
4348 kInputSIndices); \
4349 } \
4350 TEST(mnemonic##_4S_4S_S) { \
4351 CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic, \
4352 4S, \
4353 4S, \
4354 S, \
4355 kInputFloat##input_d, \
4356 kInputFloat##input_n, \
4357 kInputFloat##input_m, \
4358 kInputSIndices); \
4359 } \
4360 TEST(mnemonic##_2D_2D_D) { \
4361 CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic, \
4362 2D, \
4363 2D, \
4364 D, \
4365 kInputDouble##input_d, \
4366 kInputDouble##input_n, \
4367 kInputDouble##input_m, \
4368 kInputDIndices); \
4369 }
4370
4371 #define DEFINE_TEST_NEON_FHM_BYELEMENT(mnemonic, input_d, input_n, input_m) \
4372 TEST(mnemonic##_2S_2H_H) { \
4373 CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic, \
4374 2S, \
4375 2H, \
4376 H, \
4377 kInputFloatAccDestination, \
4378 kInputFloat16##input_n, \
4379 kInputFloat16##input_m, \
4380 kInputHIndices); \
4381 } \
4382 TEST(mnemonic##_4S_4H_H) { \
4383 CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic, \
4384 4S, \
4385 4H, \
4386 H, \
4387 kInputFloatAccDestination, \
4388 kInputFloat16##input_n, \
4389 kInputFloat16##input_m, \
4390 kInputHIndices); \
4391 }
4392
4393 #define DEFINE_TEST_NEON_FP_BYELEMENT_SCALAR(mnemonic, inp_d, inp_n, inp_m) \
4394 TEST(mnemonic##_H_H_H) { \
4395 CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic, \
4396 H, \
4397 H, \
4398 H, \
4399 kInputFloat16##inp_d, \
4400 kInputFloat16##inp_n, \
4401 kInputFloat16##inp_m, \
4402 kInputHIndices); \
4403 } \
4404 TEST(mnemonic##_S_S_S) { \
4405 CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic, \
4406 S, \
4407 S, \
4408 S, \
4409 kInputFloat##inp_d, \
4410 kInputFloat##inp_n, \
4411 kInputFloat##inp_m, \
4412 kInputSIndices); \
4413 } \
4414 TEST(mnemonic##_D_D_D) { \
4415 CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic, \
4416 D, \
4417 D, \
4418 D, \
4419 kInputDouble##inp_d, \
4420 kInputDouble##inp_n, \
4421 kInputDouble##inp_m, \
4422 kInputDIndices); \
4423 }
4424
4425
4426 #define DEFINE_TEST_NEON_BYELEMENT_DIFF(mnemonic, input_d, input_n, input_m) \
4427 TEST(mnemonic##_4S_4H_H) { \
4428 CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic, \
4429 4S, \
4430 4H, \
4431 H, \
4432 kInput32bits##input_d, \
4433 kInput16bits##input_n, \
4434 kInput16bits##input_m, \
4435 kInputHIndices); \
4436 } \
4437 TEST(mnemonic##2_4S_8H_H) { \
4438 CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic##2, \
4439 4S, \
4440 8H, \
4441 H, \
4442 kInput32bits##input_d, \
4443 kInput16bits##input_n, \
4444 kInput16bits##input_m, \
4445 kInputHIndices); \
4446 } \
4447 TEST(mnemonic##_2D_2S_S) { \
4448 CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic, \
4449 2D, \
4450 2S, \
4451 S, \
4452 kInput64bits##input_d, \
4453 kInput32bits##input_n, \
4454 kInput32bits##input_m, \
4455 kInputSIndices); \
4456 } \
4457 TEST(mnemonic##2_2D_4S_S) { \
4458 CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic##2, \
4459 2D, \
4460 4S, \
4461 S, \
4462 kInput64bits##input_d, \
4463 kInput32bits##input_n, \
4464 kInput32bits##input_m, \
4465 kInputSIndices); \
4466 }
4467
4468 #define DEFINE_TEST_NEON_BYELEMENT_DIFF_SCALAR( \
4469 mnemonic, input_d, input_n, input_m) \
4470 TEST(mnemonic##_S_H_H) { \
4471 CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic, \
4472 S, \
4473 H, \
4474 H, \
4475 kInput32bits##input_d, \
4476 kInput16bits##input_n, \
4477 kInput16bits##input_m, \
4478 kInputHIndices); \
4479 } \
4480 TEST(mnemonic##_D_S_S) { \
4481 CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic, \
4482 D, \
4483 S, \
4484 S, \
4485 kInput64bits##input_d, \
4486 kInput32bits##input_n, \
4487 kInput32bits##input_m, \
4488 kInputSIndices); \
4489 }
4490
4491
4492 #define CALL_TEST_NEON_HELPER_2OP2IMM( \
4493 mnemonic, variant, input_d, input_imm1, input_n, input_imm2) \
4494 { \
4495 CALL_TEST_NEON_HELPER_OpImmOpImm(&MacroAssembler::mnemonic, \
4496 mnemonic, \
4497 variant, \
4498 variant, \
4499 input_d, \
4500 input_imm1, \
4501 input_n, \
4502 input_imm2); \
4503 }
4504
4505 #define DEFINE_TEST_NEON_2OP2IMM( \
4506 mnemonic, input_d, input_imm1, input_n, input_imm2) \
4507 TEST(mnemonic##_B) { \
4508 CALL_TEST_NEON_HELPER_2OP2IMM(mnemonic, \
4509 16B, \
4510 kInput8bits##input_d, \
4511 kInput8bitsImm##input_imm1, \
4512 kInput8bits##input_n, \
4513 kInput8bitsImm##input_imm2); \
4514 } \
4515 TEST(mnemonic##_H) { \
4516 CALL_TEST_NEON_HELPER_2OP2IMM(mnemonic, \
4517 8H, \
4518 kInput16bits##input_d, \
4519 kInput16bitsImm##input_imm1, \
4520 kInput16bits##input_n, \
4521 kInput16bitsImm##input_imm2); \
4522 } \
4523 TEST(mnemonic##_S) { \
4524 CALL_TEST_NEON_HELPER_2OP2IMM(mnemonic, \
4525 4S, \
4526 kInput32bits##input_d, \
4527 kInput32bitsImm##input_imm1, \
4528 kInput32bits##input_n, \
4529 kInput32bitsImm##input_imm2); \
4530 } \
4531 TEST(mnemonic##_D) { \
4532 CALL_TEST_NEON_HELPER_2OP2IMM(mnemonic, \
4533 2D, \
4534 kInput64bits##input_d, \
4535 kInput64bitsImm##input_imm1, \
4536 kInput64bits##input_n, \
4537 kInput64bitsImm##input_imm2); \
4538 }
4539
4540
4541 // Advanced SIMD copy.
4542 DEFINE_TEST_NEON_2OP2IMM(
4543 ins, Basic, LaneCountFromZero, Basic, LaneCountFromZero)
4544 DEFINE_TEST_NEON_2OPIMM_COPY(dup, Basic, LaneCountFromZero)
4545
4546
4547 // Advanced SIMD scalar copy.
4548 DEFINE_TEST_NEON_2OPIMM_SCALAR(dup, Basic, LaneCountFromZero)
4549
4550
4551 // Advanced SIMD three same.
4552 DEFINE_TEST_NEON_3SAME_NO2D(shadd, Basic)
4553 DEFINE_TEST_NEON_3SAME(sqadd, Basic)
4554 DEFINE_TEST_NEON_3SAME_NO2D(srhadd, Basic)
4555 DEFINE_TEST_NEON_3SAME_NO2D(shsub, Basic)
4556 DEFINE_TEST_NEON_3SAME(sqsub, Basic)
4557 DEFINE_TEST_NEON_3SAME(cmgt, Basic)
4558 DEFINE_TEST_NEON_3SAME(cmge, Basic)
4559 DEFINE_TEST_NEON_3SAME(sshl, Basic)
4560 DEFINE_TEST_NEON_3SAME(sqshl, Basic)
4561 DEFINE_TEST_NEON_3SAME(srshl, Basic)
4562 DEFINE_TEST_NEON_3SAME(sqrshl, Basic)
4563 DEFINE_TEST_NEON_3SAME_NO2D(smax, Basic)
4564 DEFINE_TEST_NEON_3SAME_NO2D(smin, Basic)
4565 DEFINE_TEST_NEON_3SAME_NO2D(sabd, Basic)
4566 DEFINE_TEST_NEON_3SAME_NO2D(saba, Basic)
4567 DEFINE_TEST_NEON_3SAME(add, Basic)
4568 DEFINE_TEST_NEON_3SAME(cmtst, Basic)
4569 DEFINE_TEST_NEON_3SAME_NO2D(mla, Basic)
4570 DEFINE_TEST_NEON_3SAME_NO2D(mul, Basic)
4571 DEFINE_TEST_NEON_3SAME_NO2D(smaxp, Basic)
4572 DEFINE_TEST_NEON_3SAME_NO2D(sminp, Basic)
4573 DEFINE_TEST_NEON_3SAME_HS(sqdmulh, Basic)
4574 DEFINE_TEST_NEON_3SAME(addp, Basic)
4575 DEFINE_TEST_NEON_3SAME_FP(fmaxnm, Basic)
4576 DEFINE_TEST_NEON_3SAME_FP(fmla, Basic)
4577 DEFINE_TEST_NEON_3SAME_FP(fadd, Basic)
4578 DEFINE_TEST_NEON_3SAME_FP(fmulx, Basic)
4579 DEFINE_TEST_NEON_3SAME_FP(fcmeq, Basic)
4580 DEFINE_TEST_NEON_3SAME_FP(fmax, Basic)
4581 DEFINE_TEST_NEON_3SAME_FP(frecps, Basic)
4582 DEFINE_TEST_NEON_3SAME_8B_16B(and_, Basic)
4583 DEFINE_TEST_NEON_3SAME_8B_16B(bic, Basic)
4584 DEFINE_TEST_NEON_3SAME_FP(fminnm, Basic)
4585 DEFINE_TEST_NEON_3SAME_FP(fmls, Basic)
4586 DEFINE_TEST_NEON_3SAME_FP(fsub, Basic)
4587 DEFINE_TEST_NEON_3SAME_FP(fmin, Basic)
4588 DEFINE_TEST_NEON_3SAME_FP(frsqrts, Basic)
4589 DEFINE_TEST_NEON_3SAME_8B_16B(orr, Basic)
4590 DEFINE_TEST_NEON_3SAME_8B_16B(orn, Basic)
4591 DEFINE_TEST_NEON_3SAME_NO2D(uhadd, Basic)
4592 DEFINE_TEST_NEON_3SAME(uqadd, Basic)
4593 DEFINE_TEST_NEON_3SAME_NO2D(urhadd, Basic)
4594 DEFINE_TEST_NEON_3SAME_NO2D(uhsub, Basic)
4595 DEFINE_TEST_NEON_3SAME(uqsub, Basic)
4596 DEFINE_TEST_NEON_3SAME(cmhi, Basic)
4597 DEFINE_TEST_NEON_3SAME(cmhs, Basic)
4598 DEFINE_TEST_NEON_3SAME(ushl, Basic)
4599 DEFINE_TEST_NEON_3SAME(uqshl, Basic)
4600 DEFINE_TEST_NEON_3SAME(urshl, Basic)
4601 DEFINE_TEST_NEON_3SAME(uqrshl, Basic)
4602 DEFINE_TEST_NEON_3SAME_NO2D(umax, Basic)
4603 DEFINE_TEST_NEON_3SAME_NO2D(umin, Basic)
4604 DEFINE_TEST_NEON_3SAME_NO2D(uabd, Basic)
4605 DEFINE_TEST_NEON_3SAME_NO2D(uaba, Basic)
4606 DEFINE_TEST_NEON_3SAME(sub, Basic)
4607 DEFINE_TEST_NEON_3SAME(cmeq, Basic)
4608 DEFINE_TEST_NEON_3SAME_NO2D(mls, Basic)
4609 DEFINE_TEST_NEON_3SAME_8B_16B(pmul, Basic)
4610 DEFINE_TEST_NEON_3SAME_NO2D(uminp, Basic)
4611 DEFINE_TEST_NEON_3SAME_NO2D(umaxp, Basic)
4612 DEFINE_TEST_NEON_3SAME_HS(sqrdmulh, Basic)
4613 DEFINE_TEST_NEON_3SAME_HS(sqrdmlah, Basic)
4614 DEFINE_TEST_NEON_3SAME_HS(sqrdmlsh, Basic)
4615 DEFINE_TEST_NEON_3DIFF_DOUBLE_WIDE(udot, Basic)
4616 DEFINE_TEST_NEON_3DIFF_DOUBLE_WIDE(sdot, Basic)
4617 DEFINE_TEST_NEON_3SAME_FP(fmaxnmp, Basic)
4618 DEFINE_TEST_NEON_3SAME_FP(faddp, Basic)
4619 DEFINE_TEST_NEON_3SAME_FP(fmul, Basic)
4620 DEFINE_TEST_NEON_3SAME_FP(fcmge, Basic)
4621 DEFINE_TEST_NEON_3SAME_FP(facge, Basic)
4622 DEFINE_TEST_NEON_3SAME_FP(fmaxp, Basic)
4623 DEFINE_TEST_NEON_3SAME_FP(fdiv, Basic)
4624 DEFINE_TEST_NEON_3SAME_8B_16B(eor, Basic)
4625 DEFINE_TEST_NEON_3SAME_8B_16B(bsl, Basic)
4626 DEFINE_TEST_NEON_3SAME_FP(fminnmp, Basic)
4627 DEFINE_TEST_NEON_3SAME_FP(fabd, Basic)
4628 DEFINE_TEST_NEON_3SAME_FP(fcmgt, Basic)
4629 DEFINE_TEST_NEON_3SAME_FP(facgt, Basic)
4630 DEFINE_TEST_NEON_3SAME_FP(fminp, Basic)
4631 DEFINE_TEST_NEON_3SAME_8B_16B(bit, Basic)
4632 DEFINE_TEST_NEON_3SAME_8B_16B(bif, Basic)
4633
4634
4635 // Advanced SIMD scalar three same.
4636 DEFINE_TEST_NEON_3SAME_SCALAR(sqadd, Basic)
4637 DEFINE_TEST_NEON_3SAME_SCALAR(sqsub, Basic)
4638 DEFINE_TEST_NEON_3SAME_SCALAR_D(cmgt, Basic)
4639 DEFINE_TEST_NEON_3SAME_SCALAR_D(cmge, Basic)
4640 DEFINE_TEST_NEON_3SAME_SCALAR_D(sshl, Basic)
4641 DEFINE_TEST_NEON_3SAME_SCALAR(sqshl, Basic)
4642 DEFINE_TEST_NEON_3SAME_SCALAR_D(srshl, Basic)
4643 DEFINE_TEST_NEON_3SAME_SCALAR(sqrshl, Basic)
4644 DEFINE_TEST_NEON_3SAME_SCALAR_D(add, Basic)
4645 DEFINE_TEST_NEON_3SAME_SCALAR_D(cmtst, Basic)
4646 DEFINE_TEST_NEON_3SAME_SCALAR_HS(sqdmulh, Basic)
4647 DEFINE_TEST_NEON_3SAME_FP_SCALAR(fmulx, Basic)
4648 DEFINE_TEST_NEON_3SAME_FP_SCALAR(fcmeq, Basic)
4649 DEFINE_TEST_NEON_3SAME_FP_SCALAR(frecps, Basic)
4650 DEFINE_TEST_NEON_3SAME_FP_SCALAR(frsqrts, Basic)
4651 DEFINE_TEST_NEON_3SAME_SCALAR_D(uqadd, Basic)
4652 DEFINE_TEST_NEON_3SAME_SCALAR_D(uqsub, Basic)
4653 DEFINE_TEST_NEON_3SAME_SCALAR_D(cmhi, Basic)
4654 DEFINE_TEST_NEON_3SAME_SCALAR_D(cmhs, Basic)
4655 DEFINE_TEST_NEON_3SAME_SCALAR_D(ushl, Basic)
4656 DEFINE_TEST_NEON_3SAME_SCALAR(uqshl, Basic)
4657 DEFINE_TEST_NEON_3SAME_SCALAR_D(urshl, Basic)
4658 DEFINE_TEST_NEON_3SAME_SCALAR(uqrshl, Basic)
4659 DEFINE_TEST_NEON_3SAME_SCALAR_D(sub, Basic)
4660 DEFINE_TEST_NEON_3SAME_SCALAR_D(cmeq, Basic)
4661 DEFINE_TEST_NEON_3SAME_SCALAR_HS(sqrdmulh, Basic)
4662 DEFINE_TEST_NEON_3SAME_SCALAR_HS(sqrdmlah, Basic)
4663 DEFINE_TEST_NEON_3SAME_SCALAR_HS(sqrdmlsh, Basic)
4664 DEFINE_TEST_NEON_3SAME_FP_SCALAR(fcmge, Basic)
4665 DEFINE_TEST_NEON_3SAME_FP_SCALAR(facge, Basic)
4666 DEFINE_TEST_NEON_3SAME_FP_SCALAR(fabd, Basic)
4667 DEFINE_TEST_NEON_3SAME_FP_SCALAR(fcmgt, Basic)
4668 DEFINE_TEST_NEON_3SAME_FP_SCALAR(facgt, Basic)
4669
4670
4671 // Advanced SIMD FHM instructions (FMLAL, FMLSL).
4672 // These are oddballs: they are encoded under the 3SAME group but behave
4673 // quite differently.
4674 DEFINE_TEST_NEON_FHM(fmlal, Basic, Basic, Basic)
4675 DEFINE_TEST_NEON_FHM(fmlal2, Basic, Basic, Basic)
4676 DEFINE_TEST_NEON_FHM(fmlsl, Basic, Basic, Basic)
4677 DEFINE_TEST_NEON_FHM(fmlsl2, Basic, Basic, Basic)
4678
4679
4680 // Advanced SIMD three different.
4681 DEFINE_TEST_NEON_3DIFF_LONG(saddl, Basic)
4682 DEFINE_TEST_NEON_3DIFF_WIDE(saddw, Basic)
4683 DEFINE_TEST_NEON_3DIFF_LONG(ssubl, Basic)
4684 DEFINE_TEST_NEON_3DIFF_WIDE(ssubw, Basic)
4685 DEFINE_TEST_NEON_3DIFF_NARROW(addhn, Basic)
4686 DEFINE_TEST_NEON_3DIFF_LONG(sabal, Basic)
4687 DEFINE_TEST_NEON_3DIFF_NARROW(subhn, Basic)
4688 DEFINE_TEST_NEON_3DIFF_LONG(sabdl, Basic)
4689 DEFINE_TEST_NEON_3DIFF_LONG(smlal, Basic)
4690 DEFINE_TEST_NEON_3DIFF_LONG_SD(sqdmlal, Basic)
4691 DEFINE_TEST_NEON_3DIFF_LONG(smlsl, Basic)
4692 DEFINE_TEST_NEON_3DIFF_LONG_SD(sqdmlsl, Basic)
4693 DEFINE_TEST_NEON_3DIFF_LONG(smull, Basic)
4694 DEFINE_TEST_NEON_3DIFF_LONG_SD(sqdmull, Basic)
4695 DEFINE_TEST_NEON_3DIFF_LONG_8H(pmull, Basic)
4696 DEFINE_TEST_NEON_3DIFF_LONG(uaddl, Basic)
4697 DEFINE_TEST_NEON_3DIFF_WIDE(uaddw, Basic)
4698 DEFINE_TEST_NEON_3DIFF_LONG(usubl, Basic)
4699 DEFINE_TEST_NEON_3DIFF_WIDE(usubw, Basic)
4700 DEFINE_TEST_NEON_3DIFF_NARROW(raddhn, Basic)
4701 DEFINE_TEST_NEON_3DIFF_LONG(uabal, Basic)
4702 DEFINE_TEST_NEON_3DIFF_NARROW(rsubhn, Basic)
4703 DEFINE_TEST_NEON_3DIFF_LONG(uabdl, Basic)
4704 DEFINE_TEST_NEON_3DIFF_LONG(umlal, Basic)
4705 DEFINE_TEST_NEON_3DIFF_LONG(umlsl, Basic)
4706 DEFINE_TEST_NEON_3DIFF_LONG(umull, Basic)
4707
4708
4709 // Advanced SIMD scalar three different.
4710 DEFINE_TEST_NEON_3DIFF_SCALAR_LONG_SD(sqdmlal, Basic)
4711 DEFINE_TEST_NEON_3DIFF_SCALAR_LONG_SD(sqdmlsl, Basic)
4712 DEFINE_TEST_NEON_3DIFF_SCALAR_LONG_SD(sqdmull, Basic)
4713
4714
4715 // Advanced SIMD scalar pairwise.
4716 TEST(addp_SCALAR) {
4717 CALL_TEST_NEON_HELPER_2DIFF(addp, D, 2D, kInput64bitsBasic);
4718 }
DEFINE_TEST_NEON_2DIFF_FP_SCALAR_SD(fmaxnmp,Basic)4719 DEFINE_TEST_NEON_2DIFF_FP_SCALAR_SD(fmaxnmp, Basic)
4720 DEFINE_TEST_NEON_2DIFF_FP_SCALAR_SD(faddp, Basic)
4721 DEFINE_TEST_NEON_2DIFF_FP_SCALAR_SD(fmaxp, Basic)
4722 DEFINE_TEST_NEON_2DIFF_FP_SCALAR_SD(fminnmp, Basic)
4723 DEFINE_TEST_NEON_2DIFF_FP_SCALAR_SD(fminp, Basic)
4724
4725
4726 // Advanced SIMD shift by immediate.
4727 DEFINE_TEST_NEON_2OPIMM(sshr, Basic, TypeWidth)
4728 DEFINE_TEST_NEON_2OPIMM(ssra, Basic, TypeWidth)
4729 DEFINE_TEST_NEON_2OPIMM(srshr, Basic, TypeWidth)
4730 DEFINE_TEST_NEON_2OPIMM(srsra, Basic, TypeWidth)
4731 DEFINE_TEST_NEON_2OPIMM(shl, Basic, TypeWidthFromZero)
4732 DEFINE_TEST_NEON_2OPIMM(sqshl, Basic, TypeWidthFromZero)
4733 DEFINE_TEST_NEON_2OPIMM_NARROW(shrn, Basic, TypeWidth)
4734 DEFINE_TEST_NEON_2OPIMM_NARROW(rshrn, Basic, TypeWidth)
4735 DEFINE_TEST_NEON_2OPIMM_NARROW(sqshrn, Basic, TypeWidth)
4736 DEFINE_TEST_NEON_2OPIMM_NARROW(sqrshrn, Basic, TypeWidth)
4737 DEFINE_TEST_NEON_2OPIMM_LONG(sshll, Basic, TypeWidthFromZero)
4738 DEFINE_TEST_NEON_2OPIMM_HSD(scvtf,
4739 FixedPointConversions,
4740 TypeWidthFromZeroToWidth)
4741 DEFINE_TEST_NEON_2OPIMM_FP(fcvtzs, Conversions, TypeWidthFromZeroToWidth)
4742 DEFINE_TEST_NEON_2OPIMM(ushr, Basic, TypeWidth)
4743 DEFINE_TEST_NEON_2OPIMM(usra, Basic, TypeWidth)
4744 DEFINE_TEST_NEON_2OPIMM(urshr, Basic, TypeWidth)
4745 DEFINE_TEST_NEON_2OPIMM(ursra, Basic, TypeWidth)
4746 DEFINE_TEST_NEON_2OPIMM(sri, Basic, TypeWidth)
4747 DEFINE_TEST_NEON_2OPIMM(sli, Basic, TypeWidthFromZero)
4748 DEFINE_TEST_NEON_2OPIMM(sqshlu, Basic, TypeWidthFromZero)
4749 DEFINE_TEST_NEON_2OPIMM(uqshl, Basic, TypeWidthFromZero)
4750 DEFINE_TEST_NEON_2OPIMM_NARROW(sqshrun, Basic, TypeWidth)
4751 DEFINE_TEST_NEON_2OPIMM_NARROW(sqrshrun, Basic, TypeWidth)
4752 DEFINE_TEST_NEON_2OPIMM_NARROW(uqshrn, Basic, TypeWidth)
4753 DEFINE_TEST_NEON_2OPIMM_NARROW(uqrshrn, Basic, TypeWidth)
4754 DEFINE_TEST_NEON_2OPIMM_LONG(ushll, Basic, TypeWidthFromZero)
4755 DEFINE_TEST_NEON_2OPIMM_HSD(ucvtf,
4756 FixedPointConversions,
4757 TypeWidthFromZeroToWidth)
4758 DEFINE_TEST_NEON_2OPIMM_FP(fcvtzu, Conversions, TypeWidthFromZeroToWidth)
4759
4760
4761 // Advanced SIMD scalar shift by immediate..
4762 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(sshr, Basic, TypeWidth)
4763 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(ssra, Basic, TypeWidth)
4764 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(srshr, Basic, TypeWidth)
4765 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(srsra, Basic, TypeWidth)
4766 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(shl, Basic, TypeWidthFromZero)
4767 DEFINE_TEST_NEON_2OPIMM_SCALAR(sqshl, Basic, TypeWidthFromZero)
4768 DEFINE_TEST_NEON_2OPIMM_SCALAR_NARROW(sqshrn, Basic, TypeWidth)
4769 DEFINE_TEST_NEON_2OPIMM_SCALAR_NARROW(sqrshrn, Basic, TypeWidth)
4770 DEFINE_TEST_NEON_2OPIMM_SCALAR_HSD(scvtf,
4771 FixedPointConversions,
4772 TypeWidthFromZeroToWidth)
4773 DEFINE_TEST_NEON_2OPIMM_FP_SCALAR(fcvtzs, Conversions, TypeWidthFromZeroToWidth)
4774 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(ushr, Basic, TypeWidth)
4775 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(usra, Basic, TypeWidth)
4776 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(urshr, Basic, TypeWidth)
4777 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(ursra, Basic, TypeWidth)
4778 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(sri, Basic, TypeWidth)
4779 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(sli, Basic, TypeWidthFromZero)
4780 DEFINE_TEST_NEON_2OPIMM_SCALAR(sqshlu, Basic, TypeWidthFromZero)
4781 DEFINE_TEST_NEON_2OPIMM_SCALAR(uqshl, Basic, TypeWidthFromZero)
4782 DEFINE_TEST_NEON_2OPIMM_SCALAR_NARROW(sqshrun, Basic, TypeWidth)
4783 DEFINE_TEST_NEON_2OPIMM_SCALAR_NARROW(sqrshrun, Basic, TypeWidth)
4784 DEFINE_TEST_NEON_2OPIMM_SCALAR_NARROW(uqshrn, Basic, TypeWidth)
4785 DEFINE_TEST_NEON_2OPIMM_SCALAR_NARROW(uqrshrn, Basic, TypeWidth)
4786 DEFINE_TEST_NEON_2OPIMM_SCALAR_HSD(ucvtf,
4787 FixedPointConversions,
4788 TypeWidthFromZeroToWidth)
4789 DEFINE_TEST_NEON_2OPIMM_FP_SCALAR(fcvtzu, Conversions, TypeWidthFromZeroToWidth)
4790
4791
4792 // Advanced SIMD two-register miscellaneous.
4793 DEFINE_TEST_NEON_2SAME_NO2D(rev64, Basic)
4794 DEFINE_TEST_NEON_2SAME_8B_16B(rev16, Basic)
4795 DEFINE_TEST_NEON_2DIFF_LONG(saddlp, Basic)
4796 DEFINE_TEST_NEON_2SAME(suqadd, Basic)
4797 DEFINE_TEST_NEON_2SAME_NO2D(cls, Basic)
4798 DEFINE_TEST_NEON_2SAME_8B_16B(cnt, Basic)
4799 DEFINE_TEST_NEON_2DIFF_LONG(sadalp, Basic)
4800 DEFINE_TEST_NEON_2SAME(sqabs, Basic)
4801 DEFINE_TEST_NEON_2OPIMM(cmgt, Basic, Zero)
4802 DEFINE_TEST_NEON_2OPIMM(cmeq, Basic, Zero)
4803 DEFINE_TEST_NEON_2OPIMM(cmlt, Basic, Zero)
4804 DEFINE_TEST_NEON_2SAME(abs, Basic)
4805 DEFINE_TEST_NEON_2DIFF_NARROW(xtn, Basic)
4806 DEFINE_TEST_NEON_2DIFF_NARROW(sqxtn, Basic)
4807 DEFINE_TEST_NEON_2DIFF_FP_NARROW(fcvtn, Conversions)
4808 DEFINE_TEST_NEON_2DIFF_FP_LONG(fcvtl, Conversions)
4809 DEFINE_TEST_NEON_2SAME_FP_FP16(frintn, Conversions)
4810 DEFINE_TEST_NEON_2SAME_FP_FP16(frintm, Conversions)
4811 DEFINE_TEST_NEON_2SAME_FP_FP16(fcvtns, Conversions)
4812 DEFINE_TEST_NEON_2SAME_FP_FP16(fcvtms, Conversions)
4813 DEFINE_TEST_NEON_2SAME_FP_FP16(fcvtas, Conversions)
4814 // SCVTF (vector, integer) covered by SCVTF(vector, fixed point) with fbits 0.
4815 DEFINE_TEST_NEON_2OPIMM_FCMP_ZERO(fcmgt, Basic, Zero)
4816 DEFINE_TEST_NEON_2OPIMM_FCMP_ZERO(fcmeq, Basic, Zero)
4817 DEFINE_TEST_NEON_2OPIMM_FCMP_ZERO(fcmlt, Basic, Zero)
4818 DEFINE_TEST_NEON_2SAME_FP_FP16(fabs, Basic)
4819 DEFINE_TEST_NEON_2SAME_FP_FP16(frintp, Conversions)
4820 DEFINE_TEST_NEON_2SAME_FP_FP16(frintz, Conversions)
4821 DEFINE_TEST_NEON_2SAME_FP_FP16(fcvtps, Conversions)
4822 // FCVTZS(vector, integer) covered by FCVTZS(vector, fixed point) with fbits 0.
4823 DEFINE_TEST_NEON_2SAME_2S_4S(urecpe, Basic)
4824 DEFINE_TEST_NEON_2SAME_FP_FP16(frecpe, Basic)
4825 DEFINE_TEST_NEON_2SAME_BH(rev32, Basic)
4826 DEFINE_TEST_NEON_2DIFF_LONG(uaddlp, Basic)
4827 DEFINE_TEST_NEON_2SAME(usqadd, Basic)
4828 DEFINE_TEST_NEON_2SAME_NO2D(clz, Basic)
4829 DEFINE_TEST_NEON_2DIFF_LONG(uadalp, Basic)
4830 DEFINE_TEST_NEON_2SAME(sqneg, Basic)
4831 DEFINE_TEST_NEON_2OPIMM(cmge, Basic, Zero)
4832 DEFINE_TEST_NEON_2OPIMM(cmle, Basic, Zero)
4833 DEFINE_TEST_NEON_2SAME(neg, Basic)
4834 DEFINE_TEST_NEON_2DIFF_NARROW(sqxtun, Basic)
4835 DEFINE_TEST_NEON_2OPIMM_LONG(shll, Basic, SHLL)
4836 DEFINE_TEST_NEON_2DIFF_NARROW(uqxtn, Basic)
4837 DEFINE_TEST_NEON_2DIFF_FP_NARROW_2S(fcvtxn, Conversions)
4838 DEFINE_TEST_NEON_2SAME_FP(frint32x, Conversions)
4839 DEFINE_TEST_NEON_2SAME_FP(frint64x, Conversions)
4840 DEFINE_TEST_NEON_2SAME_FP(frint32z, Conversions)
4841 DEFINE_TEST_NEON_2SAME_FP(frint64z, Conversions)
4842 DEFINE_TEST_NEON_2SAME_FP_FP16(frinta, Conversions)
4843 DEFINE_TEST_NEON_2SAME_FP_FP16(frintx, Conversions)
4844 DEFINE_TEST_NEON_2SAME_FP_FP16(fcvtnu, Conversions)
4845 DEFINE_TEST_NEON_2SAME_FP_FP16(fcvtmu, Conversions)
4846 DEFINE_TEST_NEON_2SAME_FP_FP16(fcvtau, Conversions)
4847 // UCVTF (vector, integer) covered by UCVTF(vector, fixed point) with fbits 0.
4848 DEFINE_TEST_NEON_2SAME_8B_16B(not_, Basic)
4849 DEFINE_TEST_NEON_2SAME_8B_16B(rbit, Basic)
4850 DEFINE_TEST_NEON_2OPIMM_FCMP_ZERO(fcmge, Basic, Zero)
4851 DEFINE_TEST_NEON_2OPIMM_FCMP_ZERO(fcmle, Basic, Zero)
4852 DEFINE_TEST_NEON_2SAME_FP_FP16(fneg, Basic)
4853 DEFINE_TEST_NEON_2SAME_FP_FP16(frinti, Conversions)
4854 DEFINE_TEST_NEON_2SAME_FP_FP16(fcvtpu, Conversions)
4855 // FCVTZU(vector, integer) covered by FCVTZU(vector, fixed point) with fbits 0.
4856 DEFINE_TEST_NEON_2SAME_2S_4S(ursqrte, Basic)
4857 DEFINE_TEST_NEON_2SAME_FP_FP16(frsqrte, Basic)
4858 DEFINE_TEST_NEON_2SAME_FP_FP16(fsqrt, Basic)
4859
4860
4861 // Advanced SIMD scalar two-register miscellaneous.
4862 DEFINE_TEST_NEON_2SAME_SCALAR(suqadd, Basic)
4863 DEFINE_TEST_NEON_2SAME_SCALAR(sqabs, Basic)
4864 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(cmgt, Basic, Zero)
4865 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(cmeq, Basic, Zero)
4866 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(cmlt, Basic, Zero)
4867 DEFINE_TEST_NEON_2SAME_SCALAR_D(abs, Basic)
4868 DEFINE_TEST_NEON_2DIFF_SCALAR_NARROW(sqxtn, Basic)
4869 DEFINE_TEST_NEON_2SAME_FP_FP16_SCALAR(fcvtns, Conversions)
4870 DEFINE_TEST_NEON_2SAME_FP_FP16_SCALAR(fcvtms, Conversions)
4871 DEFINE_TEST_NEON_2SAME_FP_FP16_SCALAR(fcvtas, Conversions)
4872 // SCVTF (vector, integer) covered by SCVTF(vector, fixed point) with fbits 0.
4873 DEFINE_TEST_NEON_2OPIMM_FP_SCALAR_HSD(fcmgt, Basic, Zero)
4874 DEFINE_TEST_NEON_2OPIMM_FP_SCALAR_HSD(fcmeq, Basic, Zero)
4875 DEFINE_TEST_NEON_2OPIMM_FP_SCALAR_HSD(fcmlt, Basic, Zero)
4876 DEFINE_TEST_NEON_2SAME_FP_FP16_SCALAR(fcvtps, Conversions)
4877 // FCVTZS(vector, integer) covered by FCVTZS(vector, fixed point) with fbits 0.
4878 DEFINE_TEST_NEON_2SAME_FP_FP16_SCALAR(frecpe, Basic)
4879 DEFINE_TEST_NEON_2SAME_FP_FP16_SCALAR(frecpx, Basic)
4880 DEFINE_TEST_NEON_2SAME_SCALAR(usqadd, Basic)
4881 DEFINE_TEST_NEON_2SAME_SCALAR(sqneg, Basic)
4882 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(cmge, Basic, Zero)
4883 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(cmle, Basic, Zero)
4884 DEFINE_TEST_NEON_2SAME_SCALAR_D(neg, Basic)
4885 DEFINE_TEST_NEON_2DIFF_SCALAR_NARROW(sqxtun, Basic)
4886 DEFINE_TEST_NEON_2DIFF_SCALAR_NARROW(uqxtn, Basic)
4887 TEST(fcvtxn_SCALAR) {
4888 CALL_TEST_NEON_HELPER_2DIFF(fcvtxn, S, D, kInputDoubleConversions);
4889 }
DEFINE_TEST_NEON_2SAME_FP_FP16_SCALAR(fcvtnu,Conversions)4890 DEFINE_TEST_NEON_2SAME_FP_FP16_SCALAR(fcvtnu, Conversions)
4891 DEFINE_TEST_NEON_2SAME_FP_FP16_SCALAR(fcvtmu, Conversions)
4892 DEFINE_TEST_NEON_2SAME_FP_FP16_SCALAR(fcvtau, Conversions)
4893 // UCVTF (vector, integer) covered by UCVTF(vector, fixed point) with fbits 0.
4894 DEFINE_TEST_NEON_2OPIMM_FP_SCALAR_HSD(fcmge, Basic, Zero)
4895 DEFINE_TEST_NEON_2OPIMM_FP_SCALAR_HSD(fcmle, Basic, Zero)
4896 DEFINE_TEST_NEON_2SAME_FP_FP16_SCALAR(fcvtpu, Conversions)
4897 // FCVTZU(vector, integer) covered by FCVTZU(vector, fixed point) with fbits 0.
4898 DEFINE_TEST_NEON_2SAME_FP_FP16_SCALAR(frsqrte, Basic)
4899
4900
4901 // Advanced SIMD across lanes.
4902 DEFINE_TEST_NEON_ACROSS_LONG(saddlv, Basic)
4903 DEFINE_TEST_NEON_ACROSS(smaxv, Basic)
4904 DEFINE_TEST_NEON_ACROSS(sminv, Basic)
4905 DEFINE_TEST_NEON_ACROSS(addv, Basic)
4906 DEFINE_TEST_NEON_ACROSS_LONG(uaddlv, Basic)
4907 DEFINE_TEST_NEON_ACROSS(umaxv, Basic)
4908 DEFINE_TEST_NEON_ACROSS(uminv, Basic)
4909 DEFINE_TEST_NEON_ACROSS_FP(fmaxnmv, Basic)
4910 DEFINE_TEST_NEON_ACROSS_FP(fmaxv, Basic)
4911 DEFINE_TEST_NEON_ACROSS_FP(fminnmv, Basic)
4912 DEFINE_TEST_NEON_ACROSS_FP(fminv, Basic)
4913
4914
4915 // Advanced SIMD permute.
4916 DEFINE_TEST_NEON_3SAME(uzp1, Basic)
4917 DEFINE_TEST_NEON_3SAME(trn1, Basic)
4918 DEFINE_TEST_NEON_3SAME(zip1, Basic)
4919 DEFINE_TEST_NEON_3SAME(uzp2, Basic)
4920 DEFINE_TEST_NEON_3SAME(trn2, Basic)
4921 DEFINE_TEST_NEON_3SAME(zip2, Basic)
4922
4923
4924 // Advanced SIMD vector x indexed element.
4925 DEFINE_TEST_NEON_BYELEMENT_DIFF(smlal, Basic, Basic, Basic)
4926 DEFINE_TEST_NEON_BYELEMENT_DIFF(sqdmlal, Basic, Basic, Basic)
4927 DEFINE_TEST_NEON_BYELEMENT_DIFF(smlsl, Basic, Basic, Basic)
4928 DEFINE_TEST_NEON_BYELEMENT_DIFF(sqdmlsl, Basic, Basic, Basic)
4929 DEFINE_TEST_NEON_BYELEMENT(mul, Basic, Basic, Basic)
4930 DEFINE_TEST_NEON_BYELEMENT_DIFF(smull, Basic, Basic, Basic)
4931 DEFINE_TEST_NEON_BYELEMENT_DIFF(sqdmull, Basic, Basic, Basic)
4932 DEFINE_TEST_NEON_BYELEMENT(sqdmulh, Basic, Basic, Basic)
4933 DEFINE_TEST_NEON_BYELEMENT(sqrdmulh, Basic, Basic, Basic)
4934 DEFINE_TEST_NEON_BYELEMENT(sqrdmlah, Basic, Basic, Basic)
4935 DEFINE_TEST_NEON_BYELEMENT(sqrdmlsh, Basic, Basic, Basic)
4936 DEFINE_TEST_NEON_BYELEMENT_DOT_PRODUCT(udot, Basic, Basic, Basic)
4937 DEFINE_TEST_NEON_BYELEMENT_DOT_PRODUCT(sdot, Basic, Basic, Basic)
4938 DEFINE_TEST_NEON_FP_BYELEMENT(fmla, Basic, Basic, Basic)
4939 DEFINE_TEST_NEON_FP_BYELEMENT(fmls, Basic, Basic, Basic)
4940 DEFINE_TEST_NEON_FP_BYELEMENT(fmul, Basic, Basic, Basic)
4941 DEFINE_TEST_NEON_BYELEMENT(mla, Basic, Basic, Basic)
4942 DEFINE_TEST_NEON_BYELEMENT_DIFF(umlal, Basic, Basic, Basic)
4943 DEFINE_TEST_NEON_BYELEMENT(mls, Basic, Basic, Basic)
4944 DEFINE_TEST_NEON_BYELEMENT_DIFF(umlsl, Basic, Basic, Basic)
4945 DEFINE_TEST_NEON_BYELEMENT_DIFF(umull, Basic, Basic, Basic)
4946 DEFINE_TEST_NEON_FP_BYELEMENT(fmulx, Basic, Basic, Basic)
4947
4948
4949 // Advanced SIMD scalar x indexed element.
4950 DEFINE_TEST_NEON_BYELEMENT_DIFF_SCALAR(sqdmlal, Basic, Basic, Basic)
4951 DEFINE_TEST_NEON_BYELEMENT_DIFF_SCALAR(sqdmlsl, Basic, Basic, Basic)
4952 DEFINE_TEST_NEON_BYELEMENT_DIFF_SCALAR(sqdmull, Basic, Basic, Basic)
4953 DEFINE_TEST_NEON_BYELEMENT_SCALAR(sqdmulh, Basic, Basic, Basic)
4954 DEFINE_TEST_NEON_BYELEMENT_SCALAR(sqrdmulh, Basic, Basic, Basic)
4955 DEFINE_TEST_NEON_BYELEMENT_SCALAR(sqrdmlah, Basic, Basic, Basic)
4956 DEFINE_TEST_NEON_BYELEMENT_SCALAR(sqrdmlsh, Basic, Basic, Basic)
4957 DEFINE_TEST_NEON_FP_BYELEMENT_SCALAR(fmla, Basic, Basic, Basic)
4958 DEFINE_TEST_NEON_FP_BYELEMENT_SCALAR(fmls, Basic, Basic, Basic)
4959 DEFINE_TEST_NEON_FP_BYELEMENT_SCALAR(fmul, Basic, Basic, Basic)
4960 DEFINE_TEST_NEON_FP_BYELEMENT_SCALAR(fmulx, Basic, Basic, Basic)
4961
4962
4963 DEFINE_TEST_NEON_FHM_BYELEMENT(fmlal, Basic, Basic, Basic)
4964 DEFINE_TEST_NEON_FHM_BYELEMENT(fmlal2, Basic, Basic, Basic)
4965 DEFINE_TEST_NEON_FHM_BYELEMENT(fmlsl, Basic, Basic, Basic)
4966 DEFINE_TEST_NEON_FHM_BYELEMENT(fmlsl2, Basic, Basic, Basic)
4967
4968
4969 #undef __
4970 #define __ masm->
4971
4972 #if defined(VIXL_INCLUDE_SIMULATOR_AARCH64) && \
4973 defined(VIXL_HAS_ABI_SUPPORT) && __cplusplus >= 201103L && \
4974 (defined(__clang__) || GCC_VERSION_OR_NEWER(4, 9, 1))
4975
4976 // Generate a function that stores zero to a hard-coded address.
4977 Instruction* GenerateStoreZero(MacroAssembler* masm, int32_t* target) {
4978 masm->Reset();
4979
4980 UseScratchRegisterScope temps(masm);
4981 Register temp = temps.AcquireX();
4982 __ Mov(temp, reinterpret_cast<intptr_t>(target));
4983 __ Str(wzr, MemOperand(temp));
4984 __ Ret();
4985
4986 masm->FinalizeCode();
4987 return masm->GetBuffer()->GetStartAddress<Instruction*>();
4988 }
4989
4990
4991 // Generate a function that stores the `int32_t` argument to a hard-coded
4992 // address.
4993 // In this example and the other below, we use the `abi` object to retrieve
4994 // argument and return locations even though we could easily hard code them.
4995 // This mirrors how more generic code (e.g. templated) user would use these
4996 // mechanisms.
GenerateStoreInput(MacroAssembler * masm,int32_t * target)4997 Instruction* GenerateStoreInput(MacroAssembler* masm, int32_t* target) {
4998 masm->Reset();
4999
5000 ABI abi;
5001 Register input =
5002 Register(abi.GetNextParameterGenericOperand<int32_t>().GetCPURegister());
5003
5004 UseScratchRegisterScope temps(masm);
5005 Register temp = temps.AcquireX();
5006 __ Mov(temp, reinterpret_cast<intptr_t>(target));
5007 __ Str(input, MemOperand(temp));
5008 __ Ret();
5009
5010 masm->FinalizeCode();
5011 return masm->GetBuffer()->GetStartAddress<Instruction*>();
5012 }
5013
5014
5015 // A minimal implementation of a `pow` function.
GeneratePow(MacroAssembler * masm,unsigned pow)5016 Instruction* GeneratePow(MacroAssembler* masm, unsigned pow) {
5017 masm->Reset();
5018
5019 ABI abi;
5020 Register input =
5021 Register(abi.GetNextParameterGenericOperand<int64_t>().GetCPURegister());
5022 Register result =
5023 Register(abi.GetReturnGenericOperand<int64_t>().GetCPURegister());
5024 UseScratchRegisterScope temps(masm);
5025 Register temp = temps.AcquireX();
5026
5027 __ Mov(temp, 1);
5028 for (unsigned i = 0; i < pow; i++) {
5029 __ Mul(temp, temp, input);
5030 }
5031 __ Mov(result, temp);
5032 __ Ret();
5033
5034 masm->FinalizeCode();
5035 return masm->GetBuffer()->GetStartAddress<Instruction*>();
5036 }
5037
5038
GenerateSum(MacroAssembler * masm)5039 Instruction* GenerateSum(MacroAssembler* masm) {
5040 masm->Reset();
5041
5042 ABI abi;
5043 VRegister input_1 =
5044 VRegister(abi.GetNextParameterGenericOperand<float>().GetCPURegister());
5045 Register input_2 =
5046 Register(abi.GetNextParameterGenericOperand<int64_t>().GetCPURegister());
5047 VRegister input_3 =
5048 VRegister(abi.GetNextParameterGenericOperand<double>().GetCPURegister());
5049 VRegister result =
5050 VRegister(abi.GetReturnGenericOperand<double>().GetCPURegister());
5051
5052 UseScratchRegisterScope temps(masm);
5053 VRegister temp = temps.AcquireD();
5054
5055 __ Fcvt(input_1.D(), input_1);
5056 __ Scvtf(temp, input_2);
5057 __ Fadd(temp, temp, input_1.D());
5058 __ Fadd(result, temp, input_3);
5059 __ Ret();
5060
5061 masm->FinalizeCode();
5062 return masm->GetBuffer()->GetStartAddress<Instruction*>();
5063 }
5064
5065
TEST(RunFrom)5066 TEST(RunFrom) {
5067 SETUP_WITH_FEATURES(CPUFeatures::kFP);
5068
5069 // Run a function returning `void` and taking no argument.
5070 int32_t value = 0xbad;
5071 simulator.RunFrom(GenerateStoreZero(&masm, &value));
5072 VIXL_CHECK(value == 0);
5073
5074 // Run a function returning `void` and taking one argument.
5075 int32_t argument = 0xf00d;
5076 simulator.RunFrom<void, int32_t>(GenerateStoreInput(&masm, &value), argument);
5077 VIXL_CHECK(value == 0xf00d);
5078
5079 // Run a function taking one argument and returning a value.
5080 int64_t res_int64_t;
5081 res_int64_t =
5082 simulator.RunFrom<int64_t, int64_t>(GeneratePow(&masm, 0), 0xbad);
5083 VIXL_CHECK(res_int64_t == 1);
5084 res_int64_t = simulator.RunFrom<int64_t, int64_t>(GeneratePow(&masm, 1), 123);
5085 VIXL_CHECK(res_int64_t == 123);
5086 res_int64_t = simulator.RunFrom<int64_t, int64_t>(GeneratePow(&masm, 10), 2);
5087 VIXL_CHECK(res_int64_t == 1024);
5088
5089 // Run a function taking multiple arguments in registers.
5090 double res_double =
5091 simulator.RunFrom<double, float, int64_t, double>(GenerateSum(&masm),
5092 1.0,
5093 2,
5094 3.0);
5095 VIXL_CHECK(res_double == 6.0);
5096 }
5097 #endif
5098
5099
5100 } // namespace aarch64
5101 } // namespace vixl
5102