1 // Copyright 2015, VIXL authors
2 // All rights reserved.
3 //
4 // Redistribution and use in source and binary forms, with or without
5 // modification, are permitted provided that the following conditions are met:
6 //
7 // * Redistributions of source code must retain the above copyright notice,
8 // this list of conditions and the following disclaimer.
9 // * Redistributions in binary form must reproduce the above copyright notice,
10 // this list of conditions and the following disclaimer in the documentation
11 // and/or other materials provided with the distribution.
12 // * Neither the name of ARM Limited nor the names of its contributors may be
13 // used to endorse or promote products derived from this software without
14 // specific prior written permission.
15 //
16 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
17 // ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18 // WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19 // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
20 // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
22 // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
23 // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
24 // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26
27 #include <cfloat>
28 #include <cstdio>
29 #include <sstream>
30
31 #include "test-runner.h"
32 #include "test-utils.h"
33
34 #include "aarch64/cpu-features-auditor-aarch64.h"
35 #include "aarch64/macro-assembler-aarch64.h"
36 #include "aarch64/simulator-aarch64.h"
37 #include "aarch64/test-simulator-inputs-aarch64.h"
38 #include "aarch64/test-simulator-traces-aarch64.h"
39 #include "aarch64/test-utils-aarch64.h"
40
41 namespace vixl {
42 namespace aarch64 {
43
44 // ==== Simulator Tests ====
45 //
46 // These simulator tests check instruction behaviour against a trace taken from
47 // real AArch64 hardware. The same test code is used to generate the trace; the
48 // results are printed to stdout when the test is run with
49 // --generate_test_trace.
50 //
51 // The input lists and expected results are stored in test/traces. The expected
52 // results can be regenerated using tools/generate_simulator_traces.py. Adding a
53 // test for a new instruction is described at the top of
54 // test-simulator-traces-aarch64.h.
55
56 #define __ masm.
57 #define TEST(name) TEST_(AARCH64_SIM_##name)
58
59 #define SETUP() SETUP_WITH_FEATURES(CPUFeatures())
60
61 #ifdef VIXL_INCLUDE_SIMULATOR_AARCH64
62
63 #define SETUP_WITH_FEATURES(...) \
64 MacroAssembler masm; \
65 masm.SetCPUFeatures(CPUFeatures(__VA_ARGS__)); \
66 Decoder decoder; \
67 Simulator simulator(&decoder); \
68 simulator.SetColouredTrace(Test::coloured_trace());
69
70 #define START() \
71 masm.Reset(); \
72 simulator.ResetState(); \
73 __ PushCalleeSavedRegisters(); \
74 /* The infrastructure code hasn't been covered at the moment, e.g. */ \
75 /* prologue/epilogue. Suppress tagging mis-match exception before */ \
76 /* this point. */ \
77 if (masm.GetCPUFeatures()->Has(CPUFeatures::kMTE)) { \
78 __ Hlt(DebugHltOpcode::kMTEActive); \
79 } \
80 if (Test::trace_reg()) { \
81 __ Trace(LOG_STATE, TRACE_ENABLE); \
82 } \
83 if (Test::trace_write()) { \
84 __ Trace(LOG_WRITE, TRACE_ENABLE); \
85 } \
86 if (Test::trace_sim()) { \
87 __ Trace(LOG_DISASM, TRACE_ENABLE); \
88 }
89
90 #define END() \
91 if (masm.GetCPUFeatures()->Has(CPUFeatures::kMTE)) { \
92 __ Hlt(DebugHltOpcode::kMTEInactive); \
93 } \
94 __ Trace(LOG_ALL, TRACE_DISABLE); \
95 __ PopCalleeSavedRegisters(); \
96 __ Ret(); \
97 masm.FinalizeCode()
98
99 #define TRY_RUN(skipped) \
100 DISASSEMBLE(); \
101 simulator.RunFrom(masm.GetBuffer()->GetStartAddress<Instruction*>()); \
102 /* The simulator can run every test. */ \
103 *skipped = false
104
105 #ifdef VIXL_ENABLE_IMPLICIT_CHECKS
106 // The signal handler needs access to the simulator.
107 Simulator* gImplicitCheckSim;
108
109 #ifdef __x86_64__
110 #include <signal.h>
111 #include <ucontext.h>
HandleSegFault(int sig,siginfo_t * info,void * context)112 void HandleSegFault(int sig, siginfo_t* info, void* context) {
113 USE(sig);
114 USE(info);
115 Simulator* sim = gImplicitCheckSim;
116
117 // Did the signal come from the simulator?
118 ucontext_t* uc = reinterpret_cast<ucontext_t*>(context);
119 uintptr_t fault_pc = uc->uc_mcontext.gregs[REG_RIP];
120 VIXL_CHECK(sim->IsSimulatedMemoryAccess(fault_pc));
121
122 // Increment the counter (x1) each time we handle a signal.
123 int64_t counter = reinterpret_cast<int64_t>(sim->ReadXRegister(1));
124 sim->WriteXRegister(1, ++counter);
125
126 // Return to the VIXL memory access continuation point, which is also the
127 // next instruction, after this handler.
128 uc->uc_mcontext.gregs[REG_RIP] = sim->GetSignalReturnAddress();
129 // Return that the memory access failed.
130 uc->uc_mcontext.gregs[REG_RAX] =
131 static_cast<greg_t>(MemoryAccessResult::Failure);
132 }
133 #endif // __x86_64__
134
135 // Start an implicit check test with a counter and start label so the number of
136 // faults can be counted. Note: each instruction after the start will be
137 // expected to fault.
138 #define START_IMPLICIT_CHECK() \
139 gImplicitCheckSim = &simulator; \
140 /* Set up a signal handler to count the number of faulting instructions. */ \
141 struct sigaction sa; \
142 sa.sa_sigaction = HandleSegFault; \
143 sigaction(SIGSEGV, &sa, NULL); \
144 START(); \
145 /* Reset the counter. */ \
146 __ Mov(x1, 0); \
147 /* Use a consistent bad address. */ \
148 __ Mov(x15, xzr); \
149 __ Mov(ip0, xzr); \
150 /* Load an amount of data to load. */ \
151 __ Mov(ip1, 4096); \
152 [[maybe_unused]] MemOperand bad_memory = MemOperand(ip0); \
153 if (masm.GetCPUFeatures()->Has(CPUFeatures::kSVE)) { \
154 /* Turn on all lanes to ensure all loads/stores are tested. */ \
155 __ Ptrue(p0.VnB()); \
156 __ Ptrue(p1.VnB()); \
157 __ Ptrue(p2.VnB()); \
158 __ Ptrue(p3.VnB()); \
159 __ Ptrue(p4.VnB()); \
160 __ Ptrue(p5.VnB()); \
161 __ Ptrue(p6.VnB()); \
162 __ Ptrue(p7.VnB()); \
163 __ Ptrue(p8.VnB()); \
164 __ Ptrue(p9.VnB()); \
165 __ Ptrue(p10.VnB()); \
166 __ Ptrue(p11.VnB()); \
167 __ Ptrue(p12.VnB()); \
168 __ Ptrue(p13.VnB()); \
169 __ Ptrue(p14.VnB()); \
170 __ Ptrue(p15.VnB()); \
171 } \
172 Label l_start, l_end; \
173 __ Bind(&l_start);
174
175 #define END_IMPLICIT_CHECK() \
176 __ Bind(&l_end); \
177 /* Return the counter. */ \
178 __ Mov(x0, x1); \
179 END();
180
181 #define TRY_RUN_IMPLICIT_CHECK() \
182 bool skipped; \
183 TRY_RUN(&skipped); \
184 /* Implicit checks should only be used with the simulator. */ \
185 VIXL_ASSERT(!skipped); \
186 /* Check that each load/store instruction generated a segfault that was */ \
187 /* raised and dealt with. */ \
188 size_t result = simulator.ReadXRegister(0); \
189 size_t num_of_faulting_instr = masm.GetSizeOfCodeGeneratedSince(&l_start) - \
190 masm.GetSizeOfCodeGeneratedSince(&l_end); \
191 VIXL_CHECK((result * kInstructionSize) == num_of_faulting_instr);
192
193 #endif // VIXL_ENABLE_IMPLICIT_CHECKS
194
195 #else // VIXL_INCLUDE_SIMULATOR_AARCH64
196
197 #define SETUP_WITH_FEATURES(...) \
198 MacroAssembler masm; \
199 masm.SetCPUFeatures(CPUFeatures(__VA_ARGS__)); \
200 CPU::SetUp()
201
202 #define START() \
203 masm.Reset(); \
204 __ PushCalleeSavedRegisters()
205
206 #define END() \
207 __ PopCalleeSavedRegisters(); \
208 __ Ret(); \
209 masm.FinalizeCode()
210
211 #define TRY_RUN(skipped) \
212 DISASSEMBLE(); \
213 /* If the test uses features that the current CPU doesn't support, don't */ \
214 /* attempt to run it natively. */ \
215 { \
216 Decoder decoder; \
217 /* TODO: Once available, use runtime feature detection. The use of */ \
218 /* AArch64LegacyBaseline is a stopgap. */ \
219 const CPUFeatures& this_machine = CPUFeatures::AArch64LegacyBaseline(); \
220 CPUFeaturesAuditor auditor(&decoder, this_machine); \
221 CodeBuffer* buffer = masm.GetBuffer(); \
222 decoder.Decode(buffer->GetStartAddress<Instruction*>(), \
223 buffer->GetEndAddress<Instruction*>()); \
224 const CPUFeatures& requirements = auditor.GetSeenFeatures(); \
225 if (this_machine.Has(requirements)) { \
226 masm.GetBuffer()->SetExecutable(); \
227 ExecuteMemory(buffer->GetStartAddress<byte*>(), \
228 masm.GetSizeOfCodeGenerated()); \
229 masm.GetBuffer()->SetWritable(); \
230 *skipped = false; \
231 } else { \
232 std::stringstream os; \
233 /* Note: This message needs to match REGEXP_MISSING_FEATURES from */ \
234 /* tools/threaded_test.py. */ \
235 os << "SKIPPED: Missing features: { "; \
236 os << requirements.Without(this_machine) << " }\n"; \
237 printf("%s", os.str().c_str()); \
238 *skipped = true; \
239 } \
240 }
241
242
243 #endif // VIXL_INCLUDE_SIMULATOR_AARCH64
244
245
246 #define DISASSEMBLE() \
247 if (Test::disassemble()) { \
248 PrintDisassembler disasm(stdout); \
249 CodeBuffer* buffer = masm.GetBuffer(); \
250 Instruction* start = buffer->GetStartAddress<Instruction*>(); \
251 Instruction* end = buffer->GetEndAddress<Instruction*>(); \
252 disasm.DisassembleBuffer(start, end); \
253 }
254
255 // The maximum number of errors to report in detail for each test.
256 static const unsigned kErrorReportLimit = 8;
257
258
259 // Overloaded versions of RawbitsToDouble and RawbitsToFloat for use in the
260 // templated test functions.
rawbits_to_fp(uint32_t bits)261 static float rawbits_to_fp(uint32_t bits) { return RawbitsToFloat(bits); }
262
rawbits_to_fp(uint64_t bits)263 static double rawbits_to_fp(uint64_t bits) { return RawbitsToDouble(bits); }
264
265 // The rawbits_to_fp functions are only used for printing decimal values so we
266 // just approximate FP16 as double.
rawbits_to_fp(uint16_t bits)267 static double rawbits_to_fp(uint16_t bits) {
268 return FPToDouble(RawbitsToFloat16(bits), kIgnoreDefaultNaN);
269 }
270
271
272 // MacroAssembler member function pointers to pass to the test dispatchers.
273 typedef void (MacroAssembler::*Test1OpFPHelper_t)(const VRegister& fd,
274 const VRegister& fn);
275 typedef void (MacroAssembler::*Test2OpFPHelper_t)(const VRegister& fd,
276 const VRegister& fn,
277 const VRegister& fm);
278 typedef void (MacroAssembler::*Test3OpFPHelper_t)(const VRegister& fd,
279 const VRegister& fn,
280 const VRegister& fm,
281 const VRegister& fa);
282 typedef void (MacroAssembler::*TestFPCmpHelper_t)(const VRegister& fn,
283 const VRegister& fm);
284 typedef void (MacroAssembler::*TestFPCmpZeroHelper_t)(const VRegister& fn,
285 double value);
286 typedef void (MacroAssembler::*TestFPToIntHelper_t)(const Register& rd,
287 const VRegister& fn);
288 typedef void (MacroAssembler::*TestFPToFixedHelper_t)(const Register& rd,
289 const VRegister& fn,
290 int fbits);
291 typedef void (MacroAssembler::*TestFixedToFPHelper_t)(const VRegister& fd,
292 const Register& rn,
293 int fbits);
294 // TODO: 'Test2OpNEONHelper_t' and 'Test2OpFPHelper_t' can be
295 // consolidated into one routine.
296 typedef void (MacroAssembler::*Test1OpNEONHelper_t)(const VRegister& vd,
297 const VRegister& vn);
298 typedef void (MacroAssembler::*Test2OpNEONHelper_t)(const VRegister& vd,
299 const VRegister& vn,
300 const VRegister& vm);
301 typedef void (MacroAssembler::*TestByElementNEONHelper_t)(const VRegister& vd,
302 const VRegister& vn,
303 const VRegister& vm,
304 int vm_index);
305 typedef void (MacroAssembler::*TestOpImmOpImmVdUpdateNEONHelper_t)(
306 const VRegister& vd, int imm1, const VRegister& vn, int imm2);
307
308 // This helps using the same typename for both the function pointer
309 // and the array of immediates passed to helper routines.
310 template <typename T>
311 class Test2OpImmediateNEONHelper_t {
312 public:
313 typedef void (MacroAssembler::*mnemonic)(const VRegister& vd,
314 const VRegister& vn,
315 T imm);
316 };
317
318
319 // Maximum number of hex characters required to represent values of either
320 // templated type.
321 template <typename Ta, typename Tb>
MaxHexCharCount()322 static unsigned MaxHexCharCount() {
323 unsigned count = static_cast<unsigned>(std::max(sizeof(Ta), sizeof(Tb)));
324 return (count * 8) / 4;
325 }
326
327
328 // Standard test dispatchers.
329
330
Test1Op_Helper(Test1OpFPHelper_t helper,uintptr_t inputs,unsigned inputs_length,uintptr_t results,unsigned d_size,unsigned n_size,bool * skipped)331 static void Test1Op_Helper(Test1OpFPHelper_t helper,
332 uintptr_t inputs,
333 unsigned inputs_length,
334 uintptr_t results,
335 unsigned d_size,
336 unsigned n_size,
337 bool* skipped) {
338 VIXL_ASSERT((d_size == kDRegSize) || (d_size == kSRegSize) ||
339 (d_size == kHRegSize));
340 VIXL_ASSERT((n_size == kDRegSize) || (n_size == kSRegSize) ||
341 (n_size == kHRegSize));
342
343 CPUFeatures features;
344 features.Combine(CPUFeatures::kFP, CPUFeatures::kFPHalf);
345 // For frint{32,64}{x,y} variants.
346 features.Combine(CPUFeatures::kFrintToFixedSizedInt);
347 SETUP_WITH_FEATURES(features);
348 START();
349
350 // Roll up the loop to keep the code size down.
351 Label loop_n;
352
353 Register out = x0;
354 Register inputs_base = x1;
355 Register length = w2;
356 Register index_n = w3;
357
358 int n_index_shift;
359 VRegister fd;
360 VRegister fn;
361 if (n_size == kDRegSize) {
362 n_index_shift = kDRegSizeInBytesLog2;
363 fn = d1;
364 } else if (n_size == kSRegSize) {
365 n_index_shift = kSRegSizeInBytesLog2;
366 fn = s1;
367 } else {
368 n_index_shift = kHRegSizeInBytesLog2;
369 fn = h1;
370 }
371
372 if (d_size == kDRegSize) {
373 fd = d0;
374 } else if (d_size == kSRegSize) {
375 fd = s0;
376 } else {
377 fd = h0;
378 }
379
380
381 __ Mov(out, results);
382 __ Mov(inputs_base, inputs);
383 __ Mov(length, inputs_length);
384
385 __ Mov(index_n, 0);
386 __ Bind(&loop_n);
387 __ Ldr(fn, MemOperand(inputs_base, index_n, UXTW, n_index_shift));
388
389 {
390 SingleEmissionCheckScope guard(&masm);
391 (masm.*helper)(fd, fn);
392 }
393 __ Str(fd, MemOperand(out, fd.GetSizeInBytes(), PostIndex));
394
395 __ Add(index_n, index_n, 1);
396 __ Cmp(index_n, inputs_length);
397 __ B(lo, &loop_n);
398
399 END();
400 TRY_RUN(skipped);
401 }
402
403
404 // Test FP instructions. The inputs[] and expected[] arrays should be arrays of
405 // rawbits representations of doubles or floats. This ensures that exact bit
406 // comparisons can be performed.
407 template <typename Tn, typename Td>
Test1Op(const char * name,Test1OpFPHelper_t helper,const Tn inputs[],unsigned inputs_length,const Td expected[],unsigned expected_length)408 static void Test1Op(const char* name,
409 Test1OpFPHelper_t helper,
410 const Tn inputs[],
411 unsigned inputs_length,
412 const Td expected[],
413 unsigned expected_length) {
414 VIXL_ASSERT(inputs_length > 0);
415
416 const unsigned results_length = inputs_length;
417 Td* results = new Td[results_length];
418
419 const unsigned d_bits = sizeof(Td) * 8;
420 const unsigned n_bits = sizeof(Tn) * 8;
421 bool skipped;
422
423 Test1Op_Helper(helper,
424 reinterpret_cast<uintptr_t>(inputs),
425 inputs_length,
426 reinterpret_cast<uintptr_t>(results),
427 d_bits,
428 n_bits,
429 &skipped);
430
431 if (Test::generate_test_trace()) {
432 // Print the results.
433 printf("const uint%u_t kExpected_%s[] = {\n", d_bits, name);
434 for (unsigned d = 0; d < results_length; d++) {
435 printf(" 0x%0*" PRIx64 ",\n",
436 d_bits / 4,
437 static_cast<uint64_t>(results[d]));
438 }
439 printf("};\n");
440 printf("const unsigned kExpectedCount_%s = %u;\n", name, results_length);
441 } else if (!skipped) {
442 // Check the results.
443 VIXL_CHECK(expected_length == results_length);
444 unsigned error_count = 0;
445 unsigned d = 0;
446 for (unsigned n = 0; n < inputs_length; n++, d++) {
447 if (results[d] != expected[d]) {
448 if (++error_count > kErrorReportLimit) continue;
449
450 printf("%s 0x%0*" PRIx64 " (%s %g):\n",
451 name,
452 n_bits / 4,
453 static_cast<uint64_t>(inputs[n]),
454 name,
455 rawbits_to_fp(inputs[n]));
456 printf(" Expected: 0x%0*" PRIx64 " (%g)\n",
457 d_bits / 4,
458 static_cast<uint64_t>(expected[d]),
459 rawbits_to_fp(expected[d]));
460 printf(" Found: 0x%0*" PRIx64 " (%g)\n",
461 d_bits / 4,
462 static_cast<uint64_t>(results[d]),
463 rawbits_to_fp(results[d]));
464 printf("\n");
465 }
466 }
467 VIXL_ASSERT(d == expected_length);
468 if (error_count > kErrorReportLimit) {
469 printf("%u other errors follow.\n", error_count - kErrorReportLimit);
470 }
471 VIXL_CHECK(error_count == 0);
472 }
473 delete[] results;
474 }
475
476
Test2Op_Helper(Test2OpFPHelper_t helper,uintptr_t inputs,unsigned inputs_length,uintptr_t results,unsigned reg_size,bool * skipped)477 static void Test2Op_Helper(Test2OpFPHelper_t helper,
478 uintptr_t inputs,
479 unsigned inputs_length,
480 uintptr_t results,
481 unsigned reg_size,
482 bool* skipped) {
483 VIXL_ASSERT((reg_size == kDRegSize) || (reg_size == kSRegSize) ||
484 (reg_size == kHRegSize));
485
486 SETUP_WITH_FEATURES(CPUFeatures::kFP, CPUFeatures::kFPHalf);
487 START();
488
489 // Roll up the loop to keep the code size down.
490 Label loop_n, loop_m;
491
492 Register out = x0;
493 Register inputs_base = x1;
494 Register length = w2;
495 Register index_n = w3;
496 Register index_m = w4;
497
498 bool double_op = reg_size == kDRegSize;
499 bool float_op = reg_size == kSRegSize;
500 int index_shift;
501 if (double_op) {
502 index_shift = kDRegSizeInBytesLog2;
503 } else if (float_op) {
504 index_shift = kSRegSizeInBytesLog2;
505 } else {
506 index_shift = kHRegSizeInBytesLog2;
507 }
508
509 VRegister fd;
510 VRegister fn;
511 VRegister fm;
512
513 if (double_op) {
514 fd = d0;
515 fn = d1;
516 fm = d2;
517 } else if (float_op) {
518 fd = s0;
519 fn = s1;
520 fm = s2;
521 } else {
522 fd = h0;
523 fn = h1;
524 fm = h2;
525 }
526
527 __ Mov(out, results);
528 __ Mov(inputs_base, inputs);
529 __ Mov(length, inputs_length);
530
531 __ Mov(index_n, 0);
532 __ Bind(&loop_n);
533 __ Ldr(fn, MemOperand(inputs_base, index_n, UXTW, index_shift));
534
535 __ Mov(index_m, 0);
536 __ Bind(&loop_m);
537 __ Ldr(fm, MemOperand(inputs_base, index_m, UXTW, index_shift));
538
539 {
540 SingleEmissionCheckScope guard(&masm);
541 (masm.*helper)(fd, fn, fm);
542 }
543 __ Str(fd, MemOperand(out, fd.GetSizeInBytes(), PostIndex));
544
545 __ Add(index_m, index_m, 1);
546 __ Cmp(index_m, inputs_length);
547 __ B(lo, &loop_m);
548
549 __ Add(index_n, index_n, 1);
550 __ Cmp(index_n, inputs_length);
551 __ B(lo, &loop_n);
552
553 END();
554 TRY_RUN(skipped);
555 }
556
557
558 // Test FP instructions. The inputs[] and expected[] arrays should be arrays of
559 // rawbits representations of doubles or floats. This ensures that exact bit
560 // comparisons can be performed.
561 template <typename T>
Test2Op(const char * name,Test2OpFPHelper_t helper,const T inputs[],unsigned inputs_length,const T expected[],unsigned expected_length)562 static void Test2Op(const char* name,
563 Test2OpFPHelper_t helper,
564 const T inputs[],
565 unsigned inputs_length,
566 const T expected[],
567 unsigned expected_length) {
568 VIXL_ASSERT(inputs_length > 0);
569
570 const unsigned results_length = inputs_length * inputs_length;
571 T* results = new T[results_length];
572
573 const unsigned bits = sizeof(T) * 8;
574 bool skipped;
575
576 Test2Op_Helper(helper,
577 reinterpret_cast<uintptr_t>(inputs),
578 inputs_length,
579 reinterpret_cast<uintptr_t>(results),
580 bits,
581 &skipped);
582
583 if (Test::generate_test_trace()) {
584 // Print the results.
585 printf("const uint%u_t kExpected_%s[] = {\n", bits, name);
586 for (unsigned d = 0; d < results_length; d++) {
587 printf(" 0x%0*" PRIx64 ",\n",
588 bits / 4,
589 static_cast<uint64_t>(results[d]));
590 }
591 printf("};\n");
592 printf("const unsigned kExpectedCount_%s = %u;\n", name, results_length);
593 } else if (!skipped) {
594 // Check the results.
595 VIXL_CHECK(expected_length == results_length);
596 unsigned error_count = 0;
597 unsigned d = 0;
598 for (unsigned n = 0; n < inputs_length; n++) {
599 for (unsigned m = 0; m < inputs_length; m++, d++) {
600 if (results[d] != expected[d]) {
601 if (++error_count > kErrorReportLimit) continue;
602
603 printf("%s 0x%0*" PRIx64 ", 0x%0*" PRIx64 " (%s %g %g):\n",
604 name,
605 bits / 4,
606 static_cast<uint64_t>(inputs[n]),
607 bits / 4,
608 static_cast<uint64_t>(inputs[m]),
609 name,
610 rawbits_to_fp(inputs[n]),
611 rawbits_to_fp(inputs[m]));
612 printf(" Expected: 0x%0*" PRIx64 " (%g)\n",
613 bits / 4,
614 static_cast<uint64_t>(expected[d]),
615 rawbits_to_fp(expected[d]));
616 printf(" Found: 0x%0*" PRIx64 " (%g)\n",
617 bits / 4,
618 static_cast<uint64_t>(results[d]),
619 rawbits_to_fp(results[d]));
620 printf("\n");
621 }
622 }
623 }
624 VIXL_ASSERT(d == expected_length);
625 if (error_count > kErrorReportLimit) {
626 printf("%u other errors follow.\n", error_count - kErrorReportLimit);
627 }
628 VIXL_CHECK(error_count == 0);
629 }
630 delete[] results;
631 }
632
633
Test3Op_Helper(Test3OpFPHelper_t helper,uintptr_t inputs,unsigned inputs_length,uintptr_t results,unsigned reg_size,bool * skipped)634 static void Test3Op_Helper(Test3OpFPHelper_t helper,
635 uintptr_t inputs,
636 unsigned inputs_length,
637 uintptr_t results,
638 unsigned reg_size,
639 bool* skipped) {
640 VIXL_ASSERT((reg_size == kDRegSize) || (reg_size == kSRegSize) ||
641 (reg_size == kHRegSize));
642
643 SETUP_WITH_FEATURES(CPUFeatures::kFP, CPUFeatures::kFPHalf);
644 START();
645
646 // Roll up the loop to keep the code size down.
647 Label loop_n, loop_m, loop_a;
648
649 Register out = x0;
650 Register inputs_base = x1;
651 Register length = w2;
652 Register index_n = w3;
653 Register index_m = w4;
654 Register index_a = w5;
655
656 bool double_op = reg_size == kDRegSize;
657 bool single_op = reg_size == kSRegSize;
658 int index_shift;
659 VRegister fd(0, reg_size);
660 VRegister fn(1, reg_size);
661 VRegister fm(2, reg_size);
662 VRegister fa(3, reg_size);
663 if (double_op) {
664 index_shift = kDRegSizeInBytesLog2;
665 } else if (single_op) {
666 index_shift = kSRegSizeInBytesLog2;
667 } else {
668 index_shift = kHRegSizeInBytesLog2;
669 }
670
671 __ Mov(out, results);
672 __ Mov(inputs_base, inputs);
673 __ Mov(length, inputs_length);
674
675 __ Mov(index_n, 0);
676 __ Bind(&loop_n);
677 __ Ldr(fn, MemOperand(inputs_base, index_n, UXTW, index_shift));
678
679 __ Mov(index_m, 0);
680 __ Bind(&loop_m);
681 __ Ldr(fm, MemOperand(inputs_base, index_m, UXTW, index_shift));
682
683 __ Mov(index_a, 0);
684 __ Bind(&loop_a);
685 __ Ldr(fa, MemOperand(inputs_base, index_a, UXTW, index_shift));
686
687 {
688 SingleEmissionCheckScope guard(&masm);
689 (masm.*helper)(fd, fn, fm, fa);
690 }
691 __ Str(fd, MemOperand(out, fd.GetSizeInBytes(), PostIndex));
692
693 __ Add(index_a, index_a, 1);
694 __ Cmp(index_a, inputs_length);
695 __ B(lo, &loop_a);
696
697 __ Add(index_m, index_m, 1);
698 __ Cmp(index_m, inputs_length);
699 __ B(lo, &loop_m);
700
701 __ Add(index_n, index_n, 1);
702 __ Cmp(index_n, inputs_length);
703 __ B(lo, &loop_n);
704
705 END();
706 TRY_RUN(skipped);
707 }
708
709
710 // Test FP instructions. The inputs[] and expected[] arrays should be arrays of
711 // rawbits representations of doubles or floats. This ensures that exact bit
712 // comparisons can be performed.
713 template <typename T>
Test3Op(const char * name,Test3OpFPHelper_t helper,const T inputs[],unsigned inputs_length,const T expected[],unsigned expected_length)714 static void Test3Op(const char* name,
715 Test3OpFPHelper_t helper,
716 const T inputs[],
717 unsigned inputs_length,
718 const T expected[],
719 unsigned expected_length) {
720 VIXL_ASSERT(inputs_length > 0);
721
722 const unsigned results_length = inputs_length * inputs_length * inputs_length;
723 T* results = new T[results_length];
724
725 const unsigned bits = sizeof(T) * 8;
726 bool skipped;
727
728 Test3Op_Helper(helper,
729 reinterpret_cast<uintptr_t>(inputs),
730 inputs_length,
731 reinterpret_cast<uintptr_t>(results),
732 bits,
733 &skipped);
734
735 if (Test::generate_test_trace()) {
736 // Print the results.
737 printf("const uint%u_t kExpected_%s[] = {\n", bits, name);
738 for (unsigned d = 0; d < results_length; d++) {
739 printf(" 0x%0*" PRIx64 ",\n",
740 bits / 4,
741 static_cast<uint64_t>(results[d]));
742 }
743 printf("};\n");
744 printf("const unsigned kExpectedCount_%s = %u;\n", name, results_length);
745 } else if (!skipped) {
746 // Check the results.
747 VIXL_CHECK(expected_length == results_length);
748 unsigned error_count = 0;
749 unsigned d = 0;
750 for (unsigned n = 0; n < inputs_length; n++) {
751 for (unsigned m = 0; m < inputs_length; m++) {
752 for (unsigned a = 0; a < inputs_length; a++, d++) {
753 if (results[d] != expected[d]) {
754 if (++error_count > kErrorReportLimit) continue;
755
756 printf("%s 0x%0*" PRIx64 ", 0x%0*" PRIx64 ", 0x%0*" PRIx64
757 " (%s %g %g %g):\n",
758 name,
759 bits / 4,
760 static_cast<uint64_t>(inputs[n]),
761 bits / 4,
762 static_cast<uint64_t>(inputs[m]),
763 bits / 4,
764 static_cast<uint64_t>(inputs[a]),
765 name,
766 rawbits_to_fp(inputs[n]),
767 rawbits_to_fp(inputs[m]),
768 rawbits_to_fp(inputs[a]));
769 printf(" Expected: 0x%0*" PRIx64 " (%g)\n",
770 bits / 4,
771 static_cast<uint64_t>(expected[d]),
772 rawbits_to_fp(expected[d]));
773 printf(" Found: 0x%0*" PRIx64 " (%g)\n",
774 bits / 4,
775 static_cast<uint64_t>(results[d]),
776 rawbits_to_fp(results[d]));
777 printf("\n");
778 }
779 }
780 }
781 }
782 VIXL_ASSERT(d == expected_length);
783 if (error_count > kErrorReportLimit) {
784 printf("%u other errors follow.\n", error_count - kErrorReportLimit);
785 }
786 VIXL_CHECK(error_count == 0);
787 }
788 delete[] results;
789 }
790
791
TestCmp_Helper(TestFPCmpHelper_t helper,uintptr_t inputs,unsigned inputs_length,uintptr_t results,unsigned reg_size,bool * skipped)792 static void TestCmp_Helper(TestFPCmpHelper_t helper,
793 uintptr_t inputs,
794 unsigned inputs_length,
795 uintptr_t results,
796 unsigned reg_size,
797 bool* skipped) {
798 VIXL_ASSERT((reg_size == kDRegSize) || (reg_size == kSRegSize));
799
800 SETUP_WITH_FEATURES(CPUFeatures::kFP);
801 START();
802
803 // Roll up the loop to keep the code size down.
804 Label loop_n, loop_m;
805
806 Register out = x0;
807 Register inputs_base = x1;
808 Register length = w2;
809 Register index_n = w3;
810 Register index_m = w4;
811 Register flags = x5;
812
813 bool double_op = reg_size == kDRegSize;
814 const int index_shift =
815 double_op ? kDRegSizeInBytesLog2 : kSRegSizeInBytesLog2;
816
817 VRegister fn = double_op ? d1 : s1;
818 VRegister fm = double_op ? d2 : s2;
819
820 __ Mov(out, results);
821 __ Mov(inputs_base, inputs);
822 __ Mov(length, inputs_length);
823
824 __ Mov(index_n, 0);
825 __ Bind(&loop_n);
826 __ Ldr(fn, MemOperand(inputs_base, index_n, UXTW, index_shift));
827
828 __ Mov(index_m, 0);
829 __ Bind(&loop_m);
830 __ Ldr(fm, MemOperand(inputs_base, index_m, UXTW, index_shift));
831
832 {
833 SingleEmissionCheckScope guard(&masm);
834 (masm.*helper)(fn, fm);
835 }
836 __ Mrs(flags, NZCV);
837 __ Ubfx(flags, flags, 28, 4);
838 __ Strb(flags, MemOperand(out, 1, PostIndex));
839
840 __ Add(index_m, index_m, 1);
841 __ Cmp(index_m, inputs_length);
842 __ B(lo, &loop_m);
843
844 __ Add(index_n, index_n, 1);
845 __ Cmp(index_n, inputs_length);
846 __ B(lo, &loop_n);
847
848 END();
849 TRY_RUN(skipped);
850 }
851
852
853 // Test FP instructions. The inputs[] and expected[] arrays should be arrays of
854 // rawbits representations of doubles or floats. This ensures that exact bit
855 // comparisons can be performed.
856 template <typename T>
TestCmp(const char * name,TestFPCmpHelper_t helper,const T inputs[],unsigned inputs_length,const uint8_t expected[],unsigned expected_length)857 static void TestCmp(const char* name,
858 TestFPCmpHelper_t helper,
859 const T inputs[],
860 unsigned inputs_length,
861 const uint8_t expected[],
862 unsigned expected_length) {
863 VIXL_ASSERT(inputs_length > 0);
864
865 const unsigned results_length = inputs_length * inputs_length;
866 uint8_t* results = new uint8_t[results_length];
867
868 const unsigned bits = sizeof(T) * 8;
869 bool skipped;
870
871 TestCmp_Helper(helper,
872 reinterpret_cast<uintptr_t>(inputs),
873 inputs_length,
874 reinterpret_cast<uintptr_t>(results),
875 bits,
876 &skipped);
877
878 if (Test::generate_test_trace()) {
879 // Print the results.
880 printf("const uint8_t kExpected_%s[] = {\n", name);
881 for (unsigned d = 0; d < results_length; d++) {
882 // Each NZCV result only requires 4 bits.
883 VIXL_ASSERT((results[d] & 0xf) == results[d]);
884 printf(" 0x%" PRIx8 ",\n", results[d]);
885 }
886 printf("};\n");
887 printf("const unsigned kExpectedCount_%s = %u;\n", name, results_length);
888 } else if (!skipped) {
889 // Check the results.
890 VIXL_CHECK(expected_length == results_length);
891 unsigned error_count = 0;
892 unsigned d = 0;
893 for (unsigned n = 0; n < inputs_length; n++) {
894 for (unsigned m = 0; m < inputs_length; m++, d++) {
895 if (results[d] != expected[d]) {
896 if (++error_count > kErrorReportLimit) continue;
897
898 printf("%s 0x%0*" PRIx64 ", 0x%0*" PRIx64 " (%s %g %g):\n",
899 name,
900 bits / 4,
901 static_cast<uint64_t>(inputs[n]),
902 bits / 4,
903 static_cast<uint64_t>(inputs[m]),
904 name,
905 rawbits_to_fp(inputs[n]),
906 rawbits_to_fp(inputs[m]));
907 printf(" Expected: %c%c%c%c (0x%" PRIx8 ")\n",
908 (expected[d] & 0x8) ? 'N' : 'n',
909 (expected[d] & 0x4) ? 'Z' : 'z',
910 (expected[d] & 0x2) ? 'C' : 'c',
911 (expected[d] & 0x1) ? 'V' : 'v',
912 expected[d]);
913 printf(" Found: %c%c%c%c (0x%" PRIx8 ")\n",
914 (results[d] & 0x8) ? 'N' : 'n',
915 (results[d] & 0x4) ? 'Z' : 'z',
916 (results[d] & 0x2) ? 'C' : 'c',
917 (results[d] & 0x1) ? 'V' : 'v',
918 results[d]);
919 printf("\n");
920 }
921 }
922 }
923 VIXL_ASSERT(d == expected_length);
924 if (error_count > kErrorReportLimit) {
925 printf("%u other errors follow.\n", error_count - kErrorReportLimit);
926 }
927 VIXL_CHECK(error_count == 0);
928 }
929 delete[] results;
930 }
931
932
TestCmpZero_Helper(TestFPCmpZeroHelper_t helper,uintptr_t inputs,unsigned inputs_length,uintptr_t results,unsigned reg_size,bool * skipped)933 static void TestCmpZero_Helper(TestFPCmpZeroHelper_t helper,
934 uintptr_t inputs,
935 unsigned inputs_length,
936 uintptr_t results,
937 unsigned reg_size,
938 bool* skipped) {
939 VIXL_ASSERT((reg_size == kDRegSize) || (reg_size == kSRegSize));
940
941 SETUP_WITH_FEATURES(CPUFeatures::kFP);
942 START();
943
944 // Roll up the loop to keep the code size down.
945 Label loop_n, loop_m;
946
947 Register out = x0;
948 Register inputs_base = x1;
949 Register length = w2;
950 Register index_n = w3;
951 Register flags = x4;
952
953 bool double_op = reg_size == kDRegSize;
954 const int index_shift =
955 double_op ? kDRegSizeInBytesLog2 : kSRegSizeInBytesLog2;
956
957 VRegister fn = double_op ? d1 : s1;
958
959 __ Mov(out, results);
960 __ Mov(inputs_base, inputs);
961 __ Mov(length, inputs_length);
962
963 __ Mov(index_n, 0);
964 __ Bind(&loop_n);
965 __ Ldr(fn, MemOperand(inputs_base, index_n, UXTW, index_shift));
966
967 {
968 SingleEmissionCheckScope guard(&masm);
969 (masm.*helper)(fn, 0.0);
970 }
971 __ Mrs(flags, NZCV);
972 __ Ubfx(flags, flags, 28, 4);
973 __ Strb(flags, MemOperand(out, 1, PostIndex));
974
975 __ Add(index_n, index_n, 1);
976 __ Cmp(index_n, inputs_length);
977 __ B(lo, &loop_n);
978
979 END();
980 TRY_RUN(skipped);
981 }
982
983
984 // Test FP instructions. The inputs[] and expected[] arrays should be arrays of
985 // rawbits representations of doubles or floats. This ensures that exact bit
986 // comparisons can be performed.
987 template <typename T>
TestCmpZero(const char * name,TestFPCmpZeroHelper_t helper,const T inputs[],unsigned inputs_length,const uint8_t expected[],unsigned expected_length)988 static void TestCmpZero(const char* name,
989 TestFPCmpZeroHelper_t helper,
990 const T inputs[],
991 unsigned inputs_length,
992 const uint8_t expected[],
993 unsigned expected_length) {
994 VIXL_ASSERT(inputs_length > 0);
995
996 const unsigned results_length = inputs_length;
997 uint8_t* results = new uint8_t[results_length];
998
999 const unsigned bits = sizeof(T) * 8;
1000 bool skipped;
1001
1002 TestCmpZero_Helper(helper,
1003 reinterpret_cast<uintptr_t>(inputs),
1004 inputs_length,
1005 reinterpret_cast<uintptr_t>(results),
1006 bits,
1007 &skipped);
1008
1009 if (Test::generate_test_trace()) {
1010 // Print the results.
1011 printf("const uint8_t kExpected_%s[] = {\n", name);
1012 for (unsigned d = 0; d < results_length; d++) {
1013 // Each NZCV result only requires 4 bits.
1014 VIXL_ASSERT((results[d] & 0xf) == results[d]);
1015 printf(" 0x%" PRIx8 ",\n", results[d]);
1016 }
1017 printf("};\n");
1018 printf("const unsigned kExpectedCount_%s = %u;\n", name, results_length);
1019 } else if (!skipped) {
1020 // Check the results.
1021 VIXL_CHECK(expected_length == results_length);
1022 unsigned error_count = 0;
1023 unsigned d = 0;
1024 for (unsigned n = 0; n < inputs_length; n++, d++) {
1025 if (results[d] != expected[d]) {
1026 if (++error_count > kErrorReportLimit) continue;
1027
1028 printf("%s 0x%0*" PRIx64 ", 0x%0*u (%s %g #0.0):\n",
1029 name,
1030 bits / 4,
1031 static_cast<uint64_t>(inputs[n]),
1032 bits / 4,
1033 0,
1034 name,
1035 rawbits_to_fp(inputs[n]));
1036 printf(" Expected: %c%c%c%c (0x%" PRIx8 ")\n",
1037 (expected[d] & 0x8) ? 'N' : 'n',
1038 (expected[d] & 0x4) ? 'Z' : 'z',
1039 (expected[d] & 0x2) ? 'C' : 'c',
1040 (expected[d] & 0x1) ? 'V' : 'v',
1041 expected[d]);
1042 printf(" Found: %c%c%c%c (0x%" PRIx8 ")\n",
1043 (results[d] & 0x8) ? 'N' : 'n',
1044 (results[d] & 0x4) ? 'Z' : 'z',
1045 (results[d] & 0x2) ? 'C' : 'c',
1046 (results[d] & 0x1) ? 'V' : 'v',
1047 results[d]);
1048 printf("\n");
1049 }
1050 }
1051 VIXL_ASSERT(d == expected_length);
1052 if (error_count > kErrorReportLimit) {
1053 printf("%u other errors follow.\n", error_count - kErrorReportLimit);
1054 }
1055 VIXL_CHECK(error_count == 0);
1056 }
1057 delete[] results;
1058 }
1059
1060
TestFPToFixed_Helper(TestFPToFixedHelper_t helper,uintptr_t inputs,unsigned inputs_length,uintptr_t results,unsigned d_size,unsigned n_size,bool * skipped)1061 static void TestFPToFixed_Helper(TestFPToFixedHelper_t helper,
1062 uintptr_t inputs,
1063 unsigned inputs_length,
1064 uintptr_t results,
1065 unsigned d_size,
1066 unsigned n_size,
1067 bool* skipped) {
1068 VIXL_ASSERT((d_size == kXRegSize) || (d_size == kWRegSize));
1069 VIXL_ASSERT((n_size == kDRegSize) || (n_size == kSRegSize) ||
1070 (n_size == kHRegSize));
1071
1072 SETUP_WITH_FEATURES(CPUFeatures::kFP, CPUFeatures::kFPHalf);
1073 START();
1074
1075 // Roll up the loop to keep the code size down.
1076 Label loop_n;
1077
1078 Register out = x0;
1079 Register inputs_base = x1;
1080 Register length = w2;
1081 Register index_n = w3;
1082
1083 int n_index_shift;
1084 if (n_size == kDRegSize) {
1085 n_index_shift = kDRegSizeInBytesLog2;
1086 } else if (n_size == kSRegSize) {
1087 n_index_shift = kSRegSizeInBytesLog2;
1088 } else {
1089 n_index_shift = kHRegSizeInBytesLog2;
1090 }
1091
1092 Register rd = (d_size == kXRegSize) ? Register(x10) : Register(w10);
1093 VRegister fn;
1094 if (n_size == kDRegSize) {
1095 fn = d1;
1096 } else if (n_size == kSRegSize) {
1097 fn = s1;
1098 } else {
1099 fn = h1;
1100 }
1101
1102 __ Mov(out, results);
1103 __ Mov(inputs_base, inputs);
1104 __ Mov(length, inputs_length);
1105
1106 __ Mov(index_n, 0);
1107 __ Bind(&loop_n);
1108 __ Ldr(fn, MemOperand(inputs_base, index_n, UXTW, n_index_shift));
1109
1110 for (unsigned fbits = 0; fbits <= d_size; ++fbits) {
1111 {
1112 SingleEmissionCheckScope guard(&masm);
1113 (masm.*helper)(rd, fn, fbits);
1114 }
1115 __ Str(rd, MemOperand(out, rd.GetSizeInBytes(), PostIndex));
1116 }
1117
1118 __ Add(index_n, index_n, 1);
1119 __ Cmp(index_n, inputs_length);
1120 __ B(lo, &loop_n);
1121
1122 END();
1123 TRY_RUN(skipped);
1124 }
1125
1126
TestFPToInt_Helper(TestFPToIntHelper_t helper,uintptr_t inputs,unsigned inputs_length,uintptr_t results,unsigned d_size,unsigned n_size,bool * skipped)1127 static void TestFPToInt_Helper(TestFPToIntHelper_t helper,
1128 uintptr_t inputs,
1129 unsigned inputs_length,
1130 uintptr_t results,
1131 unsigned d_size,
1132 unsigned n_size,
1133 bool* skipped) {
1134 VIXL_ASSERT((d_size == kXRegSize) || (d_size == kWRegSize));
1135 VIXL_ASSERT((n_size == kDRegSize) || (n_size == kSRegSize) ||
1136 (n_size == kHRegSize));
1137
1138 SETUP_WITH_FEATURES(CPUFeatures::kFP,
1139 CPUFeatures::kFPHalf,
1140 CPUFeatures::kJSCVT);
1141 START();
1142
1143 // Roll up the loop to keep the code size down.
1144 Label loop_n;
1145
1146 Register out = x0;
1147 Register inputs_base = x1;
1148 Register length = w2;
1149 Register index_n = w3;
1150
1151 int n_index_shift;
1152 if (n_size == kDRegSize) {
1153 n_index_shift = kDRegSizeInBytesLog2;
1154 } else if (n_size == kSRegSize) {
1155 n_index_shift = kSRegSizeInBytesLog2;
1156 } else {
1157 n_index_shift = kHRegSizeInBytesLog2;
1158 }
1159
1160 Register rd = (d_size == kXRegSize) ? Register(x10) : Register(w10);
1161 VRegister fn;
1162 if (n_size == kDRegSize) {
1163 fn = d1;
1164 } else if (n_size == kSRegSize) {
1165 fn = s1;
1166 } else {
1167 fn = h1;
1168 }
1169
1170 __ Mov(out, results);
1171 __ Mov(inputs_base, inputs);
1172 __ Mov(length, inputs_length);
1173
1174 __ Mov(index_n, 0);
1175 __ Bind(&loop_n);
1176 __ Ldr(fn, MemOperand(inputs_base, index_n, UXTW, n_index_shift));
1177
1178 {
1179 SingleEmissionCheckScope guard(&masm);
1180 (masm.*helper)(rd, fn);
1181 }
1182 __ Str(rd, MemOperand(out, rd.GetSizeInBytes(), PostIndex));
1183
1184 __ Add(index_n, index_n, 1);
1185 __ Cmp(index_n, inputs_length);
1186 __ B(lo, &loop_n);
1187
1188 END();
1189 TRY_RUN(skipped);
1190 }
1191
1192
1193 // Test FP instructions.
1194 // - The inputs[] array should be an array of rawbits representations of
1195 // doubles or floats. This ensures that exact bit comparisons can be
1196 // performed.
1197 // - The expected[] array should be an array of signed integers.
1198 template <typename Tn, typename Td>
TestFPToS(const char * name,TestFPToIntHelper_t helper,const Tn inputs[],unsigned inputs_length,const Td expected[],unsigned expected_length)1199 static void TestFPToS(const char* name,
1200 TestFPToIntHelper_t helper,
1201 const Tn inputs[],
1202 unsigned inputs_length,
1203 const Td expected[],
1204 unsigned expected_length) {
1205 VIXL_ASSERT(inputs_length > 0);
1206
1207 const unsigned results_length = inputs_length;
1208 Td* results = new Td[results_length];
1209
1210 const unsigned d_bits = sizeof(Td) * 8;
1211 const unsigned n_bits = sizeof(Tn) * 8;
1212 bool skipped;
1213
1214 TestFPToInt_Helper(helper,
1215 reinterpret_cast<uintptr_t>(inputs),
1216 inputs_length,
1217 reinterpret_cast<uintptr_t>(results),
1218 d_bits,
1219 n_bits,
1220 &skipped);
1221
1222 if (Test::generate_test_trace()) {
1223 // Print the results.
1224 printf("const int%u_t kExpected_%s[] = {\n", d_bits, name);
1225 // There is no simple C++ literal for INT*_MIN that doesn't produce
1226 // warnings, so we use an appropriate constant in that case instead.
1227 // Deriving int_d_min in this way (rather than just checking INT64_MIN and
1228 // the like) avoids warnings about comparing values with differing ranges.
1229 const int64_t int_d_max = (UINT64_C(1) << (d_bits - 1)) - 1;
1230 const int64_t int_d_min = -(int_d_max)-1;
1231 for (unsigned d = 0; d < results_length; d++) {
1232 if (results[d] == int_d_min) {
1233 printf(" -INT%u_C(%" PRId64 ") - 1,\n", d_bits, int_d_max);
1234 } else {
1235 // Some constants (such as those between INT32_MAX and UINT32_MAX)
1236 // trigger compiler warnings. To avoid these warnings, use an
1237 // appropriate macro to make the type explicit.
1238 int64_t result_int64 = static_cast<int64_t>(results[d]);
1239 if (result_int64 >= 0) {
1240 printf(" INT%u_C(%" PRId64 "),\n", d_bits, result_int64);
1241 } else {
1242 printf(" -INT%u_C(%" PRId64 "),\n", d_bits, -result_int64);
1243 }
1244 }
1245 }
1246 printf("};\n");
1247 printf("const unsigned kExpectedCount_%s = %u;\n", name, results_length);
1248 } else if (!skipped) {
1249 // Check the results.
1250 VIXL_CHECK(expected_length == results_length);
1251 unsigned error_count = 0;
1252 unsigned d = 0;
1253 for (unsigned n = 0; n < inputs_length; n++, d++) {
1254 if (results[d] != expected[d]) {
1255 if (++error_count > kErrorReportLimit) continue;
1256
1257 printf("%s 0x%0*" PRIx64 " (%s %g):\n",
1258 name,
1259 n_bits / 4,
1260 static_cast<uint64_t>(inputs[n]),
1261 name,
1262 rawbits_to_fp(inputs[n]));
1263 printf(" Expected: 0x%0*" PRIx64 " (%" PRId64 ")\n",
1264 d_bits / 4,
1265 static_cast<uint64_t>(expected[d]),
1266 static_cast<int64_t>(expected[d]));
1267 printf(" Found: 0x%0*" PRIx64 " (%" PRId64 ")\n",
1268 d_bits / 4,
1269 static_cast<uint64_t>(results[d]),
1270 static_cast<int64_t>(results[d]));
1271 printf("\n");
1272 }
1273 }
1274 VIXL_ASSERT(d == expected_length);
1275 if (error_count > kErrorReportLimit) {
1276 printf("%u other errors follow.\n", error_count - kErrorReportLimit);
1277 }
1278 VIXL_CHECK(error_count == 0);
1279 }
1280 delete[] results;
1281 }
1282
1283
1284 // Test FP instructions.
1285 // - The inputs[] array should be an array of rawbits representations of
1286 // doubles or floats. This ensures that exact bit comparisons can be
1287 // performed.
1288 // - The expected[] array should be an array of unsigned integers.
1289 template <typename Tn, typename Td>
TestFPToU(const char * name,TestFPToIntHelper_t helper,const Tn inputs[],unsigned inputs_length,const Td expected[],unsigned expected_length)1290 static void TestFPToU(const char* name,
1291 TestFPToIntHelper_t helper,
1292 const Tn inputs[],
1293 unsigned inputs_length,
1294 const Td expected[],
1295 unsigned expected_length) {
1296 VIXL_ASSERT(inputs_length > 0);
1297
1298 const unsigned results_length = inputs_length;
1299 Td* results = new Td[results_length];
1300
1301 const unsigned d_bits = sizeof(Td) * 8;
1302 const unsigned n_bits = sizeof(Tn) * 8;
1303 bool skipped;
1304
1305 TestFPToInt_Helper(helper,
1306 reinterpret_cast<uintptr_t>(inputs),
1307 inputs_length,
1308 reinterpret_cast<uintptr_t>(results),
1309 d_bits,
1310 n_bits,
1311 &skipped);
1312
1313 if (Test::generate_test_trace()) {
1314 // Print the results.
1315 printf("const uint%u_t kExpected_%s[] = {\n", d_bits, name);
1316 for (unsigned d = 0; d < results_length; d++) {
1317 printf(" %" PRIu64 "u,\n", static_cast<uint64_t>(results[d]));
1318 }
1319 printf("};\n");
1320 printf("const unsigned kExpectedCount_%s = %u;\n", name, results_length);
1321 } else if (!skipped) {
1322 // Check the results.
1323 VIXL_CHECK(expected_length == results_length);
1324 unsigned error_count = 0;
1325 unsigned d = 0;
1326 for (unsigned n = 0; n < inputs_length; n++, d++) {
1327 if (results[d] != expected[d]) {
1328 if (++error_count > kErrorReportLimit) continue;
1329
1330 printf("%s 0x%0*" PRIx64 " (%s %g):\n",
1331 name,
1332 n_bits / 4,
1333 static_cast<uint64_t>(inputs[n]),
1334 name,
1335 rawbits_to_fp(inputs[n]));
1336 printf(" Expected: 0x%0*" PRIx64 " (%" PRIu64 ")\n",
1337 d_bits / 4,
1338 static_cast<uint64_t>(expected[d]),
1339 static_cast<uint64_t>(expected[d]));
1340 printf(" Found: 0x%0*" PRIx64 " (%" PRIu64 ")\n",
1341 d_bits / 4,
1342 static_cast<uint64_t>(results[d]),
1343 static_cast<uint64_t>(results[d]));
1344 printf("\n");
1345 }
1346 }
1347 VIXL_ASSERT(d == expected_length);
1348 if (error_count > kErrorReportLimit) {
1349 printf("%u other errors follow.\n", error_count - kErrorReportLimit);
1350 }
1351 VIXL_CHECK(error_count == 0);
1352 }
1353 delete[] results;
1354 }
1355
1356
1357 // Test FP instructions.
1358 // - The inputs[] array should be an array of rawbits representations of
1359 // doubles or floats. This ensures that exact bit comparisons can be
1360 // performed.
1361 // - The expected[] array should be an array of signed integers.
1362 template <typename Tn, typename Td>
TestFPToFixedS(const char * name,TestFPToFixedHelper_t helper,const Tn inputs[],unsigned inputs_length,const Td expected[],unsigned expected_length)1363 static void TestFPToFixedS(const char* name,
1364 TestFPToFixedHelper_t helper,
1365 const Tn inputs[],
1366 unsigned inputs_length,
1367 const Td expected[],
1368 unsigned expected_length) {
1369 VIXL_ASSERT(inputs_length > 0);
1370
1371 const unsigned d_bits = sizeof(Td) * 8;
1372 const unsigned n_bits = sizeof(Tn) * 8;
1373
1374 const unsigned results_length = inputs_length * (d_bits + 1);
1375 Td* results = new Td[results_length];
1376
1377 bool skipped;
1378
1379 TestFPToFixed_Helper(helper,
1380 reinterpret_cast<uintptr_t>(inputs),
1381 inputs_length,
1382 reinterpret_cast<uintptr_t>(results),
1383 d_bits,
1384 n_bits,
1385 &skipped);
1386
1387 if (Test::generate_test_trace()) {
1388 // Print the results.
1389 printf("const int%u_t kExpected_%s[] = {\n", d_bits, name);
1390 // There is no simple C++ literal for INT*_MIN that doesn't produce
1391 // warnings, so we use an appropriate constant in that case instead.
1392 // Deriving int_d_min in this way (rather than just checking INT64_MIN and
1393 // the like) avoids warnings about comparing values with differing ranges.
1394 const int64_t int_d_max = (UINT64_C(1) << (d_bits - 1)) - 1;
1395 const int64_t int_d_min = -(int_d_max)-1;
1396 for (unsigned d = 0; d < results_length; d++) {
1397 if (results[d] == int_d_min) {
1398 printf(" -INT%u_C(%" PRId64 ") - 1,\n", d_bits, int_d_max);
1399 } else {
1400 // Some constants (such as those between INT32_MAX and UINT32_MAX)
1401 // trigger compiler warnings. To avoid these warnings, use an
1402 // appropriate macro to make the type explicit.
1403 int64_t result_int64 = static_cast<int64_t>(results[d]);
1404 if (result_int64 >= 0) {
1405 printf(" INT%u_C(%" PRId64 "),\n", d_bits, result_int64);
1406 } else {
1407 printf(" -INT%u_C(%" PRId64 "),\n", d_bits, -result_int64);
1408 }
1409 }
1410 }
1411 printf("};\n");
1412 printf("const unsigned kExpectedCount_%s = %u;\n", name, results_length);
1413 } else if (!skipped) {
1414 // Check the results.
1415 VIXL_CHECK(expected_length == results_length);
1416 unsigned error_count = 0;
1417 unsigned d = 0;
1418 for (unsigned n = 0; n < inputs_length; n++) {
1419 for (unsigned fbits = 0; fbits <= d_bits; ++fbits, d++) {
1420 if (results[d] != expected[d]) {
1421 if (++error_count > kErrorReportLimit) continue;
1422
1423 printf("%s 0x%0*" PRIx64 " #%d (%s %g #%d):\n",
1424 name,
1425 n_bits / 4,
1426 static_cast<uint64_t>(inputs[n]),
1427 fbits,
1428 name,
1429 rawbits_to_fp(inputs[n]),
1430 fbits);
1431 printf(" Expected: 0x%0*" PRIx64 " (%" PRId64 ")\n",
1432 d_bits / 4,
1433 static_cast<uint64_t>(expected[d]),
1434 static_cast<int64_t>(expected[d]));
1435 printf(" Found: 0x%0*" PRIx64 " (%" PRId64 ")\n",
1436 d_bits / 4,
1437 static_cast<uint64_t>(results[d]),
1438 static_cast<int64_t>(results[d]));
1439 printf("\n");
1440 }
1441 }
1442 }
1443 VIXL_ASSERT(d == expected_length);
1444 if (error_count > kErrorReportLimit) {
1445 printf("%u other errors follow.\n", error_count - kErrorReportLimit);
1446 }
1447 VIXL_CHECK(error_count == 0);
1448 }
1449 delete[] results;
1450 }
1451
1452
1453 // Test FP instructions.
1454 // - The inputs[] array should be an array of rawbits representations of
1455 // doubles or floats. This ensures that exact bit comparisons can be
1456 // performed.
1457 // - The expected[] array should be an array of unsigned integers.
1458 template <typename Tn, typename Td>
TestFPToFixedU(const char * name,TestFPToFixedHelper_t helper,const Tn inputs[],unsigned inputs_length,const Td expected[],unsigned expected_length)1459 static void TestFPToFixedU(const char* name,
1460 TestFPToFixedHelper_t helper,
1461 const Tn inputs[],
1462 unsigned inputs_length,
1463 const Td expected[],
1464 unsigned expected_length) {
1465 VIXL_ASSERT(inputs_length > 0);
1466
1467 const unsigned d_bits = sizeof(Td) * 8;
1468 const unsigned n_bits = sizeof(Tn) * 8;
1469
1470 const unsigned results_length = inputs_length * (d_bits + 1);
1471 Td* results = new Td[results_length];
1472
1473 bool skipped;
1474
1475 TestFPToFixed_Helper(helper,
1476 reinterpret_cast<uintptr_t>(inputs),
1477 inputs_length,
1478 reinterpret_cast<uintptr_t>(results),
1479 d_bits,
1480 n_bits,
1481 &skipped);
1482
1483 if (Test::generate_test_trace()) {
1484 // Print the results.
1485 printf("const uint%u_t kExpected_%s[] = {\n", d_bits, name);
1486 for (unsigned d = 0; d < results_length; d++) {
1487 printf(" %" PRIu64 "u,\n", static_cast<uint64_t>(results[d]));
1488 }
1489 printf("};\n");
1490 printf("const unsigned kExpectedCount_%s = %u;\n", name, results_length);
1491 } else if (!skipped) {
1492 // Check the results.
1493 VIXL_CHECK(expected_length == results_length);
1494 unsigned error_count = 0;
1495 unsigned d = 0;
1496 for (unsigned n = 0; n < inputs_length; n++) {
1497 for (unsigned fbits = 0; fbits <= d_bits; ++fbits, d++) {
1498 if (results[d] != expected[d]) {
1499 if (++error_count > kErrorReportLimit) continue;
1500
1501 printf("%s 0x%0*" PRIx64 " #%d (%s %g #%d):\n",
1502 name,
1503 n_bits / 4,
1504 static_cast<uint64_t>(inputs[n]),
1505 fbits,
1506 name,
1507 rawbits_to_fp(inputs[n]),
1508 fbits);
1509 printf(" Expected: 0x%0*" PRIx64 " (%" PRIu64 ")\n",
1510 d_bits / 4,
1511 static_cast<uint64_t>(expected[d]),
1512 static_cast<uint64_t>(expected[d]));
1513 printf(" Found: 0x%0*" PRIx64 " (%" PRIu64 ")\n",
1514 d_bits / 4,
1515 static_cast<uint64_t>(results[d]),
1516 static_cast<uint64_t>(results[d]));
1517 printf("\n");
1518 }
1519 }
1520 }
1521 VIXL_ASSERT(d == expected_length);
1522 if (error_count > kErrorReportLimit) {
1523 printf("%u other errors follow.\n", error_count - kErrorReportLimit);
1524 }
1525 VIXL_CHECK(error_count == 0);
1526 }
1527 delete[] results;
1528 }
1529
1530
1531 // ==== Tests for instructions of the form <INST> VReg, VReg. ====
1532
1533
Test1OpNEON_Helper(Test1OpNEONHelper_t helper,uintptr_t inputs_n,unsigned inputs_n_length,uintptr_t results,VectorFormat vd_form,VectorFormat vn_form,bool * skipped)1534 static void Test1OpNEON_Helper(Test1OpNEONHelper_t helper,
1535 uintptr_t inputs_n,
1536 unsigned inputs_n_length,
1537 uintptr_t results,
1538 VectorFormat vd_form,
1539 VectorFormat vn_form,
1540 bool* skipped) {
1541 VIXL_ASSERT(vd_form != kFormatUndefined);
1542 VIXL_ASSERT(vn_form != kFormatUndefined);
1543
1544 CPUFeatures features;
1545 features.Combine(CPUFeatures::kNEON,
1546 CPUFeatures::kFP,
1547 CPUFeatures::kRDM,
1548 CPUFeatures::kNEONHalf);
1549 // For frint{32,64}{x,y} variants.
1550 features.Combine(CPUFeatures::kFrintToFixedSizedInt);
1551 SETUP_WITH_FEATURES(features);
1552 START();
1553
1554 // Roll up the loop to keep the code size down.
1555 Label loop_n;
1556
1557 Register out = x0;
1558 Register inputs_n_base = x1;
1559 Register inputs_n_last_16bytes = x3;
1560 Register index_n = x5;
1561
1562 // TODO: Refactor duplicate definitions below with a VRegister::As() routine.
1563 const unsigned vd_bits = RegisterSizeInBitsFromFormat(vd_form);
1564 const unsigned vd_lane_count = LaneCountFromFormat(vd_form);
1565
1566 const unsigned vn_bits = RegisterSizeInBitsFromFormat(vn_form);
1567 const unsigned vn_lane_count = LaneCountFromFormat(vn_form);
1568 const unsigned vn_lane_bytes = LaneSizeInBytesFromFormat(vn_form);
1569 const unsigned vn_lane_bytes_log2 = LaneSizeInBytesLog2FromFormat(vn_form);
1570 const unsigned vn_lane_bits = LaneSizeInBitsFromFormat(vn_form);
1571
1572
1573 // These will be either a D- or a Q-register form, with a single lane
1574 // (for use in scalar load and store operations).
1575 VRegister vd = VRegister(0, vd_bits);
1576 VRegister vn = v1.V16B();
1577 VRegister vntmp = v3.V16B();
1578
1579 // These will have the correct format for use when calling 'helper'.
1580 VRegister vd_helper = VRegister(0, vd_bits, vd_lane_count);
1581 VRegister vn_helper = VRegister(1, vn_bits, vn_lane_count);
1582
1583 // 'v*tmp_single' will be either 'Vt.B', 'Vt.H', 'Vt.S' or 'Vt.D'.
1584 VRegister vntmp_single = VRegister(3, vn_lane_bits);
1585
1586 __ Mov(out, results);
1587
1588 __ Mov(inputs_n_base, inputs_n);
1589 __ Mov(inputs_n_last_16bytes,
1590 inputs_n + (vn_lane_bytes * inputs_n_length) - 16);
1591
1592 __ Ldr(vn, MemOperand(inputs_n_last_16bytes));
1593
1594 __ Mov(index_n, 0);
1595 __ Bind(&loop_n);
1596
1597 __ Ldr(vntmp_single,
1598 MemOperand(inputs_n_base, index_n, LSL, vn_lane_bytes_log2));
1599 __ Ext(vn, vn, vntmp, vn_lane_bytes);
1600
1601 // Set the destination to zero.
1602 // TODO: Setting the destination to values other than zero
1603 // might be a better test for instructions such as sqxtn2
1604 // which may leave parts of V registers unchanged.
1605 __ Movi(vd.V16B(), 0);
1606
1607 {
1608 SingleEmissionCheckScope guard(&masm);
1609 (masm.*helper)(vd_helper, vn_helper);
1610 }
1611 __ Str(vd, MemOperand(out, vd.GetSizeInBytes(), PostIndex));
1612
1613 __ Add(index_n, index_n, 1);
1614 __ Cmp(index_n, inputs_n_length);
1615 __ B(lo, &loop_n);
1616
1617 END();
1618 TRY_RUN(skipped);
1619 }
1620
1621
1622 // Test NEON instructions. The inputs_*[] and expected[] arrays should be
1623 // arrays of rawbit representation of input values. This ensures that
1624 // exact bit comparisons can be performed.
1625 template <typename Td, typename Tn>
Test1OpNEON(const char * name,Test1OpNEONHelper_t helper,const Tn inputs_n[],unsigned inputs_n_length,const Td expected[],unsigned expected_length,VectorFormat vd_form,VectorFormat vn_form)1626 static void Test1OpNEON(const char* name,
1627 Test1OpNEONHelper_t helper,
1628 const Tn inputs_n[],
1629 unsigned inputs_n_length,
1630 const Td expected[],
1631 unsigned expected_length,
1632 VectorFormat vd_form,
1633 VectorFormat vn_form) {
1634 VIXL_ASSERT(inputs_n_length > 0);
1635
1636 const unsigned vd_lane_count = LaneCountFromFormat(vd_form);
1637 const unsigned vn_lane_bytes = LaneSizeInBytesFromFormat(vn_form);
1638 const unsigned vn_lane_count = LaneCountFromFormat(vn_form);
1639
1640 const unsigned results_length = inputs_n_length;
1641 Td* results = new Td[results_length * vd_lane_count];
1642 const unsigned lane_bit = sizeof(Td) * 8;
1643 const unsigned lane_len_in_hex = MaxHexCharCount<Td, Tn>();
1644
1645 bool skipped;
1646
1647 Test1OpNEON_Helper(helper,
1648 reinterpret_cast<uintptr_t>(inputs_n),
1649 inputs_n_length,
1650 reinterpret_cast<uintptr_t>(results),
1651 vd_form,
1652 vn_form,
1653 &skipped);
1654
1655 if (Test::generate_test_trace()) {
1656 // Print the results.
1657 printf("const uint%u_t kExpected_NEON_%s[] = {\n", lane_bit, name);
1658 for (unsigned iteration = 0; iteration < results_length; iteration++) {
1659 printf(" ");
1660 // Output a separate result for each element of the result vector.
1661 for (unsigned lane = 0; lane < vd_lane_count; lane++) {
1662 unsigned index = lane + (iteration * vd_lane_count);
1663 printf(" 0x%0*" PRIx64 ",",
1664 lane_len_in_hex,
1665 static_cast<uint64_t>(results[index]));
1666 }
1667 printf("\n");
1668 }
1669
1670 printf("};\n");
1671 printf("const unsigned kExpectedCount_NEON_%s = %u;\n",
1672 name,
1673 results_length);
1674 } else if (!skipped) {
1675 // Check the results.
1676 VIXL_CHECK(expected_length == results_length);
1677 unsigned error_count = 0;
1678 unsigned d = 0;
1679 const char* padding = " ";
1680 VIXL_ASSERT(strlen(padding) >= (lane_len_in_hex + 1));
1681 for (unsigned n = 0; n < inputs_n_length; n++, d++) {
1682 bool error_in_vector = false;
1683
1684 for (unsigned lane = 0; lane < vd_lane_count; lane++) {
1685 unsigned output_index = (n * vd_lane_count) + lane;
1686
1687 if (results[output_index] != expected[output_index]) {
1688 error_in_vector = true;
1689 break;
1690 }
1691 }
1692
1693 if (error_in_vector && (++error_count <= kErrorReportLimit)) {
1694 printf("%s\n", name);
1695 printf(" Vn%.*s| Vd%.*s| Expected\n",
1696 lane_len_in_hex + 1,
1697 padding,
1698 lane_len_in_hex + 1,
1699 padding);
1700
1701 const unsigned first_index_n =
1702 inputs_n_length - (16 / vn_lane_bytes) + n + 1;
1703
1704 for (unsigned lane = 0; lane < std::max(vd_lane_count, vn_lane_count);
1705 lane++) {
1706 unsigned output_index = (n * vd_lane_count) + lane;
1707 unsigned input_index_n = (first_index_n + lane) % inputs_n_length;
1708
1709 printf("%c0x%0*" PRIx64 " | 0x%0*" PRIx64
1710 " "
1711 "| 0x%0*" PRIx64 "\n",
1712 results[output_index] != expected[output_index] ? '*' : ' ',
1713 lane_len_in_hex,
1714 static_cast<uint64_t>(inputs_n[input_index_n]),
1715 lane_len_in_hex,
1716 static_cast<uint64_t>(results[output_index]),
1717 lane_len_in_hex,
1718 static_cast<uint64_t>(expected[output_index]));
1719 }
1720 }
1721 }
1722 VIXL_ASSERT(d == expected_length);
1723 if (error_count > kErrorReportLimit) {
1724 printf("%u other errors follow.\n", error_count - kErrorReportLimit);
1725 }
1726 VIXL_CHECK(error_count == 0);
1727 }
1728 delete[] results;
1729 }
1730
1731
1732 // ==== Tests for instructions of the form <mnemonic> <V><d>, <Vn>.<T> ====
1733 // where <V> is one of B, H, S or D registers.
1734 // e.g. saddlv H1, v0.8B
1735
1736 // TODO: Change tests to store all lanes of the resulting V register.
1737 // Some tests store all 128 bits of the resulting V register to
1738 // check the simulator's behaviour on the rest of the register.
1739 // This is better than storing the affected lanes only.
1740 // Change any tests such as the 'Across' template to do the same.
1741
Test1OpAcrossNEON_Helper(Test1OpNEONHelper_t helper,uintptr_t inputs_n,unsigned inputs_n_length,uintptr_t results,VectorFormat vd_form,VectorFormat vn_form,bool * skipped)1742 static void Test1OpAcrossNEON_Helper(Test1OpNEONHelper_t helper,
1743 uintptr_t inputs_n,
1744 unsigned inputs_n_length,
1745 uintptr_t results,
1746 VectorFormat vd_form,
1747 VectorFormat vn_form,
1748 bool* skipped) {
1749 VIXL_ASSERT(vd_form != kFormatUndefined);
1750 VIXL_ASSERT(vn_form != kFormatUndefined);
1751
1752 SETUP_WITH_FEATURES(CPUFeatures::kNEON,
1753 CPUFeatures::kFP,
1754 CPUFeatures::kNEONHalf);
1755 START();
1756
1757 // Roll up the loop to keep the code size down.
1758 Label loop_n;
1759
1760 Register out = x0;
1761 Register inputs_n_base = x1;
1762 Register inputs_n_last_vector = x3;
1763 Register index_n = x5;
1764
1765 // TODO: Refactor duplicate definitions below with a VRegister::As() routine.
1766 const unsigned vd_bits = RegisterSizeInBitsFromFormat(vd_form);
1767 const unsigned vn_bits = RegisterSizeInBitsFromFormat(vn_form);
1768 const unsigned vn_lane_count = LaneCountFromFormat(vn_form);
1769 const unsigned vn_lane_bytes = LaneSizeInBytesFromFormat(vn_form);
1770 const unsigned vn_lane_bytes_log2 = LaneSizeInBytesLog2FromFormat(vn_form);
1771 const unsigned vn_lane_bits = LaneSizeInBitsFromFormat(vn_form);
1772
1773 // Test destructive operations by (arbitrarily) using the same register for
1774 // B and S lane sizes.
1775 bool destructive = (vd_bits == kBRegSize) || (vd_bits == kSRegSize);
1776
1777 // Create two aliases for v0; the first is the destination for the tested
1778 // instruction, the second, the whole Q register to check the results.
1779 VRegister vd = VRegister(0, vd_bits);
1780 VRegister vdstr = VRegister(0, kQRegSize);
1781
1782 VRegister vn = VRegister(1, vn_bits);
1783 VRegister vntmp = VRegister(3, vn_bits);
1784
1785 // These will have the correct format for use when calling 'helper'.
1786 VRegister vd_helper = VRegister(0, vn_bits, vn_lane_count);
1787 VRegister vn_helper = VRegister(1, vn_bits, vn_lane_count);
1788
1789 // 'v*tmp_single' will be either 'Vt.B', 'Vt.H', 'Vt.S' or 'Vt.D'.
1790 VRegister vntmp_single = VRegister(3, vn_lane_bits);
1791
1792 // Same registers for use in the 'ext' instructions.
1793 VRegister vn_ext = (kDRegSize == vn_bits) ? vn.V8B() : vn.V16B();
1794 VRegister vntmp_ext = (kDRegSize == vn_bits) ? vntmp.V8B() : vntmp.V16B();
1795
1796 __ Mov(out, results);
1797
1798 __ Mov(inputs_n_base, inputs_n);
1799 __ Mov(inputs_n_last_vector,
1800 inputs_n + vn_lane_bytes * (inputs_n_length - vn_lane_count));
1801
1802 __ Ldr(vn, MemOperand(inputs_n_last_vector));
1803
1804 __ Mov(index_n, 0);
1805 __ Bind(&loop_n);
1806
1807 __ Ldr(vntmp_single,
1808 MemOperand(inputs_n_base, index_n, LSL, vn_lane_bytes_log2));
1809 __ Ext(vn_ext, vn_ext, vntmp_ext, vn_lane_bytes);
1810
1811 if (destructive) {
1812 __ Mov(vd_helper, vn_helper);
1813 SingleEmissionCheckScope guard(&masm);
1814 (masm.*helper)(vd, vd_helper);
1815 } else {
1816 SingleEmissionCheckScope guard(&masm);
1817 (masm.*helper)(vd, vn_helper);
1818 }
1819
1820 __ Str(vdstr, MemOperand(out, kQRegSizeInBytes, PostIndex));
1821
1822 __ Add(index_n, index_n, 1);
1823 __ Cmp(index_n, inputs_n_length);
1824 __ B(lo, &loop_n);
1825
1826 END();
1827 TRY_RUN(skipped);
1828 }
1829
1830 // Test NEON instructions. The inputs_*[] and expected[] arrays should be
1831 // arrays of rawbit representation of input values. This ensures that
1832 // exact bit comparisons can be performed.
1833 template <typename Td, typename Tn>
Test1OpAcrossNEON(const char * name,Test1OpNEONHelper_t helper,const Tn inputs_n[],unsigned inputs_n_length,const Td expected[],unsigned expected_length,VectorFormat vd_form,VectorFormat vn_form)1834 static void Test1OpAcrossNEON(const char* name,
1835 Test1OpNEONHelper_t helper,
1836 const Tn inputs_n[],
1837 unsigned inputs_n_length,
1838 const Td expected[],
1839 unsigned expected_length,
1840 VectorFormat vd_form,
1841 VectorFormat vn_form) {
1842 VIXL_ASSERT(inputs_n_length > 0);
1843
1844 const unsigned vd_lane_count = LaneCountFromFormat(vd_form);
1845 const unsigned vd_lanes_per_q = MaxLaneCountFromFormat(vd_form);
1846
1847 const unsigned results_length = inputs_n_length;
1848 Td* results = new Td[results_length * vd_lanes_per_q];
1849 const unsigned lane_bit = sizeof(Td) * 8;
1850 const unsigned lane_len_in_hex = MaxHexCharCount<Td, Tn>();
1851
1852 bool skipped;
1853
1854 Test1OpAcrossNEON_Helper(helper,
1855 reinterpret_cast<uintptr_t>(inputs_n),
1856 inputs_n_length,
1857 reinterpret_cast<uintptr_t>(results),
1858 vd_form,
1859 vn_form,
1860 &skipped);
1861
1862 if (Test::generate_test_trace()) {
1863 // Print the results.
1864 printf("const uint%u_t kExpected_NEON_%s[] = {\n", lane_bit, name);
1865 for (unsigned iteration = 0; iteration < results_length; iteration++) {
1866 printf(" ");
1867 // Output a separate result for each element of the result vector.
1868 for (unsigned lane = 0; lane < vd_lane_count; lane++) {
1869 unsigned index = lane + (iteration * vd_lanes_per_q);
1870 printf(" 0x%0*" PRIx64 ",",
1871 lane_len_in_hex,
1872 static_cast<uint64_t>(results[index]));
1873 }
1874 printf("\n");
1875 }
1876
1877 printf("};\n");
1878 printf("const unsigned kExpectedCount_NEON_%s = %u;\n",
1879 name,
1880 results_length);
1881 } else if (!skipped) {
1882 // Check the results.
1883 VIXL_CHECK(expected_length == results_length);
1884 unsigned error_count = 0;
1885 unsigned d = 0;
1886 const char* padding = " ";
1887 VIXL_ASSERT(strlen(padding) >= (lane_len_in_hex + 1));
1888 for (unsigned n = 0; n < inputs_n_length; n++, d++) {
1889 bool error_in_vector = false;
1890
1891 for (unsigned lane = 0; lane < vd_lane_count; lane++) {
1892 unsigned expected_index = (n * vd_lane_count) + lane;
1893 unsigned results_index = (n * vd_lanes_per_q) + lane;
1894
1895 if (results[results_index] != expected[expected_index]) {
1896 error_in_vector = true;
1897 break;
1898 }
1899 }
1900
1901 // For across operations, the remaining lanes should be zero.
1902 for (unsigned lane = vd_lane_count; lane < vd_lanes_per_q; lane++) {
1903 unsigned results_index = (n * vd_lanes_per_q) + lane;
1904 if (results[results_index] != 0) {
1905 error_in_vector = true;
1906 break;
1907 }
1908 }
1909
1910 if (error_in_vector && (++error_count <= kErrorReportLimit)) {
1911 const unsigned vn_lane_count = LaneCountFromFormat(vn_form);
1912
1913 printf("%s\n", name);
1914 printf(" Vn%.*s| Vd%.*s| Expected\n",
1915 lane_len_in_hex + 1,
1916 padding,
1917 lane_len_in_hex + 1,
1918 padding);
1919
1920 // TODO: In case of an error, all tests print out as many elements as
1921 // there are lanes in the output or input vectors. This way
1922 // the viewer can read all the values that were needed for the
1923 // operation but the output contains also unnecessary values.
1924 // These prints can be improved according to the arguments
1925 // passed to test functions.
1926 // This output for the 'Across' category has the required
1927 // modifications.
1928 for (unsigned lane = 0; lane < vn_lane_count; lane++) {
1929 unsigned results_index =
1930 (n * vd_lanes_per_q) + ((vn_lane_count - 1) - lane);
1931 unsigned input_index_n =
1932 (inputs_n_length - vn_lane_count + n + 1 + lane) %
1933 inputs_n_length;
1934
1935 Td expect = 0;
1936 if ((vn_lane_count - 1) == lane) {
1937 // This is the last lane to be printed, ie. the least-significant
1938 // lane, so use the expected value; any other lane should be zero.
1939 unsigned expected_index = n * vd_lane_count;
1940 expect = expected[expected_index];
1941 }
1942 printf("%c0x%0*" PRIx64 " | 0x%0*" PRIx64 " | 0x%0*" PRIx64 "\n",
1943 results[results_index] != expect ? '*' : ' ',
1944 lane_len_in_hex,
1945 static_cast<uint64_t>(inputs_n[input_index_n]),
1946 lane_len_in_hex,
1947 static_cast<uint64_t>(results[results_index]),
1948 lane_len_in_hex,
1949 static_cast<uint64_t>(expect));
1950 }
1951 }
1952 }
1953 VIXL_ASSERT(d == expected_length);
1954 if (error_count > kErrorReportLimit) {
1955 printf("%u other errors follow.\n", error_count - kErrorReportLimit);
1956 }
1957 VIXL_CHECK(error_count == 0);
1958 }
1959 delete[] results;
1960 }
1961
1962
1963 // ==== Tests for instructions of the form <INST> VReg, VReg, VReg. ====
1964
1965 // TODO: Iterate over inputs_d once the traces file is split.
1966
Test2OpNEON_Helper(Test2OpNEONHelper_t helper,uintptr_t inputs_d,uintptr_t inputs_n,unsigned inputs_n_length,uintptr_t inputs_m,unsigned inputs_m_length,uintptr_t results,VectorFormat vd_form,VectorFormat vn_form,VectorFormat vm_form,bool * skipped)1967 static void Test2OpNEON_Helper(Test2OpNEONHelper_t helper,
1968 uintptr_t inputs_d,
1969 uintptr_t inputs_n,
1970 unsigned inputs_n_length,
1971 uintptr_t inputs_m,
1972 unsigned inputs_m_length,
1973 uintptr_t results,
1974 VectorFormat vd_form,
1975 VectorFormat vn_form,
1976 VectorFormat vm_form,
1977 bool* skipped) {
1978 VIXL_ASSERT(vd_form != kFormatUndefined);
1979 VIXL_ASSERT(vn_form != kFormatUndefined);
1980 VIXL_ASSERT(vm_form != kFormatUndefined);
1981
1982 CPUFeatures features;
1983 features.Combine(CPUFeatures::kNEON, CPUFeatures::kNEONHalf);
1984 features.Combine(CPUFeatures::kFP);
1985 features.Combine(CPUFeatures::kRDM);
1986 features.Combine(CPUFeatures::kDotProduct);
1987 features.Combine(CPUFeatures::kFHM);
1988 SETUP_WITH_FEATURES(features);
1989 START();
1990
1991 // Roll up the loop to keep the code size down.
1992 Label loop_n, loop_m;
1993
1994 Register out = x0;
1995 Register inputs_n_base = x1;
1996 Register inputs_m_base = x2;
1997 Register inputs_d_base = x3;
1998 Register inputs_n_last_16bytes = x4;
1999 Register inputs_m_last_16bytes = x5;
2000 Register index_n = x6;
2001 Register index_m = x7;
2002
2003 // TODO: Refactor duplicate definitions below with a VRegister::As() routine.
2004 const unsigned vd_bits = RegisterSizeInBitsFromFormat(vd_form);
2005 const unsigned vd_lane_count = LaneCountFromFormat(vd_form);
2006
2007 const unsigned vn_bits = RegisterSizeInBitsFromFormat(vn_form);
2008 const unsigned vn_lane_count = LaneCountFromFormat(vn_form);
2009 const unsigned vn_lane_bytes = LaneSizeInBytesFromFormat(vn_form);
2010 const unsigned vn_lane_bytes_log2 = LaneSizeInBytesLog2FromFormat(vn_form);
2011 const unsigned vn_lane_bits = LaneSizeInBitsFromFormat(vn_form);
2012
2013 const unsigned vm_bits = RegisterSizeInBitsFromFormat(vm_form);
2014 const unsigned vm_lane_count = LaneCountFromFormat(vm_form);
2015 const unsigned vm_lane_bytes = LaneSizeInBytesFromFormat(vm_form);
2016 const unsigned vm_lane_bytes_log2 = LaneSizeInBytesLog2FromFormat(vm_form);
2017 const unsigned vm_lane_bits = LaneSizeInBitsFromFormat(vm_form);
2018
2019
2020 // Always load and store 128 bits regardless of the format.
2021 VRegister vd = v0.V16B();
2022 VRegister vn = v1.V16B();
2023 VRegister vm = v2.V16B();
2024 VRegister vntmp = v3.V16B();
2025 VRegister vmtmp = v4.V16B();
2026 VRegister vres = v5.V16B();
2027
2028 // These will have the correct format for calling the 'helper'.
2029 VRegister vn_helper = VRegister(1, vn_bits, vn_lane_count);
2030 VRegister vm_helper = VRegister(2, vm_bits, vm_lane_count);
2031 VRegister vres_helper = VRegister(5, vd_bits, vd_lane_count);
2032
2033 // 'v*tmp_single' will be either 'Vt.B', 'Vt.H', 'Vt.S' or 'Vt.D'.
2034 VRegister vntmp_single = VRegister(3, vn_lane_bits);
2035 VRegister vmtmp_single = VRegister(4, vm_lane_bits);
2036
2037 __ Mov(out, results);
2038
2039 __ Mov(inputs_d_base, inputs_d);
2040
2041 __ Mov(inputs_n_base, inputs_n);
2042 __ Mov(inputs_n_last_16bytes, inputs_n + (inputs_n_length - 16));
2043 __ Mov(inputs_m_base, inputs_m);
2044 __ Mov(inputs_m_last_16bytes, inputs_m + (inputs_m_length - 16));
2045
2046 __ Ldr(vd, MemOperand(inputs_d_base));
2047 __ Ldr(vn, MemOperand(inputs_n_last_16bytes));
2048 __ Ldr(vm, MemOperand(inputs_m_last_16bytes));
2049
2050 __ Mov(index_n, 0);
2051 __ Bind(&loop_n);
2052
2053 __ Ldr(vntmp_single,
2054 MemOperand(inputs_n_base, index_n, LSL, vn_lane_bytes_log2));
2055 __ Ext(vn, vn, vntmp, vn_lane_bytes);
2056
2057 __ Mov(index_m, 0);
2058 __ Bind(&loop_m);
2059
2060 __ Ldr(vmtmp_single,
2061 MemOperand(inputs_m_base, index_m, LSL, vm_lane_bytes_log2));
2062 __ Ext(vm, vm, vmtmp, vm_lane_bytes);
2063
2064 __ Mov(vres, vd);
2065 {
2066 SingleEmissionCheckScope guard(&masm);
2067 (masm.*helper)(vres_helper, vn_helper, vm_helper);
2068 }
2069 __ Str(vres, MemOperand(out, vd.GetSizeInBytes(), PostIndex));
2070
2071 __ Add(index_m, index_m, 1);
2072 __ Cmp(index_m, inputs_m_length);
2073 __ B(lo, &loop_m);
2074
2075 __ Add(index_n, index_n, 1);
2076 __ Cmp(index_n, inputs_n_length);
2077 __ B(lo, &loop_n);
2078
2079 END();
2080 TRY_RUN(skipped);
2081 }
2082
2083
2084 // Test NEON instructions. The inputs_*[] and expected[] arrays should be
2085 // arrays of rawbit representation of input values. This ensures that
2086 // exact bit comparisons can be performed.
2087 template <typename Td, typename Tn, typename Tm>
Test2OpNEON(const char * name,Test2OpNEONHelper_t helper,const Td inputs_d[],const Tn inputs_n[],unsigned inputs_n_length,const Tm inputs_m[],unsigned inputs_m_length,const Td expected[],unsigned expected_length,VectorFormat vd_form,VectorFormat vn_form,VectorFormat vm_form)2088 static void Test2OpNEON(const char* name,
2089 Test2OpNEONHelper_t helper,
2090 const Td inputs_d[],
2091 const Tn inputs_n[],
2092 unsigned inputs_n_length,
2093 const Tm inputs_m[],
2094 unsigned inputs_m_length,
2095 const Td expected[],
2096 unsigned expected_length,
2097 VectorFormat vd_form,
2098 VectorFormat vn_form,
2099 VectorFormat vm_form) {
2100 VIXL_ASSERT(inputs_n_length > 0 && inputs_m_length > 0);
2101
2102 const unsigned vd_lane_count = MaxLaneCountFromFormat(vd_form);
2103
2104 const unsigned results_length = inputs_n_length * inputs_m_length;
2105 Td* results = new Td[results_length * vd_lane_count];
2106 const unsigned lane_bit = sizeof(Td) * 8;
2107 const unsigned lane_len_in_hex = MaxHexCharCount<Td, Tm>();
2108
2109 bool skipped;
2110
2111 Test2OpNEON_Helper(helper,
2112 reinterpret_cast<uintptr_t>(inputs_d),
2113 reinterpret_cast<uintptr_t>(inputs_n),
2114 inputs_n_length,
2115 reinterpret_cast<uintptr_t>(inputs_m),
2116 inputs_m_length,
2117 reinterpret_cast<uintptr_t>(results),
2118 vd_form,
2119 vn_form,
2120 vm_form,
2121 &skipped);
2122
2123 if (Test::generate_test_trace()) {
2124 // Print the results.
2125 printf("const uint%u_t kExpected_NEON_%s[] = {\n", lane_bit, name);
2126 for (unsigned iteration = 0; iteration < results_length; iteration++) {
2127 printf(" ");
2128 // Output a separate result for each element of the result vector.
2129 for (unsigned lane = 0; lane < vd_lane_count; lane++) {
2130 unsigned index = lane + (iteration * vd_lane_count);
2131 printf(" 0x%0*" PRIx64 ",",
2132 lane_len_in_hex,
2133 static_cast<uint64_t>(results[index]));
2134 }
2135 printf("\n");
2136 }
2137
2138 printf("};\n");
2139 printf("const unsigned kExpectedCount_NEON_%s = %u;\n",
2140 name,
2141 results_length);
2142 } else if (!skipped) {
2143 // Check the results.
2144 VIXL_CHECK(expected_length == results_length);
2145 unsigned error_count = 0;
2146 unsigned d = 0;
2147 const char* padding = " ";
2148 VIXL_ASSERT(strlen(padding) >= (lane_len_in_hex + 1));
2149 for (unsigned n = 0; n < inputs_n_length; n++) {
2150 for (unsigned m = 0; m < inputs_m_length; m++, d++) {
2151 bool error_in_vector = false;
2152
2153 for (unsigned lane = 0; lane < vd_lane_count; lane++) {
2154 unsigned output_index = (n * inputs_m_length * vd_lane_count) +
2155 (m * vd_lane_count) + lane;
2156
2157 if (results[output_index] != expected[output_index]) {
2158 error_in_vector = true;
2159 break;
2160 }
2161 }
2162
2163 if (error_in_vector && (++error_count <= kErrorReportLimit)) {
2164 printf("%s\n", name);
2165 printf(" Vd%.*s| Vn%.*s| Vm%.*s| Vd%.*s| Expected\n",
2166 lane_len_in_hex + 1,
2167 padding,
2168 lane_len_in_hex + 1,
2169 padding,
2170 lane_len_in_hex + 1,
2171 padding,
2172 lane_len_in_hex + 1,
2173 padding);
2174
2175 for (unsigned lane = 0; lane < vd_lane_count; lane++) {
2176 unsigned output_index = (n * inputs_m_length * vd_lane_count) +
2177 (m * vd_lane_count) + lane;
2178 unsigned input_index_n =
2179 (inputs_n_length - vd_lane_count + n + 1 + lane) %
2180 inputs_n_length;
2181 unsigned input_index_m =
2182 (inputs_m_length - vd_lane_count + m + 1 + lane) %
2183 inputs_m_length;
2184
2185 printf("%c0x%0*" PRIx64 " | 0x%0*" PRIx64 " | 0x%0*" PRIx64
2186 " "
2187 "| 0x%0*" PRIx64 " | 0x%0*" PRIx64 "\n",
2188 results[output_index] != expected[output_index] ? '*' : ' ',
2189 lane_len_in_hex,
2190 static_cast<uint64_t>(inputs_d[lane]),
2191 lane_len_in_hex,
2192 static_cast<uint64_t>(inputs_n[input_index_n]),
2193 lane_len_in_hex,
2194 static_cast<uint64_t>(inputs_m[input_index_m]),
2195 lane_len_in_hex,
2196 static_cast<uint64_t>(results[output_index]),
2197 lane_len_in_hex,
2198 static_cast<uint64_t>(expected[output_index]));
2199 }
2200 }
2201 }
2202 }
2203 VIXL_ASSERT(d == expected_length);
2204 if (error_count > kErrorReportLimit) {
2205 printf("%u other errors follow.\n", error_count - kErrorReportLimit);
2206 }
2207 VIXL_CHECK(error_count == 0);
2208 }
2209 delete[] results;
2210 }
2211
2212
2213 // ==== Tests for instructions of the form <INST> Vd, Vn, Vm[<#index>]. ====
2214
TestByElementNEON_Helper(TestByElementNEONHelper_t helper,uintptr_t inputs_d,uintptr_t inputs_n,unsigned inputs_n_length,uintptr_t inputs_m,unsigned inputs_m_length,const int indices[],unsigned indices_length,uintptr_t results,VectorFormat vd_form,VectorFormat vn_form,VectorFormat vm_form,unsigned vm_subvector_count,bool * skipped)2215 static void TestByElementNEON_Helper(TestByElementNEONHelper_t helper,
2216 uintptr_t inputs_d,
2217 uintptr_t inputs_n,
2218 unsigned inputs_n_length,
2219 uintptr_t inputs_m,
2220 unsigned inputs_m_length,
2221 const int indices[],
2222 unsigned indices_length,
2223 uintptr_t results,
2224 VectorFormat vd_form,
2225 VectorFormat vn_form,
2226 VectorFormat vm_form,
2227 unsigned vm_subvector_count,
2228 bool* skipped) {
2229 VIXL_ASSERT(vd_form != kFormatUndefined);
2230 VIXL_ASSERT(vn_form != kFormatUndefined);
2231 VIXL_ASSERT(vm_form != kFormatUndefined);
2232 VIXL_ASSERT((vm_subvector_count != 0) && IsPowerOf2(vm_subvector_count));
2233
2234 CPUFeatures features;
2235 features.Combine(CPUFeatures::kNEON, CPUFeatures::kNEONHalf);
2236 features.Combine(CPUFeatures::kFP);
2237 features.Combine(CPUFeatures::kRDM);
2238 features.Combine(CPUFeatures::kDotProduct);
2239 features.Combine(CPUFeatures::kFHM);
2240 SETUP_WITH_FEATURES(features);
2241
2242 START();
2243
2244 // Roll up the loop to keep the code size down.
2245 Label loop_n, loop_m;
2246
2247 Register out = x0;
2248 Register inputs_n_base = x1;
2249 Register inputs_m_base = x2;
2250 Register inputs_d_base = x3;
2251 Register inputs_n_last_16bytes = x4;
2252 Register inputs_m_last_16bytes = x5;
2253 Register index_n = x6;
2254 Register index_m = x7;
2255
2256 // TODO: Refactor duplicate definitions below with a VRegister::As() routine.
2257 const unsigned vd_bits = RegisterSizeInBitsFromFormat(vd_form);
2258 const unsigned vd_lane_count = LaneCountFromFormat(vd_form);
2259
2260 const unsigned vn_bits = RegisterSizeInBitsFromFormat(vn_form);
2261 const unsigned vn_lane_count = LaneCountFromFormat(vn_form);
2262 const unsigned vn_lane_bytes = LaneSizeInBytesFromFormat(vn_form);
2263 const unsigned vn_lane_bytes_log2 = LaneSizeInBytesLog2FromFormat(vn_form);
2264 const unsigned vn_lane_bits = LaneSizeInBitsFromFormat(vn_form);
2265
2266 const unsigned vm_bits = RegisterSizeInBitsFromFormat(vm_form);
2267 const unsigned vm_lane_count = LaneCountFromFormat(vm_form);
2268 const unsigned vm_lane_bytes = LaneSizeInBytesFromFormat(vm_form);
2269 const unsigned vm_lane_bytes_log2 = LaneSizeInBytesLog2FromFormat(vm_form);
2270 const unsigned vm_lane_bits = LaneSizeInBitsFromFormat(vm_form);
2271
2272 VIXL_ASSERT((vm_bits * vm_subvector_count) <= kQRegSize);
2273
2274 // Always load and store 128 bits regardless of the format.
2275 VRegister vd = v0.V16B();
2276 VRegister vn = v1.V16B();
2277 VRegister vm = v2.V16B();
2278 VRegister vntmp = v3.V16B();
2279 VRegister vmtmp = v4.V16B();
2280 VRegister vres = v5.V16B();
2281
2282 // These will have the correct format for calling the 'helper'.
2283 VRegister vn_helper = VRegister(1, vn_bits, vn_lane_count);
2284 VRegister vm_helper =
2285 VRegister(2, vm_bits * vm_subvector_count, vm_lane_count);
2286 VRegister vres_helper = VRegister(5, vd_bits, vd_lane_count);
2287
2288 // 'v*tmp_single' will be either 'Vt.B', 'Vt.H', 'Vt.S' or 'Vt.D'.
2289 VRegister vntmp_single = VRegister(3, vn_lane_bits);
2290 VRegister vmtmp_single = VRegister(4, vm_lane_bits);
2291
2292 __ Mov(out, results);
2293
2294 __ Mov(inputs_d_base, inputs_d);
2295
2296 __ Mov(inputs_n_base, inputs_n);
2297 __ Mov(inputs_n_last_16bytes, inputs_n + (inputs_n_length - 16));
2298 __ Mov(inputs_m_base, inputs_m);
2299 __ Mov(inputs_m_last_16bytes, inputs_m + (inputs_m_length - 16));
2300
2301 __ Ldr(vd, MemOperand(inputs_d_base));
2302 __ Ldr(vn, MemOperand(inputs_n_last_16bytes));
2303 __ Ldr(vm, MemOperand(inputs_m_last_16bytes));
2304
2305 __ Mov(index_n, 0);
2306 __ Bind(&loop_n);
2307
2308 __ Ldr(vntmp_single,
2309 MemOperand(inputs_n_base, index_n, LSL, vn_lane_bytes_log2));
2310 __ Ext(vn, vn, vntmp, vn_lane_bytes);
2311
2312 __ Mov(index_m, 0);
2313 __ Bind(&loop_m);
2314
2315 __ Ldr(vmtmp_single,
2316 MemOperand(inputs_m_base, index_m, LSL, vm_lane_bytes_log2));
2317 __ Ext(vm, vm, vmtmp, vm_lane_bytes);
2318
2319 __ Mov(vres, vd);
2320 {
2321 for (unsigned i = 0; i < indices_length; i++) {
2322 {
2323 SingleEmissionCheckScope guard(&masm);
2324 (masm.*helper)(vres_helper, vn_helper, vm_helper, indices[i]);
2325 }
2326 __ Str(vres, MemOperand(out, vd.GetSizeInBytes(), PostIndex));
2327 }
2328 }
2329
2330 __ Add(index_m, index_m, 1);
2331 __ Cmp(index_m, inputs_m_length);
2332 __ B(lo, &loop_m);
2333
2334 __ Add(index_n, index_n, 1);
2335 __ Cmp(index_n, inputs_n_length);
2336 __ B(lo, &loop_n);
2337
2338 END();
2339 TRY_RUN(skipped);
2340 }
2341
2342
2343 // Test NEON instructions. The inputs_*[] and expected[] arrays should be
2344 // arrays of rawbit representation of input values. This ensures that
2345 // exact bit comparisons can be performed.
2346 template <typename Td, typename Tn, typename Tm>
TestByElementNEON(const char * name,TestByElementNEONHelper_t helper,const Td inputs_d[],const Tn inputs_n[],unsigned inputs_n_length,const Tm inputs_m[],unsigned inputs_m_length,const int indices[],unsigned indices_length,const Td expected[],unsigned expected_length,VectorFormat vd_form,VectorFormat vn_form,VectorFormat vm_form,unsigned vm_subvector_count=1)2347 static void TestByElementNEON(const char* name,
2348 TestByElementNEONHelper_t helper,
2349 const Td inputs_d[],
2350 const Tn inputs_n[],
2351 unsigned inputs_n_length,
2352 const Tm inputs_m[],
2353 unsigned inputs_m_length,
2354 const int indices[],
2355 unsigned indices_length,
2356 const Td expected[],
2357 unsigned expected_length,
2358 VectorFormat vd_form,
2359 VectorFormat vn_form,
2360 VectorFormat vm_form,
2361 unsigned vm_subvector_count = 1) {
2362 VIXL_ASSERT(inputs_n_length > 0);
2363 VIXL_ASSERT(inputs_m_length > 0);
2364 VIXL_ASSERT(indices_length > 0);
2365
2366 const unsigned vd_lane_count = MaxLaneCountFromFormat(vd_form);
2367
2368 const unsigned results_length =
2369 inputs_n_length * inputs_m_length * indices_length;
2370 Td* results = new Td[results_length * vd_lane_count];
2371 const unsigned lane_bit = sizeof(Td) * 8;
2372 const unsigned lane_len_in_hex = MaxHexCharCount<Td, Tm>();
2373
2374 bool skipped;
2375
2376 TestByElementNEON_Helper(helper,
2377 reinterpret_cast<uintptr_t>(inputs_d),
2378 reinterpret_cast<uintptr_t>(inputs_n),
2379 inputs_n_length,
2380 reinterpret_cast<uintptr_t>(inputs_m),
2381 inputs_m_length,
2382 indices,
2383 indices_length,
2384 reinterpret_cast<uintptr_t>(results),
2385 vd_form,
2386 vn_form,
2387 vm_form,
2388 vm_subvector_count,
2389 &skipped);
2390
2391 if (Test::generate_test_trace()) {
2392 // Print the results.
2393 printf("const uint%u_t kExpected_NEON_%s[] = {\n", lane_bit, name);
2394 for (unsigned iteration = 0; iteration < results_length; iteration++) {
2395 printf(" ");
2396 // Output a separate result for each element of the result vector.
2397 for (unsigned lane = 0; lane < vd_lane_count; lane++) {
2398 unsigned index = lane + (iteration * vd_lane_count);
2399 printf(" 0x%0*" PRIx64 ",",
2400 lane_len_in_hex,
2401 static_cast<uint64_t>(results[index]));
2402 }
2403 printf("\n");
2404 }
2405
2406 printf("};\n");
2407 printf("const unsigned kExpectedCount_NEON_%s = %u;\n",
2408 name,
2409 results_length);
2410 } else if (!skipped) {
2411 // Check the results.
2412 VIXL_CHECK(expected_length == results_length);
2413 unsigned error_count = 0;
2414 unsigned d = 0;
2415 const char* padding = " ";
2416 VIXL_ASSERT(strlen(padding) >= (lane_len_in_hex + 1));
2417 for (unsigned n = 0; n < inputs_n_length; n++) {
2418 for (unsigned m = 0; m < inputs_m_length; m++) {
2419 for (unsigned index = 0; index < indices_length; index++, d++) {
2420 bool error_in_vector = false;
2421
2422 for (unsigned lane = 0; lane < vd_lane_count; lane++) {
2423 unsigned output_index =
2424 (n * inputs_m_length * indices_length * vd_lane_count) +
2425 (m * indices_length * vd_lane_count) + (index * vd_lane_count) +
2426 lane;
2427
2428 if (results[output_index] != expected[output_index]) {
2429 error_in_vector = true;
2430 break;
2431 }
2432 }
2433
2434 if (error_in_vector && (++error_count <= kErrorReportLimit)) {
2435 printf("%s\n", name);
2436 printf(" Vd%.*s| Vn%.*s| Vm%.*s| Index | Vd%.*s| Expected\n",
2437 lane_len_in_hex + 1,
2438 padding,
2439 lane_len_in_hex + 1,
2440 padding,
2441 lane_len_in_hex + 1,
2442 padding,
2443 lane_len_in_hex + 1,
2444 padding);
2445
2446 for (unsigned lane = 0; lane < vd_lane_count; lane++) {
2447 unsigned output_index =
2448 (n * inputs_m_length * indices_length * vd_lane_count) +
2449 (m * indices_length * vd_lane_count) +
2450 (index * vd_lane_count) + lane;
2451 unsigned input_index_n =
2452 (inputs_n_length - vd_lane_count + n + 1 + lane) %
2453 inputs_n_length;
2454 unsigned input_index_m =
2455 (inputs_m_length - vd_lane_count + m + 1 + lane) %
2456 inputs_m_length;
2457
2458 printf("%c0x%0*" PRIx64 " | 0x%0*" PRIx64 " | 0x%0*" PRIx64
2459 " "
2460 "| [%3d] | 0x%0*" PRIx64 " | 0x%0*" PRIx64 "\n",
2461 results[output_index] != expected[output_index] ? '*'
2462 : ' ',
2463 lane_len_in_hex,
2464 static_cast<uint64_t>(inputs_d[lane]),
2465 lane_len_in_hex,
2466 static_cast<uint64_t>(inputs_n[input_index_n]),
2467 lane_len_in_hex,
2468 static_cast<uint64_t>(inputs_m[input_index_m]),
2469 indices[index],
2470 lane_len_in_hex,
2471 static_cast<uint64_t>(results[output_index]),
2472 lane_len_in_hex,
2473 static_cast<uint64_t>(expected[output_index]));
2474 }
2475 }
2476 }
2477 }
2478 }
2479 VIXL_ASSERT(d == expected_length);
2480 if (error_count > kErrorReportLimit) {
2481 printf("%u other errors follow.\n", error_count - kErrorReportLimit);
2482 }
2483 VIXL_CHECK(error_count == 0);
2484 }
2485 delete[] results;
2486 }
2487
2488
2489 // ==== Tests for instructions of the form <INST> VReg, VReg, #Immediate. ====
2490
2491
2492 template <typename Tm>
Test2OpImmNEON_Helper(typename Test2OpImmediateNEONHelper_t<Tm>::mnemonic helper,uintptr_t inputs_n,unsigned inputs_n_length,const Tm inputs_m[],unsigned inputs_m_length,uintptr_t results,VectorFormat vd_form,VectorFormat vn_form,bool * skipped)2493 void Test2OpImmNEON_Helper(
2494 typename Test2OpImmediateNEONHelper_t<Tm>::mnemonic helper,
2495 uintptr_t inputs_n,
2496 unsigned inputs_n_length,
2497 const Tm inputs_m[],
2498 unsigned inputs_m_length,
2499 uintptr_t results,
2500 VectorFormat vd_form,
2501 VectorFormat vn_form,
2502 bool* skipped) {
2503 VIXL_ASSERT(vd_form != kFormatUndefined && vn_form != kFormatUndefined);
2504
2505 SETUP_WITH_FEATURES(CPUFeatures::kNEON,
2506 CPUFeatures::kFP,
2507 CPUFeatures::kNEONHalf);
2508 START();
2509
2510 // Roll up the loop to keep the code size down.
2511 Label loop_n;
2512
2513 Register out = x0;
2514 Register inputs_n_base = x1;
2515 Register inputs_n_last_16bytes = x3;
2516 Register index_n = x5;
2517
2518 // TODO: Refactor duplicate definitions below with a VRegister::As() routine.
2519 const unsigned vd_bits = RegisterSizeInBitsFromFormat(vd_form);
2520 const unsigned vd_lane_count = LaneCountFromFormat(vd_form);
2521
2522 const unsigned vn_bits = RegisterSizeInBitsFromFormat(vn_form);
2523 const unsigned vn_lane_count = LaneCountFromFormat(vn_form);
2524 const unsigned vn_lane_bytes = LaneSizeInBytesFromFormat(vn_form);
2525 const unsigned vn_lane_bytes_log2 = LaneSizeInBytesLog2FromFormat(vn_form);
2526 const unsigned vn_lane_bits = LaneSizeInBitsFromFormat(vn_form);
2527
2528
2529 // These will be either a D- or a Q-register form, with a single lane
2530 // (for use in scalar load and store operations).
2531 VRegister vd = VRegister(0, vd_bits);
2532 VRegister vn = v1.V16B();
2533 VRegister vntmp = v3.V16B();
2534
2535 // These will have the correct format for use when calling 'helper'.
2536 VRegister vd_helper = VRegister(0, vd_bits, vd_lane_count);
2537 VRegister vn_helper = VRegister(1, vn_bits, vn_lane_count);
2538
2539 // 'v*tmp_single' will be either 'Vt.B', 'Vt.H', 'Vt.S' or 'Vt.D'.
2540 VRegister vntmp_single = VRegister(3, vn_lane_bits);
2541
2542 __ Mov(out, results);
2543
2544 __ Mov(inputs_n_base, inputs_n);
2545 __ Mov(inputs_n_last_16bytes,
2546 inputs_n + (vn_lane_bytes * inputs_n_length) - 16);
2547
2548 __ Ldr(vn, MemOperand(inputs_n_last_16bytes));
2549
2550 __ Mov(index_n, 0);
2551 __ Bind(&loop_n);
2552
2553 __ Ldr(vntmp_single,
2554 MemOperand(inputs_n_base, index_n, LSL, vn_lane_bytes_log2));
2555 __ Ext(vn, vn, vntmp, vn_lane_bytes);
2556
2557 // Set the destination to zero for tests such as '[r]shrn2'.
2558 // TODO: Setting the destination to values other than zero might be a better
2559 // test for shift and accumulate instructions (srsra/ssra/usra/ursra).
2560 __ Movi(vd.V16B(), 0);
2561
2562 {
2563 for (unsigned i = 0; i < inputs_m_length; i++) {
2564 {
2565 SingleEmissionCheckScope guard(&masm);
2566 (masm.*helper)(vd_helper, vn_helper, inputs_m[i]);
2567 }
2568 __ Str(vd, MemOperand(out, vd.GetSizeInBytes(), PostIndex));
2569 }
2570 }
2571
2572 __ Add(index_n, index_n, 1);
2573 __ Cmp(index_n, inputs_n_length);
2574 __ B(lo, &loop_n);
2575
2576 END();
2577 TRY_RUN(skipped);
2578 }
2579
2580
2581 // Test NEON instructions. The inputs_*[] and expected[] arrays should be
2582 // arrays of rawbit representation of input values. This ensures that
2583 // exact bit comparisons can be performed.
2584 template <typename Td, typename Tn, typename Tm>
Test2OpImmNEON(const char * name,typename Test2OpImmediateNEONHelper_t<Tm>::mnemonic helper,const Tn inputs_n[],unsigned inputs_n_length,const Tm inputs_m[],unsigned inputs_m_length,const Td expected[],unsigned expected_length,VectorFormat vd_form,VectorFormat vn_form)2585 static void Test2OpImmNEON(
2586 const char* name,
2587 typename Test2OpImmediateNEONHelper_t<Tm>::mnemonic helper,
2588 const Tn inputs_n[],
2589 unsigned inputs_n_length,
2590 const Tm inputs_m[],
2591 unsigned inputs_m_length,
2592 const Td expected[],
2593 unsigned expected_length,
2594 VectorFormat vd_form,
2595 VectorFormat vn_form) {
2596 VIXL_ASSERT(inputs_n_length > 0 && inputs_m_length > 0);
2597
2598 const unsigned vd_lane_count = LaneCountFromFormat(vd_form);
2599 const unsigned vn_lane_bytes = LaneSizeInBytesFromFormat(vn_form);
2600 const unsigned vn_lane_count = LaneCountFromFormat(vn_form);
2601
2602 const unsigned results_length = inputs_n_length * inputs_m_length;
2603 Td* results = new Td[results_length * vd_lane_count];
2604 const unsigned lane_bit = sizeof(Td) * 8;
2605 const unsigned lane_len_in_hex = MaxHexCharCount<Td, Tn>();
2606
2607 bool skipped;
2608
2609 Test2OpImmNEON_Helper(helper,
2610 reinterpret_cast<uintptr_t>(inputs_n),
2611 inputs_n_length,
2612 inputs_m,
2613 inputs_m_length,
2614 reinterpret_cast<uintptr_t>(results),
2615 vd_form,
2616 vn_form,
2617 &skipped);
2618
2619 if (Test::generate_test_trace()) {
2620 // Print the results.
2621 printf("const uint%u_t kExpected_NEON_%s[] = {\n", lane_bit, name);
2622 for (unsigned iteration = 0; iteration < results_length; iteration++) {
2623 printf(" ");
2624 // Output a separate result for each element of the result vector.
2625 for (unsigned lane = 0; lane < vd_lane_count; lane++) {
2626 unsigned index = lane + (iteration * vd_lane_count);
2627 printf(" 0x%0*" PRIx64 ",",
2628 lane_len_in_hex,
2629 static_cast<uint64_t>(results[index]));
2630 }
2631 printf("\n");
2632 }
2633
2634 printf("};\n");
2635 printf("const unsigned kExpectedCount_NEON_%s = %u;\n",
2636 name,
2637 results_length);
2638 } else if (!skipped) {
2639 // Check the results.
2640 VIXL_CHECK(expected_length == results_length);
2641 unsigned error_count = 0;
2642 unsigned d = 0;
2643 const char* padding = " ";
2644 VIXL_ASSERT(strlen(padding) >= (lane_len_in_hex + 1));
2645 for (unsigned n = 0; n < inputs_n_length; n++) {
2646 for (unsigned m = 0; m < inputs_m_length; m++, d++) {
2647 bool error_in_vector = false;
2648
2649 for (unsigned lane = 0; lane < vd_lane_count; lane++) {
2650 unsigned output_index = (n * inputs_m_length * vd_lane_count) +
2651 (m * vd_lane_count) + lane;
2652
2653 if (results[output_index] != expected[output_index]) {
2654 error_in_vector = true;
2655 break;
2656 }
2657 }
2658
2659 if (error_in_vector && (++error_count <= kErrorReportLimit)) {
2660 printf("%s\n", name);
2661 printf(" Vn%.*s| Imm%.*s| Vd%.*s| Expected\n",
2662 lane_len_in_hex + 1,
2663 padding,
2664 lane_len_in_hex,
2665 padding,
2666 lane_len_in_hex + 1,
2667 padding);
2668
2669 const unsigned first_index_n =
2670 inputs_n_length - (16 / vn_lane_bytes) + n + 1;
2671
2672 for (unsigned lane = 0; lane < std::max(vd_lane_count, vn_lane_count);
2673 lane++) {
2674 unsigned output_index = (n * inputs_m_length * vd_lane_count) +
2675 (m * vd_lane_count) + lane;
2676 unsigned input_index_n = (first_index_n + lane) % inputs_n_length;
2677 unsigned input_index_m = m;
2678
2679 printf("%c0x%0*" PRIx64 " | 0x%0*" PRIx64
2680 " "
2681 "| 0x%0*" PRIx64 " | 0x%0*" PRIx64 "\n",
2682 results[output_index] != expected[output_index] ? '*' : ' ',
2683 lane_len_in_hex,
2684 static_cast<uint64_t>(inputs_n[input_index_n]),
2685 lane_len_in_hex,
2686 static_cast<uint64_t>(inputs_m[input_index_m]),
2687 lane_len_in_hex,
2688 static_cast<uint64_t>(results[output_index]),
2689 lane_len_in_hex,
2690 static_cast<uint64_t>(expected[output_index]));
2691 }
2692 }
2693 }
2694 }
2695 VIXL_ASSERT(d == expected_length);
2696 if (error_count > kErrorReportLimit) {
2697 printf("%u other errors follow.\n", error_count - kErrorReportLimit);
2698 }
2699 VIXL_CHECK(error_count == 0);
2700 }
2701 delete[] results;
2702 }
2703
2704
2705 // ==== Tests for instructions of the form <INST> VReg, #Imm, VReg, #Imm. ====
2706
2707
TestOpImmOpImmNEON_Helper(TestOpImmOpImmVdUpdateNEONHelper_t helper,uintptr_t inputs_d,const int inputs_imm1[],unsigned inputs_imm1_length,uintptr_t inputs_n,unsigned inputs_n_length,const int inputs_imm2[],unsigned inputs_imm2_length,uintptr_t results,VectorFormat vd_form,VectorFormat vn_form,bool * skipped)2708 static void TestOpImmOpImmNEON_Helper(TestOpImmOpImmVdUpdateNEONHelper_t helper,
2709 uintptr_t inputs_d,
2710 const int inputs_imm1[],
2711 unsigned inputs_imm1_length,
2712 uintptr_t inputs_n,
2713 unsigned inputs_n_length,
2714 const int inputs_imm2[],
2715 unsigned inputs_imm2_length,
2716 uintptr_t results,
2717 VectorFormat vd_form,
2718 VectorFormat vn_form,
2719 bool* skipped) {
2720 VIXL_ASSERT(vd_form != kFormatUndefined);
2721 VIXL_ASSERT(vn_form != kFormatUndefined);
2722
2723 SETUP_WITH_FEATURES(CPUFeatures::kNEON, CPUFeatures::kFP);
2724 START();
2725
2726 // Roll up the loop to keep the code size down.
2727 Label loop_n;
2728
2729 Register out = x0;
2730 Register inputs_d_base = x1;
2731 Register inputs_n_base = x2;
2732 Register inputs_n_last_vector = x4;
2733 Register index_n = x6;
2734
2735 // TODO: Refactor duplicate definitions below with a VRegister::As() routine.
2736 const unsigned vd_bits = RegisterSizeInBitsFromFormat(vd_form);
2737 const unsigned vd_lane_count = LaneCountFromFormat(vd_form);
2738
2739 const unsigned vn_bits = RegisterSizeInBitsFromFormat(vn_form);
2740 const unsigned vn_lane_count = LaneCountFromFormat(vn_form);
2741 const unsigned vn_lane_bytes = LaneSizeInBytesFromFormat(vn_form);
2742 const unsigned vn_lane_bytes_log2 = LaneSizeInBytesLog2FromFormat(vn_form);
2743 const unsigned vn_lane_bits = LaneSizeInBitsFromFormat(vn_form);
2744
2745
2746 // These will be either a D- or a Q-register form, with a single lane
2747 // (for use in scalar load and store operations).
2748 VRegister vd = VRegister(0, vd_bits);
2749 VRegister vn = VRegister(1, vn_bits);
2750 VRegister vntmp = VRegister(4, vn_bits);
2751 VRegister vres = VRegister(5, vn_bits);
2752
2753 VRegister vn_helper = VRegister(1, vn_bits, vn_lane_count);
2754 VRegister vres_helper = VRegister(5, vd_bits, vd_lane_count);
2755
2756 // 'v*tmp_single' will be either 'Vt.B', 'Vt.H', 'Vt.S' or 'Vt.D'.
2757 VRegister vntmp_single = VRegister(4, vn_lane_bits);
2758
2759 // Same registers for use in the 'ext' instructions.
2760 VRegister vn_ext = (kDRegSize == vn_bits) ? vn.V8B() : vn.V16B();
2761 VRegister vntmp_ext = (kDRegSize == vn_bits) ? vntmp.V8B() : vntmp.V16B();
2762
2763 __ Mov(out, results);
2764
2765 __ Mov(inputs_d_base, inputs_d);
2766
2767 __ Mov(inputs_n_base, inputs_n);
2768 __ Mov(inputs_n_last_vector,
2769 inputs_n + vn_lane_bytes * (inputs_n_length - vn_lane_count));
2770
2771 __ Ldr(vd, MemOperand(inputs_d_base));
2772
2773 __ Ldr(vn, MemOperand(inputs_n_last_vector));
2774
2775 __ Mov(index_n, 0);
2776 __ Bind(&loop_n);
2777
2778 __ Ldr(vntmp_single,
2779 MemOperand(inputs_n_base, index_n, LSL, vn_lane_bytes_log2));
2780 __ Ext(vn_ext, vn_ext, vntmp_ext, vn_lane_bytes);
2781
2782 {
2783 EmissionCheckScope guard(&masm,
2784 kInstructionSize * inputs_imm1_length *
2785 inputs_imm2_length * 3);
2786 for (unsigned i = 0; i < inputs_imm1_length; i++) {
2787 for (unsigned j = 0; j < inputs_imm2_length; j++) {
2788 __ Mov(vres, vd);
2789 (masm.*helper)(vres_helper, inputs_imm1[i], vn_helper, inputs_imm2[j]);
2790 __ Str(vres, MemOperand(out, vd.GetSizeInBytes(), PostIndex));
2791 }
2792 }
2793 }
2794
2795 __ Add(index_n, index_n, 1);
2796 __ Cmp(index_n, inputs_n_length);
2797 __ B(lo, &loop_n);
2798
2799 END();
2800 TRY_RUN(skipped);
2801 }
2802
2803
2804 // Test NEON instructions. The inputs_*[] and expected[] arrays should be
2805 // arrays of rawbit representation of input values. This ensures that
2806 // exact bit comparisons can be performed.
2807 template <typename Td, typename Tn>
TestOpImmOpImmNEON(const char * name,TestOpImmOpImmVdUpdateNEONHelper_t helper,const Td inputs_d[],const int inputs_imm1[],unsigned inputs_imm1_length,const Tn inputs_n[],unsigned inputs_n_length,const int inputs_imm2[],unsigned inputs_imm2_length,const Td expected[],unsigned expected_length,VectorFormat vd_form,VectorFormat vn_form)2808 static void TestOpImmOpImmNEON(const char* name,
2809 TestOpImmOpImmVdUpdateNEONHelper_t helper,
2810 const Td inputs_d[],
2811 const int inputs_imm1[],
2812 unsigned inputs_imm1_length,
2813 const Tn inputs_n[],
2814 unsigned inputs_n_length,
2815 const int inputs_imm2[],
2816 unsigned inputs_imm2_length,
2817 const Td expected[],
2818 unsigned expected_length,
2819 VectorFormat vd_form,
2820 VectorFormat vn_form) {
2821 VIXL_ASSERT(inputs_n_length > 0);
2822 VIXL_ASSERT(inputs_imm1_length > 0);
2823 VIXL_ASSERT(inputs_imm2_length > 0);
2824
2825 const unsigned vd_lane_count = LaneCountFromFormat(vd_form);
2826
2827 const unsigned results_length =
2828 inputs_n_length * inputs_imm1_length * inputs_imm2_length;
2829
2830 Td* results = new Td[results_length * vd_lane_count];
2831 const unsigned lane_bit = sizeof(Td) * 8;
2832 const unsigned lane_len_in_hex = MaxHexCharCount<Td, Tn>();
2833
2834 bool skipped;
2835
2836 TestOpImmOpImmNEON_Helper(helper,
2837 reinterpret_cast<uintptr_t>(inputs_d),
2838 inputs_imm1,
2839 inputs_imm1_length,
2840 reinterpret_cast<uintptr_t>(inputs_n),
2841 inputs_n_length,
2842 inputs_imm2,
2843 inputs_imm2_length,
2844 reinterpret_cast<uintptr_t>(results),
2845 vd_form,
2846 vn_form,
2847 &skipped);
2848
2849 if (Test::generate_test_trace()) {
2850 // Print the results.
2851 printf("const uint%u_t kExpected_NEON_%s[] = {\n", lane_bit, name);
2852 for (unsigned iteration = 0; iteration < results_length; iteration++) {
2853 printf(" ");
2854 // Output a separate result for each element of the result vector.
2855 for (unsigned lane = 0; lane < vd_lane_count; lane++) {
2856 unsigned index = lane + (iteration * vd_lane_count);
2857 printf(" 0x%0*" PRIx64 ",",
2858 lane_len_in_hex,
2859 static_cast<uint64_t>(results[index]));
2860 }
2861 printf("\n");
2862 }
2863
2864 printf("};\n");
2865 printf("const unsigned kExpectedCount_NEON_%s = %u;\n",
2866 name,
2867 results_length);
2868 } else if (!skipped) {
2869 // Check the results.
2870 VIXL_CHECK(expected_length == results_length);
2871 unsigned error_count = 0;
2872 unsigned counted_length = 0;
2873 const char* padding = " ";
2874 VIXL_ASSERT(strlen(padding) >= (lane_len_in_hex + 1));
2875 for (unsigned n = 0; n < inputs_n_length; n++) {
2876 for (unsigned imm1 = 0; imm1 < inputs_imm1_length; imm1++) {
2877 for (unsigned imm2 = 0; imm2 < inputs_imm2_length; imm2++) {
2878 bool error_in_vector = false;
2879
2880 counted_length++;
2881
2882 for (unsigned lane = 0; lane < vd_lane_count; lane++) {
2883 unsigned output_index =
2884 (n * inputs_imm1_length * inputs_imm2_length * vd_lane_count) +
2885 (imm1 * inputs_imm2_length * vd_lane_count) +
2886 (imm2 * vd_lane_count) + lane;
2887
2888 if (results[output_index] != expected[output_index]) {
2889 error_in_vector = true;
2890 break;
2891 }
2892 }
2893
2894 if (error_in_vector && (++error_count <= kErrorReportLimit)) {
2895 printf("%s\n", name);
2896 printf(" Vd%.*s| Imm%.*s| Vn%.*s| Imm%.*s| Vd%.*s| Expected\n",
2897 lane_len_in_hex + 1,
2898 padding,
2899 lane_len_in_hex,
2900 padding,
2901 lane_len_in_hex + 1,
2902 padding,
2903 lane_len_in_hex,
2904 padding,
2905 lane_len_in_hex + 1,
2906 padding);
2907
2908 for (unsigned lane = 0; lane < vd_lane_count; lane++) {
2909 unsigned output_index =
2910 (n * inputs_imm1_length * inputs_imm2_length *
2911 vd_lane_count) +
2912 (imm1 * inputs_imm2_length * vd_lane_count) +
2913 (imm2 * vd_lane_count) + lane;
2914 unsigned input_index_n =
2915 (inputs_n_length - vd_lane_count + n + 1 + lane) %
2916 inputs_n_length;
2917 unsigned input_index_imm1 = imm1;
2918 unsigned input_index_imm2 = imm2;
2919
2920 printf("%c0x%0*" PRIx64 " | 0x%0*" PRIx64 " | 0x%0*" PRIx64
2921 " "
2922 "| 0x%0*" PRIx64 " | 0x%0*" PRIx64 " | 0x%0*" PRIx64 "\n",
2923 results[output_index] != expected[output_index] ? '*'
2924 : ' ',
2925 lane_len_in_hex,
2926 static_cast<uint64_t>(inputs_d[lane]),
2927 lane_len_in_hex,
2928 static_cast<uint64_t>(inputs_imm1[input_index_imm1]),
2929 lane_len_in_hex,
2930 static_cast<uint64_t>(inputs_n[input_index_n]),
2931 lane_len_in_hex,
2932 static_cast<uint64_t>(inputs_imm2[input_index_imm2]),
2933 lane_len_in_hex,
2934 static_cast<uint64_t>(results[output_index]),
2935 lane_len_in_hex,
2936 static_cast<uint64_t>(expected[output_index]));
2937 }
2938 }
2939 }
2940 }
2941 }
2942 VIXL_CHECK(counted_length == expected_length);
2943 if (error_count > kErrorReportLimit) {
2944 printf("%u other errors follow.\n", error_count - kErrorReportLimit);
2945 }
2946 VIXL_CHECK(error_count == 0);
2947 }
2948 delete[] results;
2949 }
2950
2951
2952 // ==== Floating-point tests. ====
2953
2954
2955 // Standard floating-point test expansion for both double- and single-precision
2956 // operations.
2957 #define STRINGIFY(s) #s
2958
2959 #define CALL_TEST_FP_HELPER(mnemonic, variant, type, input) \
2960 Test##type(STRINGIFY(mnemonic) "_" STRINGIFY(variant), \
2961 &MacroAssembler::mnemonic, \
2962 input, \
2963 sizeof(input) / sizeof(input[0]), \
2964 kExpected_##mnemonic##_##variant, \
2965 kExpectedCount_##mnemonic##_##variant)
2966
2967 #define DEFINE_TEST_FP(mnemonic, type, input) \
2968 TEST(mnemonic##_d) { \
2969 CALL_TEST_FP_HELPER(mnemonic, d, type, kInputDouble##input); \
2970 } \
2971 TEST(mnemonic##_s) { \
2972 CALL_TEST_FP_HELPER(mnemonic, s, type, kInputFloat##input); \
2973 }
2974
2975 #define DEFINE_TEST_FP_FP16(mnemonic, type, input) \
2976 TEST(mnemonic##_d) { \
2977 CALL_TEST_FP_HELPER(mnemonic, d, type, kInputDouble##input); \
2978 } \
2979 TEST(mnemonic##_s) { \
2980 CALL_TEST_FP_HELPER(mnemonic, s, type, kInputFloat##input); \
2981 } \
2982 TEST(mnemonic##_h) { \
2983 CALL_TEST_FP_HELPER(mnemonic, h, type, kInputFloat16##input); \
2984 }
2985
2986
2987 // TODO: Test with a newer version of valgrind.
2988 //
2989 // Note: valgrind-3.10.0 does not properly interpret libm's fma() on x86_64.
2990 // Therefore this test will be exiting though an ASSERT and thus leaking
2991 // memory.
2992 DEFINE_TEST_FP_FP16(fmadd, 3Op, Basic)
2993 DEFINE_TEST_FP_FP16(fmsub, 3Op, Basic)
2994 DEFINE_TEST_FP_FP16(fnmadd, 3Op, Basic)
2995 DEFINE_TEST_FP_FP16(fnmsub, 3Op, Basic)
2996
2997 DEFINE_TEST_FP_FP16(fadd, 2Op, Basic)
2998 DEFINE_TEST_FP_FP16(fdiv, 2Op, Basic)
2999 DEFINE_TEST_FP_FP16(fmax, 2Op, Basic)
3000 DEFINE_TEST_FP_FP16(fmaxnm, 2Op, Basic)
3001 DEFINE_TEST_FP_FP16(fmin, 2Op, Basic)
3002 DEFINE_TEST_FP_FP16(fminnm, 2Op, Basic)
3003 DEFINE_TEST_FP_FP16(fmul, 2Op, Basic)
3004 DEFINE_TEST_FP_FP16(fsub, 2Op, Basic)
3005 DEFINE_TEST_FP_FP16(fnmul, 2Op, Basic)
3006
3007 DEFINE_TEST_FP_FP16(fabs, 1Op, Basic)
3008 DEFINE_TEST_FP_FP16(fmov, 1Op, Basic)
3009 DEFINE_TEST_FP_FP16(fneg, 1Op, Basic)
3010 DEFINE_TEST_FP_FP16(fsqrt, 1Op, Basic)
3011 DEFINE_TEST_FP(frint32x, 1Op, Conversions)
3012 DEFINE_TEST_FP(frint64x, 1Op, Conversions)
3013 DEFINE_TEST_FP(frint32z, 1Op, Conversions)
3014 DEFINE_TEST_FP(frint64z, 1Op, Conversions)
3015 DEFINE_TEST_FP_FP16(frinta, 1Op, Conversions)
3016 DEFINE_TEST_FP_FP16(frinti, 1Op, Conversions)
3017 DEFINE_TEST_FP_FP16(frintm, 1Op, Conversions)
3018 DEFINE_TEST_FP_FP16(frintn, 1Op, Conversions)
3019 DEFINE_TEST_FP_FP16(frintp, 1Op, Conversions)
3020 DEFINE_TEST_FP_FP16(frintx, 1Op, Conversions)
3021 DEFINE_TEST_FP_FP16(frintz, 1Op, Conversions)
3022
TEST(fcmp_d)3023 TEST(fcmp_d) { CALL_TEST_FP_HELPER(fcmp, d, Cmp, kInputDoubleBasic); }
TEST(fcmp_s)3024 TEST(fcmp_s) { CALL_TEST_FP_HELPER(fcmp, s, Cmp, kInputFloatBasic); }
TEST(fcmp_dz)3025 TEST(fcmp_dz) { CALL_TEST_FP_HELPER(fcmp, dz, CmpZero, kInputDoubleBasic); }
TEST(fcmp_sz)3026 TEST(fcmp_sz) { CALL_TEST_FP_HELPER(fcmp, sz, CmpZero, kInputFloatBasic); }
3027
TEST(fcvt_sd)3028 TEST(fcvt_sd) { CALL_TEST_FP_HELPER(fcvt, sd, 1Op, kInputDoubleConversions); }
TEST(fcvt_ds)3029 TEST(fcvt_ds) { CALL_TEST_FP_HELPER(fcvt, ds, 1Op, kInputFloatConversions); }
3030
3031 #define DEFINE_TEST_FP_TO_INT(mnemonic, type, input) \
3032 TEST(mnemonic##_xd) { \
3033 CALL_TEST_FP_HELPER(mnemonic, xd, type, kInputDouble##input); \
3034 } \
3035 TEST(mnemonic##_xs) { \
3036 CALL_TEST_FP_HELPER(mnemonic, xs, type, kInputFloat##input); \
3037 } \
3038 TEST(mnemonic##_xh) { \
3039 CALL_TEST_FP_HELPER(mnemonic, xh, type, kInputFloat16##input); \
3040 } \
3041 TEST(mnemonic##_wd) { \
3042 CALL_TEST_FP_HELPER(mnemonic, wd, type, kInputDouble##input); \
3043 } \
3044 TEST(mnemonic##_ws) { \
3045 CALL_TEST_FP_HELPER(mnemonic, ws, type, kInputFloat##input); \
3046 } \
3047 TEST(mnemonic##_wh) { \
3048 CALL_TEST_FP_HELPER(mnemonic, wh, type, kInputFloat16##input); \
3049 }
3050
DEFINE_TEST_FP_TO_INT(fcvtas,FPToS,Conversions)3051 DEFINE_TEST_FP_TO_INT(fcvtas, FPToS, Conversions)
3052 DEFINE_TEST_FP_TO_INT(fcvtau, FPToU, Conversions)
3053 DEFINE_TEST_FP_TO_INT(fcvtms, FPToS, Conversions)
3054 DEFINE_TEST_FP_TO_INT(fcvtmu, FPToU, Conversions)
3055 DEFINE_TEST_FP_TO_INT(fcvtns, FPToS, Conversions)
3056 DEFINE_TEST_FP_TO_INT(fcvtnu, FPToU, Conversions)
3057 DEFINE_TEST_FP_TO_INT(fcvtzs, FPToFixedS, Conversions)
3058 DEFINE_TEST_FP_TO_INT(fcvtzu, FPToFixedU, Conversions)
3059
3060 #define DEFINE_TEST_FP_TO_JS_INT(mnemonic, type, input) \
3061 TEST(mnemonic##_wd) { \
3062 CALL_TEST_FP_HELPER(mnemonic, wd, type, kInputDouble##input); \
3063 }
3064
3065 DEFINE_TEST_FP_TO_JS_INT(fjcvtzs, FPToS, Conversions)
3066
3067 // TODO: Scvtf-fixed-point
3068 // TODO: Scvtf-integer
3069 // TODO: Ucvtf-fixed-point
3070 // TODO: Ucvtf-integer
3071
3072 // TODO: Fccmp
3073 // TODO: Fcsel
3074
3075
3076 // ==== NEON Tests. ====
3077
3078 #define CALL_TEST_NEON_HELPER_1Op(mnemonic, vdform, vnform, input_n) \
3079 Test1OpNEON(STRINGIFY(mnemonic) "_" STRINGIFY(vdform), \
3080 &MacroAssembler::mnemonic, \
3081 input_n, \
3082 (sizeof(input_n) / sizeof(input_n[0])), \
3083 kExpected_NEON_##mnemonic##_##vdform, \
3084 kExpectedCount_NEON_##mnemonic##_##vdform, \
3085 kFormat##vdform, \
3086 kFormat##vnform)
3087
3088 #define CALL_TEST_NEON_HELPER_1OpAcross(mnemonic, vdform, vnform, input_n) \
3089 Test1OpAcrossNEON(STRINGIFY(mnemonic) "_" STRINGIFY(vdform) "_" STRINGIFY( \
3090 vnform), \
3091 &MacroAssembler::mnemonic, \
3092 input_n, \
3093 (sizeof(input_n) / sizeof(input_n[0])), \
3094 kExpected_NEON_##mnemonic##_##vdform##_##vnform, \
3095 kExpectedCount_NEON_##mnemonic##_##vdform##_##vnform, \
3096 kFormat##vdform, \
3097 kFormat##vnform)
3098
3099 #define CALL_TEST_NEON_HELPER_2Op(mnemonic, \
3100 vdform, \
3101 vnform, \
3102 vmform, \
3103 input_d, \
3104 input_n, \
3105 input_m) \
3106 Test2OpNEON(STRINGIFY(mnemonic) "_" STRINGIFY(vdform), \
3107 &MacroAssembler::mnemonic, \
3108 input_d, \
3109 input_n, \
3110 (sizeof(input_n) / sizeof(input_n[0])), \
3111 input_m, \
3112 (sizeof(input_m) / sizeof(input_m[0])), \
3113 kExpected_NEON_##mnemonic##_##vdform, \
3114 kExpectedCount_NEON_##mnemonic##_##vdform, \
3115 kFormat##vdform, \
3116 kFormat##vnform, \
3117 kFormat##vmform)
3118
3119 #define CALL_TEST_NEON_HELPER_2OpImm(mnemonic, \
3120 vdform, \
3121 vnform, \
3122 input_n, \
3123 input_m) \
3124 Test2OpImmNEON(STRINGIFY(mnemonic) "_" STRINGIFY(vdform) "_2OPIMM", \
3125 &MacroAssembler::mnemonic, \
3126 input_n, \
3127 (sizeof(input_n) / sizeof(input_n[0])), \
3128 input_m, \
3129 (sizeof(input_m) / sizeof(input_m[0])), \
3130 kExpected_NEON_##mnemonic##_##vdform##_2OPIMM, \
3131 kExpectedCount_NEON_##mnemonic##_##vdform##_2OPIMM, \
3132 kFormat##vdform, \
3133 kFormat##vnform)
3134
3135 #define CALL_TEST_NEON_HELPER_ByElement(mnemonic, \
3136 vdform, \
3137 vnform, \
3138 vmform, \
3139 input_d, \
3140 input_n, \
3141 input_m, \
3142 indices) \
3143 TestByElementNEON( \
3144 STRINGIFY(mnemonic) "_" STRINGIFY(vdform) "_" STRINGIFY( \
3145 vnform) "_" STRINGIFY(vmform), \
3146 &MacroAssembler::mnemonic, \
3147 input_d, \
3148 input_n, \
3149 (sizeof(input_n) / sizeof(input_n[0])), \
3150 input_m, \
3151 (sizeof(input_m) / sizeof(input_m[0])), \
3152 indices, \
3153 (sizeof(indices) / sizeof(indices[0])), \
3154 kExpected_NEON_##mnemonic##_##vdform##_##vnform##_##vmform, \
3155 kExpectedCount_NEON_##mnemonic##_##vdform##_##vnform##_##vmform, \
3156 kFormat##vdform, \
3157 kFormat##vnform, \
3158 kFormat##vmform)
3159
3160 #define CALL_TEST_NEON_HELPER_ByElement_Dot_Product(mnemonic, \
3161 vdform, \
3162 vnform, \
3163 vmform, \
3164 input_d, \
3165 input_n, \
3166 input_m, \
3167 indices, \
3168 vm_subvector_count) \
3169 TestByElementNEON( \
3170 STRINGIFY(mnemonic) "_" STRINGIFY(vdform) "_" STRINGIFY( \
3171 vnform) "_" STRINGIFY(vmform), \
3172 &MacroAssembler::mnemonic, \
3173 input_d, \
3174 input_n, \
3175 (sizeof(input_n) / sizeof(input_n[0])), \
3176 input_m, \
3177 (sizeof(input_m) / sizeof(input_m[0])), \
3178 indices, \
3179 (sizeof(indices) / sizeof(indices[0])), \
3180 kExpected_NEON_##mnemonic##_##vdform##_##vnform##_##vmform, \
3181 kExpectedCount_NEON_##mnemonic##_##vdform##_##vnform##_##vmform, \
3182 kFormat##vdform, \
3183 kFormat##vnform, \
3184 kFormat##vmform, \
3185 vm_subvector_count)
3186
3187 #define CALL_TEST_NEON_HELPER_OpImmOpImm(helper, \
3188 mnemonic, \
3189 vdform, \
3190 vnform, \
3191 input_d, \
3192 input_imm1, \
3193 input_n, \
3194 input_imm2) \
3195 TestOpImmOpImmNEON(STRINGIFY(mnemonic) "_" STRINGIFY(vdform), \
3196 helper, \
3197 input_d, \
3198 input_imm1, \
3199 (sizeof(input_imm1) / sizeof(input_imm1[0])), \
3200 input_n, \
3201 (sizeof(input_n) / sizeof(input_n[0])), \
3202 input_imm2, \
3203 (sizeof(input_imm2) / sizeof(input_imm2[0])), \
3204 kExpected_NEON_##mnemonic##_##vdform, \
3205 kExpectedCount_NEON_##mnemonic##_##vdform, \
3206 kFormat##vdform, \
3207 kFormat##vnform)
3208
3209 #define CALL_TEST_NEON_HELPER_2SAME(mnemonic, variant, input) \
3210 CALL_TEST_NEON_HELPER_1Op(mnemonic, variant, variant, input)
3211
3212 #define DEFINE_TEST_NEON_2SAME_8B_16B(mnemonic, input) \
3213 TEST(mnemonic##_8B) { \
3214 CALL_TEST_NEON_HELPER_2SAME(mnemonic, 8B, kInput8bits##input); \
3215 } \
3216 TEST(mnemonic##_16B) { \
3217 CALL_TEST_NEON_HELPER_2SAME(mnemonic, 16B, kInput8bits##input); \
3218 }
3219
3220 #define DEFINE_TEST_NEON_2SAME_4H_8H(mnemonic, input) \
3221 TEST(mnemonic##_4H) { \
3222 CALL_TEST_NEON_HELPER_2SAME(mnemonic, 4H, kInput16bits##input); \
3223 } \
3224 TEST(mnemonic##_8H) { \
3225 CALL_TEST_NEON_HELPER_2SAME(mnemonic, 8H, kInput16bits##input); \
3226 }
3227
3228 #define DEFINE_TEST_NEON_2SAME_2S_4S(mnemonic, input) \
3229 TEST(mnemonic##_2S) { \
3230 CALL_TEST_NEON_HELPER_2SAME(mnemonic, 2S, kInput32bits##input); \
3231 } \
3232 TEST(mnemonic##_4S) { \
3233 CALL_TEST_NEON_HELPER_2SAME(mnemonic, 4S, kInput32bits##input); \
3234 }
3235
3236 #define DEFINE_TEST_NEON_2SAME_BH(mnemonic, input) \
3237 DEFINE_TEST_NEON_2SAME_8B_16B(mnemonic, input) \
3238 DEFINE_TEST_NEON_2SAME_4H_8H(mnemonic, input)
3239
3240 #define DEFINE_TEST_NEON_2SAME_NO2D(mnemonic, input) \
3241 DEFINE_TEST_NEON_2SAME_BH(mnemonic, input) \
3242 DEFINE_TEST_NEON_2SAME_2S_4S(mnemonic, input)
3243
3244 #define DEFINE_TEST_NEON_2SAME(mnemonic, input) \
3245 DEFINE_TEST_NEON_2SAME_NO2D(mnemonic, input) \
3246 TEST(mnemonic##_2D) { \
3247 CALL_TEST_NEON_HELPER_2SAME(mnemonic, 2D, kInput64bits##input); \
3248 }
3249 #define DEFINE_TEST_NEON_2SAME_SD(mnemonic, input) \
3250 DEFINE_TEST_NEON_2SAME_2S_4S(mnemonic, input) \
3251 TEST(mnemonic##_2D) { \
3252 CALL_TEST_NEON_HELPER_2SAME(mnemonic, 2D, kInput64bits##input); \
3253 }
3254
3255 #define DEFINE_TEST_NEON_2SAME_FP(mnemonic, input) \
3256 TEST(mnemonic##_2S) { \
3257 CALL_TEST_NEON_HELPER_2SAME(mnemonic, 2S, kInputFloat##input); \
3258 } \
3259 TEST(mnemonic##_4S) { \
3260 CALL_TEST_NEON_HELPER_2SAME(mnemonic, 4S, kInputFloat##input); \
3261 } \
3262 TEST(mnemonic##_2D) { \
3263 CALL_TEST_NEON_HELPER_2SAME(mnemonic, 2D, kInputDouble##input); \
3264 }
3265
3266 #define DEFINE_TEST_NEON_2SAME_FP_FP16(mnemonic, input) \
3267 DEFINE_TEST_NEON_2SAME_FP(mnemonic, input) \
3268 TEST(mnemonic##_4H) { \
3269 CALL_TEST_NEON_HELPER_2SAME(mnemonic, 4H, kInputFloat16##input); \
3270 } \
3271 TEST(mnemonic##_8H) { \
3272 CALL_TEST_NEON_HELPER_2SAME(mnemonic, 8H, kInputFloat16##input); \
3273 }
3274
3275 #define DEFINE_TEST_NEON_2SAME_FP_FP16_SCALAR(mnemonic, input) \
3276 TEST(mnemonic##_H) { \
3277 CALL_TEST_NEON_HELPER_2SAME(mnemonic, H, kInputFloat16##input); \
3278 } \
3279 TEST(mnemonic##_S) { \
3280 CALL_TEST_NEON_HELPER_2SAME(mnemonic, S, kInputFloat##input); \
3281 } \
3282 TEST(mnemonic##_D) { \
3283 CALL_TEST_NEON_HELPER_2SAME(mnemonic, D, kInputDouble##input); \
3284 }
3285
3286 #define DEFINE_TEST_NEON_2SAME_SCALAR_B(mnemonic, input) \
3287 TEST(mnemonic##_B) { \
3288 CALL_TEST_NEON_HELPER_2SAME(mnemonic, B, kInput8bits##input); \
3289 }
3290 #define DEFINE_TEST_NEON_2SAME_SCALAR_H(mnemonic, input) \
3291 TEST(mnemonic##_H) { \
3292 CALL_TEST_NEON_HELPER_2SAME(mnemonic, H, kInput16bits##input); \
3293 }
3294 #define DEFINE_TEST_NEON_2SAME_SCALAR_S(mnemonic, input) \
3295 TEST(mnemonic##_S) { \
3296 CALL_TEST_NEON_HELPER_2SAME(mnemonic, S, kInput32bits##input); \
3297 }
3298 #define DEFINE_TEST_NEON_2SAME_SCALAR_D(mnemonic, input) \
3299 TEST(mnemonic##_D) { \
3300 CALL_TEST_NEON_HELPER_2SAME(mnemonic, D, kInput64bits##input); \
3301 }
3302
3303 #define DEFINE_TEST_NEON_2SAME_SCALAR(mnemonic, input) \
3304 DEFINE_TEST_NEON_2SAME_SCALAR_B(mnemonic, input) \
3305 DEFINE_TEST_NEON_2SAME_SCALAR_H(mnemonic, input) \
3306 DEFINE_TEST_NEON_2SAME_SCALAR_S(mnemonic, input) \
3307 DEFINE_TEST_NEON_2SAME_SCALAR_D(mnemonic, input)
3308
3309 #define DEFINE_TEST_NEON_2SAME_SCALAR_SD(mnemonic, input) \
3310 DEFINE_TEST_NEON_2SAME_SCALAR_S(mnemonic, input) \
3311 DEFINE_TEST_NEON_2SAME_SCALAR_D(mnemonic, input)
3312
3313
3314 #define CALL_TEST_NEON_HELPER_ACROSS(mnemonic, vd_form, vn_form, input_n) \
3315 CALL_TEST_NEON_HELPER_1OpAcross(mnemonic, vd_form, vn_form, input_n)
3316
3317 #define DEFINE_TEST_NEON_ACROSS(mnemonic, input) \
3318 TEST(mnemonic##_B_8B) { \
3319 CALL_TEST_NEON_HELPER_ACROSS(mnemonic, B, 8B, kInput8bits##input); \
3320 } \
3321 TEST(mnemonic##_B_16B) { \
3322 CALL_TEST_NEON_HELPER_ACROSS(mnemonic, B, 16B, kInput8bits##input); \
3323 } \
3324 TEST(mnemonic##_H_4H) { \
3325 CALL_TEST_NEON_HELPER_ACROSS(mnemonic, H, 4H, kInput16bits##input); \
3326 } \
3327 TEST(mnemonic##_H_8H) { \
3328 CALL_TEST_NEON_HELPER_ACROSS(mnemonic, H, 8H, kInput16bits##input); \
3329 } \
3330 TEST(mnemonic##_S_4S) { \
3331 CALL_TEST_NEON_HELPER_ACROSS(mnemonic, S, 4S, kInput32bits##input); \
3332 }
3333
3334 #define DEFINE_TEST_NEON_ACROSS_LONG(mnemonic, input) \
3335 TEST(mnemonic##_H_8B) { \
3336 CALL_TEST_NEON_HELPER_ACROSS(mnemonic, H, 8B, kInput8bits##input); \
3337 } \
3338 TEST(mnemonic##_H_16B) { \
3339 CALL_TEST_NEON_HELPER_ACROSS(mnemonic, H, 16B, kInput8bits##input); \
3340 } \
3341 TEST(mnemonic##_S_4H) { \
3342 CALL_TEST_NEON_HELPER_ACROSS(mnemonic, S, 4H, kInput16bits##input); \
3343 } \
3344 TEST(mnemonic##_S_8H) { \
3345 CALL_TEST_NEON_HELPER_ACROSS(mnemonic, S, 8H, kInput16bits##input); \
3346 } \
3347 TEST(mnemonic##_D_4S) { \
3348 CALL_TEST_NEON_HELPER_ACROSS(mnemonic, D, 4S, kInput32bits##input); \
3349 }
3350
3351 #define DEFINE_TEST_NEON_ACROSS_FP(mnemonic, input) \
3352 TEST(mnemonic##_H_4H) { \
3353 CALL_TEST_NEON_HELPER_ACROSS(mnemonic, H, 4H, kInputFloat16##input); \
3354 } \
3355 TEST(mnemonic##_H_8H) { \
3356 CALL_TEST_NEON_HELPER_ACROSS(mnemonic, H, 8H, kInputFloat16##input); \
3357 } \
3358 TEST(mnemonic##_S_4S) { \
3359 CALL_TEST_NEON_HELPER_ACROSS(mnemonic, S, 4S, kInputFloat##input); \
3360 }
3361
3362 #define CALL_TEST_NEON_HELPER_2DIFF(mnemonic, vdform, vnform, input_n) \
3363 CALL_TEST_NEON_HELPER_1Op(mnemonic, vdform, vnform, input_n)
3364
3365 #define DEFINE_TEST_NEON_2DIFF_LONG(mnemonic, input) \
3366 TEST(mnemonic##_4H) { \
3367 CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 4H, 8B, kInput8bits##input); \
3368 } \
3369 TEST(mnemonic##_8H) { \
3370 CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 8H, 16B, kInput8bits##input); \
3371 } \
3372 TEST(mnemonic##_2S) { \
3373 CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 2S, 4H, kInput16bits##input); \
3374 } \
3375 TEST(mnemonic##_4S) { \
3376 CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 4S, 8H, kInput16bits##input); \
3377 } \
3378 TEST(mnemonic##_1D) { \
3379 CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 1D, 2S, kInput32bits##input); \
3380 } \
3381 TEST(mnemonic##_2D) { \
3382 CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 2D, 4S, kInput32bits##input); \
3383 }
3384
3385 #define DEFINE_TEST_NEON_2DIFF_NARROW(mnemonic, input) \
3386 TEST(mnemonic##_8B) { \
3387 CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 8B, 8H, kInput16bits##input); \
3388 } \
3389 TEST(mnemonic##_4H) { \
3390 CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 4H, 4S, kInput32bits##input); \
3391 } \
3392 TEST(mnemonic##_2S) { \
3393 CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 2S, 2D, kInput64bits##input); \
3394 } \
3395 TEST(mnemonic##2_16B) { \
3396 CALL_TEST_NEON_HELPER_2DIFF(mnemonic##2, 16B, 8H, kInput16bits##input); \
3397 } \
3398 TEST(mnemonic##2_8H) { \
3399 CALL_TEST_NEON_HELPER_2DIFF(mnemonic##2, 8H, 4S, kInput32bits##input); \
3400 } \
3401 TEST(mnemonic##2_4S) { \
3402 CALL_TEST_NEON_HELPER_2DIFF(mnemonic##2, 4S, 2D, kInput64bits##input); \
3403 }
3404
3405 #define DEFINE_TEST_NEON_2DIFF_FP_LONG(mnemonic, input) \
3406 TEST(mnemonic##_4S) { \
3407 CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 4S, 4H, kInputFloat16##input); \
3408 } \
3409 TEST(mnemonic##_2D) { \
3410 CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 2D, 2S, kInputFloat##input); \
3411 } \
3412 TEST(mnemonic##2_4S) { \
3413 CALL_TEST_NEON_HELPER_2DIFF(mnemonic##2, 4S, 8H, kInputFloat16##input); \
3414 } \
3415 TEST(mnemonic##2_2D) { \
3416 CALL_TEST_NEON_HELPER_2DIFF(mnemonic##2, 2D, 4S, kInputFloat##input); \
3417 }
3418
3419 #define DEFINE_TEST_NEON_2DIFF_FP_NARROW(mnemonic, input) \
3420 TEST(mnemonic##_4H) { \
3421 CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 4H, 4S, kInputFloat##input); \
3422 } \
3423 TEST(mnemonic##_2S) { \
3424 CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 2S, 2D, kInputDouble##input); \
3425 } \
3426 TEST(mnemonic##2_8H) { \
3427 CALL_TEST_NEON_HELPER_2DIFF(mnemonic##2, 8H, 4S, kInputFloat##input); \
3428 } \
3429 TEST(mnemonic##2_4S) { \
3430 CALL_TEST_NEON_HELPER_2DIFF(mnemonic##2, 4S, 2D, kInputDouble##input); \
3431 }
3432
3433 #define DEFINE_TEST_NEON_2DIFF_FP_NARROW_2S(mnemonic, input) \
3434 TEST(mnemonic##_2S) { \
3435 CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 2S, 2D, kInputDouble##input); \
3436 } \
3437 TEST(mnemonic##2_4S) { \
3438 CALL_TEST_NEON_HELPER_2DIFF(mnemonic##2, 4S, 2D, kInputDouble##input); \
3439 }
3440
3441 #define DEFINE_TEST_NEON_2DIFF_SCALAR_NARROW(mnemonic, input) \
3442 TEST(mnemonic##_B) { \
3443 CALL_TEST_NEON_HELPER_2DIFF(mnemonic, B, H, kInput16bits##input); \
3444 } \
3445 TEST(mnemonic##_H) { \
3446 CALL_TEST_NEON_HELPER_2DIFF(mnemonic, H, S, kInput32bits##input); \
3447 } \
3448 TEST(mnemonic##_S) { \
3449 CALL_TEST_NEON_HELPER_2DIFF(mnemonic, S, D, kInput64bits##input); \
3450 }
3451
3452 #define DEFINE_TEST_NEON_2DIFF_FP_SCALAR_SD(mnemonic, input) \
3453 TEST(mnemonic##_S) { \
3454 CALL_TEST_NEON_HELPER_2DIFF(mnemonic, S, 2S, kInputFloat##input); \
3455 } \
3456 TEST(mnemonic##_D) { \
3457 CALL_TEST_NEON_HELPER_2DIFF(mnemonic, D, 2D, kInputDouble##input); \
3458 } \
3459 TEST(mnemonic##_H) { \
3460 CALL_TEST_NEON_HELPER_2DIFF(mnemonic, H, 2H, kInputFloat16##input); \
3461 }
3462
3463 #define CALL_TEST_NEON_HELPER_3SAME(mnemonic, variant, input_d, input_nm) \
3464 { \
3465 CALL_TEST_NEON_HELPER_2Op(mnemonic, \
3466 variant, \
3467 variant, \
3468 variant, \
3469 input_d, \
3470 input_nm, \
3471 input_nm); \
3472 }
3473
3474 #define DEFINE_TEST_NEON_3SAME_8B_16B(mnemonic, input) \
3475 TEST(mnemonic##_8B) { \
3476 CALL_TEST_NEON_HELPER_3SAME(mnemonic, \
3477 8B, \
3478 kInput8bitsAccDestination, \
3479 kInput8bits##input); \
3480 } \
3481 TEST(mnemonic##_16B) { \
3482 CALL_TEST_NEON_HELPER_3SAME(mnemonic, \
3483 16B, \
3484 kInput8bitsAccDestination, \
3485 kInput8bits##input); \
3486 }
3487
3488 #define DEFINE_TEST_NEON_3SAME_HS(mnemonic, input) \
3489 TEST(mnemonic##_4H) { \
3490 CALL_TEST_NEON_HELPER_3SAME(mnemonic, \
3491 4H, \
3492 kInput16bitsAccDestination, \
3493 kInput16bits##input); \
3494 } \
3495 TEST(mnemonic##_8H) { \
3496 CALL_TEST_NEON_HELPER_3SAME(mnemonic, \
3497 8H, \
3498 kInput16bitsAccDestination, \
3499 kInput16bits##input); \
3500 } \
3501 TEST(mnemonic##_2S) { \
3502 CALL_TEST_NEON_HELPER_3SAME(mnemonic, \
3503 2S, \
3504 kInput32bitsAccDestination, \
3505 kInput32bits##input); \
3506 } \
3507 TEST(mnemonic##_4S) { \
3508 CALL_TEST_NEON_HELPER_3SAME(mnemonic, \
3509 4S, \
3510 kInput32bitsAccDestination, \
3511 kInput32bits##input); \
3512 }
3513
3514 #define DEFINE_TEST_NEON_3SAME_NO2D(mnemonic, input) \
3515 DEFINE_TEST_NEON_3SAME_8B_16B(mnemonic, input) \
3516 DEFINE_TEST_NEON_3SAME_HS(mnemonic, input)
3517
3518 #define DEFINE_TEST_NEON_3SAME(mnemonic, input) \
3519 DEFINE_TEST_NEON_3SAME_NO2D(mnemonic, input) \
3520 TEST(mnemonic##_2D) { \
3521 CALL_TEST_NEON_HELPER_3SAME(mnemonic, \
3522 2D, \
3523 kInput64bitsAccDestination, \
3524 kInput64bits##input); \
3525 }
3526
3527 #define DEFINE_TEST_NEON_3SAME_FP(mnemonic, input) \
3528 TEST(mnemonic##_4H) { \
3529 CALL_TEST_NEON_HELPER_3SAME(mnemonic, \
3530 4H, \
3531 kInputFloat16AccDestination, \
3532 kInputFloat16##input); \
3533 } \
3534 TEST(mnemonic##_8H) { \
3535 CALL_TEST_NEON_HELPER_3SAME(mnemonic, \
3536 8H, \
3537 kInputFloat16AccDestination, \
3538 kInputFloat16##input); \
3539 } \
3540 TEST(mnemonic##_2S) { \
3541 CALL_TEST_NEON_HELPER_3SAME(mnemonic, \
3542 2S, \
3543 kInputFloatAccDestination, \
3544 kInputFloat##input); \
3545 } \
3546 TEST(mnemonic##_4S) { \
3547 CALL_TEST_NEON_HELPER_3SAME(mnemonic, \
3548 4S, \
3549 kInputFloatAccDestination, \
3550 kInputFloat##input); \
3551 } \
3552 TEST(mnemonic##_2D) { \
3553 CALL_TEST_NEON_HELPER_3SAME(mnemonic, \
3554 2D, \
3555 kInputDoubleAccDestination, \
3556 kInputDouble##input); \
3557 }
3558
3559 #define DEFINE_TEST_NEON_3SAME_SCALAR_D(mnemonic, input) \
3560 TEST(mnemonic##_D) { \
3561 CALL_TEST_NEON_HELPER_3SAME(mnemonic, \
3562 D, \
3563 kInput64bitsAccDestination, \
3564 kInput64bits##input); \
3565 }
3566
3567 #define DEFINE_TEST_NEON_3SAME_SCALAR_HS(mnemonic, input) \
3568 TEST(mnemonic##_H) { \
3569 CALL_TEST_NEON_HELPER_3SAME(mnemonic, \
3570 H, \
3571 kInput16bitsAccDestination, \
3572 kInput16bits##input); \
3573 } \
3574 TEST(mnemonic##_S) { \
3575 CALL_TEST_NEON_HELPER_3SAME(mnemonic, \
3576 S, \
3577 kInput32bitsAccDestination, \
3578 kInput32bits##input); \
3579 }
3580
3581 #define DEFINE_TEST_NEON_3SAME_SCALAR(mnemonic, input) \
3582 TEST(mnemonic##_B) { \
3583 CALL_TEST_NEON_HELPER_3SAME(mnemonic, \
3584 B, \
3585 kInput8bitsAccDestination, \
3586 kInput8bits##input); \
3587 } \
3588 TEST(mnemonic##_H) { \
3589 CALL_TEST_NEON_HELPER_3SAME(mnemonic, \
3590 H, \
3591 kInput16bitsAccDestination, \
3592 kInput16bits##input); \
3593 } \
3594 TEST(mnemonic##_S) { \
3595 CALL_TEST_NEON_HELPER_3SAME(mnemonic, \
3596 S, \
3597 kInput32bitsAccDestination, \
3598 kInput32bits##input); \
3599 } \
3600 TEST(mnemonic##_D) { \
3601 CALL_TEST_NEON_HELPER_3SAME(mnemonic, \
3602 D, \
3603 kInput64bitsAccDestination, \
3604 kInput64bits##input); \
3605 }
3606
3607 #define DEFINE_TEST_NEON_3SAME_FP_SCALAR(mnemonic, input) \
3608 TEST(mnemonic##_H) { \
3609 CALL_TEST_NEON_HELPER_3SAME(mnemonic, \
3610 H, \
3611 kInputFloat16AccDestination, \
3612 kInputFloat16##input); \
3613 } \
3614 TEST(mnemonic##_S) { \
3615 CALL_TEST_NEON_HELPER_3SAME(mnemonic, \
3616 S, \
3617 kInputFloatAccDestination, \
3618 kInputFloat##input); \
3619 } \
3620 TEST(mnemonic##_D) { \
3621 CALL_TEST_NEON_HELPER_3SAME(mnemonic, \
3622 D, \
3623 kInputDoubleAccDestination, \
3624 kInputDouble##input); \
3625 }
3626
3627 #define DEFINE_TEST_NEON_FHM(mnemonic, input_d, input_n, input_m) \
3628 TEST(mnemonic##_2S) { \
3629 CALL_TEST_NEON_HELPER_3DIFF(mnemonic, \
3630 2S, \
3631 2H, \
3632 2H, \
3633 kInputFloatAccDestination, \
3634 kInputFloat16##input_n, \
3635 kInputFloat16##input_m); \
3636 } \
3637 TEST(mnemonic##_4S) { \
3638 CALL_TEST_NEON_HELPER_3DIFF(mnemonic, \
3639 4S, \
3640 4H, \
3641 4H, \
3642 kInputFloatAccDestination, \
3643 kInputFloat16##input_n, \
3644 kInputFloat16##input_m); \
3645 }
3646
3647 #define CALL_TEST_NEON_HELPER_3DIFF(mnemonic, \
3648 vdform, \
3649 vnform, \
3650 vmform, \
3651 input_d, \
3652 input_n, \
3653 input_m) \
3654 { \
3655 CALL_TEST_NEON_HELPER_2Op(mnemonic, \
3656 vdform, \
3657 vnform, \
3658 vmform, \
3659 input_d, \
3660 input_n, \
3661 input_m); \
3662 }
3663
3664 #define DEFINE_TEST_NEON_3DIFF_LONG_8H(mnemonic, input) \
3665 TEST(mnemonic##_8H) { \
3666 CALL_TEST_NEON_HELPER_3DIFF(mnemonic, \
3667 8H, \
3668 8B, \
3669 8B, \
3670 kInput16bitsAccDestination, \
3671 kInput8bits##input, \
3672 kInput8bits##input); \
3673 } \
3674 TEST(mnemonic##2_8H) { \
3675 CALL_TEST_NEON_HELPER_3DIFF(mnemonic##2, \
3676 8H, \
3677 16B, \
3678 16B, \
3679 kInput16bitsAccDestination, \
3680 kInput8bits##input, \
3681 kInput8bits##input); \
3682 }
3683
3684 #define DEFINE_TEST_NEON_3DIFF_LONG_4S(mnemonic, input) \
3685 TEST(mnemonic##_4S) { \
3686 CALL_TEST_NEON_HELPER_3DIFF(mnemonic, \
3687 4S, \
3688 4H, \
3689 4H, \
3690 kInput32bitsAccDestination, \
3691 kInput16bits##input, \
3692 kInput16bits##input); \
3693 } \
3694 TEST(mnemonic##2_4S) { \
3695 CALL_TEST_NEON_HELPER_3DIFF(mnemonic##2, \
3696 4S, \
3697 8H, \
3698 8H, \
3699 kInput32bitsAccDestination, \
3700 kInput16bits##input, \
3701 kInput16bits##input); \
3702 }
3703
3704 #define DEFINE_TEST_NEON_3DIFF_LONG_2D(mnemonic, input) \
3705 TEST(mnemonic##_2D) { \
3706 CALL_TEST_NEON_HELPER_3DIFF(mnemonic, \
3707 2D, \
3708 2S, \
3709 2S, \
3710 kInput64bitsAccDestination, \
3711 kInput32bits##input, \
3712 kInput32bits##input); \
3713 } \
3714 TEST(mnemonic##2_2D) { \
3715 CALL_TEST_NEON_HELPER_3DIFF(mnemonic##2, \
3716 2D, \
3717 4S, \
3718 4S, \
3719 kInput64bitsAccDestination, \
3720 kInput32bits##input, \
3721 kInput32bits##input); \
3722 }
3723
3724 #define DEFINE_TEST_NEON_3DIFF_LONG_SD(mnemonic, input) \
3725 DEFINE_TEST_NEON_3DIFF_LONG_4S(mnemonic, input) \
3726 DEFINE_TEST_NEON_3DIFF_LONG_2D(mnemonic, input)
3727
3728 #define DEFINE_TEST_NEON_3DIFF_LONG(mnemonic, input) \
3729 DEFINE_TEST_NEON_3DIFF_LONG_8H(mnemonic, input) \
3730 DEFINE_TEST_NEON_3DIFF_LONG_4S(mnemonic, input) \
3731 DEFINE_TEST_NEON_3DIFF_LONG_2D(mnemonic, input)
3732
3733 #define DEFINE_TEST_NEON_3DIFF_SCALAR_LONG_S(mnemonic, input) \
3734 TEST(mnemonic##_S) { \
3735 CALL_TEST_NEON_HELPER_3DIFF(mnemonic, \
3736 S, \
3737 H, \
3738 H, \
3739 kInput32bitsAccDestination, \
3740 kInput16bits##input, \
3741 kInput16bits##input); \
3742 }
3743
3744 #define DEFINE_TEST_NEON_3DIFF_SCALAR_LONG_D(mnemonic, input) \
3745 TEST(mnemonic##_D) { \
3746 CALL_TEST_NEON_HELPER_3DIFF(mnemonic, \
3747 D, \
3748 S, \
3749 S, \
3750 kInput64bitsAccDestination, \
3751 kInput32bits##input, \
3752 kInput32bits##input); \
3753 }
3754
3755 #define DEFINE_TEST_NEON_3DIFF_SCALAR_LONG_SD(mnemonic, input) \
3756 DEFINE_TEST_NEON_3DIFF_SCALAR_LONG_S(mnemonic, input) \
3757 DEFINE_TEST_NEON_3DIFF_SCALAR_LONG_D(mnemonic, input)
3758
3759 #define DEFINE_TEST_NEON_3DIFF_WIDE(mnemonic, input) \
3760 TEST(mnemonic##_8H) { \
3761 CALL_TEST_NEON_HELPER_3DIFF(mnemonic, \
3762 8H, \
3763 8H, \
3764 8B, \
3765 kInput16bitsAccDestination, \
3766 kInput16bits##input, \
3767 kInput8bits##input); \
3768 } \
3769 TEST(mnemonic##_4S) { \
3770 CALL_TEST_NEON_HELPER_3DIFF(mnemonic, \
3771 4S, \
3772 4S, \
3773 4H, \
3774 kInput32bitsAccDestination, \
3775 kInput32bits##input, \
3776 kInput16bits##input); \
3777 } \
3778 TEST(mnemonic##_2D) { \
3779 CALL_TEST_NEON_HELPER_3DIFF(mnemonic, \
3780 2D, \
3781 2D, \
3782 2S, \
3783 kInput64bitsAccDestination, \
3784 kInput64bits##input, \
3785 kInput32bits##input); \
3786 } \
3787 TEST(mnemonic##2_8H) { \
3788 CALL_TEST_NEON_HELPER_3DIFF(mnemonic##2, \
3789 8H, \
3790 8H, \
3791 16B, \
3792 kInput16bitsAccDestination, \
3793 kInput16bits##input, \
3794 kInput8bits##input); \
3795 } \
3796 TEST(mnemonic##2_4S) { \
3797 CALL_TEST_NEON_HELPER_3DIFF(mnemonic##2, \
3798 4S, \
3799 4S, \
3800 8H, \
3801 kInput32bitsAccDestination, \
3802 kInput32bits##input, \
3803 kInput16bits##input); \
3804 } \
3805 TEST(mnemonic##2_2D) { \
3806 CALL_TEST_NEON_HELPER_3DIFF(mnemonic##2, \
3807 2D, \
3808 2D, \
3809 4S, \
3810 kInput64bitsAccDestination, \
3811 kInput64bits##input, \
3812 kInput32bits##input); \
3813 }
3814
3815 #define DEFINE_TEST_NEON_3DIFF_NARROW(mnemonic, input) \
3816 TEST(mnemonic##_8B) { \
3817 CALL_TEST_NEON_HELPER_3DIFF(mnemonic, \
3818 8B, \
3819 8H, \
3820 8H, \
3821 kInput8bitsAccDestination, \
3822 kInput16bits##input, \
3823 kInput16bits##input); \
3824 } \
3825 TEST(mnemonic##_4H) { \
3826 CALL_TEST_NEON_HELPER_3DIFF(mnemonic, \
3827 4H, \
3828 4S, \
3829 4S, \
3830 kInput16bitsAccDestination, \
3831 kInput32bits##input, \
3832 kInput32bits##input); \
3833 } \
3834 TEST(mnemonic##_2S) { \
3835 CALL_TEST_NEON_HELPER_3DIFF(mnemonic, \
3836 2S, \
3837 2D, \
3838 2D, \
3839 kInput32bitsAccDestination, \
3840 kInput64bits##input, \
3841 kInput64bits##input); \
3842 } \
3843 TEST(mnemonic##2_16B) { \
3844 CALL_TEST_NEON_HELPER_3DIFF(mnemonic##2, \
3845 16B, \
3846 8H, \
3847 8H, \
3848 kInput8bitsAccDestination, \
3849 kInput16bits##input, \
3850 kInput16bits##input); \
3851 } \
3852 TEST(mnemonic##2_8H) { \
3853 CALL_TEST_NEON_HELPER_3DIFF(mnemonic##2, \
3854 8H, \
3855 4S, \
3856 4S, \
3857 kInput16bitsAccDestination, \
3858 kInput32bits##input, \
3859 kInput32bits##input); \
3860 } \
3861 TEST(mnemonic##2_4S) { \
3862 CALL_TEST_NEON_HELPER_3DIFF(mnemonic##2, \
3863 4S, \
3864 2D, \
3865 2D, \
3866 kInput32bitsAccDestination, \
3867 kInput64bits##input, \
3868 kInput64bits##input); \
3869 }
3870
3871 #define DEFINE_TEST_NEON_3DIFF_DOUBLE_WIDE(mnemonic, input) \
3872 TEST(mnemonic##_2S) { \
3873 CALL_TEST_NEON_HELPER_3DIFF(mnemonic, \
3874 2S, \
3875 8B, \
3876 8B, \
3877 kInput32bitsAccDestination, \
3878 kInput8bits##input, \
3879 kInput8bits##input); \
3880 } \
3881 TEST(mnemonic##_4S) { \
3882 CALL_TEST_NEON_HELPER_3DIFF(mnemonic, \
3883 4S, \
3884 16B, \
3885 16B, \
3886 kInput32bitsAccDestination, \
3887 kInput8bits##input, \
3888 kInput8bits##input); \
3889 }
3890
3891
3892 #define CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \
3893 vdform, \
3894 vnform, \
3895 input_n, \
3896 input_imm) \
3897 { \
3898 CALL_TEST_NEON_HELPER_2OpImm(mnemonic, \
3899 vdform, \
3900 vnform, \
3901 input_n, \
3902 input_imm); \
3903 }
3904
3905 #define DEFINE_TEST_NEON_2OPIMM(mnemonic, input, input_imm) \
3906 TEST(mnemonic##_8B_2OPIMM) { \
3907 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \
3908 8B, \
3909 8B, \
3910 kInput8bits##input, \
3911 kInput8bitsImm##input_imm); \
3912 } \
3913 TEST(mnemonic##_16B_2OPIMM) { \
3914 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \
3915 16B, \
3916 16B, \
3917 kInput8bits##input, \
3918 kInput8bitsImm##input_imm); \
3919 } \
3920 TEST(mnemonic##_4H_2OPIMM) { \
3921 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \
3922 4H, \
3923 4H, \
3924 kInput16bits##input, \
3925 kInput16bitsImm##input_imm); \
3926 } \
3927 TEST(mnemonic##_8H_2OPIMM) { \
3928 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \
3929 8H, \
3930 8H, \
3931 kInput16bits##input, \
3932 kInput16bitsImm##input_imm); \
3933 } \
3934 TEST(mnemonic##_2S_2OPIMM) { \
3935 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \
3936 2S, \
3937 2S, \
3938 kInput32bits##input, \
3939 kInput32bitsImm##input_imm); \
3940 } \
3941 TEST(mnemonic##_4S_2OPIMM) { \
3942 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \
3943 4S, \
3944 4S, \
3945 kInput32bits##input, \
3946 kInput32bitsImm##input_imm); \
3947 } \
3948 TEST(mnemonic##_2D_2OPIMM) { \
3949 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \
3950 2D, \
3951 2D, \
3952 kInput64bits##input, \
3953 kInput64bitsImm##input_imm); \
3954 }
3955
3956 #define DEFINE_TEST_NEON_2OPIMM_COPY(mnemonic, input, input_imm) \
3957 TEST(mnemonic##_8B_2OPIMM) { \
3958 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \
3959 8B, \
3960 B, \
3961 kInput8bits##input, \
3962 kInput8bitsImm##input_imm); \
3963 } \
3964 TEST(mnemonic##_16B_2OPIMM) { \
3965 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \
3966 16B, \
3967 B, \
3968 kInput8bits##input, \
3969 kInput8bitsImm##input_imm); \
3970 } \
3971 TEST(mnemonic##_4H_2OPIMM) { \
3972 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \
3973 4H, \
3974 H, \
3975 kInput16bits##input, \
3976 kInput16bitsImm##input_imm); \
3977 } \
3978 TEST(mnemonic##_8H_2OPIMM) { \
3979 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \
3980 8H, \
3981 H, \
3982 kInput16bits##input, \
3983 kInput16bitsImm##input_imm); \
3984 } \
3985 TEST(mnemonic##_2S_2OPIMM) { \
3986 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \
3987 2S, \
3988 S, \
3989 kInput32bits##input, \
3990 kInput32bitsImm##input_imm); \
3991 } \
3992 TEST(mnemonic##_4S_2OPIMM) { \
3993 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \
3994 4S, \
3995 S, \
3996 kInput32bits##input, \
3997 kInput32bitsImm##input_imm); \
3998 } \
3999 TEST(mnemonic##_2D_2OPIMM) { \
4000 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \
4001 2D, \
4002 D, \
4003 kInput64bits##input, \
4004 kInput64bitsImm##input_imm); \
4005 }
4006
4007 #define DEFINE_TEST_NEON_2OPIMM_NARROW(mnemonic, input, input_imm) \
4008 TEST(mnemonic##_8B_2OPIMM) { \
4009 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \
4010 8B, \
4011 8H, \
4012 kInput16bits##input, \
4013 kInput8bitsImm##input_imm); \
4014 } \
4015 TEST(mnemonic##_4H_2OPIMM) { \
4016 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \
4017 4H, \
4018 4S, \
4019 kInput32bits##input, \
4020 kInput16bitsImm##input_imm); \
4021 } \
4022 TEST(mnemonic##_2S_2OPIMM) { \
4023 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \
4024 2S, \
4025 2D, \
4026 kInput64bits##input, \
4027 kInput32bitsImm##input_imm); \
4028 } \
4029 TEST(mnemonic##2_16B_2OPIMM) { \
4030 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic##2, \
4031 16B, \
4032 8H, \
4033 kInput16bits##input, \
4034 kInput8bitsImm##input_imm); \
4035 } \
4036 TEST(mnemonic##2_8H_2OPIMM) { \
4037 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic##2, \
4038 8H, \
4039 4S, \
4040 kInput32bits##input, \
4041 kInput16bitsImm##input_imm); \
4042 } \
4043 TEST(mnemonic##2_4S_2OPIMM) { \
4044 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic##2, \
4045 4S, \
4046 2D, \
4047 kInput64bits##input, \
4048 kInput32bitsImm##input_imm); \
4049 }
4050
4051 #define DEFINE_TEST_NEON_2OPIMM_SCALAR_NARROW(mnemonic, input, input_imm) \
4052 TEST(mnemonic##_B_2OPIMM) { \
4053 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \
4054 B, \
4055 H, \
4056 kInput16bits##input, \
4057 kInput8bitsImm##input_imm); \
4058 } \
4059 TEST(mnemonic##_H_2OPIMM) { \
4060 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \
4061 H, \
4062 S, \
4063 kInput32bits##input, \
4064 kInput16bitsImm##input_imm); \
4065 } \
4066 TEST(mnemonic##_S_2OPIMM) { \
4067 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \
4068 S, \
4069 D, \
4070 kInput64bits##input, \
4071 kInput32bitsImm##input_imm); \
4072 }
4073
4074 #define DEFINE_TEST_NEON_2OPIMM_FCMP_ZERO(mnemonic, input, input_imm) \
4075 TEST(mnemonic##_4H_2OPIMM) { \
4076 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \
4077 4H, \
4078 4H, \
4079 kInputFloat16##input, \
4080 kInputDoubleImm##input_imm); \
4081 } \
4082 TEST(mnemonic##_8H_2OPIMM) { \
4083 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \
4084 8H, \
4085 8H, \
4086 kInputFloat16##input, \
4087 kInputDoubleImm##input_imm); \
4088 } \
4089 TEST(mnemonic##_2S_2OPIMM) { \
4090 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \
4091 2S, \
4092 2S, \
4093 kInputFloat##Basic, \
4094 kInputDoubleImm##input_imm); \
4095 } \
4096 TEST(mnemonic##_4S_2OPIMM) { \
4097 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \
4098 4S, \
4099 4S, \
4100 kInputFloat##input, \
4101 kInputDoubleImm##input_imm); \
4102 } \
4103 TEST(mnemonic##_2D_2OPIMM) { \
4104 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \
4105 2D, \
4106 2D, \
4107 kInputDouble##input, \
4108 kInputDoubleImm##input_imm); \
4109 }
4110
4111 #define DEFINE_TEST_NEON_2OPIMM_FP(mnemonic, input, input_imm) \
4112 TEST(mnemonic##_4H_2OPIMM) { \
4113 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \
4114 4H, \
4115 4H, \
4116 kInputFloat16##input, \
4117 kInput16bitsImm##input_imm); \
4118 } \
4119 TEST(mnemonic##_8H_2OPIMM) { \
4120 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \
4121 8H, \
4122 8H, \
4123 kInputFloat16##input, \
4124 kInput16bitsImm##input_imm); \
4125 } \
4126 TEST(mnemonic##_2S_2OPIMM) { \
4127 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \
4128 2S, \
4129 2S, \
4130 kInputFloat##Basic, \
4131 kInput32bitsImm##input_imm); \
4132 } \
4133 TEST(mnemonic##_4S_2OPIMM) { \
4134 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \
4135 4S, \
4136 4S, \
4137 kInputFloat##input, \
4138 kInput32bitsImm##input_imm); \
4139 } \
4140 TEST(mnemonic##_2D_2OPIMM) { \
4141 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \
4142 2D, \
4143 2D, \
4144 kInputDouble##input, \
4145 kInput64bitsImm##input_imm); \
4146 }
4147
4148 #define DEFINE_TEST_NEON_2OPIMM_FP_SCALAR(mnemonic, input, input_imm) \
4149 TEST(mnemonic##_H_2OPIMM) { \
4150 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \
4151 H, \
4152 H, \
4153 kInputFloat16##Basic, \
4154 kInput16bitsImm##input_imm); \
4155 } \
4156 TEST(mnemonic##_S_2OPIMM) { \
4157 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \
4158 S, \
4159 S, \
4160 kInputFloat##Basic, \
4161 kInput32bitsImm##input_imm); \
4162 } \
4163 TEST(mnemonic##_D_2OPIMM) { \
4164 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \
4165 D, \
4166 D, \
4167 kInputDouble##input, \
4168 kInput64bitsImm##input_imm); \
4169 }
4170
4171 #define DEFINE_TEST_NEON_2OPIMM_HSD(mnemonic, input, input_imm) \
4172 TEST(mnemonic##_4H_2OPIMM) { \
4173 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \
4174 4H, \
4175 4H, \
4176 kInput16bits##input, \
4177 kInput16bitsImm##input_imm); \
4178 } \
4179 TEST(mnemonic##_8H_2OPIMM) { \
4180 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \
4181 8H, \
4182 8H, \
4183 kInput16bits##input, \
4184 kInput16bitsImm##input_imm); \
4185 } \
4186 TEST(mnemonic##_2S_2OPIMM) { \
4187 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \
4188 2S, \
4189 2S, \
4190 kInput32bits##input, \
4191 kInput32bitsImm##input_imm); \
4192 } \
4193 TEST(mnemonic##_4S_2OPIMM) { \
4194 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \
4195 4S, \
4196 4S, \
4197 kInput32bits##input, \
4198 kInput32bitsImm##input_imm); \
4199 } \
4200 TEST(mnemonic##_2D_2OPIMM) { \
4201 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \
4202 2D, \
4203 2D, \
4204 kInput64bits##input, \
4205 kInput64bitsImm##input_imm); \
4206 }
4207
4208 #define DEFINE_TEST_NEON_2OPIMM_SCALAR_D(mnemonic, input, input_imm) \
4209 TEST(mnemonic##_D_2OPIMM) { \
4210 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \
4211 D, \
4212 D, \
4213 kInput64bits##input, \
4214 kInput64bitsImm##input_imm); \
4215 }
4216
4217 #define DEFINE_TEST_NEON_2OPIMM_SCALAR_HSD(mnemonic, input, input_imm) \
4218 TEST(mnemonic##_H_2OPIMM) { \
4219 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \
4220 H, \
4221 H, \
4222 kInput16bits##input, \
4223 kInput16bitsImm##input_imm); \
4224 } \
4225 TEST(mnemonic##_S_2OPIMM) { \
4226 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \
4227 S, \
4228 S, \
4229 kInput32bits##input, \
4230 kInput32bitsImm##input_imm); \
4231 } \
4232 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(mnemonic, input, input_imm)
4233
4234 #define DEFINE_TEST_NEON_2OPIMM_FP_SCALAR_D(mnemonic, input, input_imm) \
4235 TEST(mnemonic##_D_2OPIMM) { \
4236 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \
4237 D, \
4238 D, \
4239 kInputDouble##input, \
4240 kInputDoubleImm##input_imm); \
4241 }
4242
4243 #define DEFINE_TEST_NEON_2OPIMM_FP_SCALAR_HSD(mnemonic, input, input_imm) \
4244 TEST(mnemonic##_H_2OPIMM) { \
4245 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \
4246 H, \
4247 H, \
4248 kInputFloat16##input, \
4249 kInputDoubleImm##input_imm); \
4250 } \
4251 TEST(mnemonic##_S_2OPIMM) { \
4252 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \
4253 S, \
4254 S, \
4255 kInputFloat##input, \
4256 kInputDoubleImm##input_imm); \
4257 } \
4258 DEFINE_TEST_NEON_2OPIMM_FP_SCALAR_D(mnemonic, input, input_imm)
4259
4260 #define DEFINE_TEST_NEON_2OPIMM_SCALAR(mnemonic, input, input_imm) \
4261 TEST(mnemonic##_B_2OPIMM) { \
4262 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \
4263 B, \
4264 B, \
4265 kInput8bits##input, \
4266 kInput8bitsImm##input_imm); \
4267 } \
4268 DEFINE_TEST_NEON_2OPIMM_SCALAR_HSD(mnemonic, input, input_imm)
4269
4270 #define DEFINE_TEST_NEON_2OPIMM_LONG(mnemonic, input, input_imm) \
4271 TEST(mnemonic##_8H_2OPIMM) { \
4272 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \
4273 8H, \
4274 8B, \
4275 kInput8bits##input, \
4276 kInput8bitsImm##input_imm); \
4277 } \
4278 TEST(mnemonic##_4S_2OPIMM) { \
4279 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \
4280 4S, \
4281 4H, \
4282 kInput16bits##input, \
4283 kInput16bitsImm##input_imm); \
4284 } \
4285 TEST(mnemonic##_2D_2OPIMM) { \
4286 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, \
4287 2D, \
4288 2S, \
4289 kInput32bits##input, \
4290 kInput32bitsImm##input_imm); \
4291 } \
4292 TEST(mnemonic##2_8H_2OPIMM) { \
4293 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic##2, \
4294 8H, \
4295 16B, \
4296 kInput8bits##input, \
4297 kInput8bitsImm##input_imm); \
4298 } \
4299 TEST(mnemonic##2_4S_2OPIMM) { \
4300 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic##2, \
4301 4S, \
4302 8H, \
4303 kInput16bits##input, \
4304 kInput16bitsImm##input_imm); \
4305 } \
4306 TEST(mnemonic##2_2D_2OPIMM) { \
4307 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic##2, \
4308 2D, \
4309 4S, \
4310 kInput32bits##input, \
4311 kInput32bitsImm##input_imm); \
4312 }
4313
4314 #define CALL_TEST_NEON_HELPER_BYELEMENT_DOT_PRODUCT(mnemonic, \
4315 vdform, \
4316 vnform, \
4317 vmform, \
4318 input_d, \
4319 input_n, \
4320 input_m, \
4321 indices, \
4322 vm_subvector_count) \
4323 { \
4324 CALL_TEST_NEON_HELPER_ByElement_Dot_Product(mnemonic, \
4325 vdform, \
4326 vnform, \
4327 vmform, \
4328 input_d, \
4329 input_n, \
4330 input_m, \
4331 indices, \
4332 vm_subvector_count); \
4333 }
4334
4335 #define DEFINE_TEST_NEON_BYELEMENT_DOT_PRODUCT(mnemonic, \
4336 input_d, \
4337 input_n, \
4338 input_m) \
4339 TEST(mnemonic##_2S_8B_B) { \
4340 CALL_TEST_NEON_HELPER_BYELEMENT_DOT_PRODUCT(mnemonic, \
4341 2S, \
4342 8B, \
4343 B, \
4344 kInput32bits##input_d, \
4345 kInput8bits##input_n, \
4346 kInput8bits##input_m, \
4347 kInputSIndices, \
4348 4); \
4349 } \
4350 TEST(mnemonic##_4S_16B_B) { \
4351 CALL_TEST_NEON_HELPER_BYELEMENT_DOT_PRODUCT(mnemonic, \
4352 4S, \
4353 16B, \
4354 B, \
4355 kInput32bits##input_d, \
4356 kInput8bits##input_n, \
4357 kInput8bits##input_m, \
4358 kInputSIndices, \
4359 4); \
4360 }
4361
4362 #define CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic, \
4363 vdform, \
4364 vnform, \
4365 vmform, \
4366 input_d, \
4367 input_n, \
4368 input_m, \
4369 indices) \
4370 { \
4371 CALL_TEST_NEON_HELPER_ByElement(mnemonic, \
4372 vdform, \
4373 vnform, \
4374 vmform, \
4375 input_d, \
4376 input_n, \
4377 input_m, \
4378 indices); \
4379 }
4380
4381 #define DEFINE_TEST_NEON_BYELEMENT(mnemonic, input_d, input_n, input_m) \
4382 TEST(mnemonic##_4H_4H_H) { \
4383 CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic, \
4384 4H, \
4385 4H, \
4386 H, \
4387 kInput16bits##input_d, \
4388 kInput16bits##input_n, \
4389 kInput16bits##input_m, \
4390 kInputHIndices); \
4391 } \
4392 TEST(mnemonic##_8H_8H_H) { \
4393 CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic, \
4394 8H, \
4395 8H, \
4396 H, \
4397 kInput16bits##input_d, \
4398 kInput16bits##input_n, \
4399 kInput16bits##input_m, \
4400 kInputHIndices); \
4401 } \
4402 TEST(mnemonic##_2S_2S_S) { \
4403 CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic, \
4404 2S, \
4405 2S, \
4406 S, \
4407 kInput32bits##input_d, \
4408 kInput32bits##input_n, \
4409 kInput32bits##input_m, \
4410 kInputSIndices); \
4411 } \
4412 TEST(mnemonic##_4S_4S_S) { \
4413 CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic, \
4414 4S, \
4415 4S, \
4416 S, \
4417 kInput32bits##input_d, \
4418 kInput32bits##input_n, \
4419 kInput32bits##input_m, \
4420 kInputSIndices); \
4421 }
4422
4423 #define DEFINE_TEST_NEON_BYELEMENT_SCALAR(mnemonic, input_d, input_n, input_m) \
4424 TEST(mnemonic##_H_H_H) { \
4425 CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic, \
4426 H, \
4427 H, \
4428 H, \
4429 kInput16bits##input_d, \
4430 kInput16bits##input_n, \
4431 kInput16bits##input_m, \
4432 kInputHIndices); \
4433 } \
4434 TEST(mnemonic##_S_S_S) { \
4435 CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic, \
4436 S, \
4437 S, \
4438 S, \
4439 kInput32bits##input_d, \
4440 kInput32bits##input_n, \
4441 kInput32bits##input_m, \
4442 kInputSIndices); \
4443 }
4444
4445 #define DEFINE_TEST_NEON_FP_BYELEMENT(mnemonic, input_d, input_n, input_m) \
4446 TEST(mnemonic##_4H_4H_H) { \
4447 CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic, \
4448 4H, \
4449 4H, \
4450 H, \
4451 kInputFloat16##input_d, \
4452 kInputFloat16##input_n, \
4453 kInputFloat16##input_m, \
4454 kInputHIndices); \
4455 } \
4456 TEST(mnemonic##_8H_8H_H) { \
4457 CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic, \
4458 8H, \
4459 8H, \
4460 H, \
4461 kInputFloat16##input_d, \
4462 kInputFloat16##input_n, \
4463 kInputFloat16##input_m, \
4464 kInputHIndices); \
4465 } \
4466 TEST(mnemonic##_2S_2S_S) { \
4467 CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic, \
4468 2S, \
4469 2S, \
4470 S, \
4471 kInputFloat##input_d, \
4472 kInputFloat##input_n, \
4473 kInputFloat##input_m, \
4474 kInputSIndices); \
4475 } \
4476 TEST(mnemonic##_4S_4S_S) { \
4477 CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic, \
4478 4S, \
4479 4S, \
4480 S, \
4481 kInputFloat##input_d, \
4482 kInputFloat##input_n, \
4483 kInputFloat##input_m, \
4484 kInputSIndices); \
4485 } \
4486 TEST(mnemonic##_2D_2D_D) { \
4487 CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic, \
4488 2D, \
4489 2D, \
4490 D, \
4491 kInputDouble##input_d, \
4492 kInputDouble##input_n, \
4493 kInputDouble##input_m, \
4494 kInputDIndices); \
4495 }
4496
4497 #define DEFINE_TEST_NEON_FHM_BYELEMENT(mnemonic, input_d, input_n, input_m) \
4498 TEST(mnemonic##_2S_2H_H) { \
4499 CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic, \
4500 2S, \
4501 2H, \
4502 H, \
4503 kInputFloatAccDestination, \
4504 kInputFloat16##input_n, \
4505 kInputFloat16##input_m, \
4506 kInputHIndices); \
4507 } \
4508 TEST(mnemonic##_4S_4H_H) { \
4509 CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic, \
4510 4S, \
4511 4H, \
4512 H, \
4513 kInputFloatAccDestination, \
4514 kInputFloat16##input_n, \
4515 kInputFloat16##input_m, \
4516 kInputHIndices); \
4517 }
4518
4519 #define DEFINE_TEST_NEON_FP_BYELEMENT_SCALAR(mnemonic, inp_d, inp_n, inp_m) \
4520 TEST(mnemonic##_H_H_H) { \
4521 CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic, \
4522 H, \
4523 H, \
4524 H, \
4525 kInputFloat16##inp_d, \
4526 kInputFloat16##inp_n, \
4527 kInputFloat16##inp_m, \
4528 kInputHIndices); \
4529 } \
4530 TEST(mnemonic##_S_S_S) { \
4531 CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic, \
4532 S, \
4533 S, \
4534 S, \
4535 kInputFloat##inp_d, \
4536 kInputFloat##inp_n, \
4537 kInputFloat##inp_m, \
4538 kInputSIndices); \
4539 } \
4540 TEST(mnemonic##_D_D_D) { \
4541 CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic, \
4542 D, \
4543 D, \
4544 D, \
4545 kInputDouble##inp_d, \
4546 kInputDouble##inp_n, \
4547 kInputDouble##inp_m, \
4548 kInputDIndices); \
4549 }
4550
4551
4552 #define DEFINE_TEST_NEON_BYELEMENT_DIFF(mnemonic, input_d, input_n, input_m) \
4553 TEST(mnemonic##_4S_4H_H) { \
4554 CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic, \
4555 4S, \
4556 4H, \
4557 H, \
4558 kInput32bits##input_d, \
4559 kInput16bits##input_n, \
4560 kInput16bits##input_m, \
4561 kInputHIndices); \
4562 } \
4563 TEST(mnemonic##2_4S_8H_H) { \
4564 CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic##2, \
4565 4S, \
4566 8H, \
4567 H, \
4568 kInput32bits##input_d, \
4569 kInput16bits##input_n, \
4570 kInput16bits##input_m, \
4571 kInputHIndices); \
4572 } \
4573 TEST(mnemonic##_2D_2S_S) { \
4574 CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic, \
4575 2D, \
4576 2S, \
4577 S, \
4578 kInput64bits##input_d, \
4579 kInput32bits##input_n, \
4580 kInput32bits##input_m, \
4581 kInputSIndices); \
4582 } \
4583 TEST(mnemonic##2_2D_4S_S) { \
4584 CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic##2, \
4585 2D, \
4586 4S, \
4587 S, \
4588 kInput64bits##input_d, \
4589 kInput32bits##input_n, \
4590 kInput32bits##input_m, \
4591 kInputSIndices); \
4592 }
4593
4594 #define DEFINE_TEST_NEON_BYELEMENT_DIFF_SCALAR(mnemonic, \
4595 input_d, \
4596 input_n, \
4597 input_m) \
4598 TEST(mnemonic##_S_H_H) { \
4599 CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic, \
4600 S, \
4601 H, \
4602 H, \
4603 kInput32bits##input_d, \
4604 kInput16bits##input_n, \
4605 kInput16bits##input_m, \
4606 kInputHIndices); \
4607 } \
4608 TEST(mnemonic##_D_S_S) { \
4609 CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic, \
4610 D, \
4611 S, \
4612 S, \
4613 kInput64bits##input_d, \
4614 kInput32bits##input_n, \
4615 kInput32bits##input_m, \
4616 kInputSIndices); \
4617 }
4618
4619
4620 #define CALL_TEST_NEON_HELPER_2OP2IMM(mnemonic, \
4621 variant, \
4622 input_d, \
4623 input_imm1, \
4624 input_n, \
4625 input_imm2) \
4626 { \
4627 CALL_TEST_NEON_HELPER_OpImmOpImm(&MacroAssembler::mnemonic, \
4628 mnemonic, \
4629 variant, \
4630 variant, \
4631 input_d, \
4632 input_imm1, \
4633 input_n, \
4634 input_imm2); \
4635 }
4636
4637 #define DEFINE_TEST_NEON_2OP2IMM(mnemonic, \
4638 input_d, \
4639 input_imm1, \
4640 input_n, \
4641 input_imm2) \
4642 TEST(mnemonic##_B) { \
4643 CALL_TEST_NEON_HELPER_2OP2IMM(mnemonic, \
4644 16B, \
4645 kInput8bits##input_d, \
4646 kInput8bitsImm##input_imm1, \
4647 kInput8bits##input_n, \
4648 kInput8bitsImm##input_imm2); \
4649 } \
4650 TEST(mnemonic##_H) { \
4651 CALL_TEST_NEON_HELPER_2OP2IMM(mnemonic, \
4652 8H, \
4653 kInput16bits##input_d, \
4654 kInput16bitsImm##input_imm1, \
4655 kInput16bits##input_n, \
4656 kInput16bitsImm##input_imm2); \
4657 } \
4658 TEST(mnemonic##_S) { \
4659 CALL_TEST_NEON_HELPER_2OP2IMM(mnemonic, \
4660 4S, \
4661 kInput32bits##input_d, \
4662 kInput32bitsImm##input_imm1, \
4663 kInput32bits##input_n, \
4664 kInput32bitsImm##input_imm2); \
4665 } \
4666 TEST(mnemonic##_D) { \
4667 CALL_TEST_NEON_HELPER_2OP2IMM(mnemonic, \
4668 2D, \
4669 kInput64bits##input_d, \
4670 kInput64bitsImm##input_imm1, \
4671 kInput64bits##input_n, \
4672 kInput64bitsImm##input_imm2); \
4673 }
4674
4675
4676 // Advanced SIMD copy.
4677 DEFINE_TEST_NEON_2OP2IMM(
4678 ins, Basic, LaneCountFromZero, Basic, LaneCountFromZero)
4679 DEFINE_TEST_NEON_2OPIMM_COPY(dup, Basic, LaneCountFromZero)
4680
4681
4682 // Advanced SIMD scalar copy.
4683 DEFINE_TEST_NEON_2OPIMM_SCALAR(dup, Basic, LaneCountFromZero)
4684
4685
4686 // Advanced SIMD three same.
4687 DEFINE_TEST_NEON_3SAME_NO2D(shadd, Basic)
4688 DEFINE_TEST_NEON_3SAME(sqadd, Basic)
4689 DEFINE_TEST_NEON_3SAME_NO2D(srhadd, Basic)
4690 DEFINE_TEST_NEON_3SAME_NO2D(shsub, Basic)
4691 DEFINE_TEST_NEON_3SAME(sqsub, Basic)
4692 DEFINE_TEST_NEON_3SAME(cmgt, Basic)
4693 DEFINE_TEST_NEON_3SAME(cmge, Basic)
4694 DEFINE_TEST_NEON_3SAME(sshl, Basic)
4695 DEFINE_TEST_NEON_3SAME(sqshl, Basic)
4696 DEFINE_TEST_NEON_3SAME(srshl, Basic)
4697 DEFINE_TEST_NEON_3SAME(sqrshl, Basic)
4698 DEFINE_TEST_NEON_3SAME_NO2D(smax, Basic)
4699 DEFINE_TEST_NEON_3SAME_NO2D(smin, Basic)
4700 DEFINE_TEST_NEON_3SAME_NO2D(sabd, Basic)
4701 DEFINE_TEST_NEON_3SAME_NO2D(saba, Basic)
4702 DEFINE_TEST_NEON_3SAME(add, Basic)
4703 DEFINE_TEST_NEON_3SAME(cmtst, Basic)
4704 DEFINE_TEST_NEON_3SAME_NO2D(mla, Basic)
4705 DEFINE_TEST_NEON_3SAME_NO2D(mul, Basic)
4706 DEFINE_TEST_NEON_3SAME_NO2D(smaxp, Basic)
4707 DEFINE_TEST_NEON_3SAME_NO2D(sminp, Basic)
4708 DEFINE_TEST_NEON_3SAME_HS(sqdmulh, Basic)
4709 DEFINE_TEST_NEON_3SAME(addp, Basic)
4710 DEFINE_TEST_NEON_3SAME_FP(fmaxnm, Basic)
4711 DEFINE_TEST_NEON_3SAME_FP(fmla, Basic)
4712 DEFINE_TEST_NEON_3SAME_FP(fadd, Basic)
4713 DEFINE_TEST_NEON_3SAME_FP(fmulx, Basic)
4714 DEFINE_TEST_NEON_3SAME_FP(fcmeq, Basic)
4715 DEFINE_TEST_NEON_3SAME_FP(fmax, Basic)
4716 DEFINE_TEST_NEON_3SAME_FP(frecps, Basic)
4717 DEFINE_TEST_NEON_3SAME_8B_16B(and_, Basic)
4718 DEFINE_TEST_NEON_3SAME_8B_16B(bic, Basic)
4719 DEFINE_TEST_NEON_3SAME_FP(fminnm, Basic)
4720 DEFINE_TEST_NEON_3SAME_FP(fmls, Basic)
4721 DEFINE_TEST_NEON_3SAME_FP(fsub, Basic)
4722 DEFINE_TEST_NEON_3SAME_FP(fmin, Basic)
4723 DEFINE_TEST_NEON_3SAME_FP(frsqrts, Basic)
4724 DEFINE_TEST_NEON_3SAME_8B_16B(orr, Basic)
4725 DEFINE_TEST_NEON_3SAME_8B_16B(orn, Basic)
4726 DEFINE_TEST_NEON_3SAME_NO2D(uhadd, Basic)
4727 DEFINE_TEST_NEON_3SAME(uqadd, Basic)
4728 DEFINE_TEST_NEON_3SAME_NO2D(urhadd, Basic)
4729 DEFINE_TEST_NEON_3SAME_NO2D(uhsub, Basic)
4730 DEFINE_TEST_NEON_3SAME(uqsub, Basic)
4731 DEFINE_TEST_NEON_3SAME(cmhi, Basic)
4732 DEFINE_TEST_NEON_3SAME(cmhs, Basic)
4733 DEFINE_TEST_NEON_3SAME(ushl, Basic)
4734 DEFINE_TEST_NEON_3SAME(uqshl, Basic)
4735 DEFINE_TEST_NEON_3SAME(urshl, Basic)
4736 DEFINE_TEST_NEON_3SAME(uqrshl, Basic)
4737 DEFINE_TEST_NEON_3SAME_NO2D(umax, Basic)
4738 DEFINE_TEST_NEON_3SAME_NO2D(umin, Basic)
4739 DEFINE_TEST_NEON_3SAME_NO2D(uabd, Basic)
4740 DEFINE_TEST_NEON_3SAME_NO2D(uaba, Basic)
4741 DEFINE_TEST_NEON_3SAME(sub, Basic)
4742 DEFINE_TEST_NEON_3SAME(cmeq, Basic)
4743 DEFINE_TEST_NEON_3SAME_NO2D(mls, Basic)
4744 DEFINE_TEST_NEON_3SAME_8B_16B(pmul, Basic)
4745 DEFINE_TEST_NEON_3SAME_NO2D(uminp, Basic)
4746 DEFINE_TEST_NEON_3SAME_NO2D(umaxp, Basic)
4747 DEFINE_TEST_NEON_3SAME_HS(sqrdmulh, Basic)
4748 DEFINE_TEST_NEON_3SAME_HS(sqrdmlah, Basic)
4749 DEFINE_TEST_NEON_3SAME_HS(sqrdmlsh, Basic)
4750 DEFINE_TEST_NEON_3DIFF_DOUBLE_WIDE(udot, Basic)
4751 DEFINE_TEST_NEON_3DIFF_DOUBLE_WIDE(sdot, Basic)
4752 DEFINE_TEST_NEON_3SAME_FP(fmaxnmp, Basic)
4753 DEFINE_TEST_NEON_3SAME_FP(faddp, Basic)
4754 DEFINE_TEST_NEON_3SAME_FP(fmul, Basic)
4755 DEFINE_TEST_NEON_3SAME_FP(fcmge, Basic)
4756 DEFINE_TEST_NEON_3SAME_FP(facge, Basic)
4757 DEFINE_TEST_NEON_3SAME_FP(fmaxp, Basic)
4758 DEFINE_TEST_NEON_3SAME_FP(fdiv, Basic)
4759 DEFINE_TEST_NEON_3SAME_8B_16B(eor, Basic)
4760 DEFINE_TEST_NEON_3SAME_8B_16B(bsl, Basic)
4761 DEFINE_TEST_NEON_3SAME_FP(fminnmp, Basic)
4762 DEFINE_TEST_NEON_3SAME_FP(fabd, Basic)
4763 DEFINE_TEST_NEON_3SAME_FP(fcmgt, Basic)
4764 DEFINE_TEST_NEON_3SAME_FP(facgt, Basic)
4765 DEFINE_TEST_NEON_3SAME_FP(fminp, Basic)
4766 DEFINE_TEST_NEON_3SAME_8B_16B(bit, Basic)
4767 DEFINE_TEST_NEON_3SAME_8B_16B(bif, Basic)
4768
4769
4770 // Advanced SIMD scalar three same.
4771 DEFINE_TEST_NEON_3SAME_SCALAR(sqadd, Basic)
4772 DEFINE_TEST_NEON_3SAME_SCALAR(sqsub, Basic)
4773 DEFINE_TEST_NEON_3SAME_SCALAR_D(cmgt, Basic)
4774 DEFINE_TEST_NEON_3SAME_SCALAR_D(cmge, Basic)
4775 DEFINE_TEST_NEON_3SAME_SCALAR_D(sshl, Basic)
4776 DEFINE_TEST_NEON_3SAME_SCALAR(sqshl, Basic)
4777 DEFINE_TEST_NEON_3SAME_SCALAR_D(srshl, Basic)
4778 DEFINE_TEST_NEON_3SAME_SCALAR(sqrshl, Basic)
4779 DEFINE_TEST_NEON_3SAME_SCALAR_D(add, Basic)
4780 DEFINE_TEST_NEON_3SAME_SCALAR_D(cmtst, Basic)
4781 DEFINE_TEST_NEON_3SAME_SCALAR_HS(sqdmulh, Basic)
4782 DEFINE_TEST_NEON_3SAME_FP_SCALAR(fmulx, Basic)
4783 DEFINE_TEST_NEON_3SAME_FP_SCALAR(fcmeq, Basic)
4784 DEFINE_TEST_NEON_3SAME_FP_SCALAR(frecps, Basic)
4785 DEFINE_TEST_NEON_3SAME_FP_SCALAR(frsqrts, Basic)
4786 DEFINE_TEST_NEON_3SAME_SCALAR_D(uqadd, Basic)
4787 DEFINE_TEST_NEON_3SAME_SCALAR_D(uqsub, Basic)
4788 DEFINE_TEST_NEON_3SAME_SCALAR_D(cmhi, Basic)
4789 DEFINE_TEST_NEON_3SAME_SCALAR_D(cmhs, Basic)
4790 DEFINE_TEST_NEON_3SAME_SCALAR_D(ushl, Basic)
4791 DEFINE_TEST_NEON_3SAME_SCALAR(uqshl, Basic)
4792 DEFINE_TEST_NEON_3SAME_SCALAR_D(urshl, Basic)
4793 DEFINE_TEST_NEON_3SAME_SCALAR(uqrshl, Basic)
4794 DEFINE_TEST_NEON_3SAME_SCALAR_D(sub, Basic)
4795 DEFINE_TEST_NEON_3SAME_SCALAR_D(cmeq, Basic)
4796 DEFINE_TEST_NEON_3SAME_SCALAR_HS(sqrdmulh, Basic)
4797 DEFINE_TEST_NEON_3SAME_SCALAR_HS(sqrdmlah, Basic)
4798 DEFINE_TEST_NEON_3SAME_SCALAR_HS(sqrdmlsh, Basic)
4799 DEFINE_TEST_NEON_3SAME_FP_SCALAR(fcmge, Basic)
4800 DEFINE_TEST_NEON_3SAME_FP_SCALAR(facge, Basic)
4801 DEFINE_TEST_NEON_3SAME_FP_SCALAR(fabd, Basic)
4802 DEFINE_TEST_NEON_3SAME_FP_SCALAR(fcmgt, Basic)
4803 DEFINE_TEST_NEON_3SAME_FP_SCALAR(facgt, Basic)
4804
4805
4806 // Advanced SIMD FHM instructions (FMLAL, FMLSL).
4807 // These are oddballs: they are encoded under the 3SAME group but behave
4808 // quite differently.
4809 DEFINE_TEST_NEON_FHM(fmlal, Basic, Basic, Basic)
4810 DEFINE_TEST_NEON_FHM(fmlal2, Basic, Basic, Basic)
4811 DEFINE_TEST_NEON_FHM(fmlsl, Basic, Basic, Basic)
4812 DEFINE_TEST_NEON_FHM(fmlsl2, Basic, Basic, Basic)
4813
4814
4815 // Advanced SIMD three different.
4816 DEFINE_TEST_NEON_3DIFF_LONG(saddl, Basic)
4817 DEFINE_TEST_NEON_3DIFF_WIDE(saddw, Basic)
4818 DEFINE_TEST_NEON_3DIFF_LONG(ssubl, Basic)
4819 DEFINE_TEST_NEON_3DIFF_WIDE(ssubw, Basic)
4820 DEFINE_TEST_NEON_3DIFF_NARROW(addhn, Basic)
4821 DEFINE_TEST_NEON_3DIFF_LONG(sabal, Basic)
4822 DEFINE_TEST_NEON_3DIFF_NARROW(subhn, Basic)
4823 DEFINE_TEST_NEON_3DIFF_LONG(sabdl, Basic)
4824 DEFINE_TEST_NEON_3DIFF_LONG(smlal, Basic)
4825 DEFINE_TEST_NEON_3DIFF_LONG_SD(sqdmlal, Basic)
4826 DEFINE_TEST_NEON_3DIFF_LONG(smlsl, Basic)
4827 DEFINE_TEST_NEON_3DIFF_LONG_SD(sqdmlsl, Basic)
4828 DEFINE_TEST_NEON_3DIFF_LONG(smull, Basic)
4829 DEFINE_TEST_NEON_3DIFF_LONG_SD(sqdmull, Basic)
4830 DEFINE_TEST_NEON_3DIFF_LONG_8H(pmull, Basic)
4831 DEFINE_TEST_NEON_3DIFF_LONG(uaddl, Basic)
4832 DEFINE_TEST_NEON_3DIFF_WIDE(uaddw, Basic)
4833 DEFINE_TEST_NEON_3DIFF_LONG(usubl, Basic)
4834 DEFINE_TEST_NEON_3DIFF_WIDE(usubw, Basic)
4835 DEFINE_TEST_NEON_3DIFF_NARROW(raddhn, Basic)
4836 DEFINE_TEST_NEON_3DIFF_LONG(uabal, Basic)
4837 DEFINE_TEST_NEON_3DIFF_NARROW(rsubhn, Basic)
4838 DEFINE_TEST_NEON_3DIFF_LONG(uabdl, Basic)
4839 DEFINE_TEST_NEON_3DIFF_LONG(umlal, Basic)
4840 DEFINE_TEST_NEON_3DIFF_LONG(umlsl, Basic)
4841 DEFINE_TEST_NEON_3DIFF_LONG(umull, Basic)
4842
4843
4844 // Advanced SIMD scalar three different.
4845 DEFINE_TEST_NEON_3DIFF_SCALAR_LONG_SD(sqdmlal, Basic)
4846 DEFINE_TEST_NEON_3DIFF_SCALAR_LONG_SD(sqdmlsl, Basic)
4847 DEFINE_TEST_NEON_3DIFF_SCALAR_LONG_SD(sqdmull, Basic)
4848
4849
4850 // Advanced SIMD scalar pairwise.
4851 TEST(addp_SCALAR) {
4852 CALL_TEST_NEON_HELPER_2DIFF(addp, D, 2D, kInput64bitsBasic);
4853 }
DEFINE_TEST_NEON_2DIFF_FP_SCALAR_SD(fmaxnmp,Basic)4854 DEFINE_TEST_NEON_2DIFF_FP_SCALAR_SD(fmaxnmp, Basic)
4855 DEFINE_TEST_NEON_2DIFF_FP_SCALAR_SD(faddp, Basic)
4856 DEFINE_TEST_NEON_2DIFF_FP_SCALAR_SD(fmaxp, Basic)
4857 DEFINE_TEST_NEON_2DIFF_FP_SCALAR_SD(fminnmp, Basic)
4858 DEFINE_TEST_NEON_2DIFF_FP_SCALAR_SD(fminp, Basic)
4859
4860
4861 // Advanced SIMD shift by immediate.
4862 DEFINE_TEST_NEON_2OPIMM(sshr, Basic, TypeWidth)
4863 DEFINE_TEST_NEON_2OPIMM(ssra, Basic, TypeWidth)
4864 DEFINE_TEST_NEON_2OPIMM(srshr, Basic, TypeWidth)
4865 DEFINE_TEST_NEON_2OPIMM(srsra, Basic, TypeWidth)
4866 DEFINE_TEST_NEON_2OPIMM(shl, Basic, TypeWidthFromZero)
4867 DEFINE_TEST_NEON_2OPIMM(sqshl, Basic, TypeWidthFromZero)
4868 DEFINE_TEST_NEON_2OPIMM_NARROW(shrn, Basic, TypeWidth)
4869 DEFINE_TEST_NEON_2OPIMM_NARROW(rshrn, Basic, TypeWidth)
4870 DEFINE_TEST_NEON_2OPIMM_NARROW(sqshrn, Basic, TypeWidth)
4871 DEFINE_TEST_NEON_2OPIMM_NARROW(sqrshrn, Basic, TypeWidth)
4872 DEFINE_TEST_NEON_2OPIMM_LONG(sshll, Basic, TypeWidthFromZero)
4873 DEFINE_TEST_NEON_2OPIMM_HSD(scvtf,
4874 FixedPointConversions,
4875 TypeWidthFromZeroToWidth)
4876 DEFINE_TEST_NEON_2OPIMM_FP(fcvtzs, Conversions, TypeWidthFromZeroToWidth)
4877 DEFINE_TEST_NEON_2OPIMM(ushr, Basic, TypeWidth)
4878 DEFINE_TEST_NEON_2OPIMM(usra, Basic, TypeWidth)
4879 DEFINE_TEST_NEON_2OPIMM(urshr, Basic, TypeWidth)
4880 DEFINE_TEST_NEON_2OPIMM(ursra, Basic, TypeWidth)
4881 DEFINE_TEST_NEON_2OPIMM(sri, Basic, TypeWidth)
4882 DEFINE_TEST_NEON_2OPIMM(sli, Basic, TypeWidthFromZero)
4883 DEFINE_TEST_NEON_2OPIMM(sqshlu, Basic, TypeWidthFromZero)
4884 DEFINE_TEST_NEON_2OPIMM(uqshl, Basic, TypeWidthFromZero)
4885 DEFINE_TEST_NEON_2OPIMM_NARROW(sqshrun, Basic, TypeWidth)
4886 DEFINE_TEST_NEON_2OPIMM_NARROW(sqrshrun, Basic, TypeWidth)
4887 DEFINE_TEST_NEON_2OPIMM_NARROW(uqshrn, Basic, TypeWidth)
4888 DEFINE_TEST_NEON_2OPIMM_NARROW(uqrshrn, Basic, TypeWidth)
4889 DEFINE_TEST_NEON_2OPIMM_LONG(ushll, Basic, TypeWidthFromZero)
4890 DEFINE_TEST_NEON_2OPIMM_HSD(ucvtf,
4891 FixedPointConversions,
4892 TypeWidthFromZeroToWidth)
4893 DEFINE_TEST_NEON_2OPIMM_FP(fcvtzu, Conversions, TypeWidthFromZeroToWidth)
4894
4895
4896 // Advanced SIMD scalar shift by immediate..
4897 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(sshr, Basic, TypeWidth)
4898 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(ssra, Basic, TypeWidth)
4899 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(srshr, Basic, TypeWidth)
4900 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(srsra, Basic, TypeWidth)
4901 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(shl, Basic, TypeWidthFromZero)
4902 DEFINE_TEST_NEON_2OPIMM_SCALAR(sqshl, Basic, TypeWidthFromZero)
4903 DEFINE_TEST_NEON_2OPIMM_SCALAR_NARROW(sqshrn, Basic, TypeWidth)
4904 DEFINE_TEST_NEON_2OPIMM_SCALAR_NARROW(sqrshrn, Basic, TypeWidth)
4905 DEFINE_TEST_NEON_2OPIMM_SCALAR_HSD(scvtf,
4906 FixedPointConversions,
4907 TypeWidthFromZeroToWidth)
4908 DEFINE_TEST_NEON_2OPIMM_FP_SCALAR(fcvtzs, Conversions, TypeWidthFromZeroToWidth)
4909 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(ushr, Basic, TypeWidth)
4910 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(usra, Basic, TypeWidth)
4911 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(urshr, Basic, TypeWidth)
4912 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(ursra, Basic, TypeWidth)
4913 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(sri, Basic, TypeWidth)
4914 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(sli, Basic, TypeWidthFromZero)
4915 DEFINE_TEST_NEON_2OPIMM_SCALAR(sqshlu, Basic, TypeWidthFromZero)
4916 DEFINE_TEST_NEON_2OPIMM_SCALAR(uqshl, Basic, TypeWidthFromZero)
4917 DEFINE_TEST_NEON_2OPIMM_SCALAR_NARROW(sqshrun, Basic, TypeWidth)
4918 DEFINE_TEST_NEON_2OPIMM_SCALAR_NARROW(sqrshrun, Basic, TypeWidth)
4919 DEFINE_TEST_NEON_2OPIMM_SCALAR_NARROW(uqshrn, Basic, TypeWidth)
4920 DEFINE_TEST_NEON_2OPIMM_SCALAR_NARROW(uqrshrn, Basic, TypeWidth)
4921 DEFINE_TEST_NEON_2OPIMM_SCALAR_HSD(ucvtf,
4922 FixedPointConversions,
4923 TypeWidthFromZeroToWidth)
4924 DEFINE_TEST_NEON_2OPIMM_FP_SCALAR(fcvtzu, Conversions, TypeWidthFromZeroToWidth)
4925
4926
4927 // Advanced SIMD two-register miscellaneous.
4928 DEFINE_TEST_NEON_2SAME_NO2D(rev64, Basic)
4929 DEFINE_TEST_NEON_2SAME_8B_16B(rev16, Basic)
4930 DEFINE_TEST_NEON_2DIFF_LONG(saddlp, Basic)
4931 DEFINE_TEST_NEON_2SAME(suqadd, Basic)
4932 DEFINE_TEST_NEON_2SAME_NO2D(cls, Basic)
4933 DEFINE_TEST_NEON_2SAME_8B_16B(cnt, Basic)
4934 DEFINE_TEST_NEON_2DIFF_LONG(sadalp, Basic)
4935 DEFINE_TEST_NEON_2SAME(sqabs, Basic)
4936 DEFINE_TEST_NEON_2OPIMM(cmgt, Basic, Zero)
4937 DEFINE_TEST_NEON_2OPIMM(cmeq, Basic, Zero)
4938 DEFINE_TEST_NEON_2OPIMM(cmlt, Basic, Zero)
4939 DEFINE_TEST_NEON_2SAME(abs, Basic)
4940 DEFINE_TEST_NEON_2DIFF_NARROW(xtn, Basic)
4941 DEFINE_TEST_NEON_2DIFF_NARROW(sqxtn, Basic)
4942 DEFINE_TEST_NEON_2DIFF_FP_NARROW(fcvtn, Conversions)
4943 DEFINE_TEST_NEON_2DIFF_FP_LONG(fcvtl, Conversions)
4944 DEFINE_TEST_NEON_2SAME_FP_FP16(frintn, Conversions)
4945 DEFINE_TEST_NEON_2SAME_FP_FP16(frintm, Conversions)
4946 DEFINE_TEST_NEON_2SAME_FP_FP16(fcvtns, Conversions)
4947 DEFINE_TEST_NEON_2SAME_FP_FP16(fcvtms, Conversions)
4948 DEFINE_TEST_NEON_2SAME_FP_FP16(fcvtas, Conversions)
4949 // SCVTF (vector, integer) covered by SCVTF(vector, fixed point) with fbits 0.
4950 DEFINE_TEST_NEON_2OPIMM_FCMP_ZERO(fcmgt, Basic, Zero)
4951 DEFINE_TEST_NEON_2OPIMM_FCMP_ZERO(fcmeq, Basic, Zero)
4952 DEFINE_TEST_NEON_2OPIMM_FCMP_ZERO(fcmlt, Basic, Zero)
4953 DEFINE_TEST_NEON_2SAME_FP_FP16(fabs, Basic)
4954 DEFINE_TEST_NEON_2SAME_FP_FP16(frintp, Conversions)
4955 DEFINE_TEST_NEON_2SAME_FP_FP16(frintz, Conversions)
4956 DEFINE_TEST_NEON_2SAME_FP_FP16(fcvtps, Conversions)
4957 // FCVTZS(vector, integer) covered by FCVTZS(vector, fixed point) with fbits 0.
4958 DEFINE_TEST_NEON_2SAME_2S_4S(urecpe, Basic)
4959 DEFINE_TEST_NEON_2SAME_FP_FP16(frecpe, Basic)
4960 DEFINE_TEST_NEON_2SAME_BH(rev32, Basic)
4961 DEFINE_TEST_NEON_2DIFF_LONG(uaddlp, Basic)
4962 DEFINE_TEST_NEON_2SAME(usqadd, Basic)
4963 DEFINE_TEST_NEON_2SAME_NO2D(clz, Basic)
4964 DEFINE_TEST_NEON_2DIFF_LONG(uadalp, Basic)
4965 DEFINE_TEST_NEON_2SAME(sqneg, Basic)
4966 DEFINE_TEST_NEON_2OPIMM(cmge, Basic, Zero)
4967 DEFINE_TEST_NEON_2OPIMM(cmle, Basic, Zero)
4968 DEFINE_TEST_NEON_2SAME(neg, Basic)
4969 DEFINE_TEST_NEON_2DIFF_NARROW(sqxtun, Basic)
4970 DEFINE_TEST_NEON_2OPIMM_LONG(shll, Basic, SHLL)
4971 DEFINE_TEST_NEON_2DIFF_NARROW(uqxtn, Basic)
4972 DEFINE_TEST_NEON_2DIFF_FP_NARROW_2S(fcvtxn, Conversions)
4973 DEFINE_TEST_NEON_2SAME_FP(frint32x, Conversions)
4974 DEFINE_TEST_NEON_2SAME_FP(frint64x, Conversions)
4975 DEFINE_TEST_NEON_2SAME_FP(frint32z, Conversions)
4976 DEFINE_TEST_NEON_2SAME_FP(frint64z, Conversions)
4977 DEFINE_TEST_NEON_2SAME_FP_FP16(frinta, Conversions)
4978 DEFINE_TEST_NEON_2SAME_FP_FP16(frintx, Conversions)
4979 DEFINE_TEST_NEON_2SAME_FP_FP16(fcvtnu, Conversions)
4980 DEFINE_TEST_NEON_2SAME_FP_FP16(fcvtmu, Conversions)
4981 DEFINE_TEST_NEON_2SAME_FP_FP16(fcvtau, Conversions)
4982 // UCVTF (vector, integer) covered by UCVTF(vector, fixed point) with fbits 0.
4983 DEFINE_TEST_NEON_2SAME_8B_16B(not_, Basic)
4984 DEFINE_TEST_NEON_2SAME_8B_16B(rbit, Basic)
4985 DEFINE_TEST_NEON_2OPIMM_FCMP_ZERO(fcmge, Basic, Zero)
4986 DEFINE_TEST_NEON_2OPIMM_FCMP_ZERO(fcmle, Basic, Zero)
4987 DEFINE_TEST_NEON_2SAME_FP_FP16(fneg, Basic)
4988 DEFINE_TEST_NEON_2SAME_FP_FP16(frinti, Conversions)
4989 DEFINE_TEST_NEON_2SAME_FP_FP16(fcvtpu, Conversions)
4990 // FCVTZU(vector, integer) covered by FCVTZU(vector, fixed point) with fbits 0.
4991 DEFINE_TEST_NEON_2SAME_2S_4S(ursqrte, Basic)
4992 DEFINE_TEST_NEON_2SAME_FP_FP16(frsqrte, Basic)
4993 DEFINE_TEST_NEON_2SAME_FP_FP16(fsqrt, Basic)
4994
4995
4996 // Advanced SIMD scalar two-register miscellaneous.
4997 DEFINE_TEST_NEON_2SAME_SCALAR(suqadd, Basic)
4998 DEFINE_TEST_NEON_2SAME_SCALAR(sqabs, Basic)
4999 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(cmgt, Basic, Zero)
5000 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(cmeq, Basic, Zero)
5001 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(cmlt, Basic, Zero)
5002 DEFINE_TEST_NEON_2SAME_SCALAR_D(abs, Basic)
5003 DEFINE_TEST_NEON_2DIFF_SCALAR_NARROW(sqxtn, Basic)
5004 DEFINE_TEST_NEON_2SAME_FP_FP16_SCALAR(fcvtns, Conversions)
5005 DEFINE_TEST_NEON_2SAME_FP_FP16_SCALAR(fcvtms, Conversions)
5006 DEFINE_TEST_NEON_2SAME_FP_FP16_SCALAR(fcvtas, Conversions)
5007 // SCVTF (vector, integer) covered by SCVTF(vector, fixed point) with fbits 0.
5008 DEFINE_TEST_NEON_2OPIMM_FP_SCALAR_HSD(fcmgt, Basic, Zero)
5009 DEFINE_TEST_NEON_2OPIMM_FP_SCALAR_HSD(fcmeq, Basic, Zero)
5010 DEFINE_TEST_NEON_2OPIMM_FP_SCALAR_HSD(fcmlt, Basic, Zero)
5011 DEFINE_TEST_NEON_2SAME_FP_FP16_SCALAR(fcvtps, Conversions)
5012 // FCVTZS(vector, integer) covered by FCVTZS(vector, fixed point) with fbits 0.
5013 DEFINE_TEST_NEON_2SAME_FP_FP16_SCALAR(frecpe, Basic)
5014 DEFINE_TEST_NEON_2SAME_FP_FP16_SCALAR(frecpx, Basic)
5015 DEFINE_TEST_NEON_2SAME_SCALAR(usqadd, Basic)
5016 DEFINE_TEST_NEON_2SAME_SCALAR(sqneg, Basic)
5017 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(cmge, Basic, Zero)
5018 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(cmle, Basic, Zero)
5019 DEFINE_TEST_NEON_2SAME_SCALAR_D(neg, Basic)
5020 DEFINE_TEST_NEON_2DIFF_SCALAR_NARROW(sqxtun, Basic)
5021 DEFINE_TEST_NEON_2DIFF_SCALAR_NARROW(uqxtn, Basic)
5022 TEST(fcvtxn_SCALAR) {
5023 CALL_TEST_NEON_HELPER_2DIFF(fcvtxn, S, D, kInputDoubleConversions);
5024 }
DEFINE_TEST_NEON_2SAME_FP_FP16_SCALAR(fcvtnu,Conversions)5025 DEFINE_TEST_NEON_2SAME_FP_FP16_SCALAR(fcvtnu, Conversions)
5026 DEFINE_TEST_NEON_2SAME_FP_FP16_SCALAR(fcvtmu, Conversions)
5027 DEFINE_TEST_NEON_2SAME_FP_FP16_SCALAR(fcvtau, Conversions)
5028 // UCVTF (vector, integer) covered by UCVTF(vector, fixed point) with fbits 0.
5029 DEFINE_TEST_NEON_2OPIMM_FP_SCALAR_HSD(fcmge, Basic, Zero)
5030 DEFINE_TEST_NEON_2OPIMM_FP_SCALAR_HSD(fcmle, Basic, Zero)
5031 DEFINE_TEST_NEON_2SAME_FP_FP16_SCALAR(fcvtpu, Conversions)
5032 // FCVTZU(vector, integer) covered by FCVTZU(vector, fixed point) with fbits 0.
5033 DEFINE_TEST_NEON_2SAME_FP_FP16_SCALAR(frsqrte, Basic)
5034
5035
5036 // Advanced SIMD across lanes.
5037 DEFINE_TEST_NEON_ACROSS_LONG(saddlv, Basic)
5038 DEFINE_TEST_NEON_ACROSS(smaxv, Basic)
5039 DEFINE_TEST_NEON_ACROSS(sminv, Basic)
5040 DEFINE_TEST_NEON_ACROSS(addv, Basic)
5041 DEFINE_TEST_NEON_ACROSS_LONG(uaddlv, Basic)
5042 DEFINE_TEST_NEON_ACROSS(umaxv, Basic)
5043 DEFINE_TEST_NEON_ACROSS(uminv, Basic)
5044 DEFINE_TEST_NEON_ACROSS_FP(fmaxnmv, Basic)
5045 DEFINE_TEST_NEON_ACROSS_FP(fmaxv, Basic)
5046 DEFINE_TEST_NEON_ACROSS_FP(fminnmv, Basic)
5047 DEFINE_TEST_NEON_ACROSS_FP(fminv, Basic)
5048
5049
5050 // Advanced SIMD permute.
5051 DEFINE_TEST_NEON_3SAME(uzp1, Basic)
5052 DEFINE_TEST_NEON_3SAME(trn1, Basic)
5053 DEFINE_TEST_NEON_3SAME(zip1, Basic)
5054 DEFINE_TEST_NEON_3SAME(uzp2, Basic)
5055 DEFINE_TEST_NEON_3SAME(trn2, Basic)
5056 DEFINE_TEST_NEON_3SAME(zip2, Basic)
5057
5058
5059 // Advanced SIMD vector x indexed element.
5060 DEFINE_TEST_NEON_BYELEMENT_DIFF(smlal, Basic, Basic, Basic)
5061 DEFINE_TEST_NEON_BYELEMENT_DIFF(sqdmlal, Basic, Basic, Basic)
5062 DEFINE_TEST_NEON_BYELEMENT_DIFF(smlsl, Basic, Basic, Basic)
5063 DEFINE_TEST_NEON_BYELEMENT_DIFF(sqdmlsl, Basic, Basic, Basic)
5064 DEFINE_TEST_NEON_BYELEMENT(mul, Basic, Basic, Basic)
5065 DEFINE_TEST_NEON_BYELEMENT_DIFF(smull, Basic, Basic, Basic)
5066 DEFINE_TEST_NEON_BYELEMENT_DIFF(sqdmull, Basic, Basic, Basic)
5067 DEFINE_TEST_NEON_BYELEMENT(sqdmulh, Basic, Basic, Basic)
5068 DEFINE_TEST_NEON_BYELEMENT(sqrdmulh, Basic, Basic, Basic)
5069 DEFINE_TEST_NEON_BYELEMENT(sqrdmlah, Basic, Basic, Basic)
5070 DEFINE_TEST_NEON_BYELEMENT(sqrdmlsh, Basic, Basic, Basic)
5071 DEFINE_TEST_NEON_BYELEMENT_DOT_PRODUCT(udot, Basic, Basic, Basic)
5072 DEFINE_TEST_NEON_BYELEMENT_DOT_PRODUCT(sdot, Basic, Basic, Basic)
5073 DEFINE_TEST_NEON_FP_BYELEMENT(fmla, Basic, Basic, Basic)
5074 DEFINE_TEST_NEON_FP_BYELEMENT(fmls, Basic, Basic, Basic)
5075 DEFINE_TEST_NEON_FP_BYELEMENT(fmul, Basic, Basic, Basic)
5076 DEFINE_TEST_NEON_BYELEMENT(mla, Basic, Basic, Basic)
5077 DEFINE_TEST_NEON_BYELEMENT_DIFF(umlal, Basic, Basic, Basic)
5078 DEFINE_TEST_NEON_BYELEMENT(mls, Basic, Basic, Basic)
5079 DEFINE_TEST_NEON_BYELEMENT_DIFF(umlsl, Basic, Basic, Basic)
5080 DEFINE_TEST_NEON_BYELEMENT_DIFF(umull, Basic, Basic, Basic)
5081 DEFINE_TEST_NEON_FP_BYELEMENT(fmulx, Basic, Basic, Basic)
5082
5083
5084 // Advanced SIMD scalar x indexed element.
5085 DEFINE_TEST_NEON_BYELEMENT_DIFF_SCALAR(sqdmlal, Basic, Basic, Basic)
5086 DEFINE_TEST_NEON_BYELEMENT_DIFF_SCALAR(sqdmlsl, Basic, Basic, Basic)
5087 DEFINE_TEST_NEON_BYELEMENT_DIFF_SCALAR(sqdmull, Basic, Basic, Basic)
5088 DEFINE_TEST_NEON_BYELEMENT_SCALAR(sqdmulh, Basic, Basic, Basic)
5089 DEFINE_TEST_NEON_BYELEMENT_SCALAR(sqrdmulh, Basic, Basic, Basic)
5090 DEFINE_TEST_NEON_BYELEMENT_SCALAR(sqrdmlah, Basic, Basic, Basic)
5091 DEFINE_TEST_NEON_BYELEMENT_SCALAR(sqrdmlsh, Basic, Basic, Basic)
5092 DEFINE_TEST_NEON_FP_BYELEMENT_SCALAR(fmla, Basic, Basic, Basic)
5093 DEFINE_TEST_NEON_FP_BYELEMENT_SCALAR(fmls, Basic, Basic, Basic)
5094 DEFINE_TEST_NEON_FP_BYELEMENT_SCALAR(fmul, Basic, Basic, Basic)
5095 DEFINE_TEST_NEON_FP_BYELEMENT_SCALAR(fmulx, Basic, Basic, Basic)
5096
5097
5098 DEFINE_TEST_NEON_FHM_BYELEMENT(fmlal, Basic, Basic, Basic)
5099 DEFINE_TEST_NEON_FHM_BYELEMENT(fmlal2, Basic, Basic, Basic)
5100 DEFINE_TEST_NEON_FHM_BYELEMENT(fmlsl, Basic, Basic, Basic)
5101 DEFINE_TEST_NEON_FHM_BYELEMENT(fmlsl2, Basic, Basic, Basic)
5102
5103
5104 #ifdef VIXL_ENABLE_IMPLICIT_CHECKS
5105 TEST(ImplicitCheck) {
5106 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
5107 START_IMPLICIT_CHECK();
5108
5109 EmissionCheckScope guard(&masm, masm.GetBuffer()->GetRemainingBytes());
5110 // Invalid memory reads.
5111 __ ldar(w3, bad_memory);
5112 __ ldar(x4, bad_memory);
5113 __ ldarb(w5, bad_memory);
5114 __ ldarb(x6, bad_memory);
5115 __ ldarh(w7, bad_memory);
5116 __ ldarh(x8, bad_memory);
5117 __ ldaxp(w9, w10, bad_memory);
5118 __ ldaxp(x11, x12, bad_memory);
5119 __ ldaxr(w13, bad_memory);
5120 __ ldaxr(x14, bad_memory);
5121 __ ldaxrb(w15, bad_memory);
5122 __ ldaxrb(x16, bad_memory);
5123 __ ldaxrh(w17, bad_memory);
5124 __ ldaxrh(x18, bad_memory);
5125 __ ldnp(w19, w20, bad_memory);
5126 __ ldnp(x21, x22, bad_memory);
5127 __ ldp(w23, w24, bad_memory);
5128 __ ldp(x25, x26, bad_memory);
5129 __ ldpsw(x27, x28, bad_memory);
5130 __ ldr(w29, bad_memory);
5131 __ ldr(x2, bad_memory);
5132 __ ldrb(w3, bad_memory);
5133 __ ldrb(x4, bad_memory);
5134 __ ldrh(w5, bad_memory);
5135 __ ldrh(x6, bad_memory);
5136 __ ldrsb(w7, bad_memory);
5137 __ ldrsb(x8, bad_memory);
5138 __ ldrsh(w9, bad_memory);
5139 __ ldrsh(x10, bad_memory);
5140 __ ldrsw(x11, bad_memory);
5141 __ ldur(w12, bad_memory);
5142 __ ldur(x13, bad_memory);
5143 __ ldurb(w14, bad_memory);
5144 __ ldurb(x15, bad_memory);
5145 __ ldurh(w16, bad_memory);
5146 __ ldurh(x17, bad_memory);
5147 __ ldursb(w18, bad_memory);
5148 __ ldursb(x19, bad_memory);
5149 __ ldursh(w20, bad_memory);
5150 __ ldursh(x21, bad_memory);
5151 __ ldursw(x22, bad_memory);
5152 __ ldxp(w23, w24, bad_memory);
5153 __ ldxp(x25, x26, bad_memory);
5154 __ ldxr(w27, bad_memory);
5155 __ ldxr(x28, bad_memory);
5156 __ ldxrb(w29, bad_memory);
5157 __ ldxrb(x2, bad_memory);
5158 __ ldxrh(w3, bad_memory);
5159 __ ldxrh(x4, bad_memory);
5160
5161 // Invalid memory writes. Note: exclusive store instructions are not tested
5162 // because they can fail due to the global monitor before trying to perform a
5163 // memory store.
5164 __ stlr(w18, bad_memory);
5165 __ stlr(x19, bad_memory);
5166 __ stlrb(w20, bad_memory);
5167 __ stlrb(x21, bad_memory);
5168 __ stlrh(w22, bad_memory);
5169 __ stlrh(x23, bad_memory);
5170 __ stnp(w14, w15, bad_memory);
5171 __ stnp(x16, x17, bad_memory);
5172 __ stp(w18, w19, bad_memory);
5173 __ stp(x20, x21, bad_memory);
5174 __ str(w22, bad_memory);
5175 __ str(x23, bad_memory);
5176 __ strb(w24, bad_memory);
5177 __ strb(x25, bad_memory);
5178 __ strh(w26, bad_memory);
5179 __ strh(x27, bad_memory);
5180 __ stur(w28, bad_memory);
5181 __ stur(x29, bad_memory);
5182 __ sturb(w2, bad_memory);
5183 __ sturb(x3, bad_memory);
5184 __ sturh(w4, bad_memory);
5185 __ sturh(x5, bad_memory);
5186
5187 END_IMPLICIT_CHECK();
5188 TRY_RUN_IMPLICIT_CHECK();
5189 }
5190
TEST(ImplicitCheckNeon)5191 TEST(ImplicitCheckNeon) {
5192 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
5193 START_IMPLICIT_CHECK();
5194
5195 EmissionCheckScope guard(&masm, masm.GetBuffer()->GetRemainingBytes());
5196 __ ld1(v18.V16B(), v19.V16B(), v20.V16B(), v21.V16B(), bad_memory);
5197 __ ld1(v23.V16B(), v24.V16B(), v25.V16B(), v26.V16B(), bad_memory);
5198 __ ld1(v5.V16B(), v6.V16B(), v7.V16B(), v8.V16B(), bad_memory);
5199 __ ld1(v18.V16B(), v19.V16B(), v20.V16B(), bad_memory);
5200 __ ld1(v13.V16B(), v14.V16B(), v15.V16B(), bad_memory);
5201 __ ld1(v19.V16B(), v20.V16B(), v21.V16B(), bad_memory);
5202 __ ld1(v17.V16B(), v18.V16B(), bad_memory);
5203 __ ld1(v20.V16B(), v21.V16B(), bad_memory);
5204 __ ld1(v28.V16B(), v29.V16B(), bad_memory);
5205 __ ld1(v29.V16B(), bad_memory);
5206 __ ld1(v21.V16B(), bad_memory);
5207 __ ld1(v4.V16B(), bad_memory);
5208 __ ld1(v4.V1D(), v5.V1D(), v6.V1D(), v7.V1D(), bad_memory);
5209 __ ld1(v17.V1D(), v18.V1D(), v19.V1D(), v20.V1D(), bad_memory);
5210 __ ld1(v28.V1D(), v29.V1D(), v30.V1D(), v31.V1D(), bad_memory);
5211 __ ld1(v20.V1D(), v21.V1D(), v22.V1D(), bad_memory);
5212 __ ld1(v19.V1D(), v20.V1D(), v21.V1D(), bad_memory);
5213 __ ld1(v12.V1D(), v13.V1D(), v14.V1D(), bad_memory);
5214 __ ld1(v29.V1D(), v30.V1D(), bad_memory);
5215 __ ld1(v31.V1D(), v0.V1D(), bad_memory);
5216 __ ld1(v3.V1D(), v4.V1D(), bad_memory);
5217 __ ld1(v28.V1D(), bad_memory);
5218 __ ld1(v11.V1D(), bad_memory);
5219 __ ld1(v29.V1D(), bad_memory);
5220 __ ld1(v28.V2D(), v29.V2D(), v30.V2D(), v31.V2D(), bad_memory);
5221 __ ld1(v8.V2D(), v9.V2D(), v10.V2D(), v11.V2D(), bad_memory);
5222 __ ld1(v14.V2D(), v15.V2D(), v16.V2D(), v17.V2D(), bad_memory);
5223 __ ld1(v26.V2D(), v27.V2D(), v28.V2D(), bad_memory);
5224 __ ld1(v5.V2D(), v6.V2D(), v7.V2D(), bad_memory);
5225 __ ld1(v26.V2D(), v27.V2D(), v28.V2D(), bad_memory);
5226 __ ld1(v18.V2D(), v19.V2D(), bad_memory);
5227 __ ld1(v21.V2D(), v22.V2D(), bad_memory);
5228 __ ld1(v17.V2D(), v18.V2D(), bad_memory);
5229 __ ld1(v5.V2D(), bad_memory);
5230 __ ld1(v6.V2D(), bad_memory);
5231 __ ld1(v15.V2D(), bad_memory);
5232 __ ld1(v30.V2S(), v31.V2S(), v0.V2S(), v1.V2S(), bad_memory);
5233 __ ld1(v24.V2S(), v25.V2S(), v26.V2S(), v27.V2S(), bad_memory);
5234 __ ld1(v27.V2S(), v28.V2S(), v29.V2S(), v30.V2S(), bad_memory);
5235 __ ld1(v11.V2S(), v12.V2S(), v13.V2S(), bad_memory);
5236 __ ld1(v8.V2S(), v9.V2S(), v10.V2S(), bad_memory);
5237 __ ld1(v31.V2S(), v0.V2S(), v1.V2S(), bad_memory);
5238 __ ld1(v0.V2S(), v1.V2S(), bad_memory);
5239 __ ld1(v13.V2S(), v14.V2S(), bad_memory);
5240 __ ld1(v3.V2S(), v4.V2S(), bad_memory);
5241 __ ld1(v26.V2S(), bad_memory);
5242 __ ld1(v0.V2S(), bad_memory);
5243 __ ld1(v11.V2S(), bad_memory);
5244 __ ld1(v16.V4H(), v17.V4H(), v18.V4H(), v19.V4H(), bad_memory);
5245 __ ld1(v24.V4H(), v25.V4H(), v26.V4H(), v27.V4H(), bad_memory);
5246 __ ld1(v1.V4H(), v2.V4H(), v3.V4H(), v4.V4H(), bad_memory);
5247 __ ld1(v30.V4H(), v31.V4H(), v0.V4H(), bad_memory);
5248 __ ld1(v25.V4H(), v26.V4H(), v27.V4H(), bad_memory);
5249 __ ld1(v3.V4H(), v4.V4H(), v5.V4H(), bad_memory);
5250 __ ld1(v3.V4H(), v4.V4H(), bad_memory);
5251 __ ld1(v3.V4H(), v4.V4H(), bad_memory);
5252 __ ld1(v23.V4H(), v24.V4H(), bad_memory);
5253 __ ld1(v26.V4H(), bad_memory);
5254 __ ld1(v1.V4H(), bad_memory);
5255 __ ld1(v14.V4H(), bad_memory);
5256 __ ld1(v26.V4S(), v27.V4S(), v28.V4S(), v29.V4S(), bad_memory);
5257 __ ld1(v28.V4S(), v29.V4S(), v30.V4S(), v31.V4S(), bad_memory);
5258 __ ld1(v4.V4S(), v5.V4S(), v6.V4S(), v7.V4S(), bad_memory);
5259 __ ld1(v2.V4S(), v3.V4S(), v4.V4S(), bad_memory);
5260 __ ld1(v22.V4S(), v23.V4S(), v24.V4S(), bad_memory);
5261 __ ld1(v15.V4S(), v16.V4S(), v17.V4S(), bad_memory);
5262 __ ld1(v20.V4S(), v21.V4S(), bad_memory);
5263 __ ld1(v30.V4S(), v31.V4S(), bad_memory);
5264 __ ld1(v11.V4S(), v12.V4S(), bad_memory);
5265 __ ld1(v15.V4S(), bad_memory);
5266 __ ld1(v12.V4S(), bad_memory);
5267 __ ld1(v0.V4S(), bad_memory);
5268 __ ld1(v17.V8B(), v18.V8B(), v19.V8B(), v20.V8B(), bad_memory);
5269 __ ld1(v5.V8B(), v6.V8B(), v7.V8B(), v8.V8B(), bad_memory);
5270 __ ld1(v9.V8B(), v10.V8B(), v11.V8B(), v12.V8B(), bad_memory);
5271 __ ld1(v4.V8B(), v5.V8B(), v6.V8B(), bad_memory);
5272 __ ld1(v2.V8B(), v3.V8B(), v4.V8B(), bad_memory);
5273 __ ld1(v12.V8B(), v13.V8B(), v14.V8B(), bad_memory);
5274 __ ld1(v10.V8B(), v11.V8B(), bad_memory);
5275 __ ld1(v11.V8B(), v12.V8B(), bad_memory);
5276 __ ld1(v27.V8B(), v28.V8B(), bad_memory);
5277 __ ld1(v31.V8B(), bad_memory);
5278 __ ld1(v10.V8B(), bad_memory);
5279 __ ld1(v28.V8B(), bad_memory);
5280 __ ld1(v5.V8H(), v6.V8H(), v7.V8H(), v8.V8H(), bad_memory);
5281 __ ld1(v2.V8H(), v3.V8H(), v4.V8H(), v5.V8H(), bad_memory);
5282 __ ld1(v10.V8H(), v11.V8H(), v12.V8H(), v13.V8H(), bad_memory);
5283 __ ld1(v26.V8H(), v27.V8H(), v28.V8H(), bad_memory);
5284 __ ld1(v3.V8H(), v4.V8H(), v5.V8H(), bad_memory);
5285 __ ld1(v17.V8H(), v18.V8H(), v19.V8H(), bad_memory);
5286 __ ld1(v4.V8H(), v5.V8H(), bad_memory);
5287 __ ld1(v21.V8H(), v22.V8H(), bad_memory);
5288 __ ld1(v4.V8H(), v5.V8H(), bad_memory);
5289 __ ld1(v9.V8H(), bad_memory);
5290 __ ld1(v27.V8H(), bad_memory);
5291 __ ld1(v26.V8H(), bad_memory);
5292 __ ld1(v19.B(), 1, bad_memory);
5293 __ ld1(v12.B(), 3, bad_memory);
5294 __ ld1(v27.B(), 12, bad_memory);
5295 __ ld1(v10.D(), 1, bad_memory);
5296 __ ld1(v26.D(), 1, bad_memory);
5297 __ ld1(v7.D(), 1, bad_memory);
5298 __ ld1(v19.H(), 5, bad_memory);
5299 __ ld1(v10.H(), 1, bad_memory);
5300 __ ld1(v5.H(), 4, bad_memory);
5301 __ ld1(v21.S(), 2, bad_memory);
5302 __ ld1(v13.S(), 2, bad_memory);
5303 __ ld1(v1.S(), 2, bad_memory);
5304 __ ld1r(v2.V16B(), bad_memory);
5305 __ ld1r(v2.V16B(), bad_memory);
5306 __ ld1r(v22.V16B(), bad_memory);
5307 __ ld1r(v25.V1D(), bad_memory);
5308 __ ld1r(v9.V1D(), bad_memory);
5309 __ ld1r(v23.V1D(), bad_memory);
5310 __ ld1r(v19.V2D(), bad_memory);
5311 __ ld1r(v21.V2D(), bad_memory);
5312 __ ld1r(v30.V2D(), bad_memory);
5313 __ ld1r(v24.V2S(), bad_memory);
5314 __ ld1r(v26.V2S(), bad_memory);
5315 __ ld1r(v28.V2S(), bad_memory);
5316 __ ld1r(v19.V4H(), bad_memory);
5317 __ ld1r(v1.V4H(), bad_memory);
5318 __ ld1r(v21.V4H(), bad_memory);
5319 __ ld1r(v15.V4S(), bad_memory);
5320 __ ld1r(v21.V4S(), bad_memory);
5321 __ ld1r(v23.V4S(), bad_memory);
5322 __ ld1r(v26.V8B(), bad_memory);
5323 __ ld1r(v14.V8B(), bad_memory);
5324 __ ld1r(v19.V8B(), bad_memory);
5325 __ ld1r(v13.V8H(), bad_memory);
5326 __ ld1r(v30.V8H(), bad_memory);
5327 __ ld1r(v27.V8H(), bad_memory);
5328 __ ld2(v21.V16B(), v22.V16B(), bad_memory);
5329 __ ld2(v21.V16B(), v22.V16B(), bad_memory);
5330 __ ld2(v12.V16B(), v13.V16B(), bad_memory);
5331 __ ld2(v14.V2D(), v15.V2D(), bad_memory);
5332 __ ld2(v0.V2D(), v1.V2D(), bad_memory);
5333 __ ld2(v12.V2D(), v13.V2D(), bad_memory);
5334 __ ld2(v27.V2S(), v28.V2S(), bad_memory);
5335 __ ld2(v2.V2S(), v3.V2S(), bad_memory);
5336 __ ld2(v12.V2S(), v13.V2S(), bad_memory);
5337 __ ld2(v9.V4H(), v10.V4H(), bad_memory);
5338 __ ld2(v23.V4H(), v24.V4H(), bad_memory);
5339 __ ld2(v1.V4H(), v2.V4H(), bad_memory);
5340 __ ld2(v20.V4S(), v21.V4S(), bad_memory);
5341 __ ld2(v10.V4S(), v11.V4S(), bad_memory);
5342 __ ld2(v24.V4S(), v25.V4S(), bad_memory);
5343 __ ld2(v17.V8B(), v18.V8B(), bad_memory);
5344 __ ld2(v13.V8B(), v14.V8B(), bad_memory);
5345 __ ld2(v7.V8B(), v8.V8B(), bad_memory);
5346 __ ld2(v30.V8H(), v31.V8H(), bad_memory);
5347 __ ld2(v4.V8H(), v5.V8H(), bad_memory);
5348 __ ld2(v13.V8H(), v14.V8H(), bad_memory);
5349 __ ld2(v5.B(), v6.B(), 12, bad_memory);
5350 __ ld2(v16.B(), v17.B(), 7, bad_memory);
5351 __ ld2(v29.B(), v30.B(), 2, bad_memory);
5352 __ ld2(v11.D(), v12.D(), 1, bad_memory);
5353 __ ld2(v26.D(), v27.D(), 0, bad_memory);
5354 __ ld2(v25.D(), v26.D(), 0, bad_memory);
5355 __ ld2(v18.H(), v19.H(), 7, bad_memory);
5356 __ ld2(v17.H(), v18.H(), 5, bad_memory);
5357 __ ld2(v30.H(), v31.H(), 2, bad_memory);
5358 __ ld2(v29.S(), v30.S(), 3, bad_memory);
5359 __ ld2(v28.S(), v29.S(), 0, bad_memory);
5360 __ ld2(v6.S(), v7.S(), 1, bad_memory);
5361 __ ld2r(v26.V16B(), v27.V16B(), bad_memory);
5362 __ ld2r(v21.V16B(), v22.V16B(), bad_memory);
5363 __ ld2r(v5.V16B(), v6.V16B(), bad_memory);
5364 __ ld2r(v26.V1D(), v27.V1D(), bad_memory);
5365 __ ld2r(v14.V1D(), v15.V1D(), bad_memory);
5366 __ ld2r(v23.V1D(), v24.V1D(), bad_memory);
5367 __ ld2r(v11.V2D(), v12.V2D(), bad_memory);
5368 __ ld2r(v29.V2D(), v30.V2D(), bad_memory);
5369 __ ld2r(v15.V2D(), v16.V2D(), bad_memory);
5370 __ ld2r(v26.V2S(), v27.V2S(), bad_memory);
5371 __ ld2r(v22.V2S(), v23.V2S(), bad_memory);
5372 __ ld2r(v2.V2S(), v3.V2S(), bad_memory);
5373 __ ld2r(v2.V4H(), v3.V4H(), bad_memory);
5374 __ ld2r(v9.V4H(), v10.V4H(), bad_memory);
5375 __ ld2r(v6.V4H(), v7.V4H(), bad_memory);
5376 __ ld2r(v7.V4S(), v8.V4S(), bad_memory);
5377 __ ld2r(v19.V4S(), v20.V4S(), bad_memory);
5378 __ ld2r(v21.V4S(), v22.V4S(), bad_memory);
5379 __ ld2r(v26.V8B(), v27.V8B(), bad_memory);
5380 __ ld2r(v20.V8B(), v21.V8B(), bad_memory);
5381 __ ld2r(v11.V8B(), v12.V8B(), bad_memory);
5382 __ ld2r(v12.V8H(), v13.V8H(), bad_memory);
5383 __ ld2r(v6.V8H(), v7.V8H(), bad_memory);
5384 __ ld2r(v25.V8H(), v26.V8H(), bad_memory);
5385 __ ld3(v20.V16B(), v21.V16B(), v22.V16B(), bad_memory);
5386 __ ld3(v28.V16B(), v29.V16B(), v30.V16B(), bad_memory);
5387 __ ld3(v20.V16B(), v21.V16B(), v22.V16B(), bad_memory);
5388 __ ld3(v21.V2D(), v22.V2D(), v23.V2D(), bad_memory);
5389 __ ld3(v18.V2D(), v19.V2D(), v20.V2D(), bad_memory);
5390 __ ld3(v27.V2D(), v28.V2D(), v29.V2D(), bad_memory);
5391 __ ld3(v7.V2S(), v8.V2S(), v9.V2S(), bad_memory);
5392 __ ld3(v20.V2S(), v21.V2S(), v22.V2S(), bad_memory);
5393 __ ld3(v26.V2S(), v27.V2S(), v28.V2S(), bad_memory);
5394 __ ld3(v27.V4H(), v28.V4H(), v29.V4H(), bad_memory);
5395 __ ld3(v28.V4H(), v29.V4H(), v30.V4H(), bad_memory);
5396 __ ld3(v7.V4H(), v8.V4H(), v9.V4H(), bad_memory);
5397 __ ld3(v2.V4S(), v3.V4S(), v4.V4S(), bad_memory);
5398 __ ld3(v24.V4S(), v25.V4S(), v26.V4S(), bad_memory);
5399 __ ld3(v11.V4S(), v12.V4S(), v13.V4S(), bad_memory);
5400 __ ld3(v29.V8B(), v30.V8B(), v31.V8B(), bad_memory);
5401 __ ld3(v1.V8B(), v2.V8B(), v3.V8B(), bad_memory);
5402 __ ld3(v12.V8B(), v13.V8B(), v14.V8B(), bad_memory);
5403 __ ld3(v22.V8H(), v23.V8H(), v24.V8H(), bad_memory);
5404 __ ld3(v13.V8H(), v14.V8H(), v15.V8H(), bad_memory);
5405 __ ld3(v28.V8H(), v29.V8H(), v30.V8H(), bad_memory);
5406 __ ld3(v21.B(), v22.B(), v23.B(), 11, bad_memory);
5407 __ ld3(v5.B(), v6.B(), v7.B(), 9, bad_memory);
5408 __ ld3(v23.B(), v24.B(), v25.B(), 0, bad_memory);
5409 __ ld3(v16.D(), v17.D(), v18.D(), 0, bad_memory);
5410 __ ld3(v30.D(), v31.D(), v0.D(), 0, bad_memory);
5411 __ ld3(v28.D(), v29.D(), v30.D(), 1, bad_memory);
5412 __ ld3(v13.H(), v14.H(), v15.H(), 2, bad_memory);
5413 __ ld3(v22.H(), v23.H(), v24.H(), 7, bad_memory);
5414 __ ld3(v14.H(), v15.H(), v16.H(), 3, bad_memory);
5415 __ ld3(v22.S(), v23.S(), v24.S(), 3, bad_memory);
5416 __ ld3(v30.S(), v31.S(), v0.S(), 2, bad_memory);
5417 __ ld3(v12.S(), v13.S(), v14.S(), 1, bad_memory);
5418 __ ld3r(v24.V16B(), v25.V16B(), v26.V16B(), bad_memory);
5419 __ ld3r(v24.V16B(), v25.V16B(), v26.V16B(), bad_memory);
5420 __ ld3r(v3.V16B(), v4.V16B(), v5.V16B(), bad_memory);
5421 __ ld3r(v4.V1D(), v5.V1D(), v6.V1D(), bad_memory);
5422 __ ld3r(v7.V1D(), v8.V1D(), v9.V1D(), bad_memory);
5423 __ ld3r(v17.V1D(), v18.V1D(), v19.V1D(), bad_memory);
5424 __ ld3r(v16.V2D(), v17.V2D(), v18.V2D(), bad_memory);
5425 __ ld3r(v20.V2D(), v21.V2D(), v22.V2D(), bad_memory);
5426 __ ld3r(v14.V2D(), v15.V2D(), v16.V2D(), bad_memory);
5427 __ ld3r(v10.V2S(), v11.V2S(), v12.V2S(), bad_memory);
5428 __ ld3r(v0.V2S(), v1.V2S(), v2.V2S(), bad_memory);
5429 __ ld3r(v23.V2S(), v24.V2S(), v25.V2S(), bad_memory);
5430 __ ld3r(v22.V4H(), v23.V4H(), v24.V4H(), bad_memory);
5431 __ ld3r(v6.V4H(), v7.V4H(), v8.V4H(), bad_memory);
5432 __ ld3r(v7.V4H(), v8.V4H(), v9.V4H(), bad_memory);
5433 __ ld3r(v26.V4S(), v27.V4S(), v28.V4S(), bad_memory);
5434 __ ld3r(v0.V4S(), v1.V4S(), v2.V4S(), bad_memory);
5435 __ ld3r(v30.V4S(), v31.V4S(), v0.V4S(), bad_memory);
5436 __ ld3r(v2.V8B(), v3.V8B(), v4.V8B(), bad_memory);
5437 __ ld3r(v10.V8B(), v11.V8B(), v12.V8B(), bad_memory);
5438 __ ld3r(v28.V8B(), v29.V8B(), v30.V8B(), bad_memory);
5439 __ ld3r(v6.V8H(), v7.V8H(), v8.V8H(), bad_memory);
5440 __ ld3r(v29.V8H(), v30.V8H(), v31.V8H(), bad_memory);
5441 __ ld3r(v7.V8H(), v8.V8H(), v9.V8H(), bad_memory);
5442 __ ld4(v3.V16B(), v4.V16B(), v5.V16B(), v6.V16B(), bad_memory);
5443 __ ld4(v2.V16B(), v3.V16B(), v4.V16B(), v5.V16B(), bad_memory);
5444 __ ld4(v5.V16B(), v6.V16B(), v7.V16B(), v8.V16B(), bad_memory);
5445 __ ld4(v18.V2D(), v19.V2D(), v20.V2D(), v21.V2D(), bad_memory);
5446 __ ld4(v4.V2D(), v5.V2D(), v6.V2D(), v7.V2D(), bad_memory);
5447 __ ld4(v29.V2D(), v30.V2D(), v31.V2D(), v0.V2D(), bad_memory);
5448 __ ld4(v27.V2S(), v28.V2S(), v29.V2S(), v30.V2S(), bad_memory);
5449 __ ld4(v24.V2S(), v25.V2S(), v26.V2S(), v27.V2S(), bad_memory);
5450 __ ld4(v4.V2S(), v5.V2S(), v6.V2S(), v7.V2S(), bad_memory);
5451 __ ld4(v16.V4H(), v17.V4H(), v18.V4H(), v19.V4H(), bad_memory);
5452 __ ld4(v23.V4H(), v24.V4H(), v25.V4H(), v26.V4H(), bad_memory);
5453 __ ld4(v2.V4H(), v3.V4H(), v4.V4H(), v5.V4H(), bad_memory);
5454 __ ld4(v7.V4S(), v8.V4S(), v9.V4S(), v10.V4S(), bad_memory);
5455 __ ld4(v28.V4S(), v29.V4S(), v30.V4S(), v31.V4S(), bad_memory);
5456 __ ld4(v29.V4S(), v30.V4S(), v31.V4S(), v0.V4S(), bad_memory);
5457 __ ld4(v15.V8B(), v16.V8B(), v17.V8B(), v18.V8B(), bad_memory);
5458 __ ld4(v27.V8B(), v28.V8B(), v29.V8B(), v30.V8B(), bad_memory);
5459 __ ld4(v5.V8B(), v6.V8B(), v7.V8B(), v8.V8B(), bad_memory);
5460 __ ld4(v25.V8H(), v26.V8H(), v27.V8H(), v28.V8H(), bad_memory);
5461 __ ld4(v2.V8H(), v3.V8H(), v4.V8H(), v5.V8H(), bad_memory);
5462 __ ld4(v20.V8H(), v21.V8H(), v22.V8H(), v23.V8H(), bad_memory);
5463 __ ld4(v20.B(), v21.B(), v22.B(), v23.B(), 3, bad_memory);
5464 __ ld4(v12.B(), v13.B(), v14.B(), v15.B(), 3, bad_memory);
5465 __ ld4(v27.B(), v28.B(), v29.B(), v30.B(), 6, bad_memory);
5466 __ ld4(v28.D(), v29.D(), v30.D(), v31.D(), 1, bad_memory);
5467 __ ld4(v15.D(), v16.D(), v17.D(), v18.D(), 1, bad_memory);
5468 __ ld4(v16.D(), v17.D(), v18.D(), v19.D(), 1, bad_memory);
5469 __ ld4(v2.H(), v3.H(), v4.H(), v5.H(), 6, bad_memory);
5470 __ ld4(v5.H(), v6.H(), v7.H(), v8.H(), 3, bad_memory);
5471 __ ld4(v7.H(), v8.H(), v9.H(), v10.H(), 6, bad_memory);
5472 __ ld4(v6.S(), v7.S(), v8.S(), v9.S(), 1, bad_memory);
5473 __ ld4(v25.S(), v26.S(), v27.S(), v28.S(), 2, bad_memory);
5474 __ ld4(v8.S(), v9.S(), v10.S(), v11.S(), 3, bad_memory);
5475 __ ld4r(v14.V16B(), v15.V16B(), v16.V16B(), v17.V16B(), bad_memory);
5476 __ ld4r(v13.V16B(), v14.V16B(), v15.V16B(), v16.V16B(), bad_memory);
5477 __ ld4r(v9.V16B(), v10.V16B(), v11.V16B(), v12.V16B(), bad_memory);
5478 __ ld4r(v8.V1D(), v9.V1D(), v10.V1D(), v11.V1D(), bad_memory);
5479 __ ld4r(v4.V1D(), v5.V1D(), v6.V1D(), v7.V1D(), bad_memory);
5480 __ ld4r(v26.V1D(), v27.V1D(), v28.V1D(), v29.V1D(), bad_memory);
5481 __ ld4r(v19.V2D(), v20.V2D(), v21.V2D(), v22.V2D(), bad_memory);
5482 __ ld4r(v28.V2D(), v29.V2D(), v30.V2D(), v31.V2D(), bad_memory);
5483 __ ld4r(v15.V2D(), v16.V2D(), v17.V2D(), v18.V2D(), bad_memory);
5484 __ ld4r(v31.V2S(), v0.V2S(), v1.V2S(), v2.V2S(), bad_memory);
5485 __ ld4r(v28.V2S(), v29.V2S(), v30.V2S(), v31.V2S(), bad_memory);
5486 __ ld4r(v11.V2S(), v12.V2S(), v13.V2S(), v14.V2S(), bad_memory);
5487 __ ld4r(v19.V4H(), v20.V4H(), v21.V4H(), v22.V4H(), bad_memory);
5488 __ ld4r(v22.V4H(), v23.V4H(), v24.V4H(), v25.V4H(), bad_memory);
5489 __ ld4r(v20.V4H(), v21.V4H(), v22.V4H(), v23.V4H(), bad_memory);
5490 __ ld4r(v16.V4S(), v17.V4S(), v18.V4S(), v19.V4S(), bad_memory);
5491 __ ld4r(v25.V4S(), v26.V4S(), v27.V4S(), v28.V4S(), bad_memory);
5492 __ ld4r(v23.V4S(), v24.V4S(), v25.V4S(), v26.V4S(), bad_memory);
5493 __ ld4r(v22.V8B(), v23.V8B(), v24.V8B(), v25.V8B(), bad_memory);
5494 __ ld4r(v27.V8B(), v28.V8B(), v29.V8B(), v30.V8B(), bad_memory);
5495 __ ld4r(v29.V8B(), v30.V8B(), v31.V8B(), v0.V8B(), bad_memory);
5496 __ ld4r(v28.V8H(), v29.V8H(), v30.V8H(), v31.V8H(), bad_memory);
5497 __ ld4r(v25.V8H(), v26.V8H(), v27.V8H(), v28.V8H(), bad_memory);
5498 __ ld4r(v22.V8H(), v23.V8H(), v24.V8H(), v25.V8H(), bad_memory);
5499
5500 __ st1(v18.V16B(), v19.V16B(), v20.V16B(), v21.V16B(), bad_memory);
5501 __ st1(v10.V16B(), v11.V16B(), v12.V16B(), v13.V16B(), bad_memory);
5502 __ st1(v27.V16B(), v28.V16B(), v29.V16B(), v30.V16B(), bad_memory);
5503 __ st1(v16.V16B(), v17.V16B(), v18.V16B(), bad_memory);
5504 __ st1(v21.V16B(), v22.V16B(), v23.V16B(), bad_memory);
5505 __ st1(v9.V16B(), v10.V16B(), v11.V16B(), bad_memory);
5506 __ st1(v7.V16B(), v8.V16B(), bad_memory);
5507 __ st1(v26.V16B(), v27.V16B(), bad_memory);
5508 __ st1(v22.V16B(), v23.V16B(), bad_memory);
5509 __ st1(v23.V16B(), bad_memory);
5510 __ st1(v28.V16B(), bad_memory);
5511 __ st1(v2.V16B(), bad_memory);
5512 __ st1(v29.V1D(), v30.V1D(), v31.V1D(), v0.V1D(), bad_memory);
5513 __ st1(v12.V1D(), v13.V1D(), v14.V1D(), v15.V1D(), bad_memory);
5514 __ st1(v30.V1D(), v31.V1D(), v0.V1D(), v1.V1D(), bad_memory);
5515 __ st1(v16.V1D(), v17.V1D(), v18.V1D(), bad_memory);
5516 __ st1(v3.V1D(), v4.V1D(), v5.V1D(), bad_memory);
5517 __ st1(v14.V1D(), v15.V1D(), v16.V1D(), bad_memory);
5518 __ st1(v18.V1D(), v19.V1D(), bad_memory);
5519 __ st1(v5.V1D(), v6.V1D(), bad_memory);
5520 __ st1(v2.V1D(), v3.V1D(), bad_memory);
5521 __ st1(v4.V1D(), bad_memory);
5522 __ st1(v27.V1D(), bad_memory);
5523 __ st1(v23.V1D(), bad_memory);
5524 __ st1(v2.V2D(), v3.V2D(), v4.V2D(), v5.V2D(), bad_memory);
5525 __ st1(v22.V2D(), v23.V2D(), v24.V2D(), v25.V2D(), bad_memory);
5526 __ st1(v28.V2D(), v29.V2D(), v30.V2D(), v31.V2D(), bad_memory);
5527 __ st1(v17.V2D(), v18.V2D(), v19.V2D(), bad_memory);
5528 __ st1(v16.V2D(), v17.V2D(), v18.V2D(), bad_memory);
5529 __ st1(v22.V2D(), v23.V2D(), v24.V2D(), bad_memory);
5530 __ st1(v21.V2D(), v22.V2D(), bad_memory);
5531 __ st1(v6.V2D(), v7.V2D(), bad_memory);
5532 __ st1(v27.V2D(), v28.V2D(), bad_memory);
5533 __ st1(v21.V2D(), bad_memory);
5534 __ st1(v29.V2D(), bad_memory);
5535 __ st1(v20.V2D(), bad_memory);
5536 __ st1(v22.V2S(), v23.V2S(), v24.V2S(), v25.V2S(), bad_memory);
5537 __ st1(v8.V2S(), v9.V2S(), v10.V2S(), v11.V2S(), bad_memory);
5538 __ st1(v15.V2S(), v16.V2S(), v17.V2S(), v18.V2S(), bad_memory);
5539 __ st1(v2.V2S(), v3.V2S(), v4.V2S(), bad_memory);
5540 __ st1(v23.V2S(), v24.V2S(), v25.V2S(), bad_memory);
5541 __ st1(v7.V2S(), v8.V2S(), v9.V2S(), bad_memory);
5542 __ st1(v28.V2S(), v29.V2S(), bad_memory);
5543 __ st1(v29.V2S(), v30.V2S(), bad_memory);
5544 __ st1(v23.V2S(), v24.V2S(), bad_memory);
5545 __ st1(v6.V2S(), bad_memory);
5546 __ st1(v11.V2S(), bad_memory);
5547 __ st1(v17.V2S(), bad_memory);
5548 __ st1(v6.V4H(), v7.V4H(), v8.V4H(), v9.V4H(), bad_memory);
5549 __ st1(v9.V4H(), v10.V4H(), v11.V4H(), v12.V4H(), bad_memory);
5550 __ st1(v25.V4H(), v26.V4H(), v27.V4H(), v28.V4H(), bad_memory);
5551 __ st1(v11.V4H(), v12.V4H(), v13.V4H(), bad_memory);
5552 __ st1(v10.V4H(), v11.V4H(), v12.V4H(), bad_memory);
5553 __ st1(v12.V4H(), v13.V4H(), v14.V4H(), bad_memory);
5554 __ st1(v13.V4H(), v14.V4H(), bad_memory);
5555 __ st1(v15.V4H(), v16.V4H(), bad_memory);
5556 __ st1(v21.V4H(), v22.V4H(), bad_memory);
5557 __ st1(v16.V4H(), bad_memory);
5558 __ st1(v8.V4H(), bad_memory);
5559 __ st1(v30.V4H(), bad_memory);
5560 __ st1(v3.V4S(), v4.V4S(), v5.V4S(), v6.V4S(), bad_memory);
5561 __ st1(v25.V4S(), v26.V4S(), v27.V4S(), v28.V4S(), bad_memory);
5562 __ st1(v5.V4S(), v6.V4S(), v7.V4S(), v8.V4S(), bad_memory);
5563 __ st1(v31.V4S(), v0.V4S(), v1.V4S(), bad_memory);
5564 __ st1(v30.V4S(), v31.V4S(), v0.V4S(), bad_memory);
5565 __ st1(v6.V4S(), v7.V4S(), v8.V4S(), bad_memory);
5566 __ st1(v17.V4S(), v18.V4S(), bad_memory);
5567 __ st1(v31.V4S(), v0.V4S(), bad_memory);
5568 __ st1(v1.V4S(), v2.V4S(), bad_memory);
5569 __ st1(v26.V4S(), bad_memory);
5570 __ st1(v15.V4S(), bad_memory);
5571 __ st1(v13.V4S(), bad_memory);
5572 __ st1(v26.V8B(), v27.V8B(), v28.V8B(), v29.V8B(), bad_memory);
5573 __ st1(v10.V8B(), v11.V8B(), v12.V8B(), v13.V8B(), bad_memory);
5574 __ st1(v15.V8B(), v16.V8B(), v17.V8B(), v18.V8B(), bad_memory);
5575 __ st1(v19.V8B(), v20.V8B(), v21.V8B(), bad_memory);
5576 __ st1(v31.V8B(), v0.V8B(), v1.V8B(), bad_memory);
5577 __ st1(v9.V8B(), v10.V8B(), v11.V8B(), bad_memory);
5578 __ st1(v12.V8B(), v13.V8B(), bad_memory);
5579 __ st1(v2.V8B(), v3.V8B(), bad_memory);
5580 __ st1(v0.V8B(), v1.V8B(), bad_memory);
5581 __ st1(v16.V8B(), bad_memory);
5582 __ st1(v25.V8B(), bad_memory);
5583 __ st1(v31.V8B(), bad_memory);
5584 __ st1(v4.V8H(), v5.V8H(), v6.V8H(), v7.V8H(), bad_memory);
5585 __ st1(v3.V8H(), v4.V8H(), v5.V8H(), v6.V8H(), bad_memory);
5586 __ st1(v26.V8H(), v27.V8H(), v28.V8H(), v29.V8H(), bad_memory);
5587 __ st1(v10.V8H(), v11.V8H(), v12.V8H(), bad_memory);
5588 __ st1(v21.V8H(), v22.V8H(), v23.V8H(), bad_memory);
5589 __ st1(v18.V8H(), v19.V8H(), v20.V8H(), bad_memory);
5590 __ st1(v26.V8H(), v27.V8H(), bad_memory);
5591 __ st1(v24.V8H(), v25.V8H(), bad_memory);
5592 __ st1(v17.V8H(), v18.V8H(), bad_memory);
5593 __ st1(v29.V8H(), bad_memory);
5594 __ st1(v19.V8H(), bad_memory);
5595 __ st1(v23.V8H(), bad_memory);
5596 __ st1(v19.B(), 15, bad_memory);
5597 __ st1(v25.B(), 9, bad_memory);
5598 __ st1(v4.B(), 8, bad_memory);
5599 __ st1(v13.D(), 0, bad_memory);
5600 __ st1(v30.D(), 0, bad_memory);
5601 __ st1(v3.D(), 0, bad_memory);
5602 __ st1(v22.H(), 0, bad_memory);
5603 __ st1(v31.H(), 7, bad_memory);
5604 __ st1(v23.H(), 3, bad_memory);
5605 __ st1(v0.S(), 0, bad_memory);
5606 __ st1(v11.S(), 3, bad_memory);
5607 __ st1(v24.S(), 3, bad_memory);
5608 __ st2(v7.V16B(), v8.V16B(), bad_memory);
5609 __ st2(v5.V16B(), v6.V16B(), bad_memory);
5610 __ st2(v18.V16B(), v19.V16B(), bad_memory);
5611 __ st2(v14.V2D(), v15.V2D(), bad_memory);
5612 __ st2(v7.V2D(), v8.V2D(), bad_memory);
5613 __ st2(v24.V2D(), v25.V2D(), bad_memory);
5614 __ st2(v22.V2S(), v23.V2S(), bad_memory);
5615 __ st2(v4.V2S(), v5.V2S(), bad_memory);
5616 __ st2(v2.V2S(), v3.V2S(), bad_memory);
5617 __ st2(v23.V4H(), v24.V4H(), bad_memory);
5618 __ st2(v8.V4H(), v9.V4H(), bad_memory);
5619 __ st2(v7.V4H(), v8.V4H(), bad_memory);
5620 __ st2(v17.V4S(), v18.V4S(), bad_memory);
5621 __ st2(v6.V4S(), v7.V4S(), bad_memory);
5622 __ st2(v26.V4S(), v27.V4S(), bad_memory);
5623 __ st2(v31.V8B(), v0.V8B(), bad_memory);
5624 __ st2(v0.V8B(), v1.V8B(), bad_memory);
5625 __ st2(v21.V8B(), v22.V8B(), bad_memory);
5626 __ st2(v7.V8H(), v8.V8H(), bad_memory);
5627 __ st2(v22.V8H(), v23.V8H(), bad_memory);
5628 __ st2(v4.V8H(), v5.V8H(), bad_memory);
5629 __ st2(v8.B(), v9.B(), 15, bad_memory);
5630 __ st2(v8.B(), v9.B(), 15, bad_memory);
5631 __ st2(v7.B(), v8.B(), 4, bad_memory);
5632 __ st2(v25.D(), v26.D(), 0, bad_memory);
5633 __ st2(v17.D(), v18.D(), 1, bad_memory);
5634 __ st2(v3.D(), v4.D(), 1, bad_memory);
5635 __ st2(v4.H(), v5.H(), 3, bad_memory);
5636 __ st2(v0.H(), v1.H(), 5, bad_memory);
5637 __ st2(v22.H(), v23.H(), 2, bad_memory);
5638 __ st2(v14.S(), v15.S(), 3, bad_memory);
5639 __ st2(v23.S(), v24.S(), 3, bad_memory);
5640 __ st2(v0.S(), v1.S(), 2, bad_memory);
5641 __ st3(v26.V16B(), v27.V16B(), v28.V16B(), bad_memory);
5642 __ st3(v21.V16B(), v22.V16B(), v23.V16B(), bad_memory);
5643 __ st3(v24.V16B(), v25.V16B(), v26.V16B(), bad_memory);
5644 __ st3(v17.V2D(), v18.V2D(), v19.V2D(), bad_memory);
5645 __ st3(v23.V2D(), v24.V2D(), v25.V2D(), bad_memory);
5646 __ st3(v10.V2D(), v11.V2D(), v12.V2D(), bad_memory);
5647 __ st3(v9.V2S(), v10.V2S(), v11.V2S(), bad_memory);
5648 __ st3(v13.V2S(), v14.V2S(), v15.V2S(), bad_memory);
5649 __ st3(v22.V2S(), v23.V2S(), v24.V2S(), bad_memory);
5650 __ st3(v31.V4H(), v0.V4H(), v1.V4H(), bad_memory);
5651 __ st3(v8.V4H(), v9.V4H(), v10.V4H(), bad_memory);
5652 __ st3(v19.V4H(), v20.V4H(), v21.V4H(), bad_memory);
5653 __ st3(v18.V4S(), v19.V4S(), v20.V4S(), bad_memory);
5654 __ st3(v25.V4S(), v26.V4S(), v27.V4S(), bad_memory);
5655 __ st3(v16.V4S(), v17.V4S(), v18.V4S(), bad_memory);
5656 __ st3(v27.V8B(), v28.V8B(), v29.V8B(), bad_memory);
5657 __ st3(v29.V8B(), v30.V8B(), v31.V8B(), bad_memory);
5658 __ st3(v30.V8B(), v31.V8B(), v0.V8B(), bad_memory);
5659 __ st3(v8.V8H(), v9.V8H(), v10.V8H(), bad_memory);
5660 __ st3(v18.V8H(), v19.V8H(), v20.V8H(), bad_memory);
5661 __ st3(v18.V8H(), v19.V8H(), v20.V8H(), bad_memory);
5662 __ st3(v31.B(), v0.B(), v1.B(), 10, bad_memory);
5663 __ st3(v4.B(), v5.B(), v6.B(), 5, bad_memory);
5664 __ st3(v5.B(), v6.B(), v7.B(), 1, bad_memory);
5665 __ st3(v5.D(), v6.D(), v7.D(), 0, bad_memory);
5666 __ st3(v6.D(), v7.D(), v8.D(), 0, bad_memory);
5667 __ st3(v0.D(), v1.D(), v2.D(), 0, bad_memory);
5668 __ st3(v31.H(), v0.H(), v1.H(), 2, bad_memory);
5669 __ st3(v14.H(), v15.H(), v16.H(), 5, bad_memory);
5670 __ st3(v21.H(), v22.H(), v23.H(), 6, bad_memory);
5671 __ st3(v21.S(), v22.S(), v23.S(), 0, bad_memory);
5672 __ st3(v11.S(), v12.S(), v13.S(), 1, bad_memory);
5673 __ st3(v15.S(), v16.S(), v17.S(), 0, bad_memory);
5674 __ st4(v22.V16B(), v23.V16B(), v24.V16B(), v25.V16B(), bad_memory);
5675 __ st4(v24.V16B(), v25.V16B(), v26.V16B(), v27.V16B(), bad_memory);
5676 __ st4(v15.V16B(), v16.V16B(), v17.V16B(), v18.V16B(), bad_memory);
5677 __ st4(v16.V2D(), v17.V2D(), v18.V2D(), v19.V2D(), bad_memory);
5678 __ st4(v17.V2D(), v18.V2D(), v19.V2D(), v20.V2D(), bad_memory);
5679 __ st4(v9.V2D(), v10.V2D(), v11.V2D(), v12.V2D(), bad_memory);
5680 __ st4(v23.V2S(), v24.V2S(), v25.V2S(), v26.V2S(), bad_memory);
5681 __ st4(v15.V2S(), v16.V2S(), v17.V2S(), v18.V2S(), bad_memory);
5682 __ st4(v24.V2S(), v25.V2S(), v26.V2S(), v27.V2S(), bad_memory);
5683 __ st4(v14.V4H(), v15.V4H(), v16.V4H(), v17.V4H(), bad_memory);
5684 __ st4(v18.V4H(), v19.V4H(), v20.V4H(), v21.V4H(), bad_memory);
5685 __ st4(v1.V4H(), v2.V4H(), v3.V4H(), v4.V4H(), bad_memory);
5686 __ st4(v13.V4S(), v14.V4S(), v15.V4S(), v16.V4S(), bad_memory);
5687 __ st4(v6.V4S(), v7.V4S(), v8.V4S(), v9.V4S(), bad_memory);
5688 __ st4(v15.V4S(), v16.V4S(), v17.V4S(), v18.V4S(), bad_memory);
5689 __ st4(v26.V8B(), v27.V8B(), v28.V8B(), v29.V8B(), bad_memory);
5690 __ st4(v25.V8B(), v26.V8B(), v27.V8B(), v28.V8B(), bad_memory);
5691 __ st4(v19.V8B(), v20.V8B(), v21.V8B(), v22.V8B(), bad_memory);
5692 __ st4(v19.V8H(), v20.V8H(), v21.V8H(), v22.V8H(), bad_memory);
5693 __ st4(v15.V8H(), v16.V8H(), v17.V8H(), v18.V8H(), bad_memory);
5694 __ st4(v31.V8H(), v0.V8H(), v1.V8H(), v2.V8H(), bad_memory);
5695 __ st4(v0.B(), v1.B(), v2.B(), v3.B(), 13, bad_memory);
5696 __ st4(v4.B(), v5.B(), v6.B(), v7.B(), 10, bad_memory);
5697 __ st4(v9.B(), v10.B(), v11.B(), v12.B(), 9, bad_memory);
5698 __ st4(v2.D(), v3.D(), v4.D(), v5.D(), 1, bad_memory);
5699 __ st4(v7.D(), v8.D(), v9.D(), v10.D(), 0, bad_memory);
5700 __ st4(v31.D(), v0.D(), v1.D(), v2.D(), 1, bad_memory);
5701 __ st4(v2.H(), v3.H(), v4.H(), v5.H(), 1, bad_memory);
5702 __ st4(v27.H(), v28.H(), v29.H(), v30.H(), 3, bad_memory);
5703 __ st4(v24.H(), v25.H(), v26.H(), v27.H(), 4, bad_memory);
5704 __ st4(v18.S(), v19.S(), v20.S(), v21.S(), 2, bad_memory);
5705 __ st4(v6.S(), v7.S(), v8.S(), v9.S(), 2, bad_memory);
5706 __ st4(v25.S(), v26.S(), v27.S(), v28.S(), 1, bad_memory);
5707
5708 END_IMPLICIT_CHECK();
5709 TRY_RUN_IMPLICIT_CHECK();
5710 }
5711
TEST(ImplicitCheckSve)5712 TEST(ImplicitCheckSve) {
5713 SETUP_WITH_FEATURES(CPUFeatures::kSVE,
5714 CPUFeatures::kSVE2,
5715 CPUFeatures::kNEON);
5716 START_IMPLICIT_CHECK();
5717
5718 SVEMemOperand bad_sve_memory = SVEMemOperand(ip0);
5719
5720 EmissionCheckScope guard(&masm, masm.GetBuffer()->GetRemainingBytes());
5721 // Simple, unpredicated loads and stores.
5722 __ Str(p12.VnD(), bad_sve_memory);
5723 __ Str(p13.VnS(), bad_sve_memory);
5724 __ Str(p14.VnH(), bad_sve_memory);
5725 __ Str(p15.VnB(), bad_sve_memory);
5726 __ Ldr(p8.VnD(), bad_sve_memory);
5727 __ Ldr(p9.VnS(), bad_sve_memory);
5728 __ Ldr(p10.VnH(), bad_sve_memory);
5729 __ Ldr(p11.VnB(), bad_sve_memory);
5730
5731 __ Str(z0.VnD(), bad_sve_memory);
5732 __ Str(z1.VnS(), bad_sve_memory);
5733 __ Str(z2.VnH(), bad_sve_memory);
5734 __ Str(z3.VnB(), bad_sve_memory);
5735 __ Ldr(z20.VnD(), bad_sve_memory);
5736 __ Ldr(z21.VnS(), bad_sve_memory);
5737 __ Ldr(z22.VnH(), bad_sve_memory);
5738 __ Ldr(z23.VnB(), bad_sve_memory);
5739
5740 // Structured accesses.
5741 __ St1b(z0.VnB(), p2, bad_sve_memory);
5742 __ St1h(z1.VnH(), p1, bad_sve_memory);
5743 __ St1w(z2.VnS(), p1, bad_sve_memory);
5744 __ St1d(z3.VnD(), p2, bad_sve_memory);
5745 __ Ld1b(z20.VnB(), p1.Zeroing(), bad_sve_memory);
5746 __ Ld1h(z21.VnH(), p2.Zeroing(), bad_sve_memory);
5747 __ Ld1w(z22.VnS(), p1.Zeroing(), bad_sve_memory);
5748 __ Ld1d(z23.VnD(), p1.Zeroing(), bad_sve_memory);
5749
5750 // Structured, packed accesses.
5751 __ St1b(z2.VnH(), p1, bad_sve_memory);
5752 __ St1b(z3.VnS(), p2, bad_sve_memory);
5753 __ St1b(z4.VnD(), p2, bad_sve_memory);
5754 __ St1h(z0.VnS(), p1, bad_sve_memory);
5755 __ St1h(z1.VnD(), p1, bad_sve_memory);
5756 __ St1w(z2.VnD(), p1, bad_sve_memory);
5757 __ Ld1b(z20.VnH(), p1.Zeroing(), bad_sve_memory);
5758 __ Ld1b(z21.VnS(), p1.Zeroing(), bad_sve_memory);
5759 __ Ld1b(z22.VnD(), p1.Zeroing(), bad_sve_memory);
5760 __ Ld1h(z23.VnS(), p2.Zeroing(), bad_sve_memory);
5761 __ Ld1h(z24.VnD(), p2.Zeroing(), bad_sve_memory);
5762 __ Ld1w(z20.VnD(), p1.Zeroing(), bad_sve_memory);
5763 __ Ld1sb(z21.VnH(), p1.Zeroing(), bad_sve_memory);
5764 __ Ld1sb(z22.VnS(), p1.Zeroing(), bad_sve_memory);
5765 __ Ld1sb(z23.VnD(), p2.Zeroing(), bad_sve_memory);
5766 __ Ld1sh(z24.VnS(), p2.Zeroing(), bad_sve_memory);
5767 __ Ld1sh(z20.VnD(), p1.Zeroing(), bad_sve_memory);
5768 __ Ld1sw(z21.VnD(), p1.Zeroing(), bad_sve_memory);
5769
5770 // Structured, interleaved accesses.
5771 __ St2b(z0.VnB(), z1.VnB(), p4, bad_sve_memory);
5772 __ St2h(z1.VnH(), z2.VnH(), p4, bad_sve_memory);
5773 __ St2w(z2.VnS(), z3.VnS(), p3, bad_sve_memory);
5774 __ St2d(z3.VnD(), z4.VnD(), p4, bad_sve_memory);
5775 __ Ld2b(z20.VnB(), z21.VnB(), p5.Zeroing(), bad_sve_memory);
5776 __ Ld2h(z21.VnH(), z22.VnH(), p6.Zeroing(), bad_sve_memory);
5777 __ Ld2w(z22.VnS(), z23.VnS(), p6.Zeroing(), bad_sve_memory);
5778 __ Ld2d(z23.VnD(), z24.VnD(), p5.Zeroing(), bad_sve_memory);
5779
5780 __ St3b(z4.VnB(), z5.VnB(), z6.VnB(), p4, bad_sve_memory);
5781 __ St3h(z5.VnH(), z6.VnH(), z7.VnH(), p4, bad_sve_memory);
5782 __ St3w(z6.VnS(), z7.VnS(), z8.VnS(), p3, bad_sve_memory);
5783 __ St3d(z7.VnD(), z8.VnD(), z9.VnD(), p4, bad_sve_memory);
5784 __ Ld3b(z24.VnB(), z25.VnB(), z26.VnB(), p5.Zeroing(), bad_sve_memory);
5785 __ Ld3h(z25.VnH(), z26.VnH(), z27.VnH(), p6.Zeroing(), bad_sve_memory);
5786 __ Ld3w(z26.VnS(), z27.VnS(), z28.VnS(), p6.Zeroing(), bad_sve_memory);
5787 __ Ld3d(z27.VnD(), z28.VnD(), z29.VnD(), p5.Zeroing(), bad_sve_memory);
5788
5789 __ St4b(z31.VnB(), z0.VnB(), z1.VnB(), z2.VnB(), p4, bad_sve_memory);
5790 __ St4h(z0.VnH(), z1.VnH(), z2.VnH(), z3.VnH(), p4, bad_sve_memory);
5791 __ St4w(z1.VnS(), z2.VnS(), z3.VnS(), z4.VnS(), p3, bad_sve_memory);
5792 __ St4d(z2.VnD(), z3.VnD(), z4.VnD(), z5.VnD(), p4, bad_sve_memory);
5793 __ Ld4b(z25.VnB(),
5794 z26.VnB(),
5795 z27.VnB(),
5796 z28.VnB(),
5797 p5.Zeroing(),
5798 bad_sve_memory);
5799 __ Ld4h(z26.VnH(),
5800 z27.VnH(),
5801 z28.VnH(),
5802 z29.VnH(),
5803 p6.Zeroing(),
5804 bad_sve_memory);
5805 __ Ld4w(z27.VnS(),
5806 z28.VnS(),
5807 z29.VnS(),
5808 z30.VnS(),
5809 p6.Zeroing(),
5810 bad_sve_memory);
5811 __ Ld4d(z28.VnD(),
5812 z29.VnD(),
5813 z30.VnD(),
5814 z31.VnD(),
5815 p5.Zeroing(),
5816 bad_sve_memory);
5817
5818 END_IMPLICIT_CHECK();
5819 TRY_RUN_IMPLICIT_CHECK();
5820 }
5821
TEST(ImplicitCheckAtomics)5822 TEST(ImplicitCheckAtomics) {
5823 SETUP_WITH_FEATURES(CPUFeatures::kNEON, CPUFeatures::kAtomics);
5824 START_IMPLICIT_CHECK();
5825
5826 EmissionCheckScope guard(&masm, masm.GetBuffer()->GetRemainingBytes());
5827 #define INST_LIST(OP) \
5828 __ Ld##OP##b(w0, w0, bad_memory); \
5829 __ Ld##OP##ab(w0, w1, bad_memory); \
5830 __ Ld##OP##lb(w0, w2, bad_memory); \
5831 __ Ld##OP##alb(w0, w3, bad_memory); \
5832 __ Ld##OP##h(w0, w0, bad_memory); \
5833 __ Ld##OP##ah(w0, w1, bad_memory); \
5834 __ Ld##OP##lh(w0, w2, bad_memory); \
5835 __ Ld##OP##alh(w0, w3, bad_memory); \
5836 __ Ld##OP(w0, w0, bad_memory); \
5837 __ Ld##OP##a(w0, w1, bad_memory); \
5838 __ Ld##OP##l(w0, w2, bad_memory); \
5839 __ Ld##OP##al(w0, w3, bad_memory); \
5840 __ Ld##OP(x0, x0, bad_memory); \
5841 __ Ld##OP##a(x0, x1, bad_memory); \
5842 __ Ld##OP##l(x0, x2, bad_memory); \
5843 __ Ld##OP##al(x0, x3, bad_memory); \
5844 __ St##OP##b(w0, bad_memory); \
5845 __ St##OP##lb(w0, bad_memory); \
5846 __ St##OP##h(w0, bad_memory); \
5847 __ St##OP##lh(w0, bad_memory); \
5848 __ St##OP(w0, bad_memory); \
5849 __ St##OP##l(w0, bad_memory); \
5850 __ St##OP(x0, bad_memory); \
5851 __ St##OP##l(x0, bad_memory);
5852
5853 INST_LIST(add);
5854 INST_LIST(set);
5855 INST_LIST(eor);
5856 INST_LIST(smin);
5857 INST_LIST(smax);
5858 INST_LIST(umin);
5859 INST_LIST(umax);
5860 INST_LIST(clr);
5861
5862 #undef INST_LIST
5863
5864 END_IMPLICIT_CHECK();
5865 TRY_RUN_IMPLICIT_CHECK();
5866 }
5867
TEST(ImplicitCheckMops)5868 TEST(ImplicitCheckMops) {
5869 SETUP_WITH_FEATURES(CPUFeatures::kNEON, CPUFeatures::kMOPS);
5870 START_IMPLICIT_CHECK();
5871
5872 EmissionCheckScope guard(&masm, masm.GetBuffer()->GetRemainingBytes());
5873 __ Set(x15, ip1, ip0);
5874 __ Setn(x15, ip1, ip0);
5875 __ Setg(x15, ip1, ip0);
5876 __ Setgn(x15, ip1, ip0);
5877
5878 __ Cpy(x15, ip0, ip1);
5879 __ Cpyn(x15, ip0, ip1);
5880 __ Cpyrn(x15, ip0, ip1);
5881 __ Cpywn(x15, ip0, ip1);
5882 __ Cpyf(x15, ip0, ip1);
5883 __ Cpyfn(x15, ip0, ip1);
5884 __ Cpyfrn(x15, ip0, ip1);
5885 __ Cpyfwn(x15, ip0, ip1);
5886
5887 // The macro-assembler expands each instruction into prologue, main and
5888 // epilogue instructions where only the main instruction will fail. Increase
5889 // the counter to account for those additional instructions and the following
5890 // instructions.
5891 __ Mov(x0, 3);
5892 __ Mul(x1, x1, x0);
5893 __ Add(x1, x1, x0);
5894
5895 END_IMPLICIT_CHECK();
5896 TRY_RUN_IMPLICIT_CHECK();
5897 }
5898 #endif // VIXL_ENABLE_IMPLICIT_CHECKS
5899
5900 #undef __
5901 #define __ masm->
5902
5903 #if defined(VIXL_INCLUDE_SIMULATOR_AARCH64) && \
5904 defined(VIXL_HAS_ABI_SUPPORT) && __cplusplus >= 201103L && \
5905 (defined(__clang__) || GCC_VERSION_OR_NEWER(4, 9, 1))
5906
5907 // Generate a function that stores zero to a hard-coded address.
GenerateStoreZero(MacroAssembler * masm,int32_t * target)5908 Instruction* GenerateStoreZero(MacroAssembler* masm, int32_t* target) {
5909 masm->Reset();
5910
5911 UseScratchRegisterScope temps(masm);
5912 Register temp = temps.AcquireX();
5913 __ Mov(temp, reinterpret_cast<intptr_t>(target));
5914 __ Str(wzr, MemOperand(temp));
5915 __ Ret();
5916
5917 masm->FinalizeCode();
5918 return masm->GetBuffer()->GetStartAddress<Instruction*>();
5919 }
5920
5921
5922 // Generate a function that stores the `int32_t` argument to a hard-coded
5923 // address.
5924 // In this example and the other below, we use the `abi` object to retrieve
5925 // argument and return locations even though we could easily hard code them.
5926 // This mirrors how more generic code (e.g. templated) user would use these
5927 // mechanisms.
GenerateStoreInput(MacroAssembler * masm,int32_t * target)5928 Instruction* GenerateStoreInput(MacroAssembler* masm, int32_t* target) {
5929 masm->Reset();
5930
5931 ABI abi;
5932 Register input =
5933 Register(abi.GetNextParameterGenericOperand<int32_t>().GetCPURegister());
5934
5935 UseScratchRegisterScope temps(masm);
5936 Register temp = temps.AcquireX();
5937 __ Mov(temp, reinterpret_cast<intptr_t>(target));
5938 __ Str(input, MemOperand(temp));
5939 __ Ret();
5940
5941 masm->FinalizeCode();
5942 return masm->GetBuffer()->GetStartAddress<Instruction*>();
5943 }
5944
5945
5946 // A minimal implementation of a `pow` function.
GeneratePow(MacroAssembler * masm,unsigned pow)5947 Instruction* GeneratePow(MacroAssembler* masm, unsigned pow) {
5948 masm->Reset();
5949
5950 ABI abi;
5951 Register input =
5952 Register(abi.GetNextParameterGenericOperand<int64_t>().GetCPURegister());
5953 Register result =
5954 Register(abi.GetReturnGenericOperand<int64_t>().GetCPURegister());
5955 UseScratchRegisterScope temps(masm);
5956 Register temp = temps.AcquireX();
5957
5958 __ Mov(temp, 1);
5959 for (unsigned i = 0; i < pow; i++) {
5960 __ Mul(temp, temp, input);
5961 }
5962 __ Mov(result, temp);
5963 __ Ret();
5964
5965 masm->FinalizeCode();
5966 return masm->GetBuffer()->GetStartAddress<Instruction*>();
5967 }
5968
5969
GenerateSum(MacroAssembler * masm)5970 Instruction* GenerateSum(MacroAssembler* masm) {
5971 masm->Reset();
5972
5973 ABI abi;
5974 VRegister input_1 =
5975 VRegister(abi.GetNextParameterGenericOperand<float>().GetCPURegister());
5976 Register input_2 =
5977 Register(abi.GetNextParameterGenericOperand<int64_t>().GetCPURegister());
5978 VRegister input_3 =
5979 VRegister(abi.GetNextParameterGenericOperand<double>().GetCPURegister());
5980 VRegister result =
5981 VRegister(abi.GetReturnGenericOperand<double>().GetCPURegister());
5982
5983 UseScratchRegisterScope temps(masm);
5984 VRegister temp = temps.AcquireD();
5985
5986 __ Fcvt(input_1.D(), input_1);
5987 __ Scvtf(temp, input_2);
5988 __ Fadd(temp, temp, input_1.D());
5989 __ Fadd(result, temp, input_3);
5990 __ Ret();
5991
5992 masm->FinalizeCode();
5993 return masm->GetBuffer()->GetStartAddress<Instruction*>();
5994 }
5995
5996
TEST(RunFrom)5997 TEST(RunFrom) {
5998 SETUP_WITH_FEATURES(CPUFeatures::kFP);
5999
6000 // Run a function returning `void` and taking no argument.
6001 int32_t value = 0xbad;
6002 simulator.RunFrom(GenerateStoreZero(&masm, &value));
6003 VIXL_CHECK(value == 0);
6004
6005 // Run a function returning `void` and taking one argument.
6006 int32_t argument = 0xf00d;
6007 simulator.RunFrom<void, int32_t>(GenerateStoreInput(&masm, &value), argument);
6008 VIXL_CHECK(value == 0xf00d);
6009
6010 // Run a function taking one argument and returning a value.
6011 int64_t res_int64_t;
6012 res_int64_t =
6013 simulator.RunFrom<int64_t, int64_t>(GeneratePow(&masm, 0), 0xbad);
6014 VIXL_CHECK(res_int64_t == 1);
6015 res_int64_t = simulator.RunFrom<int64_t, int64_t>(GeneratePow(&masm, 1), 123);
6016 VIXL_CHECK(res_int64_t == 123);
6017 res_int64_t = simulator.RunFrom<int64_t, int64_t>(GeneratePow(&masm, 10), 2);
6018 VIXL_CHECK(res_int64_t == 1024);
6019
6020 // Run a function taking multiple arguments in registers.
6021 double res_double =
6022 simulator.RunFrom<double, float, int64_t, double>(GenerateSum(&masm),
6023 1.0,
6024 2,
6025 3.0);
6026 VIXL_CHECK(res_double == 6.0);
6027 }
6028
6029 #endif
6030
6031
6032 } // namespace aarch64
6033 } // namespace vixl
6034