• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2015, VIXL authors
2 // All rights reserved.
3 //
4 // Redistribution and use in source and binary forms, with or without
5 // modification, are permitted provided that the following conditions are met:
6 //
7 //   * Redistributions of source code must retain the above copyright notice,
8 //     this list of conditions and the following disclaimer.
9 //   * Redistributions in binary form must reproduce the above copyright notice,
10 //     this list of conditions and the following disclaimer in the documentation
11 //     and/or other materials provided with the distribution.
12 //   * Neither the name of ARM Limited nor the names of its contributors may be
13 //     used to endorse or promote products derived from this software without
14 //     specific prior written permission.
15 //
16 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
17 // ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18 // WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19 // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
20 // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
22 // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
23 // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
24 // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 
27 #include <cfloat>
28 #include <cstdio>
29 #include <sstream>
30 
31 #include "test-runner.h"
32 #include "test-utils.h"
33 
34 #include "aarch64/cpu-features-auditor-aarch64.h"
35 #include "aarch64/macro-assembler-aarch64.h"
36 #include "aarch64/simulator-aarch64.h"
37 #include "aarch64/test-simulator-inputs-aarch64.h"
38 #include "aarch64/test-simulator-traces-aarch64.h"
39 #include "aarch64/test-utils-aarch64.h"
40 
41 namespace vixl {
42 namespace aarch64 {
43 
44 // ==== Simulator Tests ====
45 //
46 // These simulator tests check instruction behaviour against a trace taken from
47 // real AArch64 hardware. The same test code is used to generate the trace; the
48 // results are printed to stdout when the test is run with
49 // --generate_test_trace.
50 //
51 // The input lists and expected results are stored in test/traces. The expected
52 // results can be regenerated using tools/generate_simulator_traces.py. Adding a
53 // test for a new instruction is described at the top of
54 // test-simulator-traces-aarch64.h.
55 
56 #define __ masm.
57 #define TEST(name) TEST_(AARCH64_SIM_##name)
58 
59 #define SETUP() SETUP_WITH_FEATURES(CPUFeatures())
60 
61 #ifdef VIXL_INCLUDE_SIMULATOR_AARCH64
62 
63 #define SETUP_WITH_FEATURES(...)                 \
64   MacroAssembler masm;                           \
65   masm.SetCPUFeatures(CPUFeatures(__VA_ARGS__)); \
66   Decoder decoder;                               \
67   Simulator simulator(&decoder);                 \
68   simulator.SetColouredTrace(Test::coloured_trace());
69 
70 #define START()                                                         \
71   masm.Reset();                                                         \
72   simulator.ResetState();                                               \
73   __ PushCalleeSavedRegisters();                                        \
74   /* The infrastructure code hasn't been covered at the moment, e.g. */ \
75   /* prologue/epilogue. Suppress tagging mis-match exception before  */ \
76   /* this point. */                                                     \
77   if (masm.GetCPUFeatures()->Has(CPUFeatures::kMTE)) {                  \
78     __ Hlt(DebugHltOpcode::kMTEActive);                                 \
79   }                                                                     \
80   if (Test::trace_reg()) {                                              \
81     __ Trace(LOG_STATE, TRACE_ENABLE);                                  \
82   }                                                                     \
83   if (Test::trace_write()) {                                            \
84     __ Trace(LOG_WRITE, TRACE_ENABLE);                                  \
85   }                                                                     \
86   if (Test::trace_sim()) {                                              \
87     __ Trace(LOG_DISASM, TRACE_ENABLE);                                 \
88   }
89 
90 #define END()                                          \
91   if (masm.GetCPUFeatures()->Has(CPUFeatures::kMTE)) { \
92     __ Hlt(DebugHltOpcode::kMTEInactive);              \
93   }                                                    \
94   __ Trace(LOG_ALL, TRACE_DISABLE);                    \
95   __ PopCalleeSavedRegisters();                        \
96   __ Ret();                                            \
97   masm.FinalizeCode()
98 
99 #define TRY_RUN(skipped)                                                \
100   DISASSEMBLE();                                                        \
101   simulator.RunFrom(masm.GetBuffer()->GetStartAddress<Instruction*>()); \
102   /* The simulator can run every test. */                               \
103   *skipped = false
104 
105 #ifdef VIXL_ENABLE_IMPLICIT_CHECKS
106 // The signal handler needs access to the simulator.
107 Simulator* gImplicitCheckSim;
108 
109 #ifdef __x86_64__
110 #include <signal.h>
111 #include <ucontext.h>
HandleSegFault(int sig,siginfo_t * info,void * context)112 void HandleSegFault(int sig, siginfo_t* info, void* context) {
113   USE(sig);
114   USE(info);
115   Simulator* sim = gImplicitCheckSim;
116 
117   // Did the signal come from the simulator?
118   ucontext_t* uc = reinterpret_cast<ucontext_t*>(context);
119   uintptr_t fault_pc = uc->uc_mcontext.gregs[REG_RIP];
120   VIXL_CHECK(sim->IsSimulatedMemoryAccess(fault_pc));
121 
122   // Increment the counter (x1) each time we handle a signal.
123   int64_t counter = reinterpret_cast<int64_t>(sim->ReadXRegister(1));
124   sim->WriteXRegister(1, ++counter);
125 
126   // Return to the VIXL memory access continuation point, which is also the
127   // next instruction, after this handler.
128   uc->uc_mcontext.gregs[REG_RIP] = sim->GetSignalReturnAddress();
129   // Return that the memory access failed.
130   uc->uc_mcontext.gregs[REG_RAX] =
131       static_cast<greg_t>(MemoryAccessResult::Failure);
132 }
133 #endif  // __x86_64__
134 
135 // Start an implicit check test with a counter and start label so the number of
136 // faults can be counted. Note: each instruction after the start will be
137 // expected to fault.
138 #define START_IMPLICIT_CHECK()                                                \
139   gImplicitCheckSim = &simulator;                                             \
140   /* Set up a signal handler to count the number of faulting instructions. */ \
141   struct sigaction sa;                                                        \
142   sa.sa_sigaction = HandleSegFault;                                           \
143   sigaction(SIGSEGV, &sa, NULL);                                              \
144   START();                                                                    \
145   /* Reset the counter. */                                                    \
146   __ Mov(x1, 0);                                                              \
147   /* Use a consistent bad address. */                                         \
148   __ Mov(x15, xzr);                                                           \
149   __ Mov(ip0, xzr);                                                           \
150   /* Load an amount of data to load. */                                       \
151   __ Mov(ip1, 4096);                                                          \
152   [[maybe_unused]] MemOperand bad_memory = MemOperand(ip0);                   \
153   if (masm.GetCPUFeatures()->Has(CPUFeatures::kSVE)) {                        \
154     /* Turn on all lanes to ensure all loads/stores are tested. */            \
155     __ Ptrue(p0.VnB());                                                       \
156     __ Ptrue(p1.VnB());                                                       \
157     __ Ptrue(p2.VnB());                                                       \
158     __ Ptrue(p3.VnB());                                                       \
159     __ Ptrue(p4.VnB());                                                       \
160     __ Ptrue(p5.VnB());                                                       \
161     __ Ptrue(p6.VnB());                                                       \
162     __ Ptrue(p7.VnB());                                                       \
163     __ Ptrue(p8.VnB());                                                       \
164     __ Ptrue(p9.VnB());                                                       \
165     __ Ptrue(p10.VnB());                                                      \
166     __ Ptrue(p11.VnB());                                                      \
167     __ Ptrue(p12.VnB());                                                      \
168     __ Ptrue(p13.VnB());                                                      \
169     __ Ptrue(p14.VnB());                                                      \
170     __ Ptrue(p15.VnB());                                                      \
171   }                                                                           \
172   Label l_start, l_end;                                                       \
173   __ Bind(&l_start);
174 
175 #define END_IMPLICIT_CHECK() \
176   __ Bind(&l_end);           \
177   /* Return the counter. */  \
178   __ Mov(x0, x1);            \
179   END();
180 
181 #define TRY_RUN_IMPLICIT_CHECK()                                              \
182   bool skipped;                                                               \
183   TRY_RUN(&skipped);                                                          \
184   /* Implicit checks should only be used with the simulator. */               \
185   VIXL_ASSERT(!skipped);                                                      \
186   /* Check that each load/store instruction generated a segfault that was */  \
187   /* raised and dealt with. */                                                \
188   size_t result = simulator.ReadXRegister(0);                                 \
189   size_t num_of_faulting_instr = masm.GetSizeOfCodeGeneratedSince(&l_start) - \
190                                  masm.GetSizeOfCodeGeneratedSince(&l_end);    \
191   VIXL_CHECK((result * kInstructionSize) == num_of_faulting_instr);
192 
193 #endif  // VIXL_ENABLE_IMPLICIT_CHECKS
194 
195 #else  // VIXL_INCLUDE_SIMULATOR_AARCH64
196 
197 #define SETUP_WITH_FEATURES(...)                 \
198   MacroAssembler masm;                           \
199   masm.SetCPUFeatures(CPUFeatures(__VA_ARGS__)); \
200   CPU::SetUp()
201 
202 #define START() \
203   masm.Reset(); \
204   __ PushCalleeSavedRegisters()
205 
206 #define END()                   \
207   __ PopCalleeSavedRegisters(); \
208   __ Ret();                     \
209   masm.FinalizeCode()
210 
211 #define TRY_RUN(skipped)                                                      \
212   DISASSEMBLE();                                                              \
213   /* If the test uses features that the current CPU doesn't support, don't */ \
214   /* attempt to run it natively.                                           */ \
215   {                                                                           \
216     Decoder decoder;                                                          \
217     /* TODO: Once available, use runtime feature detection. The use of  */    \
218     /* AArch64LegacyBaseline is a stopgap.                              */    \
219     const CPUFeatures& this_machine = CPUFeatures::AArch64LegacyBaseline();   \
220     CPUFeaturesAuditor auditor(&decoder, this_machine);                       \
221     CodeBuffer* buffer = masm.GetBuffer();                                    \
222     decoder.Decode(buffer->GetStartAddress<Instruction*>(),                   \
223                    buffer->GetEndAddress<Instruction*>());                    \
224     const CPUFeatures& requirements = auditor.GetSeenFeatures();              \
225     if (this_machine.Has(requirements)) {                                     \
226       masm.GetBuffer()->SetExecutable();                                      \
227       ExecuteMemory(buffer->GetStartAddress<byte*>(),                         \
228                     masm.GetSizeOfCodeGenerated());                           \
229       masm.GetBuffer()->SetWritable();                                        \
230       *skipped = false;                                                       \
231     } else {                                                                  \
232       std::stringstream os;                                                   \
233       /* Note: This message needs to match REGEXP_MISSING_FEATURES from    */ \
234       /* tools/threaded_test.py.                                           */ \
235       os << "SKIPPED: Missing features: { ";                                  \
236       os << requirements.Without(this_machine) << " }\n";                     \
237       printf("%s", os.str().c_str());                                         \
238       *skipped = true;                                                        \
239     }                                                                         \
240   }
241 
242 
243 #endif  // VIXL_INCLUDE_SIMULATOR_AARCH64
244 
245 
246 #define DISASSEMBLE()                                             \
247   if (Test::disassemble()) {                                      \
248     PrintDisassembler disasm(stdout);                             \
249     CodeBuffer* buffer = masm.GetBuffer();                        \
250     Instruction* start = buffer->GetStartAddress<Instruction*>(); \
251     Instruction* end = buffer->GetEndAddress<Instruction*>();     \
252     disasm.DisassembleBuffer(start, end);                         \
253   }
254 
255 // The maximum number of errors to report in detail for each test.
256 static const unsigned kErrorReportLimit = 8;
257 
258 
259 // Overloaded versions of RawbitsToDouble and RawbitsToFloat for use in the
260 // templated test functions.
rawbits_to_fp(uint32_t bits)261 static float rawbits_to_fp(uint32_t bits) { return RawbitsToFloat(bits); }
262 
rawbits_to_fp(uint64_t bits)263 static double rawbits_to_fp(uint64_t bits) { return RawbitsToDouble(bits); }
264 
265 // The rawbits_to_fp functions are only used for printing decimal values so we
266 // just approximate FP16 as double.
rawbits_to_fp(uint16_t bits)267 static double rawbits_to_fp(uint16_t bits) {
268   return FPToDouble(RawbitsToFloat16(bits), kIgnoreDefaultNaN);
269 }
270 
271 
272 // MacroAssembler member function pointers to pass to the test dispatchers.
273 typedef void (MacroAssembler::*Test1OpFPHelper_t)(const VRegister& fd,
274                                                   const VRegister& fn);
275 typedef void (MacroAssembler::*Test2OpFPHelper_t)(const VRegister& fd,
276                                                   const VRegister& fn,
277                                                   const VRegister& fm);
278 typedef void (MacroAssembler::*Test3OpFPHelper_t)(const VRegister& fd,
279                                                   const VRegister& fn,
280                                                   const VRegister& fm,
281                                                   const VRegister& fa);
282 typedef void (MacroAssembler::*TestFPCmpHelper_t)(const VRegister& fn,
283                                                   const VRegister& fm);
284 typedef void (MacroAssembler::*TestFPCmpZeroHelper_t)(const VRegister& fn,
285                                                       double value);
286 typedef void (MacroAssembler::*TestFPToIntHelper_t)(const Register& rd,
287                                                     const VRegister& fn);
288 typedef void (MacroAssembler::*TestFPToFixedHelper_t)(const Register& rd,
289                                                       const VRegister& fn,
290                                                       int fbits);
291 typedef void (MacroAssembler::*TestFixedToFPHelper_t)(const VRegister& fd,
292                                                       const Register& rn,
293                                                       int fbits);
294 // TODO: 'Test2OpNEONHelper_t' and 'Test2OpFPHelper_t' can be
295 //       consolidated into one routine.
296 typedef void (MacroAssembler::*Test1OpNEONHelper_t)(const VRegister& vd,
297                                                     const VRegister& vn);
298 typedef void (MacroAssembler::*Test2OpNEONHelper_t)(const VRegister& vd,
299                                                     const VRegister& vn,
300                                                     const VRegister& vm);
301 typedef void (MacroAssembler::*TestByElementNEONHelper_t)(const VRegister& vd,
302                                                           const VRegister& vn,
303                                                           const VRegister& vm,
304                                                           int vm_index);
305 typedef void (MacroAssembler::*TestOpImmOpImmVdUpdateNEONHelper_t)(
306     const VRegister& vd, int imm1, const VRegister& vn, int imm2);
307 
308 // This helps using the same typename for both the function pointer
309 // and the array of immediates passed to helper routines.
310 template <typename T>
311 class Test2OpImmediateNEONHelper_t {
312  public:
313   typedef void (MacroAssembler::*mnemonic)(const VRegister& vd,
314                                            const VRegister& vn,
315                                            T imm);
316 };
317 
318 
319 // Maximum number of hex characters required to represent values of either
320 // templated type.
321 template <typename Ta, typename Tb>
MaxHexCharCount()322 static unsigned MaxHexCharCount() {
323   unsigned count = static_cast<unsigned>(std::max(sizeof(Ta), sizeof(Tb)));
324   return (count * 8) / 4;
325 }
326 
327 
328 // Standard test dispatchers.
329 
330 
Test1Op_Helper(Test1OpFPHelper_t helper,uintptr_t inputs,unsigned inputs_length,uintptr_t results,unsigned d_size,unsigned n_size,bool * skipped)331 static void Test1Op_Helper(Test1OpFPHelper_t helper,
332                            uintptr_t inputs,
333                            unsigned inputs_length,
334                            uintptr_t results,
335                            unsigned d_size,
336                            unsigned n_size,
337                            bool* skipped) {
338   VIXL_ASSERT((d_size == kDRegSize) || (d_size == kSRegSize) ||
339               (d_size == kHRegSize));
340   VIXL_ASSERT((n_size == kDRegSize) || (n_size == kSRegSize) ||
341               (n_size == kHRegSize));
342 
343   CPUFeatures features;
344   features.Combine(CPUFeatures::kFP, CPUFeatures::kFPHalf);
345   // For frint{32,64}{x,y} variants.
346   features.Combine(CPUFeatures::kFrintToFixedSizedInt);
347   SETUP_WITH_FEATURES(features);
348   START();
349 
350   // Roll up the loop to keep the code size down.
351   Label loop_n;
352 
353   Register out = x0;
354   Register inputs_base = x1;
355   Register length = w2;
356   Register index_n = w3;
357 
358   int n_index_shift;
359   VRegister fd;
360   VRegister fn;
361   if (n_size == kDRegSize) {
362     n_index_shift = kDRegSizeInBytesLog2;
363     fn = d1;
364   } else if (n_size == kSRegSize) {
365     n_index_shift = kSRegSizeInBytesLog2;
366     fn = s1;
367   } else {
368     n_index_shift = kHRegSizeInBytesLog2;
369     fn = h1;
370   }
371 
372   if (d_size == kDRegSize) {
373     fd = d0;
374   } else if (d_size == kSRegSize) {
375     fd = s0;
376   } else {
377     fd = h0;
378   }
379 
380 
381   __ Mov(out, results);
382   __ Mov(inputs_base, inputs);
383   __ Mov(length, inputs_length);
384 
385   __ Mov(index_n, 0);
386   __ Bind(&loop_n);
387   __ Ldr(fn, MemOperand(inputs_base, index_n, UXTW, n_index_shift));
388 
389   {
390     SingleEmissionCheckScope guard(&masm);
391     (masm.*helper)(fd, fn);
392   }
393   __ Str(fd, MemOperand(out, fd.GetSizeInBytes(), PostIndex));
394 
395   __ Add(index_n, index_n, 1);
396   __ Cmp(index_n, inputs_length);
397   __ B(lo, &loop_n);
398 
399   END();
400   TRY_RUN(skipped);
401 }
402 
403 
404 // Test FP instructions. The inputs[] and expected[] arrays should be arrays of
405 // rawbits representations of doubles or floats. This ensures that exact bit
406 // comparisons can be performed.
407 template <typename Tn, typename Td>
Test1Op(const char * name,Test1OpFPHelper_t helper,const Tn inputs[],unsigned inputs_length,const Td expected[],unsigned expected_length)408 static void Test1Op(const char* name,
409                     Test1OpFPHelper_t helper,
410                     const Tn inputs[],
411                     unsigned inputs_length,
412                     const Td expected[],
413                     unsigned expected_length) {
414   VIXL_ASSERT(inputs_length > 0);
415 
416   const unsigned results_length = inputs_length;
417   Td* results = new Td[results_length];
418 
419   const unsigned d_bits = sizeof(Td) * 8;
420   const unsigned n_bits = sizeof(Tn) * 8;
421   bool skipped;
422 
423   Test1Op_Helper(helper,
424                  reinterpret_cast<uintptr_t>(inputs),
425                  inputs_length,
426                  reinterpret_cast<uintptr_t>(results),
427                  d_bits,
428                  n_bits,
429                  &skipped);
430 
431   if (Test::generate_test_trace()) {
432     // Print the results.
433     printf("const uint%u_t kExpected_%s[] = {\n", d_bits, name);
434     for (unsigned d = 0; d < results_length; d++) {
435       printf("  0x%0*" PRIx64 ",\n",
436              d_bits / 4,
437              static_cast<uint64_t>(results[d]));
438     }
439     printf("};\n");
440     printf("const unsigned kExpectedCount_%s = %u;\n", name, results_length);
441   } else if (!skipped) {
442     // Check the results.
443     VIXL_CHECK(expected_length == results_length);
444     unsigned error_count = 0;
445     unsigned d = 0;
446     for (unsigned n = 0; n < inputs_length; n++, d++) {
447       if (results[d] != expected[d]) {
448         if (++error_count > kErrorReportLimit) continue;
449 
450         printf("%s 0x%0*" PRIx64 " (%s %g):\n",
451                name,
452                n_bits / 4,
453                static_cast<uint64_t>(inputs[n]),
454                name,
455                rawbits_to_fp(inputs[n]));
456         printf("  Expected: 0x%0*" PRIx64 " (%g)\n",
457                d_bits / 4,
458                static_cast<uint64_t>(expected[d]),
459                rawbits_to_fp(expected[d]));
460         printf("  Found:    0x%0*" PRIx64 " (%g)\n",
461                d_bits / 4,
462                static_cast<uint64_t>(results[d]),
463                rawbits_to_fp(results[d]));
464         printf("\n");
465       }
466     }
467     VIXL_ASSERT(d == expected_length);
468     if (error_count > kErrorReportLimit) {
469       printf("%u other errors follow.\n", error_count - kErrorReportLimit);
470     }
471     VIXL_CHECK(error_count == 0);
472   }
473   delete[] results;
474 }
475 
476 
Test2Op_Helper(Test2OpFPHelper_t helper,uintptr_t inputs,unsigned inputs_length,uintptr_t results,unsigned reg_size,bool * skipped)477 static void Test2Op_Helper(Test2OpFPHelper_t helper,
478                            uintptr_t inputs,
479                            unsigned inputs_length,
480                            uintptr_t results,
481                            unsigned reg_size,
482                            bool* skipped) {
483   VIXL_ASSERT((reg_size == kDRegSize) || (reg_size == kSRegSize) ||
484               (reg_size == kHRegSize));
485 
486   SETUP_WITH_FEATURES(CPUFeatures::kFP, CPUFeatures::kFPHalf);
487   START();
488 
489   // Roll up the loop to keep the code size down.
490   Label loop_n, loop_m;
491 
492   Register out = x0;
493   Register inputs_base = x1;
494   Register length = w2;
495   Register index_n = w3;
496   Register index_m = w4;
497 
498   bool double_op = reg_size == kDRegSize;
499   bool float_op = reg_size == kSRegSize;
500   int index_shift;
501   if (double_op) {
502     index_shift = kDRegSizeInBytesLog2;
503   } else if (float_op) {
504     index_shift = kSRegSizeInBytesLog2;
505   } else {
506     index_shift = kHRegSizeInBytesLog2;
507   }
508 
509   VRegister fd;
510   VRegister fn;
511   VRegister fm;
512 
513   if (double_op) {
514     fd = d0;
515     fn = d1;
516     fm = d2;
517   } else if (float_op) {
518     fd = s0;
519     fn = s1;
520     fm = s2;
521   } else {
522     fd = h0;
523     fn = h1;
524     fm = h2;
525   }
526 
527   __ Mov(out, results);
528   __ Mov(inputs_base, inputs);
529   __ Mov(length, inputs_length);
530 
531   __ Mov(index_n, 0);
532   __ Bind(&loop_n);
533   __ Ldr(fn, MemOperand(inputs_base, index_n, UXTW, index_shift));
534 
535   __ Mov(index_m, 0);
536   __ Bind(&loop_m);
537   __ Ldr(fm, MemOperand(inputs_base, index_m, UXTW, index_shift));
538 
539   {
540     SingleEmissionCheckScope guard(&masm);
541     (masm.*helper)(fd, fn, fm);
542   }
543   __ Str(fd, MemOperand(out, fd.GetSizeInBytes(), PostIndex));
544 
545   __ Add(index_m, index_m, 1);
546   __ Cmp(index_m, inputs_length);
547   __ B(lo, &loop_m);
548 
549   __ Add(index_n, index_n, 1);
550   __ Cmp(index_n, inputs_length);
551   __ B(lo, &loop_n);
552 
553   END();
554   TRY_RUN(skipped);
555 }
556 
557 
558 // Test FP instructions. The inputs[] and expected[] arrays should be arrays of
559 // rawbits representations of doubles or floats. This ensures that exact bit
560 // comparisons can be performed.
561 template <typename T>
Test2Op(const char * name,Test2OpFPHelper_t helper,const T inputs[],unsigned inputs_length,const T expected[],unsigned expected_length)562 static void Test2Op(const char* name,
563                     Test2OpFPHelper_t helper,
564                     const T inputs[],
565                     unsigned inputs_length,
566                     const T expected[],
567                     unsigned expected_length) {
568   VIXL_ASSERT(inputs_length > 0);
569 
570   const unsigned results_length = inputs_length * inputs_length;
571   T* results = new T[results_length];
572 
573   const unsigned bits = sizeof(T) * 8;
574   bool skipped;
575 
576   Test2Op_Helper(helper,
577                  reinterpret_cast<uintptr_t>(inputs),
578                  inputs_length,
579                  reinterpret_cast<uintptr_t>(results),
580                  bits,
581                  &skipped);
582 
583   if (Test::generate_test_trace()) {
584     // Print the results.
585     printf("const uint%u_t kExpected_%s[] = {\n", bits, name);
586     for (unsigned d = 0; d < results_length; d++) {
587       printf("  0x%0*" PRIx64 ",\n",
588              bits / 4,
589              static_cast<uint64_t>(results[d]));
590     }
591     printf("};\n");
592     printf("const unsigned kExpectedCount_%s = %u;\n", name, results_length);
593   } else if (!skipped) {
594     // Check the results.
595     VIXL_CHECK(expected_length == results_length);
596     unsigned error_count = 0;
597     unsigned d = 0;
598     for (unsigned n = 0; n < inputs_length; n++) {
599       for (unsigned m = 0; m < inputs_length; m++, d++) {
600         if (results[d] != expected[d]) {
601           if (++error_count > kErrorReportLimit) continue;
602 
603           printf("%s 0x%0*" PRIx64 ", 0x%0*" PRIx64 " (%s %g %g):\n",
604                  name,
605                  bits / 4,
606                  static_cast<uint64_t>(inputs[n]),
607                  bits / 4,
608                  static_cast<uint64_t>(inputs[m]),
609                  name,
610                  rawbits_to_fp(inputs[n]),
611                  rawbits_to_fp(inputs[m]));
612           printf("  Expected: 0x%0*" PRIx64 " (%g)\n",
613                  bits / 4,
614                  static_cast<uint64_t>(expected[d]),
615                  rawbits_to_fp(expected[d]));
616           printf("  Found:    0x%0*" PRIx64 " (%g)\n",
617                  bits / 4,
618                  static_cast<uint64_t>(results[d]),
619                  rawbits_to_fp(results[d]));
620           printf("\n");
621         }
622       }
623     }
624     VIXL_ASSERT(d == expected_length);
625     if (error_count > kErrorReportLimit) {
626       printf("%u other errors follow.\n", error_count - kErrorReportLimit);
627     }
628     VIXL_CHECK(error_count == 0);
629   }
630   delete[] results;
631 }
632 
633 
Test3Op_Helper(Test3OpFPHelper_t helper,uintptr_t inputs,unsigned inputs_length,uintptr_t results,unsigned reg_size,bool * skipped)634 static void Test3Op_Helper(Test3OpFPHelper_t helper,
635                            uintptr_t inputs,
636                            unsigned inputs_length,
637                            uintptr_t results,
638                            unsigned reg_size,
639                            bool* skipped) {
640   VIXL_ASSERT((reg_size == kDRegSize) || (reg_size == kSRegSize) ||
641               (reg_size == kHRegSize));
642 
643   SETUP_WITH_FEATURES(CPUFeatures::kFP, CPUFeatures::kFPHalf);
644   START();
645 
646   // Roll up the loop to keep the code size down.
647   Label loop_n, loop_m, loop_a;
648 
649   Register out = x0;
650   Register inputs_base = x1;
651   Register length = w2;
652   Register index_n = w3;
653   Register index_m = w4;
654   Register index_a = w5;
655 
656   bool double_op = reg_size == kDRegSize;
657   bool single_op = reg_size == kSRegSize;
658   int index_shift;
659   VRegister fd(0, reg_size);
660   VRegister fn(1, reg_size);
661   VRegister fm(2, reg_size);
662   VRegister fa(3, reg_size);
663   if (double_op) {
664     index_shift = kDRegSizeInBytesLog2;
665   } else if (single_op) {
666     index_shift = kSRegSizeInBytesLog2;
667   } else {
668     index_shift = kHRegSizeInBytesLog2;
669   }
670 
671   __ Mov(out, results);
672   __ Mov(inputs_base, inputs);
673   __ Mov(length, inputs_length);
674 
675   __ Mov(index_n, 0);
676   __ Bind(&loop_n);
677   __ Ldr(fn, MemOperand(inputs_base, index_n, UXTW, index_shift));
678 
679   __ Mov(index_m, 0);
680   __ Bind(&loop_m);
681   __ Ldr(fm, MemOperand(inputs_base, index_m, UXTW, index_shift));
682 
683   __ Mov(index_a, 0);
684   __ Bind(&loop_a);
685   __ Ldr(fa, MemOperand(inputs_base, index_a, UXTW, index_shift));
686 
687   {
688     SingleEmissionCheckScope guard(&masm);
689     (masm.*helper)(fd, fn, fm, fa);
690   }
691   __ Str(fd, MemOperand(out, fd.GetSizeInBytes(), PostIndex));
692 
693   __ Add(index_a, index_a, 1);
694   __ Cmp(index_a, inputs_length);
695   __ B(lo, &loop_a);
696 
697   __ Add(index_m, index_m, 1);
698   __ Cmp(index_m, inputs_length);
699   __ B(lo, &loop_m);
700 
701   __ Add(index_n, index_n, 1);
702   __ Cmp(index_n, inputs_length);
703   __ B(lo, &loop_n);
704 
705   END();
706   TRY_RUN(skipped);
707 }
708 
709 
710 // Test FP instructions. The inputs[] and expected[] arrays should be arrays of
711 // rawbits representations of doubles or floats. This ensures that exact bit
712 // comparisons can be performed.
713 template <typename T>
Test3Op(const char * name,Test3OpFPHelper_t helper,const T inputs[],unsigned inputs_length,const T expected[],unsigned expected_length)714 static void Test3Op(const char* name,
715                     Test3OpFPHelper_t helper,
716                     const T inputs[],
717                     unsigned inputs_length,
718                     const T expected[],
719                     unsigned expected_length) {
720   VIXL_ASSERT(inputs_length > 0);
721 
722   const unsigned results_length = inputs_length * inputs_length * inputs_length;
723   T* results = new T[results_length];
724 
725   const unsigned bits = sizeof(T) * 8;
726   bool skipped;
727 
728   Test3Op_Helper(helper,
729                  reinterpret_cast<uintptr_t>(inputs),
730                  inputs_length,
731                  reinterpret_cast<uintptr_t>(results),
732                  bits,
733                  &skipped);
734 
735   if (Test::generate_test_trace()) {
736     // Print the results.
737     printf("const uint%u_t kExpected_%s[] = {\n", bits, name);
738     for (unsigned d = 0; d < results_length; d++) {
739       printf("  0x%0*" PRIx64 ",\n",
740              bits / 4,
741              static_cast<uint64_t>(results[d]));
742     }
743     printf("};\n");
744     printf("const unsigned kExpectedCount_%s = %u;\n", name, results_length);
745   } else if (!skipped) {
746     // Check the results.
747     VIXL_CHECK(expected_length == results_length);
748     unsigned error_count = 0;
749     unsigned d = 0;
750     for (unsigned n = 0; n < inputs_length; n++) {
751       for (unsigned m = 0; m < inputs_length; m++) {
752         for (unsigned a = 0; a < inputs_length; a++, d++) {
753           if (results[d] != expected[d]) {
754             if (++error_count > kErrorReportLimit) continue;
755 
756             printf("%s 0x%0*" PRIx64 ", 0x%0*" PRIx64 ", 0x%0*" PRIx64
757                    " (%s %g %g %g):\n",
758                    name,
759                    bits / 4,
760                    static_cast<uint64_t>(inputs[n]),
761                    bits / 4,
762                    static_cast<uint64_t>(inputs[m]),
763                    bits / 4,
764                    static_cast<uint64_t>(inputs[a]),
765                    name,
766                    rawbits_to_fp(inputs[n]),
767                    rawbits_to_fp(inputs[m]),
768                    rawbits_to_fp(inputs[a]));
769             printf("  Expected: 0x%0*" PRIx64 " (%g)\n",
770                    bits / 4,
771                    static_cast<uint64_t>(expected[d]),
772                    rawbits_to_fp(expected[d]));
773             printf("  Found:    0x%0*" PRIx64 " (%g)\n",
774                    bits / 4,
775                    static_cast<uint64_t>(results[d]),
776                    rawbits_to_fp(results[d]));
777             printf("\n");
778           }
779         }
780       }
781     }
782     VIXL_ASSERT(d == expected_length);
783     if (error_count > kErrorReportLimit) {
784       printf("%u other errors follow.\n", error_count - kErrorReportLimit);
785     }
786     VIXL_CHECK(error_count == 0);
787   }
788   delete[] results;
789 }
790 
791 
TestCmp_Helper(TestFPCmpHelper_t helper,uintptr_t inputs,unsigned inputs_length,uintptr_t results,unsigned reg_size,bool * skipped)792 static void TestCmp_Helper(TestFPCmpHelper_t helper,
793                            uintptr_t inputs,
794                            unsigned inputs_length,
795                            uintptr_t results,
796                            unsigned reg_size,
797                            bool* skipped) {
798   VIXL_ASSERT((reg_size == kDRegSize) || (reg_size == kSRegSize));
799 
800   SETUP_WITH_FEATURES(CPUFeatures::kFP);
801   START();
802 
803   // Roll up the loop to keep the code size down.
804   Label loop_n, loop_m;
805 
806   Register out = x0;
807   Register inputs_base = x1;
808   Register length = w2;
809   Register index_n = w3;
810   Register index_m = w4;
811   Register flags = x5;
812 
813   bool double_op = reg_size == kDRegSize;
814   const int index_shift =
815       double_op ? kDRegSizeInBytesLog2 : kSRegSizeInBytesLog2;
816 
817   VRegister fn = double_op ? d1 : s1;
818   VRegister fm = double_op ? d2 : s2;
819 
820   __ Mov(out, results);
821   __ Mov(inputs_base, inputs);
822   __ Mov(length, inputs_length);
823 
824   __ Mov(index_n, 0);
825   __ Bind(&loop_n);
826   __ Ldr(fn, MemOperand(inputs_base, index_n, UXTW, index_shift));
827 
828   __ Mov(index_m, 0);
829   __ Bind(&loop_m);
830   __ Ldr(fm, MemOperand(inputs_base, index_m, UXTW, index_shift));
831 
832   {
833     SingleEmissionCheckScope guard(&masm);
834     (masm.*helper)(fn, fm);
835   }
836   __ Mrs(flags, NZCV);
837   __ Ubfx(flags, flags, 28, 4);
838   __ Strb(flags, MemOperand(out, 1, PostIndex));
839 
840   __ Add(index_m, index_m, 1);
841   __ Cmp(index_m, inputs_length);
842   __ B(lo, &loop_m);
843 
844   __ Add(index_n, index_n, 1);
845   __ Cmp(index_n, inputs_length);
846   __ B(lo, &loop_n);
847 
848   END();
849   TRY_RUN(skipped);
850 }
851 
852 
853 // Test FP instructions. The inputs[] and expected[] arrays should be arrays of
854 // rawbits representations of doubles or floats. This ensures that exact bit
855 // comparisons can be performed.
856 template <typename T>
TestCmp(const char * name,TestFPCmpHelper_t helper,const T inputs[],unsigned inputs_length,const uint8_t expected[],unsigned expected_length)857 static void TestCmp(const char* name,
858                     TestFPCmpHelper_t helper,
859                     const T inputs[],
860                     unsigned inputs_length,
861                     const uint8_t expected[],
862                     unsigned expected_length) {
863   VIXL_ASSERT(inputs_length > 0);
864 
865   const unsigned results_length = inputs_length * inputs_length;
866   uint8_t* results = new uint8_t[results_length];
867 
868   const unsigned bits = sizeof(T) * 8;
869   bool skipped;
870 
871   TestCmp_Helper(helper,
872                  reinterpret_cast<uintptr_t>(inputs),
873                  inputs_length,
874                  reinterpret_cast<uintptr_t>(results),
875                  bits,
876                  &skipped);
877 
878   if (Test::generate_test_trace()) {
879     // Print the results.
880     printf("const uint8_t kExpected_%s[] = {\n", name);
881     for (unsigned d = 0; d < results_length; d++) {
882       // Each NZCV result only requires 4 bits.
883       VIXL_ASSERT((results[d] & 0xf) == results[d]);
884       printf("  0x%" PRIx8 ",\n", results[d]);
885     }
886     printf("};\n");
887     printf("const unsigned kExpectedCount_%s = %u;\n", name, results_length);
888   } else if (!skipped) {
889     // Check the results.
890     VIXL_CHECK(expected_length == results_length);
891     unsigned error_count = 0;
892     unsigned d = 0;
893     for (unsigned n = 0; n < inputs_length; n++) {
894       for (unsigned m = 0; m < inputs_length; m++, d++) {
895         if (results[d] != expected[d]) {
896           if (++error_count > kErrorReportLimit) continue;
897 
898           printf("%s 0x%0*" PRIx64 ", 0x%0*" PRIx64 " (%s %g %g):\n",
899                  name,
900                  bits / 4,
901                  static_cast<uint64_t>(inputs[n]),
902                  bits / 4,
903                  static_cast<uint64_t>(inputs[m]),
904                  name,
905                  rawbits_to_fp(inputs[n]),
906                  rawbits_to_fp(inputs[m]));
907           printf("  Expected: %c%c%c%c (0x%" PRIx8 ")\n",
908                  (expected[d] & 0x8) ? 'N' : 'n',
909                  (expected[d] & 0x4) ? 'Z' : 'z',
910                  (expected[d] & 0x2) ? 'C' : 'c',
911                  (expected[d] & 0x1) ? 'V' : 'v',
912                  expected[d]);
913           printf("  Found:    %c%c%c%c (0x%" PRIx8 ")\n",
914                  (results[d] & 0x8) ? 'N' : 'n',
915                  (results[d] & 0x4) ? 'Z' : 'z',
916                  (results[d] & 0x2) ? 'C' : 'c',
917                  (results[d] & 0x1) ? 'V' : 'v',
918                  results[d]);
919           printf("\n");
920         }
921       }
922     }
923     VIXL_ASSERT(d == expected_length);
924     if (error_count > kErrorReportLimit) {
925       printf("%u other errors follow.\n", error_count - kErrorReportLimit);
926     }
927     VIXL_CHECK(error_count == 0);
928   }
929   delete[] results;
930 }
931 
932 
TestCmpZero_Helper(TestFPCmpZeroHelper_t helper,uintptr_t inputs,unsigned inputs_length,uintptr_t results,unsigned reg_size,bool * skipped)933 static void TestCmpZero_Helper(TestFPCmpZeroHelper_t helper,
934                                uintptr_t inputs,
935                                unsigned inputs_length,
936                                uintptr_t results,
937                                unsigned reg_size,
938                                bool* skipped) {
939   VIXL_ASSERT((reg_size == kDRegSize) || (reg_size == kSRegSize));
940 
941   SETUP_WITH_FEATURES(CPUFeatures::kFP);
942   START();
943 
944   // Roll up the loop to keep the code size down.
945   Label loop_n, loop_m;
946 
947   Register out = x0;
948   Register inputs_base = x1;
949   Register length = w2;
950   Register index_n = w3;
951   Register flags = x4;
952 
953   bool double_op = reg_size == kDRegSize;
954   const int index_shift =
955       double_op ? kDRegSizeInBytesLog2 : kSRegSizeInBytesLog2;
956 
957   VRegister fn = double_op ? d1 : s1;
958 
959   __ Mov(out, results);
960   __ Mov(inputs_base, inputs);
961   __ Mov(length, inputs_length);
962 
963   __ Mov(index_n, 0);
964   __ Bind(&loop_n);
965   __ Ldr(fn, MemOperand(inputs_base, index_n, UXTW, index_shift));
966 
967   {
968     SingleEmissionCheckScope guard(&masm);
969     (masm.*helper)(fn, 0.0);
970   }
971   __ Mrs(flags, NZCV);
972   __ Ubfx(flags, flags, 28, 4);
973   __ Strb(flags, MemOperand(out, 1, PostIndex));
974 
975   __ Add(index_n, index_n, 1);
976   __ Cmp(index_n, inputs_length);
977   __ B(lo, &loop_n);
978 
979   END();
980   TRY_RUN(skipped);
981 }
982 
983 
984 // Test FP instructions. The inputs[] and expected[] arrays should be arrays of
985 // rawbits representations of doubles or floats. This ensures that exact bit
986 // comparisons can be performed.
987 template <typename T>
TestCmpZero(const char * name,TestFPCmpZeroHelper_t helper,const T inputs[],unsigned inputs_length,const uint8_t expected[],unsigned expected_length)988 static void TestCmpZero(const char* name,
989                         TestFPCmpZeroHelper_t helper,
990                         const T inputs[],
991                         unsigned inputs_length,
992                         const uint8_t expected[],
993                         unsigned expected_length) {
994   VIXL_ASSERT(inputs_length > 0);
995 
996   const unsigned results_length = inputs_length;
997   uint8_t* results = new uint8_t[results_length];
998 
999   const unsigned bits = sizeof(T) * 8;
1000   bool skipped;
1001 
1002   TestCmpZero_Helper(helper,
1003                      reinterpret_cast<uintptr_t>(inputs),
1004                      inputs_length,
1005                      reinterpret_cast<uintptr_t>(results),
1006                      bits,
1007                      &skipped);
1008 
1009   if (Test::generate_test_trace()) {
1010     // Print the results.
1011     printf("const uint8_t kExpected_%s[] = {\n", name);
1012     for (unsigned d = 0; d < results_length; d++) {
1013       // Each NZCV result only requires 4 bits.
1014       VIXL_ASSERT((results[d] & 0xf) == results[d]);
1015       printf("  0x%" PRIx8 ",\n", results[d]);
1016     }
1017     printf("};\n");
1018     printf("const unsigned kExpectedCount_%s = %u;\n", name, results_length);
1019   } else if (!skipped) {
1020     // Check the results.
1021     VIXL_CHECK(expected_length == results_length);
1022     unsigned error_count = 0;
1023     unsigned d = 0;
1024     for (unsigned n = 0; n < inputs_length; n++, d++) {
1025       if (results[d] != expected[d]) {
1026         if (++error_count > kErrorReportLimit) continue;
1027 
1028         printf("%s 0x%0*" PRIx64 ", 0x%0*u (%s %g #0.0):\n",
1029                name,
1030                bits / 4,
1031                static_cast<uint64_t>(inputs[n]),
1032                bits / 4,
1033                0,
1034                name,
1035                rawbits_to_fp(inputs[n]));
1036         printf("  Expected: %c%c%c%c (0x%" PRIx8 ")\n",
1037                (expected[d] & 0x8) ? 'N' : 'n',
1038                (expected[d] & 0x4) ? 'Z' : 'z',
1039                (expected[d] & 0x2) ? 'C' : 'c',
1040                (expected[d] & 0x1) ? 'V' : 'v',
1041                expected[d]);
1042         printf("  Found:    %c%c%c%c (0x%" PRIx8 ")\n",
1043                (results[d] & 0x8) ? 'N' : 'n',
1044                (results[d] & 0x4) ? 'Z' : 'z',
1045                (results[d] & 0x2) ? 'C' : 'c',
1046                (results[d] & 0x1) ? 'V' : 'v',
1047                results[d]);
1048         printf("\n");
1049       }
1050     }
1051     VIXL_ASSERT(d == expected_length);
1052     if (error_count > kErrorReportLimit) {
1053       printf("%u other errors follow.\n", error_count - kErrorReportLimit);
1054     }
1055     VIXL_CHECK(error_count == 0);
1056   }
1057   delete[] results;
1058 }
1059 
1060 
TestFPToFixed_Helper(TestFPToFixedHelper_t helper,uintptr_t inputs,unsigned inputs_length,uintptr_t results,unsigned d_size,unsigned n_size,bool * skipped)1061 static void TestFPToFixed_Helper(TestFPToFixedHelper_t helper,
1062                                  uintptr_t inputs,
1063                                  unsigned inputs_length,
1064                                  uintptr_t results,
1065                                  unsigned d_size,
1066                                  unsigned n_size,
1067                                  bool* skipped) {
1068   VIXL_ASSERT((d_size == kXRegSize) || (d_size == kWRegSize));
1069   VIXL_ASSERT((n_size == kDRegSize) || (n_size == kSRegSize) ||
1070               (n_size == kHRegSize));
1071 
1072   SETUP_WITH_FEATURES(CPUFeatures::kFP, CPUFeatures::kFPHalf);
1073   START();
1074 
1075   // Roll up the loop to keep the code size down.
1076   Label loop_n;
1077 
1078   Register out = x0;
1079   Register inputs_base = x1;
1080   Register length = w2;
1081   Register index_n = w3;
1082 
1083   int n_index_shift;
1084   if (n_size == kDRegSize) {
1085     n_index_shift = kDRegSizeInBytesLog2;
1086   } else if (n_size == kSRegSize) {
1087     n_index_shift = kSRegSizeInBytesLog2;
1088   } else {
1089     n_index_shift = kHRegSizeInBytesLog2;
1090   }
1091 
1092   Register rd = (d_size == kXRegSize) ? Register(x10) : Register(w10);
1093   VRegister fn;
1094   if (n_size == kDRegSize) {
1095     fn = d1;
1096   } else if (n_size == kSRegSize) {
1097     fn = s1;
1098   } else {
1099     fn = h1;
1100   }
1101 
1102   __ Mov(out, results);
1103   __ Mov(inputs_base, inputs);
1104   __ Mov(length, inputs_length);
1105 
1106   __ Mov(index_n, 0);
1107   __ Bind(&loop_n);
1108   __ Ldr(fn, MemOperand(inputs_base, index_n, UXTW, n_index_shift));
1109 
1110   for (unsigned fbits = 0; fbits <= d_size; ++fbits) {
1111     {
1112       SingleEmissionCheckScope guard(&masm);
1113       (masm.*helper)(rd, fn, fbits);
1114     }
1115     __ Str(rd, MemOperand(out, rd.GetSizeInBytes(), PostIndex));
1116   }
1117 
1118   __ Add(index_n, index_n, 1);
1119   __ Cmp(index_n, inputs_length);
1120   __ B(lo, &loop_n);
1121 
1122   END();
1123   TRY_RUN(skipped);
1124 }
1125 
1126 
TestFPToInt_Helper(TestFPToIntHelper_t helper,uintptr_t inputs,unsigned inputs_length,uintptr_t results,unsigned d_size,unsigned n_size,bool * skipped)1127 static void TestFPToInt_Helper(TestFPToIntHelper_t helper,
1128                                uintptr_t inputs,
1129                                unsigned inputs_length,
1130                                uintptr_t results,
1131                                unsigned d_size,
1132                                unsigned n_size,
1133                                bool* skipped) {
1134   VIXL_ASSERT((d_size == kXRegSize) || (d_size == kWRegSize));
1135   VIXL_ASSERT((n_size == kDRegSize) || (n_size == kSRegSize) ||
1136               (n_size == kHRegSize));
1137 
1138   SETUP_WITH_FEATURES(CPUFeatures::kFP,
1139                       CPUFeatures::kFPHalf,
1140                       CPUFeatures::kJSCVT);
1141   START();
1142 
1143   // Roll up the loop to keep the code size down.
1144   Label loop_n;
1145 
1146   Register out = x0;
1147   Register inputs_base = x1;
1148   Register length = w2;
1149   Register index_n = w3;
1150 
1151   int n_index_shift;
1152   if (n_size == kDRegSize) {
1153     n_index_shift = kDRegSizeInBytesLog2;
1154   } else if (n_size == kSRegSize) {
1155     n_index_shift = kSRegSizeInBytesLog2;
1156   } else {
1157     n_index_shift = kHRegSizeInBytesLog2;
1158   }
1159 
1160   Register rd = (d_size == kXRegSize) ? Register(x10) : Register(w10);
1161   VRegister fn;
1162   if (n_size == kDRegSize) {
1163     fn = d1;
1164   } else if (n_size == kSRegSize) {
1165     fn = s1;
1166   } else {
1167     fn = h1;
1168   }
1169 
1170   __ Mov(out, results);
1171   __ Mov(inputs_base, inputs);
1172   __ Mov(length, inputs_length);
1173 
1174   __ Mov(index_n, 0);
1175   __ Bind(&loop_n);
1176   __ Ldr(fn, MemOperand(inputs_base, index_n, UXTW, n_index_shift));
1177 
1178   {
1179     SingleEmissionCheckScope guard(&masm);
1180     (masm.*helper)(rd, fn);
1181   }
1182   __ Str(rd, MemOperand(out, rd.GetSizeInBytes(), PostIndex));
1183 
1184   __ Add(index_n, index_n, 1);
1185   __ Cmp(index_n, inputs_length);
1186   __ B(lo, &loop_n);
1187 
1188   END();
1189   TRY_RUN(skipped);
1190 }
1191 
1192 
1193 // Test FP instructions.
1194 //  - The inputs[] array should be an array of rawbits representations of
1195 //    doubles or floats. This ensures that exact bit comparisons can be
1196 //    performed.
1197 //  - The expected[] array should be an array of signed integers.
1198 template <typename Tn, typename Td>
TestFPToS(const char * name,TestFPToIntHelper_t helper,const Tn inputs[],unsigned inputs_length,const Td expected[],unsigned expected_length)1199 static void TestFPToS(const char* name,
1200                       TestFPToIntHelper_t helper,
1201                       const Tn inputs[],
1202                       unsigned inputs_length,
1203                       const Td expected[],
1204                       unsigned expected_length) {
1205   VIXL_ASSERT(inputs_length > 0);
1206 
1207   const unsigned results_length = inputs_length;
1208   Td* results = new Td[results_length];
1209 
1210   const unsigned d_bits = sizeof(Td) * 8;
1211   const unsigned n_bits = sizeof(Tn) * 8;
1212   bool skipped;
1213 
1214   TestFPToInt_Helper(helper,
1215                      reinterpret_cast<uintptr_t>(inputs),
1216                      inputs_length,
1217                      reinterpret_cast<uintptr_t>(results),
1218                      d_bits,
1219                      n_bits,
1220                      &skipped);
1221 
1222   if (Test::generate_test_trace()) {
1223     // Print the results.
1224     printf("const int%u_t kExpected_%s[] = {\n", d_bits, name);
1225     // There is no simple C++ literal for INT*_MIN that doesn't produce
1226     // warnings, so we use an appropriate constant in that case instead.
1227     // Deriving int_d_min in this way (rather than just checking INT64_MIN and
1228     // the like) avoids warnings about comparing values with differing ranges.
1229     const int64_t int_d_max = (UINT64_C(1) << (d_bits - 1)) - 1;
1230     const int64_t int_d_min = -(int_d_max)-1;
1231     for (unsigned d = 0; d < results_length; d++) {
1232       if (results[d] == int_d_min) {
1233         printf("  -INT%u_C(%" PRId64 ") - 1,\n", d_bits, int_d_max);
1234       } else {
1235         // Some constants (such as those between INT32_MAX and UINT32_MAX)
1236         // trigger compiler warnings. To avoid these warnings, use an
1237         // appropriate macro to make the type explicit.
1238         int64_t result_int64 = static_cast<int64_t>(results[d]);
1239         if (result_int64 >= 0) {
1240           printf("  INT%u_C(%" PRId64 "),\n", d_bits, result_int64);
1241         } else {
1242           printf("  -INT%u_C(%" PRId64 "),\n", d_bits, -result_int64);
1243         }
1244       }
1245     }
1246     printf("};\n");
1247     printf("const unsigned kExpectedCount_%s = %u;\n", name, results_length);
1248   } else if (!skipped) {
1249     // Check the results.
1250     VIXL_CHECK(expected_length == results_length);
1251     unsigned error_count = 0;
1252     unsigned d = 0;
1253     for (unsigned n = 0; n < inputs_length; n++, d++) {
1254       if (results[d] != expected[d]) {
1255         if (++error_count > kErrorReportLimit) continue;
1256 
1257         printf("%s 0x%0*" PRIx64 " (%s %g):\n",
1258                name,
1259                n_bits / 4,
1260                static_cast<uint64_t>(inputs[n]),
1261                name,
1262                rawbits_to_fp(inputs[n]));
1263         printf("  Expected: 0x%0*" PRIx64 " (%" PRId64 ")\n",
1264                d_bits / 4,
1265                static_cast<uint64_t>(expected[d]),
1266                static_cast<int64_t>(expected[d]));
1267         printf("  Found:    0x%0*" PRIx64 " (%" PRId64 ")\n",
1268                d_bits / 4,
1269                static_cast<uint64_t>(results[d]),
1270                static_cast<int64_t>(results[d]));
1271         printf("\n");
1272       }
1273     }
1274     VIXL_ASSERT(d == expected_length);
1275     if (error_count > kErrorReportLimit) {
1276       printf("%u other errors follow.\n", error_count - kErrorReportLimit);
1277     }
1278     VIXL_CHECK(error_count == 0);
1279   }
1280   delete[] results;
1281 }
1282 
1283 
1284 // Test FP instructions.
1285 //  - The inputs[] array should be an array of rawbits representations of
1286 //    doubles or floats. This ensures that exact bit comparisons can be
1287 //    performed.
1288 //  - The expected[] array should be an array of unsigned integers.
1289 template <typename Tn, typename Td>
TestFPToU(const char * name,TestFPToIntHelper_t helper,const Tn inputs[],unsigned inputs_length,const Td expected[],unsigned expected_length)1290 static void TestFPToU(const char* name,
1291                       TestFPToIntHelper_t helper,
1292                       const Tn inputs[],
1293                       unsigned inputs_length,
1294                       const Td expected[],
1295                       unsigned expected_length) {
1296   VIXL_ASSERT(inputs_length > 0);
1297 
1298   const unsigned results_length = inputs_length;
1299   Td* results = new Td[results_length];
1300 
1301   const unsigned d_bits = sizeof(Td) * 8;
1302   const unsigned n_bits = sizeof(Tn) * 8;
1303   bool skipped;
1304 
1305   TestFPToInt_Helper(helper,
1306                      reinterpret_cast<uintptr_t>(inputs),
1307                      inputs_length,
1308                      reinterpret_cast<uintptr_t>(results),
1309                      d_bits,
1310                      n_bits,
1311                      &skipped);
1312 
1313   if (Test::generate_test_trace()) {
1314     // Print the results.
1315     printf("const uint%u_t kExpected_%s[] = {\n", d_bits, name);
1316     for (unsigned d = 0; d < results_length; d++) {
1317       printf("  %" PRIu64 "u,\n", static_cast<uint64_t>(results[d]));
1318     }
1319     printf("};\n");
1320     printf("const unsigned kExpectedCount_%s = %u;\n", name, results_length);
1321   } else if (!skipped) {
1322     // Check the results.
1323     VIXL_CHECK(expected_length == results_length);
1324     unsigned error_count = 0;
1325     unsigned d = 0;
1326     for (unsigned n = 0; n < inputs_length; n++, d++) {
1327       if (results[d] != expected[d]) {
1328         if (++error_count > kErrorReportLimit) continue;
1329 
1330         printf("%s 0x%0*" PRIx64 " (%s %g):\n",
1331                name,
1332                n_bits / 4,
1333                static_cast<uint64_t>(inputs[n]),
1334                name,
1335                rawbits_to_fp(inputs[n]));
1336         printf("  Expected: 0x%0*" PRIx64 " (%" PRIu64 ")\n",
1337                d_bits / 4,
1338                static_cast<uint64_t>(expected[d]),
1339                static_cast<uint64_t>(expected[d]));
1340         printf("  Found:    0x%0*" PRIx64 " (%" PRIu64 ")\n",
1341                d_bits / 4,
1342                static_cast<uint64_t>(results[d]),
1343                static_cast<uint64_t>(results[d]));
1344         printf("\n");
1345       }
1346     }
1347     VIXL_ASSERT(d == expected_length);
1348     if (error_count > kErrorReportLimit) {
1349       printf("%u other errors follow.\n", error_count - kErrorReportLimit);
1350     }
1351     VIXL_CHECK(error_count == 0);
1352   }
1353   delete[] results;
1354 }
1355 
1356 
1357 // Test FP instructions.
1358 //  - The inputs[] array should be an array of rawbits representations of
1359 //    doubles or floats. This ensures that exact bit comparisons can be
1360 //    performed.
1361 //  - The expected[] array should be an array of signed integers.
1362 template <typename Tn, typename Td>
TestFPToFixedS(const char * name,TestFPToFixedHelper_t helper,const Tn inputs[],unsigned inputs_length,const Td expected[],unsigned expected_length)1363 static void TestFPToFixedS(const char* name,
1364                            TestFPToFixedHelper_t helper,
1365                            const Tn inputs[],
1366                            unsigned inputs_length,
1367                            const Td expected[],
1368                            unsigned expected_length) {
1369   VIXL_ASSERT(inputs_length > 0);
1370 
1371   const unsigned d_bits = sizeof(Td) * 8;
1372   const unsigned n_bits = sizeof(Tn) * 8;
1373 
1374   const unsigned results_length = inputs_length * (d_bits + 1);
1375   Td* results = new Td[results_length];
1376 
1377   bool skipped;
1378 
1379   TestFPToFixed_Helper(helper,
1380                        reinterpret_cast<uintptr_t>(inputs),
1381                        inputs_length,
1382                        reinterpret_cast<uintptr_t>(results),
1383                        d_bits,
1384                        n_bits,
1385                        &skipped);
1386 
1387   if (Test::generate_test_trace()) {
1388     // Print the results.
1389     printf("const int%u_t kExpected_%s[] = {\n", d_bits, name);
1390     // There is no simple C++ literal for INT*_MIN that doesn't produce
1391     // warnings, so we use an appropriate constant in that case instead.
1392     // Deriving int_d_min in this way (rather than just checking INT64_MIN and
1393     // the like) avoids warnings about comparing values with differing ranges.
1394     const int64_t int_d_max = (UINT64_C(1) << (d_bits - 1)) - 1;
1395     const int64_t int_d_min = -(int_d_max)-1;
1396     for (unsigned d = 0; d < results_length; d++) {
1397       if (results[d] == int_d_min) {
1398         printf("  -INT%u_C(%" PRId64 ") - 1,\n", d_bits, int_d_max);
1399       } else {
1400         // Some constants (such as those between INT32_MAX and UINT32_MAX)
1401         // trigger compiler warnings. To avoid these warnings, use an
1402         // appropriate macro to make the type explicit.
1403         int64_t result_int64 = static_cast<int64_t>(results[d]);
1404         if (result_int64 >= 0) {
1405           printf("  INT%u_C(%" PRId64 "),\n", d_bits, result_int64);
1406         } else {
1407           printf("  -INT%u_C(%" PRId64 "),\n", d_bits, -result_int64);
1408         }
1409       }
1410     }
1411     printf("};\n");
1412     printf("const unsigned kExpectedCount_%s = %u;\n", name, results_length);
1413   } else if (!skipped) {
1414     // Check the results.
1415     VIXL_CHECK(expected_length == results_length);
1416     unsigned error_count = 0;
1417     unsigned d = 0;
1418     for (unsigned n = 0; n < inputs_length; n++) {
1419       for (unsigned fbits = 0; fbits <= d_bits; ++fbits, d++) {
1420         if (results[d] != expected[d]) {
1421           if (++error_count > kErrorReportLimit) continue;
1422 
1423           printf("%s 0x%0*" PRIx64 " #%d (%s %g #%d):\n",
1424                  name,
1425                  n_bits / 4,
1426                  static_cast<uint64_t>(inputs[n]),
1427                  fbits,
1428                  name,
1429                  rawbits_to_fp(inputs[n]),
1430                  fbits);
1431           printf("  Expected: 0x%0*" PRIx64 " (%" PRId64 ")\n",
1432                  d_bits / 4,
1433                  static_cast<uint64_t>(expected[d]),
1434                  static_cast<int64_t>(expected[d]));
1435           printf("  Found:    0x%0*" PRIx64 " (%" PRId64 ")\n",
1436                  d_bits / 4,
1437                  static_cast<uint64_t>(results[d]),
1438                  static_cast<int64_t>(results[d]));
1439           printf("\n");
1440         }
1441       }
1442     }
1443     VIXL_ASSERT(d == expected_length);
1444     if (error_count > kErrorReportLimit) {
1445       printf("%u other errors follow.\n", error_count - kErrorReportLimit);
1446     }
1447     VIXL_CHECK(error_count == 0);
1448   }
1449   delete[] results;
1450 }
1451 
1452 
1453 // Test FP instructions.
1454 //  - The inputs[] array should be an array of rawbits representations of
1455 //    doubles or floats. This ensures that exact bit comparisons can be
1456 //    performed.
1457 //  - The expected[] array should be an array of unsigned integers.
1458 template <typename Tn, typename Td>
TestFPToFixedU(const char * name,TestFPToFixedHelper_t helper,const Tn inputs[],unsigned inputs_length,const Td expected[],unsigned expected_length)1459 static void TestFPToFixedU(const char* name,
1460                            TestFPToFixedHelper_t helper,
1461                            const Tn inputs[],
1462                            unsigned inputs_length,
1463                            const Td expected[],
1464                            unsigned expected_length) {
1465   VIXL_ASSERT(inputs_length > 0);
1466 
1467   const unsigned d_bits = sizeof(Td) * 8;
1468   const unsigned n_bits = sizeof(Tn) * 8;
1469 
1470   const unsigned results_length = inputs_length * (d_bits + 1);
1471   Td* results = new Td[results_length];
1472 
1473   bool skipped;
1474 
1475   TestFPToFixed_Helper(helper,
1476                        reinterpret_cast<uintptr_t>(inputs),
1477                        inputs_length,
1478                        reinterpret_cast<uintptr_t>(results),
1479                        d_bits,
1480                        n_bits,
1481                        &skipped);
1482 
1483   if (Test::generate_test_trace()) {
1484     // Print the results.
1485     printf("const uint%u_t kExpected_%s[] = {\n", d_bits, name);
1486     for (unsigned d = 0; d < results_length; d++) {
1487       printf("  %" PRIu64 "u,\n", static_cast<uint64_t>(results[d]));
1488     }
1489     printf("};\n");
1490     printf("const unsigned kExpectedCount_%s = %u;\n", name, results_length);
1491   } else if (!skipped) {
1492     // Check the results.
1493     VIXL_CHECK(expected_length == results_length);
1494     unsigned error_count = 0;
1495     unsigned d = 0;
1496     for (unsigned n = 0; n < inputs_length; n++) {
1497       for (unsigned fbits = 0; fbits <= d_bits; ++fbits, d++) {
1498         if (results[d] != expected[d]) {
1499           if (++error_count > kErrorReportLimit) continue;
1500 
1501           printf("%s 0x%0*" PRIx64 " #%d (%s %g #%d):\n",
1502                  name,
1503                  n_bits / 4,
1504                  static_cast<uint64_t>(inputs[n]),
1505                  fbits,
1506                  name,
1507                  rawbits_to_fp(inputs[n]),
1508                  fbits);
1509           printf("  Expected: 0x%0*" PRIx64 " (%" PRIu64 ")\n",
1510                  d_bits / 4,
1511                  static_cast<uint64_t>(expected[d]),
1512                  static_cast<uint64_t>(expected[d]));
1513           printf("  Found:    0x%0*" PRIx64 " (%" PRIu64 ")\n",
1514                  d_bits / 4,
1515                  static_cast<uint64_t>(results[d]),
1516                  static_cast<uint64_t>(results[d]));
1517           printf("\n");
1518         }
1519       }
1520     }
1521     VIXL_ASSERT(d == expected_length);
1522     if (error_count > kErrorReportLimit) {
1523       printf("%u other errors follow.\n", error_count - kErrorReportLimit);
1524     }
1525     VIXL_CHECK(error_count == 0);
1526   }
1527   delete[] results;
1528 }
1529 
1530 
1531 // ==== Tests for instructions of the form <INST> VReg, VReg. ====
1532 
1533 
Test1OpNEON_Helper(Test1OpNEONHelper_t helper,uintptr_t inputs_n,unsigned inputs_n_length,uintptr_t results,VectorFormat vd_form,VectorFormat vn_form,bool * skipped)1534 static void Test1OpNEON_Helper(Test1OpNEONHelper_t helper,
1535                                uintptr_t inputs_n,
1536                                unsigned inputs_n_length,
1537                                uintptr_t results,
1538                                VectorFormat vd_form,
1539                                VectorFormat vn_form,
1540                                bool* skipped) {
1541   VIXL_ASSERT(vd_form != kFormatUndefined);
1542   VIXL_ASSERT(vn_form != kFormatUndefined);
1543 
1544   CPUFeatures features;
1545   features.Combine(CPUFeatures::kNEON,
1546                    CPUFeatures::kFP,
1547                    CPUFeatures::kRDM,
1548                    CPUFeatures::kNEONHalf);
1549   // For frint{32,64}{x,y} variants.
1550   features.Combine(CPUFeatures::kFrintToFixedSizedInt);
1551   SETUP_WITH_FEATURES(features);
1552   START();
1553 
1554   // Roll up the loop to keep the code size down.
1555   Label loop_n;
1556 
1557   Register out = x0;
1558   Register inputs_n_base = x1;
1559   Register inputs_n_last_16bytes = x3;
1560   Register index_n = x5;
1561 
1562   // TODO: Refactor duplicate definitions below with a VRegister::As() routine.
1563   const unsigned vd_bits = RegisterSizeInBitsFromFormat(vd_form);
1564   const unsigned vd_lane_count = LaneCountFromFormat(vd_form);
1565 
1566   const unsigned vn_bits = RegisterSizeInBitsFromFormat(vn_form);
1567   const unsigned vn_lane_count = LaneCountFromFormat(vn_form);
1568   const unsigned vn_lane_bytes = LaneSizeInBytesFromFormat(vn_form);
1569   const unsigned vn_lane_bytes_log2 = LaneSizeInBytesLog2FromFormat(vn_form);
1570   const unsigned vn_lane_bits = LaneSizeInBitsFromFormat(vn_form);
1571 
1572 
1573   // These will be either a D- or a Q-register form, with a single lane
1574   // (for use in scalar load and store operations).
1575   VRegister vd = VRegister(0, vd_bits);
1576   VRegister vn = v1.V16B();
1577   VRegister vntmp = v3.V16B();
1578 
1579   // These will have the correct format for use when calling 'helper'.
1580   VRegister vd_helper = VRegister(0, vd_bits, vd_lane_count);
1581   VRegister vn_helper = VRegister(1, vn_bits, vn_lane_count);
1582 
1583   // 'v*tmp_single' will be either 'Vt.B', 'Vt.H', 'Vt.S' or 'Vt.D'.
1584   VRegister vntmp_single = VRegister(3, vn_lane_bits);
1585 
1586   __ Mov(out, results);
1587 
1588   __ Mov(inputs_n_base, inputs_n);
1589   __ Mov(inputs_n_last_16bytes,
1590          inputs_n + (vn_lane_bytes * inputs_n_length) - 16);
1591 
1592   __ Ldr(vn, MemOperand(inputs_n_last_16bytes));
1593 
1594   __ Mov(index_n, 0);
1595   __ Bind(&loop_n);
1596 
1597   __ Ldr(vntmp_single,
1598          MemOperand(inputs_n_base, index_n, LSL, vn_lane_bytes_log2));
1599   __ Ext(vn, vn, vntmp, vn_lane_bytes);
1600 
1601   // Set the destination to zero.
1602   // TODO: Setting the destination to values other than zero
1603   //       might be a better test for instructions such as sqxtn2
1604   //       which may leave parts of V registers unchanged.
1605   __ Movi(vd.V16B(), 0);
1606 
1607   {
1608     SingleEmissionCheckScope guard(&masm);
1609     (masm.*helper)(vd_helper, vn_helper);
1610   }
1611   __ Str(vd, MemOperand(out, vd.GetSizeInBytes(), PostIndex));
1612 
1613   __ Add(index_n, index_n, 1);
1614   __ Cmp(index_n, inputs_n_length);
1615   __ B(lo, &loop_n);
1616 
1617   END();
1618   TRY_RUN(skipped);
1619 }
1620 
1621 
1622 // Test NEON instructions. The inputs_*[] and expected[] arrays should be
1623 // arrays of rawbit representation of input values. This ensures that
1624 // exact bit comparisons can be performed.
1625 template <typename Td, typename Tn>
Test1OpNEON(const char * name,Test1OpNEONHelper_t helper,const Tn inputs_n[],unsigned inputs_n_length,const Td expected[],unsigned expected_length,VectorFormat vd_form,VectorFormat vn_form)1626 static void Test1OpNEON(const char* name,
1627                         Test1OpNEONHelper_t helper,
1628                         const Tn inputs_n[],
1629                         unsigned inputs_n_length,
1630                         const Td expected[],
1631                         unsigned expected_length,
1632                         VectorFormat vd_form,
1633                         VectorFormat vn_form) {
1634   VIXL_ASSERT(inputs_n_length > 0);
1635 
1636   const unsigned vd_lane_count = LaneCountFromFormat(vd_form);
1637   const unsigned vn_lane_bytes = LaneSizeInBytesFromFormat(vn_form);
1638   const unsigned vn_lane_count = LaneCountFromFormat(vn_form);
1639 
1640   const unsigned results_length = inputs_n_length;
1641   Td* results = new Td[results_length * vd_lane_count];
1642   const unsigned lane_bit = sizeof(Td) * 8;
1643   const unsigned lane_len_in_hex = MaxHexCharCount<Td, Tn>();
1644 
1645   bool skipped;
1646 
1647   Test1OpNEON_Helper(helper,
1648                      reinterpret_cast<uintptr_t>(inputs_n),
1649                      inputs_n_length,
1650                      reinterpret_cast<uintptr_t>(results),
1651                      vd_form,
1652                      vn_form,
1653                      &skipped);
1654 
1655   if (Test::generate_test_trace()) {
1656     // Print the results.
1657     printf("const uint%u_t kExpected_NEON_%s[] = {\n", lane_bit, name);
1658     for (unsigned iteration = 0; iteration < results_length; iteration++) {
1659       printf(" ");
1660       // Output a separate result for each element of the result vector.
1661       for (unsigned lane = 0; lane < vd_lane_count; lane++) {
1662         unsigned index = lane + (iteration * vd_lane_count);
1663         printf(" 0x%0*" PRIx64 ",",
1664                lane_len_in_hex,
1665                static_cast<uint64_t>(results[index]));
1666       }
1667       printf("\n");
1668     }
1669 
1670     printf("};\n");
1671     printf("const unsigned kExpectedCount_NEON_%s = %u;\n",
1672            name,
1673            results_length);
1674   } else if (!skipped) {
1675     // Check the results.
1676     VIXL_CHECK(expected_length == results_length);
1677     unsigned error_count = 0;
1678     unsigned d = 0;
1679     const char* padding = "                    ";
1680     VIXL_ASSERT(strlen(padding) >= (lane_len_in_hex + 1));
1681     for (unsigned n = 0; n < inputs_n_length; n++, d++) {
1682       bool error_in_vector = false;
1683 
1684       for (unsigned lane = 0; lane < vd_lane_count; lane++) {
1685         unsigned output_index = (n * vd_lane_count) + lane;
1686 
1687         if (results[output_index] != expected[output_index]) {
1688           error_in_vector = true;
1689           break;
1690         }
1691       }
1692 
1693       if (error_in_vector && (++error_count <= kErrorReportLimit)) {
1694         printf("%s\n", name);
1695         printf(" Vn%.*s| Vd%.*s| Expected\n",
1696                lane_len_in_hex + 1,
1697                padding,
1698                lane_len_in_hex + 1,
1699                padding);
1700 
1701         const unsigned first_index_n =
1702             inputs_n_length - (16 / vn_lane_bytes) + n + 1;
1703 
1704         for (unsigned lane = 0; lane < std::max(vd_lane_count, vn_lane_count);
1705              lane++) {
1706           unsigned output_index = (n * vd_lane_count) + lane;
1707           unsigned input_index_n = (first_index_n + lane) % inputs_n_length;
1708 
1709           printf("%c0x%0*" PRIx64 " | 0x%0*" PRIx64
1710                  " "
1711                  "| 0x%0*" PRIx64 "\n",
1712                  results[output_index] != expected[output_index] ? '*' : ' ',
1713                  lane_len_in_hex,
1714                  static_cast<uint64_t>(inputs_n[input_index_n]),
1715                  lane_len_in_hex,
1716                  static_cast<uint64_t>(results[output_index]),
1717                  lane_len_in_hex,
1718                  static_cast<uint64_t>(expected[output_index]));
1719         }
1720       }
1721     }
1722     VIXL_ASSERT(d == expected_length);
1723     if (error_count > kErrorReportLimit) {
1724       printf("%u other errors follow.\n", error_count - kErrorReportLimit);
1725     }
1726     VIXL_CHECK(error_count == 0);
1727   }
1728   delete[] results;
1729 }
1730 
1731 
1732 // ==== Tests for instructions of the form <mnemonic> <V><d>, <Vn>.<T> ====
1733 //      where <V> is one of B, H, S or D registers.
1734 //      e.g. saddlv H1, v0.8B
1735 
1736 // TODO: Change tests to store all lanes of the resulting V register.
1737 //       Some tests store all 128 bits of the resulting V register to
1738 //       check the simulator's behaviour on the rest of the register.
1739 //       This is better than storing the affected lanes only.
1740 //       Change any tests such as the 'Across' template to do the same.
1741 
Test1OpAcrossNEON_Helper(Test1OpNEONHelper_t helper,uintptr_t inputs_n,unsigned inputs_n_length,uintptr_t results,VectorFormat vd_form,VectorFormat vn_form,bool * skipped)1742 static void Test1OpAcrossNEON_Helper(Test1OpNEONHelper_t helper,
1743                                      uintptr_t inputs_n,
1744                                      unsigned inputs_n_length,
1745                                      uintptr_t results,
1746                                      VectorFormat vd_form,
1747                                      VectorFormat vn_form,
1748                                      bool* skipped) {
1749   VIXL_ASSERT(vd_form != kFormatUndefined);
1750   VIXL_ASSERT(vn_form != kFormatUndefined);
1751 
1752   SETUP_WITH_FEATURES(CPUFeatures::kNEON,
1753                       CPUFeatures::kFP,
1754                       CPUFeatures::kNEONHalf);
1755   START();
1756 
1757   // Roll up the loop to keep the code size down.
1758   Label loop_n;
1759 
1760   Register out = x0;
1761   Register inputs_n_base = x1;
1762   Register inputs_n_last_vector = x3;
1763   Register index_n = x5;
1764 
1765   // TODO: Refactor duplicate definitions below with a VRegister::As() routine.
1766   const unsigned vd_bits = RegisterSizeInBitsFromFormat(vd_form);
1767   const unsigned vn_bits = RegisterSizeInBitsFromFormat(vn_form);
1768   const unsigned vn_lane_count = LaneCountFromFormat(vn_form);
1769   const unsigned vn_lane_bytes = LaneSizeInBytesFromFormat(vn_form);
1770   const unsigned vn_lane_bytes_log2 = LaneSizeInBytesLog2FromFormat(vn_form);
1771   const unsigned vn_lane_bits = LaneSizeInBitsFromFormat(vn_form);
1772 
1773   // Test destructive operations by (arbitrarily) using the same register for
1774   // B and S lane sizes.
1775   bool destructive = (vd_bits == kBRegSize) || (vd_bits == kSRegSize);
1776 
1777   // Create two aliases for v0; the first is the destination for the tested
1778   // instruction, the second, the whole Q register to check the results.
1779   VRegister vd = VRegister(0, vd_bits);
1780   VRegister vdstr = VRegister(0, kQRegSize);
1781 
1782   VRegister vn = VRegister(1, vn_bits);
1783   VRegister vntmp = VRegister(3, vn_bits);
1784 
1785   // These will have the correct format for use when calling 'helper'.
1786   VRegister vd_helper = VRegister(0, vn_bits, vn_lane_count);
1787   VRegister vn_helper = VRegister(1, vn_bits, vn_lane_count);
1788 
1789   // 'v*tmp_single' will be either 'Vt.B', 'Vt.H', 'Vt.S' or 'Vt.D'.
1790   VRegister vntmp_single = VRegister(3, vn_lane_bits);
1791 
1792   // Same registers for use in the 'ext' instructions.
1793   VRegister vn_ext = (kDRegSize == vn_bits) ? vn.V8B() : vn.V16B();
1794   VRegister vntmp_ext = (kDRegSize == vn_bits) ? vntmp.V8B() : vntmp.V16B();
1795 
1796   __ Mov(out, results);
1797 
1798   __ Mov(inputs_n_base, inputs_n);
1799   __ Mov(inputs_n_last_vector,
1800          inputs_n + vn_lane_bytes * (inputs_n_length - vn_lane_count));
1801 
1802   __ Ldr(vn, MemOperand(inputs_n_last_vector));
1803 
1804   __ Mov(index_n, 0);
1805   __ Bind(&loop_n);
1806 
1807   __ Ldr(vntmp_single,
1808          MemOperand(inputs_n_base, index_n, LSL, vn_lane_bytes_log2));
1809   __ Ext(vn_ext, vn_ext, vntmp_ext, vn_lane_bytes);
1810 
1811   if (destructive) {
1812     __ Mov(vd_helper, vn_helper);
1813     SingleEmissionCheckScope guard(&masm);
1814     (masm.*helper)(vd, vd_helper);
1815   } else {
1816     SingleEmissionCheckScope guard(&masm);
1817     (masm.*helper)(vd, vn_helper);
1818   }
1819 
1820   __ Str(vdstr, MemOperand(out, kQRegSizeInBytes, PostIndex));
1821 
1822   __ Add(index_n, index_n, 1);
1823   __ Cmp(index_n, inputs_n_length);
1824   __ B(lo, &loop_n);
1825 
1826   END();
1827   TRY_RUN(skipped);
1828 }
1829 
1830 // Test NEON instructions. The inputs_*[] and expected[] arrays should be
1831 // arrays of rawbit representation of input values. This ensures that
1832 // exact bit comparisons can be performed.
1833 template <typename Td, typename Tn>
Test1OpAcrossNEON(const char * name,Test1OpNEONHelper_t helper,const Tn inputs_n[],unsigned inputs_n_length,const Td expected[],unsigned expected_length,VectorFormat vd_form,VectorFormat vn_form)1834 static void Test1OpAcrossNEON(const char* name,
1835                               Test1OpNEONHelper_t helper,
1836                               const Tn inputs_n[],
1837                               unsigned inputs_n_length,
1838                               const Td expected[],
1839                               unsigned expected_length,
1840                               VectorFormat vd_form,
1841                               VectorFormat vn_form) {
1842   VIXL_ASSERT(inputs_n_length > 0);
1843 
1844   const unsigned vd_lane_count = LaneCountFromFormat(vd_form);
1845   const unsigned vd_lanes_per_q = MaxLaneCountFromFormat(vd_form);
1846 
1847   const unsigned results_length = inputs_n_length;
1848   Td* results = new Td[results_length * vd_lanes_per_q];
1849   const unsigned lane_bit = sizeof(Td) * 8;
1850   const unsigned lane_len_in_hex = MaxHexCharCount<Td, Tn>();
1851 
1852   bool skipped;
1853 
1854   Test1OpAcrossNEON_Helper(helper,
1855                            reinterpret_cast<uintptr_t>(inputs_n),
1856                            inputs_n_length,
1857                            reinterpret_cast<uintptr_t>(results),
1858                            vd_form,
1859                            vn_form,
1860                            &skipped);
1861 
1862   if (Test::generate_test_trace()) {
1863     // Print the results.
1864     printf("const uint%u_t kExpected_NEON_%s[] = {\n", lane_bit, name);
1865     for (unsigned iteration = 0; iteration < results_length; iteration++) {
1866       printf(" ");
1867       // Output a separate result for each element of the result vector.
1868       for (unsigned lane = 0; lane < vd_lane_count; lane++) {
1869         unsigned index = lane + (iteration * vd_lanes_per_q);
1870         printf(" 0x%0*" PRIx64 ",",
1871                lane_len_in_hex,
1872                static_cast<uint64_t>(results[index]));
1873       }
1874       printf("\n");
1875     }
1876 
1877     printf("};\n");
1878     printf("const unsigned kExpectedCount_NEON_%s = %u;\n",
1879            name,
1880            results_length);
1881   } else if (!skipped) {
1882     // Check the results.
1883     VIXL_CHECK(expected_length == results_length);
1884     unsigned error_count = 0;
1885     unsigned d = 0;
1886     const char* padding = "                    ";
1887     VIXL_ASSERT(strlen(padding) >= (lane_len_in_hex + 1));
1888     for (unsigned n = 0; n < inputs_n_length; n++, d++) {
1889       bool error_in_vector = false;
1890 
1891       for (unsigned lane = 0; lane < vd_lane_count; lane++) {
1892         unsigned expected_index = (n * vd_lane_count) + lane;
1893         unsigned results_index = (n * vd_lanes_per_q) + lane;
1894 
1895         if (results[results_index] != expected[expected_index]) {
1896           error_in_vector = true;
1897           break;
1898         }
1899       }
1900 
1901       // For across operations, the remaining lanes should be zero.
1902       for (unsigned lane = vd_lane_count; lane < vd_lanes_per_q; lane++) {
1903         unsigned results_index = (n * vd_lanes_per_q) + lane;
1904         if (results[results_index] != 0) {
1905           error_in_vector = true;
1906           break;
1907         }
1908       }
1909 
1910       if (error_in_vector && (++error_count <= kErrorReportLimit)) {
1911         const unsigned vn_lane_count = LaneCountFromFormat(vn_form);
1912 
1913         printf("%s\n", name);
1914         printf(" Vn%.*s| Vd%.*s| Expected\n",
1915                lane_len_in_hex + 1,
1916                padding,
1917                lane_len_in_hex + 1,
1918                padding);
1919 
1920         // TODO: In case of an error, all tests print out as many elements as
1921         //       there are lanes in the output or input vectors. This way
1922         //       the viewer can read all the values that were needed for the
1923         //       operation but the output contains also unnecessary values.
1924         //       These prints can be improved according to the arguments
1925         //       passed to test functions.
1926         //       This output for the 'Across' category has the required
1927         //       modifications.
1928         for (unsigned lane = 0; lane < vn_lane_count; lane++) {
1929           unsigned results_index =
1930               (n * vd_lanes_per_q) + ((vn_lane_count - 1) - lane);
1931           unsigned input_index_n =
1932               (inputs_n_length - vn_lane_count + n + 1 + lane) %
1933               inputs_n_length;
1934 
1935           Td expect = 0;
1936           if ((vn_lane_count - 1) == lane) {
1937             // This is the last lane to be printed, ie. the least-significant
1938             // lane, so use the expected value; any other lane should be zero.
1939             unsigned expected_index = n * vd_lane_count;
1940             expect = expected[expected_index];
1941           }
1942           printf("%c0x%0*" PRIx64 " | 0x%0*" PRIx64 " | 0x%0*" PRIx64 "\n",
1943                  results[results_index] != expect ? '*' : ' ',
1944                  lane_len_in_hex,
1945                  static_cast<uint64_t>(inputs_n[input_index_n]),
1946                  lane_len_in_hex,
1947                  static_cast<uint64_t>(results[results_index]),
1948                  lane_len_in_hex,
1949                  static_cast<uint64_t>(expect));
1950         }
1951       }
1952     }
1953     VIXL_ASSERT(d == expected_length);
1954     if (error_count > kErrorReportLimit) {
1955       printf("%u other errors follow.\n", error_count - kErrorReportLimit);
1956     }
1957     VIXL_CHECK(error_count == 0);
1958   }
1959   delete[] results;
1960 }
1961 
1962 
1963 // ==== Tests for instructions of the form <INST> VReg, VReg, VReg. ====
1964 
1965 // TODO: Iterate over inputs_d once the traces file is split.
1966 
Test2OpNEON_Helper(Test2OpNEONHelper_t helper,uintptr_t inputs_d,uintptr_t inputs_n,unsigned inputs_n_length,uintptr_t inputs_m,unsigned inputs_m_length,uintptr_t results,VectorFormat vd_form,VectorFormat vn_form,VectorFormat vm_form,bool * skipped)1967 static void Test2OpNEON_Helper(Test2OpNEONHelper_t helper,
1968                                uintptr_t inputs_d,
1969                                uintptr_t inputs_n,
1970                                unsigned inputs_n_length,
1971                                uintptr_t inputs_m,
1972                                unsigned inputs_m_length,
1973                                uintptr_t results,
1974                                VectorFormat vd_form,
1975                                VectorFormat vn_form,
1976                                VectorFormat vm_form,
1977                                bool* skipped) {
1978   VIXL_ASSERT(vd_form != kFormatUndefined);
1979   VIXL_ASSERT(vn_form != kFormatUndefined);
1980   VIXL_ASSERT(vm_form != kFormatUndefined);
1981 
1982   CPUFeatures features;
1983   features.Combine(CPUFeatures::kNEON, CPUFeatures::kNEONHalf);
1984   features.Combine(CPUFeatures::kFP);
1985   features.Combine(CPUFeatures::kRDM);
1986   features.Combine(CPUFeatures::kDotProduct);
1987   features.Combine(CPUFeatures::kFHM);
1988   SETUP_WITH_FEATURES(features);
1989   START();
1990 
1991   // Roll up the loop to keep the code size down.
1992   Label loop_n, loop_m;
1993 
1994   Register out = x0;
1995   Register inputs_n_base = x1;
1996   Register inputs_m_base = x2;
1997   Register inputs_d_base = x3;
1998   Register inputs_n_last_16bytes = x4;
1999   Register inputs_m_last_16bytes = x5;
2000   Register index_n = x6;
2001   Register index_m = x7;
2002 
2003   // TODO: Refactor duplicate definitions below with a VRegister::As() routine.
2004   const unsigned vd_bits = RegisterSizeInBitsFromFormat(vd_form);
2005   const unsigned vd_lane_count = LaneCountFromFormat(vd_form);
2006 
2007   const unsigned vn_bits = RegisterSizeInBitsFromFormat(vn_form);
2008   const unsigned vn_lane_count = LaneCountFromFormat(vn_form);
2009   const unsigned vn_lane_bytes = LaneSizeInBytesFromFormat(vn_form);
2010   const unsigned vn_lane_bytes_log2 = LaneSizeInBytesLog2FromFormat(vn_form);
2011   const unsigned vn_lane_bits = LaneSizeInBitsFromFormat(vn_form);
2012 
2013   const unsigned vm_bits = RegisterSizeInBitsFromFormat(vm_form);
2014   const unsigned vm_lane_count = LaneCountFromFormat(vm_form);
2015   const unsigned vm_lane_bytes = LaneSizeInBytesFromFormat(vm_form);
2016   const unsigned vm_lane_bytes_log2 = LaneSizeInBytesLog2FromFormat(vm_form);
2017   const unsigned vm_lane_bits = LaneSizeInBitsFromFormat(vm_form);
2018 
2019 
2020   // Always load and store 128 bits regardless of the format.
2021   VRegister vd = v0.V16B();
2022   VRegister vn = v1.V16B();
2023   VRegister vm = v2.V16B();
2024   VRegister vntmp = v3.V16B();
2025   VRegister vmtmp = v4.V16B();
2026   VRegister vres = v5.V16B();
2027 
2028   // These will have the correct format for calling the 'helper'.
2029   VRegister vn_helper = VRegister(1, vn_bits, vn_lane_count);
2030   VRegister vm_helper = VRegister(2, vm_bits, vm_lane_count);
2031   VRegister vres_helper = VRegister(5, vd_bits, vd_lane_count);
2032 
2033   // 'v*tmp_single' will be either 'Vt.B', 'Vt.H', 'Vt.S' or 'Vt.D'.
2034   VRegister vntmp_single = VRegister(3, vn_lane_bits);
2035   VRegister vmtmp_single = VRegister(4, vm_lane_bits);
2036 
2037   __ Mov(out, results);
2038 
2039   __ Mov(inputs_d_base, inputs_d);
2040 
2041   __ Mov(inputs_n_base, inputs_n);
2042   __ Mov(inputs_n_last_16bytes, inputs_n + (inputs_n_length - 16));
2043   __ Mov(inputs_m_base, inputs_m);
2044   __ Mov(inputs_m_last_16bytes, inputs_m + (inputs_m_length - 16));
2045 
2046   __ Ldr(vd, MemOperand(inputs_d_base));
2047   __ Ldr(vn, MemOperand(inputs_n_last_16bytes));
2048   __ Ldr(vm, MemOperand(inputs_m_last_16bytes));
2049 
2050   __ Mov(index_n, 0);
2051   __ Bind(&loop_n);
2052 
2053   __ Ldr(vntmp_single,
2054          MemOperand(inputs_n_base, index_n, LSL, vn_lane_bytes_log2));
2055   __ Ext(vn, vn, vntmp, vn_lane_bytes);
2056 
2057   __ Mov(index_m, 0);
2058   __ Bind(&loop_m);
2059 
2060   __ Ldr(vmtmp_single,
2061          MemOperand(inputs_m_base, index_m, LSL, vm_lane_bytes_log2));
2062   __ Ext(vm, vm, vmtmp, vm_lane_bytes);
2063 
2064   __ Mov(vres, vd);
2065   {
2066     SingleEmissionCheckScope guard(&masm);
2067     (masm.*helper)(vres_helper, vn_helper, vm_helper);
2068   }
2069   __ Str(vres, MemOperand(out, vd.GetSizeInBytes(), PostIndex));
2070 
2071   __ Add(index_m, index_m, 1);
2072   __ Cmp(index_m, inputs_m_length);
2073   __ B(lo, &loop_m);
2074 
2075   __ Add(index_n, index_n, 1);
2076   __ Cmp(index_n, inputs_n_length);
2077   __ B(lo, &loop_n);
2078 
2079   END();
2080   TRY_RUN(skipped);
2081 }
2082 
2083 
2084 // Test NEON instructions. The inputs_*[] and expected[] arrays should be
2085 // arrays of rawbit representation of input values. This ensures that
2086 // exact bit comparisons can be performed.
2087 template <typename Td, typename Tn, typename Tm>
Test2OpNEON(const char * name,Test2OpNEONHelper_t helper,const Td inputs_d[],const Tn inputs_n[],unsigned inputs_n_length,const Tm inputs_m[],unsigned inputs_m_length,const Td expected[],unsigned expected_length,VectorFormat vd_form,VectorFormat vn_form,VectorFormat vm_form)2088 static void Test2OpNEON(const char* name,
2089                         Test2OpNEONHelper_t helper,
2090                         const Td inputs_d[],
2091                         const Tn inputs_n[],
2092                         unsigned inputs_n_length,
2093                         const Tm inputs_m[],
2094                         unsigned inputs_m_length,
2095                         const Td expected[],
2096                         unsigned expected_length,
2097                         VectorFormat vd_form,
2098                         VectorFormat vn_form,
2099                         VectorFormat vm_form) {
2100   VIXL_ASSERT(inputs_n_length > 0 && inputs_m_length > 0);
2101 
2102   const unsigned vd_lane_count = MaxLaneCountFromFormat(vd_form);
2103 
2104   const unsigned results_length = inputs_n_length * inputs_m_length;
2105   Td* results = new Td[results_length * vd_lane_count];
2106   const unsigned lane_bit = sizeof(Td) * 8;
2107   const unsigned lane_len_in_hex = MaxHexCharCount<Td, Tm>();
2108 
2109   bool skipped;
2110 
2111   Test2OpNEON_Helper(helper,
2112                      reinterpret_cast<uintptr_t>(inputs_d),
2113                      reinterpret_cast<uintptr_t>(inputs_n),
2114                      inputs_n_length,
2115                      reinterpret_cast<uintptr_t>(inputs_m),
2116                      inputs_m_length,
2117                      reinterpret_cast<uintptr_t>(results),
2118                      vd_form,
2119                      vn_form,
2120                      vm_form,
2121                      &skipped);
2122 
2123   if (Test::generate_test_trace()) {
2124     // Print the results.
2125     printf("const uint%u_t kExpected_NEON_%s[] = {\n", lane_bit, name);
2126     for (unsigned iteration = 0; iteration < results_length; iteration++) {
2127       printf(" ");
2128       // Output a separate result for each element of the result vector.
2129       for (unsigned lane = 0; lane < vd_lane_count; lane++) {
2130         unsigned index = lane + (iteration * vd_lane_count);
2131         printf(" 0x%0*" PRIx64 ",",
2132                lane_len_in_hex,
2133                static_cast<uint64_t>(results[index]));
2134       }
2135       printf("\n");
2136     }
2137 
2138     printf("};\n");
2139     printf("const unsigned kExpectedCount_NEON_%s = %u;\n",
2140            name,
2141            results_length);
2142   } else if (!skipped) {
2143     // Check the results.
2144     VIXL_CHECK(expected_length == results_length);
2145     unsigned error_count = 0;
2146     unsigned d = 0;
2147     const char* padding = "                    ";
2148     VIXL_ASSERT(strlen(padding) >= (lane_len_in_hex + 1));
2149     for (unsigned n = 0; n < inputs_n_length; n++) {
2150       for (unsigned m = 0; m < inputs_m_length; m++, d++) {
2151         bool error_in_vector = false;
2152 
2153         for (unsigned lane = 0; lane < vd_lane_count; lane++) {
2154           unsigned output_index = (n * inputs_m_length * vd_lane_count) +
2155                                   (m * vd_lane_count) + lane;
2156 
2157           if (results[output_index] != expected[output_index]) {
2158             error_in_vector = true;
2159             break;
2160           }
2161         }
2162 
2163         if (error_in_vector && (++error_count <= kErrorReportLimit)) {
2164           printf("%s\n", name);
2165           printf(" Vd%.*s| Vn%.*s| Vm%.*s| Vd%.*s| Expected\n",
2166                  lane_len_in_hex + 1,
2167                  padding,
2168                  lane_len_in_hex + 1,
2169                  padding,
2170                  lane_len_in_hex + 1,
2171                  padding,
2172                  lane_len_in_hex + 1,
2173                  padding);
2174 
2175           for (unsigned lane = 0; lane < vd_lane_count; lane++) {
2176             unsigned output_index = (n * inputs_m_length * vd_lane_count) +
2177                                     (m * vd_lane_count) + lane;
2178             unsigned input_index_n =
2179                 (inputs_n_length - vd_lane_count + n + 1 + lane) %
2180                 inputs_n_length;
2181             unsigned input_index_m =
2182                 (inputs_m_length - vd_lane_count + m + 1 + lane) %
2183                 inputs_m_length;
2184 
2185             printf("%c0x%0*" PRIx64 " | 0x%0*" PRIx64 " | 0x%0*" PRIx64
2186                    " "
2187                    "| 0x%0*" PRIx64 " | 0x%0*" PRIx64 "\n",
2188                    results[output_index] != expected[output_index] ? '*' : ' ',
2189                    lane_len_in_hex,
2190                    static_cast<uint64_t>(inputs_d[lane]),
2191                    lane_len_in_hex,
2192                    static_cast<uint64_t>(inputs_n[input_index_n]),
2193                    lane_len_in_hex,
2194                    static_cast<uint64_t>(inputs_m[input_index_m]),
2195                    lane_len_in_hex,
2196                    static_cast<uint64_t>(results[output_index]),
2197                    lane_len_in_hex,
2198                    static_cast<uint64_t>(expected[output_index]));
2199           }
2200         }
2201       }
2202     }
2203     VIXL_ASSERT(d == expected_length);
2204     if (error_count > kErrorReportLimit) {
2205       printf("%u other errors follow.\n", error_count - kErrorReportLimit);
2206     }
2207     VIXL_CHECK(error_count == 0);
2208   }
2209   delete[] results;
2210 }
2211 
2212 
2213 // ==== Tests for instructions of the form <INST> Vd, Vn, Vm[<#index>]. ====
2214 
TestByElementNEON_Helper(TestByElementNEONHelper_t helper,uintptr_t inputs_d,uintptr_t inputs_n,unsigned inputs_n_length,uintptr_t inputs_m,unsigned inputs_m_length,const int indices[],unsigned indices_length,uintptr_t results,VectorFormat vd_form,VectorFormat vn_form,VectorFormat vm_form,unsigned vm_subvector_count,bool * skipped)2215 static void TestByElementNEON_Helper(TestByElementNEONHelper_t helper,
2216                                      uintptr_t inputs_d,
2217                                      uintptr_t inputs_n,
2218                                      unsigned inputs_n_length,
2219                                      uintptr_t inputs_m,
2220                                      unsigned inputs_m_length,
2221                                      const int indices[],
2222                                      unsigned indices_length,
2223                                      uintptr_t results,
2224                                      VectorFormat vd_form,
2225                                      VectorFormat vn_form,
2226                                      VectorFormat vm_form,
2227                                      unsigned vm_subvector_count,
2228                                      bool* skipped) {
2229   VIXL_ASSERT(vd_form != kFormatUndefined);
2230   VIXL_ASSERT(vn_form != kFormatUndefined);
2231   VIXL_ASSERT(vm_form != kFormatUndefined);
2232   VIXL_ASSERT((vm_subvector_count != 0) && IsPowerOf2(vm_subvector_count));
2233 
2234   CPUFeatures features;
2235   features.Combine(CPUFeatures::kNEON, CPUFeatures::kNEONHalf);
2236   features.Combine(CPUFeatures::kFP);
2237   features.Combine(CPUFeatures::kRDM);
2238   features.Combine(CPUFeatures::kDotProduct);
2239   features.Combine(CPUFeatures::kFHM);
2240   SETUP_WITH_FEATURES(features);
2241 
2242   START();
2243 
2244   // Roll up the loop to keep the code size down.
2245   Label loop_n, loop_m;
2246 
2247   Register out = x0;
2248   Register inputs_n_base = x1;
2249   Register inputs_m_base = x2;
2250   Register inputs_d_base = x3;
2251   Register inputs_n_last_16bytes = x4;
2252   Register inputs_m_last_16bytes = x5;
2253   Register index_n = x6;
2254   Register index_m = x7;
2255 
2256   // TODO: Refactor duplicate definitions below with a VRegister::As() routine.
2257   const unsigned vd_bits = RegisterSizeInBitsFromFormat(vd_form);
2258   const unsigned vd_lane_count = LaneCountFromFormat(vd_form);
2259 
2260   const unsigned vn_bits = RegisterSizeInBitsFromFormat(vn_form);
2261   const unsigned vn_lane_count = LaneCountFromFormat(vn_form);
2262   const unsigned vn_lane_bytes = LaneSizeInBytesFromFormat(vn_form);
2263   const unsigned vn_lane_bytes_log2 = LaneSizeInBytesLog2FromFormat(vn_form);
2264   const unsigned vn_lane_bits = LaneSizeInBitsFromFormat(vn_form);
2265 
2266   const unsigned vm_bits = RegisterSizeInBitsFromFormat(vm_form);
2267   const unsigned vm_lane_count = LaneCountFromFormat(vm_form);
2268   const unsigned vm_lane_bytes = LaneSizeInBytesFromFormat(vm_form);
2269   const unsigned vm_lane_bytes_log2 = LaneSizeInBytesLog2FromFormat(vm_form);
2270   const unsigned vm_lane_bits = LaneSizeInBitsFromFormat(vm_form);
2271 
2272   VIXL_ASSERT((vm_bits * vm_subvector_count) <= kQRegSize);
2273 
2274   // Always load and store 128 bits regardless of the format.
2275   VRegister vd = v0.V16B();
2276   VRegister vn = v1.V16B();
2277   VRegister vm = v2.V16B();
2278   VRegister vntmp = v3.V16B();
2279   VRegister vmtmp = v4.V16B();
2280   VRegister vres = v5.V16B();
2281 
2282   // These will have the correct format for calling the 'helper'.
2283   VRegister vn_helper = VRegister(1, vn_bits, vn_lane_count);
2284   VRegister vm_helper =
2285       VRegister(2, vm_bits * vm_subvector_count, vm_lane_count);
2286   VRegister vres_helper = VRegister(5, vd_bits, vd_lane_count);
2287 
2288   // 'v*tmp_single' will be either 'Vt.B', 'Vt.H', 'Vt.S' or 'Vt.D'.
2289   VRegister vntmp_single = VRegister(3, vn_lane_bits);
2290   VRegister vmtmp_single = VRegister(4, vm_lane_bits);
2291 
2292   __ Mov(out, results);
2293 
2294   __ Mov(inputs_d_base, inputs_d);
2295 
2296   __ Mov(inputs_n_base, inputs_n);
2297   __ Mov(inputs_n_last_16bytes, inputs_n + (inputs_n_length - 16));
2298   __ Mov(inputs_m_base, inputs_m);
2299   __ Mov(inputs_m_last_16bytes, inputs_m + (inputs_m_length - 16));
2300 
2301   __ Ldr(vd, MemOperand(inputs_d_base));
2302   __ Ldr(vn, MemOperand(inputs_n_last_16bytes));
2303   __ Ldr(vm, MemOperand(inputs_m_last_16bytes));
2304 
2305   __ Mov(index_n, 0);
2306   __ Bind(&loop_n);
2307 
2308   __ Ldr(vntmp_single,
2309          MemOperand(inputs_n_base, index_n, LSL, vn_lane_bytes_log2));
2310   __ Ext(vn, vn, vntmp, vn_lane_bytes);
2311 
2312   __ Mov(index_m, 0);
2313   __ Bind(&loop_m);
2314 
2315   __ Ldr(vmtmp_single,
2316          MemOperand(inputs_m_base, index_m, LSL, vm_lane_bytes_log2));
2317   __ Ext(vm, vm, vmtmp, vm_lane_bytes);
2318 
2319   __ Mov(vres, vd);
2320   {
2321     for (unsigned i = 0; i < indices_length; i++) {
2322       {
2323         SingleEmissionCheckScope guard(&masm);
2324         (masm.*helper)(vres_helper, vn_helper, vm_helper, indices[i]);
2325       }
2326       __ Str(vres, MemOperand(out, vd.GetSizeInBytes(), PostIndex));
2327     }
2328   }
2329 
2330   __ Add(index_m, index_m, 1);
2331   __ Cmp(index_m, inputs_m_length);
2332   __ B(lo, &loop_m);
2333 
2334   __ Add(index_n, index_n, 1);
2335   __ Cmp(index_n, inputs_n_length);
2336   __ B(lo, &loop_n);
2337 
2338   END();
2339   TRY_RUN(skipped);
2340 }
2341 
2342 
2343 // Test NEON instructions. The inputs_*[] and expected[] arrays should be
2344 // arrays of rawbit representation of input values. This ensures that
2345 // exact bit comparisons can be performed.
2346 template <typename Td, typename Tn, typename Tm>
TestByElementNEON(const char * name,TestByElementNEONHelper_t helper,const Td inputs_d[],const Tn inputs_n[],unsigned inputs_n_length,const Tm inputs_m[],unsigned inputs_m_length,const int indices[],unsigned indices_length,const Td expected[],unsigned expected_length,VectorFormat vd_form,VectorFormat vn_form,VectorFormat vm_form,unsigned vm_subvector_count=1)2347 static void TestByElementNEON(const char* name,
2348                               TestByElementNEONHelper_t helper,
2349                               const Td inputs_d[],
2350                               const Tn inputs_n[],
2351                               unsigned inputs_n_length,
2352                               const Tm inputs_m[],
2353                               unsigned inputs_m_length,
2354                               const int indices[],
2355                               unsigned indices_length,
2356                               const Td expected[],
2357                               unsigned expected_length,
2358                               VectorFormat vd_form,
2359                               VectorFormat vn_form,
2360                               VectorFormat vm_form,
2361                               unsigned vm_subvector_count = 1) {
2362   VIXL_ASSERT(inputs_n_length > 0);
2363   VIXL_ASSERT(inputs_m_length > 0);
2364   VIXL_ASSERT(indices_length > 0);
2365 
2366   const unsigned vd_lane_count = MaxLaneCountFromFormat(vd_form);
2367 
2368   const unsigned results_length =
2369       inputs_n_length * inputs_m_length * indices_length;
2370   Td* results = new Td[results_length * vd_lane_count];
2371   const unsigned lane_bit = sizeof(Td) * 8;
2372   const unsigned lane_len_in_hex = MaxHexCharCount<Td, Tm>();
2373 
2374   bool skipped;
2375 
2376   TestByElementNEON_Helper(helper,
2377                            reinterpret_cast<uintptr_t>(inputs_d),
2378                            reinterpret_cast<uintptr_t>(inputs_n),
2379                            inputs_n_length,
2380                            reinterpret_cast<uintptr_t>(inputs_m),
2381                            inputs_m_length,
2382                            indices,
2383                            indices_length,
2384                            reinterpret_cast<uintptr_t>(results),
2385                            vd_form,
2386                            vn_form,
2387                            vm_form,
2388                            vm_subvector_count,
2389                            &skipped);
2390 
2391   if (Test::generate_test_trace()) {
2392     // Print the results.
2393     printf("const uint%u_t kExpected_NEON_%s[] = {\n", lane_bit, name);
2394     for (unsigned iteration = 0; iteration < results_length; iteration++) {
2395       printf(" ");
2396       // Output a separate result for each element of the result vector.
2397       for (unsigned lane = 0; lane < vd_lane_count; lane++) {
2398         unsigned index = lane + (iteration * vd_lane_count);
2399         printf(" 0x%0*" PRIx64 ",",
2400                lane_len_in_hex,
2401                static_cast<uint64_t>(results[index]));
2402       }
2403       printf("\n");
2404     }
2405 
2406     printf("};\n");
2407     printf("const unsigned kExpectedCount_NEON_%s = %u;\n",
2408            name,
2409            results_length);
2410   } else if (!skipped) {
2411     // Check the results.
2412     VIXL_CHECK(expected_length == results_length);
2413     unsigned error_count = 0;
2414     unsigned d = 0;
2415     const char* padding = "                    ";
2416     VIXL_ASSERT(strlen(padding) >= (lane_len_in_hex + 1));
2417     for (unsigned n = 0; n < inputs_n_length; n++) {
2418       for (unsigned m = 0; m < inputs_m_length; m++) {
2419         for (unsigned index = 0; index < indices_length; index++, d++) {
2420           bool error_in_vector = false;
2421 
2422           for (unsigned lane = 0; lane < vd_lane_count; lane++) {
2423             unsigned output_index =
2424                 (n * inputs_m_length * indices_length * vd_lane_count) +
2425                 (m * indices_length * vd_lane_count) + (index * vd_lane_count) +
2426                 lane;
2427 
2428             if (results[output_index] != expected[output_index]) {
2429               error_in_vector = true;
2430               break;
2431             }
2432           }
2433 
2434           if (error_in_vector && (++error_count <= kErrorReportLimit)) {
2435             printf("%s\n", name);
2436             printf(" Vd%.*s| Vn%.*s| Vm%.*s| Index | Vd%.*s| Expected\n",
2437                    lane_len_in_hex + 1,
2438                    padding,
2439                    lane_len_in_hex + 1,
2440                    padding,
2441                    lane_len_in_hex + 1,
2442                    padding,
2443                    lane_len_in_hex + 1,
2444                    padding);
2445 
2446             for (unsigned lane = 0; lane < vd_lane_count; lane++) {
2447               unsigned output_index =
2448                   (n * inputs_m_length * indices_length * vd_lane_count) +
2449                   (m * indices_length * vd_lane_count) +
2450                   (index * vd_lane_count) + lane;
2451               unsigned input_index_n =
2452                   (inputs_n_length - vd_lane_count + n + 1 + lane) %
2453                   inputs_n_length;
2454               unsigned input_index_m =
2455                   (inputs_m_length - vd_lane_count + m + 1 + lane) %
2456                   inputs_m_length;
2457 
2458               printf("%c0x%0*" PRIx64 " | 0x%0*" PRIx64 " | 0x%0*" PRIx64
2459                      " "
2460                      "| [%3d] | 0x%0*" PRIx64 " | 0x%0*" PRIx64 "\n",
2461                      results[output_index] != expected[output_index] ? '*'
2462                                                                      : ' ',
2463                      lane_len_in_hex,
2464                      static_cast<uint64_t>(inputs_d[lane]),
2465                      lane_len_in_hex,
2466                      static_cast<uint64_t>(inputs_n[input_index_n]),
2467                      lane_len_in_hex,
2468                      static_cast<uint64_t>(inputs_m[input_index_m]),
2469                      indices[index],
2470                      lane_len_in_hex,
2471                      static_cast<uint64_t>(results[output_index]),
2472                      lane_len_in_hex,
2473                      static_cast<uint64_t>(expected[output_index]));
2474             }
2475           }
2476         }
2477       }
2478     }
2479     VIXL_ASSERT(d == expected_length);
2480     if (error_count > kErrorReportLimit) {
2481       printf("%u other errors follow.\n", error_count - kErrorReportLimit);
2482     }
2483     VIXL_CHECK(error_count == 0);
2484   }
2485   delete[] results;
2486 }
2487 
2488 
2489 // ==== Tests for instructions of the form <INST> VReg, VReg, #Immediate. ====
2490 
2491 
2492 template <typename Tm>
Test2OpImmNEON_Helper(typename Test2OpImmediateNEONHelper_t<Tm>::mnemonic helper,uintptr_t inputs_n,unsigned inputs_n_length,const Tm inputs_m[],unsigned inputs_m_length,uintptr_t results,VectorFormat vd_form,VectorFormat vn_form,bool * skipped)2493 void Test2OpImmNEON_Helper(
2494     typename Test2OpImmediateNEONHelper_t<Tm>::mnemonic helper,
2495     uintptr_t inputs_n,
2496     unsigned inputs_n_length,
2497     const Tm inputs_m[],
2498     unsigned inputs_m_length,
2499     uintptr_t results,
2500     VectorFormat vd_form,
2501     VectorFormat vn_form,
2502     bool* skipped) {
2503   VIXL_ASSERT(vd_form != kFormatUndefined && vn_form != kFormatUndefined);
2504 
2505   SETUP_WITH_FEATURES(CPUFeatures::kNEON,
2506                       CPUFeatures::kFP,
2507                       CPUFeatures::kNEONHalf);
2508   START();
2509 
2510   // Roll up the loop to keep the code size down.
2511   Label loop_n;
2512 
2513   Register out = x0;
2514   Register inputs_n_base = x1;
2515   Register inputs_n_last_16bytes = x3;
2516   Register index_n = x5;
2517 
2518   // TODO: Refactor duplicate definitions below with a VRegister::As() routine.
2519   const unsigned vd_bits = RegisterSizeInBitsFromFormat(vd_form);
2520   const unsigned vd_lane_count = LaneCountFromFormat(vd_form);
2521 
2522   const unsigned vn_bits = RegisterSizeInBitsFromFormat(vn_form);
2523   const unsigned vn_lane_count = LaneCountFromFormat(vn_form);
2524   const unsigned vn_lane_bytes = LaneSizeInBytesFromFormat(vn_form);
2525   const unsigned vn_lane_bytes_log2 = LaneSizeInBytesLog2FromFormat(vn_form);
2526   const unsigned vn_lane_bits = LaneSizeInBitsFromFormat(vn_form);
2527 
2528 
2529   // These will be either a D- or a Q-register form, with a single lane
2530   // (for use in scalar load and store operations).
2531   VRegister vd = VRegister(0, vd_bits);
2532   VRegister vn = v1.V16B();
2533   VRegister vntmp = v3.V16B();
2534 
2535   // These will have the correct format for use when calling 'helper'.
2536   VRegister vd_helper = VRegister(0, vd_bits, vd_lane_count);
2537   VRegister vn_helper = VRegister(1, vn_bits, vn_lane_count);
2538 
2539   // 'v*tmp_single' will be either 'Vt.B', 'Vt.H', 'Vt.S' or 'Vt.D'.
2540   VRegister vntmp_single = VRegister(3, vn_lane_bits);
2541 
2542   __ Mov(out, results);
2543 
2544   __ Mov(inputs_n_base, inputs_n);
2545   __ Mov(inputs_n_last_16bytes,
2546          inputs_n + (vn_lane_bytes * inputs_n_length) - 16);
2547 
2548   __ Ldr(vn, MemOperand(inputs_n_last_16bytes));
2549 
2550   __ Mov(index_n, 0);
2551   __ Bind(&loop_n);
2552 
2553   __ Ldr(vntmp_single,
2554          MemOperand(inputs_n_base, index_n, LSL, vn_lane_bytes_log2));
2555   __ Ext(vn, vn, vntmp, vn_lane_bytes);
2556 
2557   // Set the destination to zero for tests such as '[r]shrn2'.
2558   // TODO: Setting the destination to values other than zero might be a better
2559   //       test for shift and accumulate instructions (srsra/ssra/usra/ursra).
2560   __ Movi(vd.V16B(), 0);
2561 
2562   {
2563     for (unsigned i = 0; i < inputs_m_length; i++) {
2564       {
2565         SingleEmissionCheckScope guard(&masm);
2566         (masm.*helper)(vd_helper, vn_helper, inputs_m[i]);
2567       }
2568       __ Str(vd, MemOperand(out, vd.GetSizeInBytes(), PostIndex));
2569     }
2570   }
2571 
2572   __ Add(index_n, index_n, 1);
2573   __ Cmp(index_n, inputs_n_length);
2574   __ B(lo, &loop_n);
2575 
2576   END();
2577   TRY_RUN(skipped);
2578 }
2579 
2580 
2581 // Test NEON instructions. The inputs_*[] and expected[] arrays should be
2582 // arrays of rawbit representation of input values. This ensures that
2583 // exact bit comparisons can be performed.
2584 template <typename Td, typename Tn, typename Tm>
Test2OpImmNEON(const char * name,typename Test2OpImmediateNEONHelper_t<Tm>::mnemonic helper,const Tn inputs_n[],unsigned inputs_n_length,const Tm inputs_m[],unsigned inputs_m_length,const Td expected[],unsigned expected_length,VectorFormat vd_form,VectorFormat vn_form)2585 static void Test2OpImmNEON(
2586     const char* name,
2587     typename Test2OpImmediateNEONHelper_t<Tm>::mnemonic helper,
2588     const Tn inputs_n[],
2589     unsigned inputs_n_length,
2590     const Tm inputs_m[],
2591     unsigned inputs_m_length,
2592     const Td expected[],
2593     unsigned expected_length,
2594     VectorFormat vd_form,
2595     VectorFormat vn_form) {
2596   VIXL_ASSERT(inputs_n_length > 0 && inputs_m_length > 0);
2597 
2598   const unsigned vd_lane_count = LaneCountFromFormat(vd_form);
2599   const unsigned vn_lane_bytes = LaneSizeInBytesFromFormat(vn_form);
2600   const unsigned vn_lane_count = LaneCountFromFormat(vn_form);
2601 
2602   const unsigned results_length = inputs_n_length * inputs_m_length;
2603   Td* results = new Td[results_length * vd_lane_count];
2604   const unsigned lane_bit = sizeof(Td) * 8;
2605   const unsigned lane_len_in_hex = MaxHexCharCount<Td, Tn>();
2606 
2607   bool skipped;
2608 
2609   Test2OpImmNEON_Helper(helper,
2610                         reinterpret_cast<uintptr_t>(inputs_n),
2611                         inputs_n_length,
2612                         inputs_m,
2613                         inputs_m_length,
2614                         reinterpret_cast<uintptr_t>(results),
2615                         vd_form,
2616                         vn_form,
2617                         &skipped);
2618 
2619   if (Test::generate_test_trace()) {
2620     // Print the results.
2621     printf("const uint%u_t kExpected_NEON_%s[] = {\n", lane_bit, name);
2622     for (unsigned iteration = 0; iteration < results_length; iteration++) {
2623       printf(" ");
2624       // Output a separate result for each element of the result vector.
2625       for (unsigned lane = 0; lane < vd_lane_count; lane++) {
2626         unsigned index = lane + (iteration * vd_lane_count);
2627         printf(" 0x%0*" PRIx64 ",",
2628                lane_len_in_hex,
2629                static_cast<uint64_t>(results[index]));
2630       }
2631       printf("\n");
2632     }
2633 
2634     printf("};\n");
2635     printf("const unsigned kExpectedCount_NEON_%s = %u;\n",
2636            name,
2637            results_length);
2638   } else if (!skipped) {
2639     // Check the results.
2640     VIXL_CHECK(expected_length == results_length);
2641     unsigned error_count = 0;
2642     unsigned d = 0;
2643     const char* padding = "                    ";
2644     VIXL_ASSERT(strlen(padding) >= (lane_len_in_hex + 1));
2645     for (unsigned n = 0; n < inputs_n_length; n++) {
2646       for (unsigned m = 0; m < inputs_m_length; m++, d++) {
2647         bool error_in_vector = false;
2648 
2649         for (unsigned lane = 0; lane < vd_lane_count; lane++) {
2650           unsigned output_index = (n * inputs_m_length * vd_lane_count) +
2651                                   (m * vd_lane_count) + lane;
2652 
2653           if (results[output_index] != expected[output_index]) {
2654             error_in_vector = true;
2655             break;
2656           }
2657         }
2658 
2659         if (error_in_vector && (++error_count <= kErrorReportLimit)) {
2660           printf("%s\n", name);
2661           printf(" Vn%.*s| Imm%.*s| Vd%.*s| Expected\n",
2662                  lane_len_in_hex + 1,
2663                  padding,
2664                  lane_len_in_hex,
2665                  padding,
2666                  lane_len_in_hex + 1,
2667                  padding);
2668 
2669           const unsigned first_index_n =
2670               inputs_n_length - (16 / vn_lane_bytes) + n + 1;
2671 
2672           for (unsigned lane = 0; lane < std::max(vd_lane_count, vn_lane_count);
2673                lane++) {
2674             unsigned output_index = (n * inputs_m_length * vd_lane_count) +
2675                                     (m * vd_lane_count) + lane;
2676             unsigned input_index_n = (first_index_n + lane) % inputs_n_length;
2677             unsigned input_index_m = m;
2678 
2679             printf("%c0x%0*" PRIx64 " | 0x%0*" PRIx64
2680                    " "
2681                    "| 0x%0*" PRIx64 " | 0x%0*" PRIx64 "\n",
2682                    results[output_index] != expected[output_index] ? '*' : ' ',
2683                    lane_len_in_hex,
2684                    static_cast<uint64_t>(inputs_n[input_index_n]),
2685                    lane_len_in_hex,
2686                    static_cast<uint64_t>(inputs_m[input_index_m]),
2687                    lane_len_in_hex,
2688                    static_cast<uint64_t>(results[output_index]),
2689                    lane_len_in_hex,
2690                    static_cast<uint64_t>(expected[output_index]));
2691           }
2692         }
2693       }
2694     }
2695     VIXL_ASSERT(d == expected_length);
2696     if (error_count > kErrorReportLimit) {
2697       printf("%u other errors follow.\n", error_count - kErrorReportLimit);
2698     }
2699     VIXL_CHECK(error_count == 0);
2700   }
2701   delete[] results;
2702 }
2703 
2704 
2705 // ==== Tests for instructions of the form <INST> VReg, #Imm, VReg, #Imm. ====
2706 
2707 
TestOpImmOpImmNEON_Helper(TestOpImmOpImmVdUpdateNEONHelper_t helper,uintptr_t inputs_d,const int inputs_imm1[],unsigned inputs_imm1_length,uintptr_t inputs_n,unsigned inputs_n_length,const int inputs_imm2[],unsigned inputs_imm2_length,uintptr_t results,VectorFormat vd_form,VectorFormat vn_form,bool * skipped)2708 static void TestOpImmOpImmNEON_Helper(TestOpImmOpImmVdUpdateNEONHelper_t helper,
2709                                       uintptr_t inputs_d,
2710                                       const int inputs_imm1[],
2711                                       unsigned inputs_imm1_length,
2712                                       uintptr_t inputs_n,
2713                                       unsigned inputs_n_length,
2714                                       const int inputs_imm2[],
2715                                       unsigned inputs_imm2_length,
2716                                       uintptr_t results,
2717                                       VectorFormat vd_form,
2718                                       VectorFormat vn_form,
2719                                       bool* skipped) {
2720   VIXL_ASSERT(vd_form != kFormatUndefined);
2721   VIXL_ASSERT(vn_form != kFormatUndefined);
2722 
2723   SETUP_WITH_FEATURES(CPUFeatures::kNEON, CPUFeatures::kFP);
2724   START();
2725 
2726   // Roll up the loop to keep the code size down.
2727   Label loop_n;
2728 
2729   Register out = x0;
2730   Register inputs_d_base = x1;
2731   Register inputs_n_base = x2;
2732   Register inputs_n_last_vector = x4;
2733   Register index_n = x6;
2734 
2735   // TODO: Refactor duplicate definitions below with a VRegister::As() routine.
2736   const unsigned vd_bits = RegisterSizeInBitsFromFormat(vd_form);
2737   const unsigned vd_lane_count = LaneCountFromFormat(vd_form);
2738 
2739   const unsigned vn_bits = RegisterSizeInBitsFromFormat(vn_form);
2740   const unsigned vn_lane_count = LaneCountFromFormat(vn_form);
2741   const unsigned vn_lane_bytes = LaneSizeInBytesFromFormat(vn_form);
2742   const unsigned vn_lane_bytes_log2 = LaneSizeInBytesLog2FromFormat(vn_form);
2743   const unsigned vn_lane_bits = LaneSizeInBitsFromFormat(vn_form);
2744 
2745 
2746   // These will be either a D- or a Q-register form, with a single lane
2747   // (for use in scalar load and store operations).
2748   VRegister vd = VRegister(0, vd_bits);
2749   VRegister vn = VRegister(1, vn_bits);
2750   VRegister vntmp = VRegister(4, vn_bits);
2751   VRegister vres = VRegister(5, vn_bits);
2752 
2753   VRegister vn_helper = VRegister(1, vn_bits, vn_lane_count);
2754   VRegister vres_helper = VRegister(5, vd_bits, vd_lane_count);
2755 
2756   // 'v*tmp_single' will be either 'Vt.B', 'Vt.H', 'Vt.S' or 'Vt.D'.
2757   VRegister vntmp_single = VRegister(4, vn_lane_bits);
2758 
2759   // Same registers for use in the 'ext' instructions.
2760   VRegister vn_ext = (kDRegSize == vn_bits) ? vn.V8B() : vn.V16B();
2761   VRegister vntmp_ext = (kDRegSize == vn_bits) ? vntmp.V8B() : vntmp.V16B();
2762 
2763   __ Mov(out, results);
2764 
2765   __ Mov(inputs_d_base, inputs_d);
2766 
2767   __ Mov(inputs_n_base, inputs_n);
2768   __ Mov(inputs_n_last_vector,
2769          inputs_n + vn_lane_bytes * (inputs_n_length - vn_lane_count));
2770 
2771   __ Ldr(vd, MemOperand(inputs_d_base));
2772 
2773   __ Ldr(vn, MemOperand(inputs_n_last_vector));
2774 
2775   __ Mov(index_n, 0);
2776   __ Bind(&loop_n);
2777 
2778   __ Ldr(vntmp_single,
2779          MemOperand(inputs_n_base, index_n, LSL, vn_lane_bytes_log2));
2780   __ Ext(vn_ext, vn_ext, vntmp_ext, vn_lane_bytes);
2781 
2782   {
2783     EmissionCheckScope guard(&masm,
2784                              kInstructionSize * inputs_imm1_length *
2785                                  inputs_imm2_length * 3);
2786     for (unsigned i = 0; i < inputs_imm1_length; i++) {
2787       for (unsigned j = 0; j < inputs_imm2_length; j++) {
2788         __ Mov(vres, vd);
2789         (masm.*helper)(vres_helper, inputs_imm1[i], vn_helper, inputs_imm2[j]);
2790         __ Str(vres, MemOperand(out, vd.GetSizeInBytes(), PostIndex));
2791       }
2792     }
2793   }
2794 
2795   __ Add(index_n, index_n, 1);
2796   __ Cmp(index_n, inputs_n_length);
2797   __ B(lo, &loop_n);
2798 
2799   END();
2800   TRY_RUN(skipped);
2801 }
2802 
2803 
2804 // Test NEON instructions. The inputs_*[] and expected[] arrays should be
2805 // arrays of rawbit representation of input values. This ensures that
2806 // exact bit comparisons can be performed.
2807 template <typename Td, typename Tn>
TestOpImmOpImmNEON(const char * name,TestOpImmOpImmVdUpdateNEONHelper_t helper,const Td inputs_d[],const int inputs_imm1[],unsigned inputs_imm1_length,const Tn inputs_n[],unsigned inputs_n_length,const int inputs_imm2[],unsigned inputs_imm2_length,const Td expected[],unsigned expected_length,VectorFormat vd_form,VectorFormat vn_form)2808 static void TestOpImmOpImmNEON(const char* name,
2809                                TestOpImmOpImmVdUpdateNEONHelper_t helper,
2810                                const Td inputs_d[],
2811                                const int inputs_imm1[],
2812                                unsigned inputs_imm1_length,
2813                                const Tn inputs_n[],
2814                                unsigned inputs_n_length,
2815                                const int inputs_imm2[],
2816                                unsigned inputs_imm2_length,
2817                                const Td expected[],
2818                                unsigned expected_length,
2819                                VectorFormat vd_form,
2820                                VectorFormat vn_form) {
2821   VIXL_ASSERT(inputs_n_length > 0);
2822   VIXL_ASSERT(inputs_imm1_length > 0);
2823   VIXL_ASSERT(inputs_imm2_length > 0);
2824 
2825   const unsigned vd_lane_count = LaneCountFromFormat(vd_form);
2826 
2827   const unsigned results_length =
2828       inputs_n_length * inputs_imm1_length * inputs_imm2_length;
2829 
2830   Td* results = new Td[results_length * vd_lane_count];
2831   const unsigned lane_bit = sizeof(Td) * 8;
2832   const unsigned lane_len_in_hex = MaxHexCharCount<Td, Tn>();
2833 
2834   bool skipped;
2835 
2836   TestOpImmOpImmNEON_Helper(helper,
2837                             reinterpret_cast<uintptr_t>(inputs_d),
2838                             inputs_imm1,
2839                             inputs_imm1_length,
2840                             reinterpret_cast<uintptr_t>(inputs_n),
2841                             inputs_n_length,
2842                             inputs_imm2,
2843                             inputs_imm2_length,
2844                             reinterpret_cast<uintptr_t>(results),
2845                             vd_form,
2846                             vn_form,
2847                             &skipped);
2848 
2849   if (Test::generate_test_trace()) {
2850     // Print the results.
2851     printf("const uint%u_t kExpected_NEON_%s[] = {\n", lane_bit, name);
2852     for (unsigned iteration = 0; iteration < results_length; iteration++) {
2853       printf(" ");
2854       // Output a separate result for each element of the result vector.
2855       for (unsigned lane = 0; lane < vd_lane_count; lane++) {
2856         unsigned index = lane + (iteration * vd_lane_count);
2857         printf(" 0x%0*" PRIx64 ",",
2858                lane_len_in_hex,
2859                static_cast<uint64_t>(results[index]));
2860       }
2861       printf("\n");
2862     }
2863 
2864     printf("};\n");
2865     printf("const unsigned kExpectedCount_NEON_%s = %u;\n",
2866            name,
2867            results_length);
2868   } else if (!skipped) {
2869     // Check the results.
2870     VIXL_CHECK(expected_length == results_length);
2871     unsigned error_count = 0;
2872     unsigned counted_length = 0;
2873     const char* padding = "                    ";
2874     VIXL_ASSERT(strlen(padding) >= (lane_len_in_hex + 1));
2875     for (unsigned n = 0; n < inputs_n_length; n++) {
2876       for (unsigned imm1 = 0; imm1 < inputs_imm1_length; imm1++) {
2877         for (unsigned imm2 = 0; imm2 < inputs_imm2_length; imm2++) {
2878           bool error_in_vector = false;
2879 
2880           counted_length++;
2881 
2882           for (unsigned lane = 0; lane < vd_lane_count; lane++) {
2883             unsigned output_index =
2884                 (n * inputs_imm1_length * inputs_imm2_length * vd_lane_count) +
2885                 (imm1 * inputs_imm2_length * vd_lane_count) +
2886                 (imm2 * vd_lane_count) + lane;
2887 
2888             if (results[output_index] != expected[output_index]) {
2889               error_in_vector = true;
2890               break;
2891             }
2892           }
2893 
2894           if (error_in_vector && (++error_count <= kErrorReportLimit)) {
2895             printf("%s\n", name);
2896             printf(" Vd%.*s| Imm%.*s| Vn%.*s| Imm%.*s| Vd%.*s| Expected\n",
2897                    lane_len_in_hex + 1,
2898                    padding,
2899                    lane_len_in_hex,
2900                    padding,
2901                    lane_len_in_hex + 1,
2902                    padding,
2903                    lane_len_in_hex,
2904                    padding,
2905                    lane_len_in_hex + 1,
2906                    padding);
2907 
2908             for (unsigned lane = 0; lane < vd_lane_count; lane++) {
2909               unsigned output_index =
2910                   (n * inputs_imm1_length * inputs_imm2_length *
2911                    vd_lane_count) +
2912                   (imm1 * inputs_imm2_length * vd_lane_count) +
2913                   (imm2 * vd_lane_count) + lane;
2914               unsigned input_index_n =
2915                   (inputs_n_length - vd_lane_count + n + 1 + lane) %
2916                   inputs_n_length;
2917               unsigned input_index_imm1 = imm1;
2918               unsigned input_index_imm2 = imm2;
2919 
2920               printf("%c0x%0*" PRIx64 " | 0x%0*" PRIx64 " | 0x%0*" PRIx64
2921                      " "
2922                      "| 0x%0*" PRIx64 " | 0x%0*" PRIx64 " | 0x%0*" PRIx64 "\n",
2923                      results[output_index] != expected[output_index] ? '*'
2924                                                                      : ' ',
2925                      lane_len_in_hex,
2926                      static_cast<uint64_t>(inputs_d[lane]),
2927                      lane_len_in_hex,
2928                      static_cast<uint64_t>(inputs_imm1[input_index_imm1]),
2929                      lane_len_in_hex,
2930                      static_cast<uint64_t>(inputs_n[input_index_n]),
2931                      lane_len_in_hex,
2932                      static_cast<uint64_t>(inputs_imm2[input_index_imm2]),
2933                      lane_len_in_hex,
2934                      static_cast<uint64_t>(results[output_index]),
2935                      lane_len_in_hex,
2936                      static_cast<uint64_t>(expected[output_index]));
2937             }
2938           }
2939         }
2940       }
2941     }
2942     VIXL_CHECK(counted_length == expected_length);
2943     if (error_count > kErrorReportLimit) {
2944       printf("%u other errors follow.\n", error_count - kErrorReportLimit);
2945     }
2946     VIXL_CHECK(error_count == 0);
2947   }
2948   delete[] results;
2949 }
2950 
2951 
2952 // ==== Floating-point tests. ====
2953 
2954 
2955 // Standard floating-point test expansion for both double- and single-precision
2956 // operations.
2957 #define STRINGIFY(s) #s
2958 
2959 #define CALL_TEST_FP_HELPER(mnemonic, variant, type, input) \
2960   Test##type(STRINGIFY(mnemonic) "_" STRINGIFY(variant),    \
2961              &MacroAssembler::mnemonic,                     \
2962              input,                                         \
2963              sizeof(input) / sizeof(input[0]),              \
2964              kExpected_##mnemonic##_##variant,              \
2965              kExpectedCount_##mnemonic##_##variant)
2966 
2967 #define DEFINE_TEST_FP(mnemonic, type, input)                    \
2968   TEST(mnemonic##_d) {                                           \
2969     CALL_TEST_FP_HELPER(mnemonic, d, type, kInputDouble##input); \
2970   }                                                              \
2971   TEST(mnemonic##_s) {                                           \
2972     CALL_TEST_FP_HELPER(mnemonic, s, type, kInputFloat##input);  \
2973   }
2974 
2975 #define DEFINE_TEST_FP_FP16(mnemonic, type, input)                \
2976   TEST(mnemonic##_d) {                                            \
2977     CALL_TEST_FP_HELPER(mnemonic, d, type, kInputDouble##input);  \
2978   }                                                               \
2979   TEST(mnemonic##_s) {                                            \
2980     CALL_TEST_FP_HELPER(mnemonic, s, type, kInputFloat##input);   \
2981   }                                                               \
2982   TEST(mnemonic##_h) {                                            \
2983     CALL_TEST_FP_HELPER(mnemonic, h, type, kInputFloat16##input); \
2984   }
2985 
2986 
2987 // TODO: Test with a newer version of valgrind.
2988 //
2989 // Note: valgrind-3.10.0 does not properly interpret libm's fma() on x86_64.
2990 // Therefore this test will be exiting though an ASSERT and thus leaking
2991 // memory.
2992 DEFINE_TEST_FP_FP16(fmadd, 3Op, Basic)
2993 DEFINE_TEST_FP_FP16(fmsub, 3Op, Basic)
2994 DEFINE_TEST_FP_FP16(fnmadd, 3Op, Basic)
2995 DEFINE_TEST_FP_FP16(fnmsub, 3Op, Basic)
2996 
2997 DEFINE_TEST_FP_FP16(fadd, 2Op, Basic)
2998 DEFINE_TEST_FP_FP16(fdiv, 2Op, Basic)
2999 DEFINE_TEST_FP_FP16(fmax, 2Op, Basic)
3000 DEFINE_TEST_FP_FP16(fmaxnm, 2Op, Basic)
3001 DEFINE_TEST_FP_FP16(fmin, 2Op, Basic)
3002 DEFINE_TEST_FP_FP16(fminnm, 2Op, Basic)
3003 DEFINE_TEST_FP_FP16(fmul, 2Op, Basic)
3004 DEFINE_TEST_FP_FP16(fsub, 2Op, Basic)
3005 DEFINE_TEST_FP_FP16(fnmul, 2Op, Basic)
3006 
3007 DEFINE_TEST_FP_FP16(fabs, 1Op, Basic)
3008 DEFINE_TEST_FP_FP16(fmov, 1Op, Basic)
3009 DEFINE_TEST_FP_FP16(fneg, 1Op, Basic)
3010 DEFINE_TEST_FP_FP16(fsqrt, 1Op, Basic)
3011 DEFINE_TEST_FP(frint32x, 1Op, Conversions)
3012 DEFINE_TEST_FP(frint64x, 1Op, Conversions)
3013 DEFINE_TEST_FP(frint32z, 1Op, Conversions)
3014 DEFINE_TEST_FP(frint64z, 1Op, Conversions)
3015 DEFINE_TEST_FP_FP16(frinta, 1Op, Conversions)
3016 DEFINE_TEST_FP_FP16(frinti, 1Op, Conversions)
3017 DEFINE_TEST_FP_FP16(frintm, 1Op, Conversions)
3018 DEFINE_TEST_FP_FP16(frintn, 1Op, Conversions)
3019 DEFINE_TEST_FP_FP16(frintp, 1Op, Conversions)
3020 DEFINE_TEST_FP_FP16(frintx, 1Op, Conversions)
3021 DEFINE_TEST_FP_FP16(frintz, 1Op, Conversions)
3022 
TEST(fcmp_d)3023 TEST(fcmp_d) { CALL_TEST_FP_HELPER(fcmp, d, Cmp, kInputDoubleBasic); }
TEST(fcmp_s)3024 TEST(fcmp_s) { CALL_TEST_FP_HELPER(fcmp, s, Cmp, kInputFloatBasic); }
TEST(fcmp_dz)3025 TEST(fcmp_dz) { CALL_TEST_FP_HELPER(fcmp, dz, CmpZero, kInputDoubleBasic); }
TEST(fcmp_sz)3026 TEST(fcmp_sz) { CALL_TEST_FP_HELPER(fcmp, sz, CmpZero, kInputFloatBasic); }
3027 
TEST(fcvt_sd)3028 TEST(fcvt_sd) { CALL_TEST_FP_HELPER(fcvt, sd, 1Op, kInputDoubleConversions); }
TEST(fcvt_ds)3029 TEST(fcvt_ds) { CALL_TEST_FP_HELPER(fcvt, ds, 1Op, kInputFloatConversions); }
3030 
3031 #define DEFINE_TEST_FP_TO_INT(mnemonic, type, input)               \
3032   TEST(mnemonic##_xd) {                                            \
3033     CALL_TEST_FP_HELPER(mnemonic, xd, type, kInputDouble##input);  \
3034   }                                                                \
3035   TEST(mnemonic##_xs) {                                            \
3036     CALL_TEST_FP_HELPER(mnemonic, xs, type, kInputFloat##input);   \
3037   }                                                                \
3038   TEST(mnemonic##_xh) {                                            \
3039     CALL_TEST_FP_HELPER(mnemonic, xh, type, kInputFloat16##input); \
3040   }                                                                \
3041   TEST(mnemonic##_wd) {                                            \
3042     CALL_TEST_FP_HELPER(mnemonic, wd, type, kInputDouble##input);  \
3043   }                                                                \
3044   TEST(mnemonic##_ws) {                                            \
3045     CALL_TEST_FP_HELPER(mnemonic, ws, type, kInputFloat##input);   \
3046   }                                                                \
3047   TEST(mnemonic##_wh) {                                            \
3048     CALL_TEST_FP_HELPER(mnemonic, wh, type, kInputFloat16##input); \
3049   }
3050 
DEFINE_TEST_FP_TO_INT(fcvtas,FPToS,Conversions)3051 DEFINE_TEST_FP_TO_INT(fcvtas, FPToS, Conversions)
3052 DEFINE_TEST_FP_TO_INT(fcvtau, FPToU, Conversions)
3053 DEFINE_TEST_FP_TO_INT(fcvtms, FPToS, Conversions)
3054 DEFINE_TEST_FP_TO_INT(fcvtmu, FPToU, Conversions)
3055 DEFINE_TEST_FP_TO_INT(fcvtns, FPToS, Conversions)
3056 DEFINE_TEST_FP_TO_INT(fcvtnu, FPToU, Conversions)
3057 DEFINE_TEST_FP_TO_INT(fcvtzs, FPToFixedS, Conversions)
3058 DEFINE_TEST_FP_TO_INT(fcvtzu, FPToFixedU, Conversions)
3059 
3060 #define DEFINE_TEST_FP_TO_JS_INT(mnemonic, type, input)           \
3061   TEST(mnemonic##_wd) {                                           \
3062     CALL_TEST_FP_HELPER(mnemonic, wd, type, kInputDouble##input); \
3063   }
3064 
3065 DEFINE_TEST_FP_TO_JS_INT(fjcvtzs, FPToS, Conversions)
3066 
3067 // TODO: Scvtf-fixed-point
3068 // TODO: Scvtf-integer
3069 // TODO: Ucvtf-fixed-point
3070 // TODO: Ucvtf-integer
3071 
3072 // TODO: Fccmp
3073 // TODO: Fcsel
3074 
3075 
3076 // ==== NEON Tests. ====
3077 
3078 #define CALL_TEST_NEON_HELPER_1Op(mnemonic, vdform, vnform, input_n) \
3079   Test1OpNEON(STRINGIFY(mnemonic) "_" STRINGIFY(vdform),             \
3080               &MacroAssembler::mnemonic,                             \
3081               input_n,                                               \
3082               (sizeof(input_n) / sizeof(input_n[0])),                \
3083               kExpected_NEON_##mnemonic##_##vdform,                  \
3084               kExpectedCount_NEON_##mnemonic##_##vdform,             \
3085               kFormat##vdform,                                       \
3086               kFormat##vnform)
3087 
3088 #define CALL_TEST_NEON_HELPER_1OpAcross(mnemonic, vdform, vnform, input_n)   \
3089   Test1OpAcrossNEON(STRINGIFY(mnemonic) "_" STRINGIFY(vdform) "_" STRINGIFY( \
3090                         vnform),                                             \
3091                     &MacroAssembler::mnemonic,                               \
3092                     input_n,                                                 \
3093                     (sizeof(input_n) / sizeof(input_n[0])),                  \
3094                     kExpected_NEON_##mnemonic##_##vdform##_##vnform,         \
3095                     kExpectedCount_NEON_##mnemonic##_##vdform##_##vnform,    \
3096                     kFormat##vdform,                                         \
3097                     kFormat##vnform)
3098 
3099 #define CALL_TEST_NEON_HELPER_2Op(mnemonic,              \
3100                                   vdform,                \
3101                                   vnform,                \
3102                                   vmform,                \
3103                                   input_d,               \
3104                                   input_n,               \
3105                                   input_m)               \
3106   Test2OpNEON(STRINGIFY(mnemonic) "_" STRINGIFY(vdform), \
3107               &MacroAssembler::mnemonic,                 \
3108               input_d,                                   \
3109               input_n,                                   \
3110               (sizeof(input_n) / sizeof(input_n[0])),    \
3111               input_m,                                   \
3112               (sizeof(input_m) / sizeof(input_m[0])),    \
3113               kExpected_NEON_##mnemonic##_##vdform,      \
3114               kExpectedCount_NEON_##mnemonic##_##vdform, \
3115               kFormat##vdform,                           \
3116               kFormat##vnform,                           \
3117               kFormat##vmform)
3118 
3119 #define CALL_TEST_NEON_HELPER_2OpImm(mnemonic,                        \
3120                                      vdform,                          \
3121                                      vnform,                          \
3122                                      input_n,                         \
3123                                      input_m)                         \
3124   Test2OpImmNEON(STRINGIFY(mnemonic) "_" STRINGIFY(vdform) "_2OPIMM", \
3125                  &MacroAssembler::mnemonic,                           \
3126                  input_n,                                             \
3127                  (sizeof(input_n) / sizeof(input_n[0])),              \
3128                  input_m,                                             \
3129                  (sizeof(input_m) / sizeof(input_m[0])),              \
3130                  kExpected_NEON_##mnemonic##_##vdform##_2OPIMM,       \
3131                  kExpectedCount_NEON_##mnemonic##_##vdform##_2OPIMM,  \
3132                  kFormat##vdform,                                     \
3133                  kFormat##vnform)
3134 
3135 #define CALL_TEST_NEON_HELPER_ByElement(mnemonic,                      \
3136                                         vdform,                        \
3137                                         vnform,                        \
3138                                         vmform,                        \
3139                                         input_d,                       \
3140                                         input_n,                       \
3141                                         input_m,                       \
3142                                         indices)                       \
3143   TestByElementNEON(                                                   \
3144       STRINGIFY(mnemonic) "_" STRINGIFY(vdform) "_" STRINGIFY(         \
3145           vnform) "_" STRINGIFY(vmform),                               \
3146       &MacroAssembler::mnemonic,                                       \
3147       input_d,                                                         \
3148       input_n,                                                         \
3149       (sizeof(input_n) / sizeof(input_n[0])),                          \
3150       input_m,                                                         \
3151       (sizeof(input_m) / sizeof(input_m[0])),                          \
3152       indices,                                                         \
3153       (sizeof(indices) / sizeof(indices[0])),                          \
3154       kExpected_NEON_##mnemonic##_##vdform##_##vnform##_##vmform,      \
3155       kExpectedCount_NEON_##mnemonic##_##vdform##_##vnform##_##vmform, \
3156       kFormat##vdform,                                                 \
3157       kFormat##vnform,                                                 \
3158       kFormat##vmform)
3159 
3160 #define CALL_TEST_NEON_HELPER_ByElement_Dot_Product(mnemonic,           \
3161                                                     vdform,             \
3162                                                     vnform,             \
3163                                                     vmform,             \
3164                                                     input_d,            \
3165                                                     input_n,            \
3166                                                     input_m,            \
3167                                                     indices,            \
3168                                                     vm_subvector_count) \
3169   TestByElementNEON(                                                    \
3170       STRINGIFY(mnemonic) "_" STRINGIFY(vdform) "_" STRINGIFY(          \
3171           vnform) "_" STRINGIFY(vmform),                                \
3172       &MacroAssembler::mnemonic,                                        \
3173       input_d,                                                          \
3174       input_n,                                                          \
3175       (sizeof(input_n) / sizeof(input_n[0])),                           \
3176       input_m,                                                          \
3177       (sizeof(input_m) / sizeof(input_m[0])),                           \
3178       indices,                                                          \
3179       (sizeof(indices) / sizeof(indices[0])),                           \
3180       kExpected_NEON_##mnemonic##_##vdform##_##vnform##_##vmform,       \
3181       kExpectedCount_NEON_##mnemonic##_##vdform##_##vnform##_##vmform,  \
3182       kFormat##vdform,                                                  \
3183       kFormat##vnform,                                                  \
3184       kFormat##vmform,                                                  \
3185       vm_subvector_count)
3186 
3187 #define CALL_TEST_NEON_HELPER_OpImmOpImm(helper,                   \
3188                                          mnemonic,                 \
3189                                          vdform,                   \
3190                                          vnform,                   \
3191                                          input_d,                  \
3192                                          input_imm1,               \
3193                                          input_n,                  \
3194                                          input_imm2)               \
3195   TestOpImmOpImmNEON(STRINGIFY(mnemonic) "_" STRINGIFY(vdform),    \
3196                      helper,                                       \
3197                      input_d,                                      \
3198                      input_imm1,                                   \
3199                      (sizeof(input_imm1) / sizeof(input_imm1[0])), \
3200                      input_n,                                      \
3201                      (sizeof(input_n) / sizeof(input_n[0])),       \
3202                      input_imm2,                                   \
3203                      (sizeof(input_imm2) / sizeof(input_imm2[0])), \
3204                      kExpected_NEON_##mnemonic##_##vdform,         \
3205                      kExpectedCount_NEON_##mnemonic##_##vdform,    \
3206                      kFormat##vdform,                              \
3207                      kFormat##vnform)
3208 
3209 #define CALL_TEST_NEON_HELPER_2SAME(mnemonic, variant, input) \
3210   CALL_TEST_NEON_HELPER_1Op(mnemonic, variant, variant, input)
3211 
3212 #define DEFINE_TEST_NEON_2SAME_8B_16B(mnemonic, input)              \
3213   TEST(mnemonic##_8B) {                                             \
3214     CALL_TEST_NEON_HELPER_2SAME(mnemonic, 8B, kInput8bits##input);  \
3215   }                                                                 \
3216   TEST(mnemonic##_16B) {                                            \
3217     CALL_TEST_NEON_HELPER_2SAME(mnemonic, 16B, kInput8bits##input); \
3218   }
3219 
3220 #define DEFINE_TEST_NEON_2SAME_4H_8H(mnemonic, input)               \
3221   TEST(mnemonic##_4H) {                                             \
3222     CALL_TEST_NEON_HELPER_2SAME(mnemonic, 4H, kInput16bits##input); \
3223   }                                                                 \
3224   TEST(mnemonic##_8H) {                                             \
3225     CALL_TEST_NEON_HELPER_2SAME(mnemonic, 8H, kInput16bits##input); \
3226   }
3227 
3228 #define DEFINE_TEST_NEON_2SAME_2S_4S(mnemonic, input)               \
3229   TEST(mnemonic##_2S) {                                             \
3230     CALL_TEST_NEON_HELPER_2SAME(mnemonic, 2S, kInput32bits##input); \
3231   }                                                                 \
3232   TEST(mnemonic##_4S) {                                             \
3233     CALL_TEST_NEON_HELPER_2SAME(mnemonic, 4S, kInput32bits##input); \
3234   }
3235 
3236 #define DEFINE_TEST_NEON_2SAME_BH(mnemonic, input) \
3237   DEFINE_TEST_NEON_2SAME_8B_16B(mnemonic, input)   \
3238   DEFINE_TEST_NEON_2SAME_4H_8H(mnemonic, input)
3239 
3240 #define DEFINE_TEST_NEON_2SAME_NO2D(mnemonic, input) \
3241   DEFINE_TEST_NEON_2SAME_BH(mnemonic, input)         \
3242   DEFINE_TEST_NEON_2SAME_2S_4S(mnemonic, input)
3243 
3244 #define DEFINE_TEST_NEON_2SAME(mnemonic, input)                     \
3245   DEFINE_TEST_NEON_2SAME_NO2D(mnemonic, input)                      \
3246   TEST(mnemonic##_2D) {                                             \
3247     CALL_TEST_NEON_HELPER_2SAME(mnemonic, 2D, kInput64bits##input); \
3248   }
3249 #define DEFINE_TEST_NEON_2SAME_SD(mnemonic, input)                  \
3250   DEFINE_TEST_NEON_2SAME_2S_4S(mnemonic, input)                     \
3251   TEST(mnemonic##_2D) {                                             \
3252     CALL_TEST_NEON_HELPER_2SAME(mnemonic, 2D, kInput64bits##input); \
3253   }
3254 
3255 #define DEFINE_TEST_NEON_2SAME_FP(mnemonic, input)                  \
3256   TEST(mnemonic##_2S) {                                             \
3257     CALL_TEST_NEON_HELPER_2SAME(mnemonic, 2S, kInputFloat##input);  \
3258   }                                                                 \
3259   TEST(mnemonic##_4S) {                                             \
3260     CALL_TEST_NEON_HELPER_2SAME(mnemonic, 4S, kInputFloat##input);  \
3261   }                                                                 \
3262   TEST(mnemonic##_2D) {                                             \
3263     CALL_TEST_NEON_HELPER_2SAME(mnemonic, 2D, kInputDouble##input); \
3264   }
3265 
3266 #define DEFINE_TEST_NEON_2SAME_FP_FP16(mnemonic, input)              \
3267   DEFINE_TEST_NEON_2SAME_FP(mnemonic, input)                         \
3268   TEST(mnemonic##_4H) {                                              \
3269     CALL_TEST_NEON_HELPER_2SAME(mnemonic, 4H, kInputFloat16##input); \
3270   }                                                                  \
3271   TEST(mnemonic##_8H) {                                              \
3272     CALL_TEST_NEON_HELPER_2SAME(mnemonic, 8H, kInputFloat16##input); \
3273   }
3274 
3275 #define DEFINE_TEST_NEON_2SAME_FP_FP16_SCALAR(mnemonic, input)      \
3276   TEST(mnemonic##_H) {                                              \
3277     CALL_TEST_NEON_HELPER_2SAME(mnemonic, H, kInputFloat16##input); \
3278   }                                                                 \
3279   TEST(mnemonic##_S) {                                              \
3280     CALL_TEST_NEON_HELPER_2SAME(mnemonic, S, kInputFloat##input);   \
3281   }                                                                 \
3282   TEST(mnemonic##_D) {                                              \
3283     CALL_TEST_NEON_HELPER_2SAME(mnemonic, D, kInputDouble##input);  \
3284   }
3285 
3286 #define DEFINE_TEST_NEON_2SAME_SCALAR_B(mnemonic, input)          \
3287   TEST(mnemonic##_B) {                                            \
3288     CALL_TEST_NEON_HELPER_2SAME(mnemonic, B, kInput8bits##input); \
3289   }
3290 #define DEFINE_TEST_NEON_2SAME_SCALAR_H(mnemonic, input)           \
3291   TEST(mnemonic##_H) {                                             \
3292     CALL_TEST_NEON_HELPER_2SAME(mnemonic, H, kInput16bits##input); \
3293   }
3294 #define DEFINE_TEST_NEON_2SAME_SCALAR_S(mnemonic, input)           \
3295   TEST(mnemonic##_S) {                                             \
3296     CALL_TEST_NEON_HELPER_2SAME(mnemonic, S, kInput32bits##input); \
3297   }
3298 #define DEFINE_TEST_NEON_2SAME_SCALAR_D(mnemonic, input)           \
3299   TEST(mnemonic##_D) {                                             \
3300     CALL_TEST_NEON_HELPER_2SAME(mnemonic, D, kInput64bits##input); \
3301   }
3302 
3303 #define DEFINE_TEST_NEON_2SAME_SCALAR(mnemonic, input) \
3304   DEFINE_TEST_NEON_2SAME_SCALAR_B(mnemonic, input)     \
3305   DEFINE_TEST_NEON_2SAME_SCALAR_H(mnemonic, input)     \
3306   DEFINE_TEST_NEON_2SAME_SCALAR_S(mnemonic, input)     \
3307   DEFINE_TEST_NEON_2SAME_SCALAR_D(mnemonic, input)
3308 
3309 #define DEFINE_TEST_NEON_2SAME_SCALAR_SD(mnemonic, input) \
3310   DEFINE_TEST_NEON_2SAME_SCALAR_S(mnemonic, input)        \
3311   DEFINE_TEST_NEON_2SAME_SCALAR_D(mnemonic, input)
3312 
3313 
3314 #define CALL_TEST_NEON_HELPER_ACROSS(mnemonic, vd_form, vn_form, input_n) \
3315   CALL_TEST_NEON_HELPER_1OpAcross(mnemonic, vd_form, vn_form, input_n)
3316 
3317 #define DEFINE_TEST_NEON_ACROSS(mnemonic, input)                        \
3318   TEST(mnemonic##_B_8B) {                                               \
3319     CALL_TEST_NEON_HELPER_ACROSS(mnemonic, B, 8B, kInput8bits##input);  \
3320   }                                                                     \
3321   TEST(mnemonic##_B_16B) {                                              \
3322     CALL_TEST_NEON_HELPER_ACROSS(mnemonic, B, 16B, kInput8bits##input); \
3323   }                                                                     \
3324   TEST(mnemonic##_H_4H) {                                               \
3325     CALL_TEST_NEON_HELPER_ACROSS(mnemonic, H, 4H, kInput16bits##input); \
3326   }                                                                     \
3327   TEST(mnemonic##_H_8H) {                                               \
3328     CALL_TEST_NEON_HELPER_ACROSS(mnemonic, H, 8H, kInput16bits##input); \
3329   }                                                                     \
3330   TEST(mnemonic##_S_4S) {                                               \
3331     CALL_TEST_NEON_HELPER_ACROSS(mnemonic, S, 4S, kInput32bits##input); \
3332   }
3333 
3334 #define DEFINE_TEST_NEON_ACROSS_LONG(mnemonic, input)                   \
3335   TEST(mnemonic##_H_8B) {                                               \
3336     CALL_TEST_NEON_HELPER_ACROSS(mnemonic, H, 8B, kInput8bits##input);  \
3337   }                                                                     \
3338   TEST(mnemonic##_H_16B) {                                              \
3339     CALL_TEST_NEON_HELPER_ACROSS(mnemonic, H, 16B, kInput8bits##input); \
3340   }                                                                     \
3341   TEST(mnemonic##_S_4H) {                                               \
3342     CALL_TEST_NEON_HELPER_ACROSS(mnemonic, S, 4H, kInput16bits##input); \
3343   }                                                                     \
3344   TEST(mnemonic##_S_8H) {                                               \
3345     CALL_TEST_NEON_HELPER_ACROSS(mnemonic, S, 8H, kInput16bits##input); \
3346   }                                                                     \
3347   TEST(mnemonic##_D_4S) {                                               \
3348     CALL_TEST_NEON_HELPER_ACROSS(mnemonic, D, 4S, kInput32bits##input); \
3349   }
3350 
3351 #define DEFINE_TEST_NEON_ACROSS_FP(mnemonic, input)                      \
3352   TEST(mnemonic##_H_4H) {                                                \
3353     CALL_TEST_NEON_HELPER_ACROSS(mnemonic, H, 4H, kInputFloat16##input); \
3354   }                                                                      \
3355   TEST(mnemonic##_H_8H) {                                                \
3356     CALL_TEST_NEON_HELPER_ACROSS(mnemonic, H, 8H, kInputFloat16##input); \
3357   }                                                                      \
3358   TEST(mnemonic##_S_4S) {                                                \
3359     CALL_TEST_NEON_HELPER_ACROSS(mnemonic, S, 4S, kInputFloat##input);   \
3360   }
3361 
3362 #define CALL_TEST_NEON_HELPER_2DIFF(mnemonic, vdform, vnform, input_n) \
3363   CALL_TEST_NEON_HELPER_1Op(mnemonic, vdform, vnform, input_n)
3364 
3365 #define DEFINE_TEST_NEON_2DIFF_LONG(mnemonic, input)                    \
3366   TEST(mnemonic##_4H) {                                                 \
3367     CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 4H, 8B, kInput8bits##input);  \
3368   }                                                                     \
3369   TEST(mnemonic##_8H) {                                                 \
3370     CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 8H, 16B, kInput8bits##input); \
3371   }                                                                     \
3372   TEST(mnemonic##_2S) {                                                 \
3373     CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 2S, 4H, kInput16bits##input); \
3374   }                                                                     \
3375   TEST(mnemonic##_4S) {                                                 \
3376     CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 4S, 8H, kInput16bits##input); \
3377   }                                                                     \
3378   TEST(mnemonic##_1D) {                                                 \
3379     CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 1D, 2S, kInput32bits##input); \
3380   }                                                                     \
3381   TEST(mnemonic##_2D) {                                                 \
3382     CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 2D, 4S, kInput32bits##input); \
3383   }
3384 
3385 #define DEFINE_TEST_NEON_2DIFF_NARROW(mnemonic, input)                      \
3386   TEST(mnemonic##_8B) {                                                     \
3387     CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 8B, 8H, kInput16bits##input);     \
3388   }                                                                         \
3389   TEST(mnemonic##_4H) {                                                     \
3390     CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 4H, 4S, kInput32bits##input);     \
3391   }                                                                         \
3392   TEST(mnemonic##_2S) {                                                     \
3393     CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 2S, 2D, kInput64bits##input);     \
3394   }                                                                         \
3395   TEST(mnemonic##2_16B) {                                                   \
3396     CALL_TEST_NEON_HELPER_2DIFF(mnemonic##2, 16B, 8H, kInput16bits##input); \
3397   }                                                                         \
3398   TEST(mnemonic##2_8H) {                                                    \
3399     CALL_TEST_NEON_HELPER_2DIFF(mnemonic##2, 8H, 4S, kInput32bits##input);  \
3400   }                                                                         \
3401   TEST(mnemonic##2_4S) {                                                    \
3402     CALL_TEST_NEON_HELPER_2DIFF(mnemonic##2, 4S, 2D, kInput64bits##input);  \
3403   }
3404 
3405 #define DEFINE_TEST_NEON_2DIFF_FP_LONG(mnemonic, input)                     \
3406   TEST(mnemonic##_4S) {                                                     \
3407     CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 4S, 4H, kInputFloat16##input);    \
3408   }                                                                         \
3409   TEST(mnemonic##_2D) {                                                     \
3410     CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 2D, 2S, kInputFloat##input);      \
3411   }                                                                         \
3412   TEST(mnemonic##2_4S) {                                                    \
3413     CALL_TEST_NEON_HELPER_2DIFF(mnemonic##2, 4S, 8H, kInputFloat16##input); \
3414   }                                                                         \
3415   TEST(mnemonic##2_2D) {                                                    \
3416     CALL_TEST_NEON_HELPER_2DIFF(mnemonic##2, 2D, 4S, kInputFloat##input);   \
3417   }
3418 
3419 #define DEFINE_TEST_NEON_2DIFF_FP_NARROW(mnemonic, input)                  \
3420   TEST(mnemonic##_4H) {                                                    \
3421     CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 4H, 4S, kInputFloat##input);     \
3422   }                                                                        \
3423   TEST(mnemonic##_2S) {                                                    \
3424     CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 2S, 2D, kInputDouble##input);    \
3425   }                                                                        \
3426   TEST(mnemonic##2_8H) {                                                   \
3427     CALL_TEST_NEON_HELPER_2DIFF(mnemonic##2, 8H, 4S, kInputFloat##input);  \
3428   }                                                                        \
3429   TEST(mnemonic##2_4S) {                                                   \
3430     CALL_TEST_NEON_HELPER_2DIFF(mnemonic##2, 4S, 2D, kInputDouble##input); \
3431   }
3432 
3433 #define DEFINE_TEST_NEON_2DIFF_FP_NARROW_2S(mnemonic, input)               \
3434   TEST(mnemonic##_2S) {                                                    \
3435     CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 2S, 2D, kInputDouble##input);    \
3436   }                                                                        \
3437   TEST(mnemonic##2_4S) {                                                   \
3438     CALL_TEST_NEON_HELPER_2DIFF(mnemonic##2, 4S, 2D, kInputDouble##input); \
3439   }
3440 
3441 #define DEFINE_TEST_NEON_2DIFF_SCALAR_NARROW(mnemonic, input)         \
3442   TEST(mnemonic##_B) {                                                \
3443     CALL_TEST_NEON_HELPER_2DIFF(mnemonic, B, H, kInput16bits##input); \
3444   }                                                                   \
3445   TEST(mnemonic##_H) {                                                \
3446     CALL_TEST_NEON_HELPER_2DIFF(mnemonic, H, S, kInput32bits##input); \
3447   }                                                                   \
3448   TEST(mnemonic##_S) {                                                \
3449     CALL_TEST_NEON_HELPER_2DIFF(mnemonic, S, D, kInput64bits##input); \
3450   }
3451 
3452 #define DEFINE_TEST_NEON_2DIFF_FP_SCALAR_SD(mnemonic, input)            \
3453   TEST(mnemonic##_S) {                                                  \
3454     CALL_TEST_NEON_HELPER_2DIFF(mnemonic, S, 2S, kInputFloat##input);   \
3455   }                                                                     \
3456   TEST(mnemonic##_D) {                                                  \
3457     CALL_TEST_NEON_HELPER_2DIFF(mnemonic, D, 2D, kInputDouble##input);  \
3458   }                                                                     \
3459   TEST(mnemonic##_H) {                                                  \
3460     CALL_TEST_NEON_HELPER_2DIFF(mnemonic, H, 2H, kInputFloat16##input); \
3461   }
3462 
3463 #define CALL_TEST_NEON_HELPER_3SAME(mnemonic, variant, input_d, input_nm) \
3464   {                                                                       \
3465     CALL_TEST_NEON_HELPER_2Op(mnemonic,                                   \
3466                               variant,                                    \
3467                               variant,                                    \
3468                               variant,                                    \
3469                               input_d,                                    \
3470                               input_nm,                                   \
3471                               input_nm);                                  \
3472   }
3473 
3474 #define DEFINE_TEST_NEON_3SAME_8B_16B(mnemonic, input)     \
3475   TEST(mnemonic##_8B) {                                    \
3476     CALL_TEST_NEON_HELPER_3SAME(mnemonic,                  \
3477                                 8B,                        \
3478                                 kInput8bitsAccDestination, \
3479                                 kInput8bits##input);       \
3480   }                                                        \
3481   TEST(mnemonic##_16B) {                                   \
3482     CALL_TEST_NEON_HELPER_3SAME(mnemonic,                  \
3483                                 16B,                       \
3484                                 kInput8bitsAccDestination, \
3485                                 kInput8bits##input);       \
3486   }
3487 
3488 #define DEFINE_TEST_NEON_3SAME_HS(mnemonic, input)          \
3489   TEST(mnemonic##_4H) {                                     \
3490     CALL_TEST_NEON_HELPER_3SAME(mnemonic,                   \
3491                                 4H,                         \
3492                                 kInput16bitsAccDestination, \
3493                                 kInput16bits##input);       \
3494   }                                                         \
3495   TEST(mnemonic##_8H) {                                     \
3496     CALL_TEST_NEON_HELPER_3SAME(mnemonic,                   \
3497                                 8H,                         \
3498                                 kInput16bitsAccDestination, \
3499                                 kInput16bits##input);       \
3500   }                                                         \
3501   TEST(mnemonic##_2S) {                                     \
3502     CALL_TEST_NEON_HELPER_3SAME(mnemonic,                   \
3503                                 2S,                         \
3504                                 kInput32bitsAccDestination, \
3505                                 kInput32bits##input);       \
3506   }                                                         \
3507   TEST(mnemonic##_4S) {                                     \
3508     CALL_TEST_NEON_HELPER_3SAME(mnemonic,                   \
3509                                 4S,                         \
3510                                 kInput32bitsAccDestination, \
3511                                 kInput32bits##input);       \
3512   }
3513 
3514 #define DEFINE_TEST_NEON_3SAME_NO2D(mnemonic, input) \
3515   DEFINE_TEST_NEON_3SAME_8B_16B(mnemonic, input)     \
3516   DEFINE_TEST_NEON_3SAME_HS(mnemonic, input)
3517 
3518 #define DEFINE_TEST_NEON_3SAME(mnemonic, input)             \
3519   DEFINE_TEST_NEON_3SAME_NO2D(mnemonic, input)              \
3520   TEST(mnemonic##_2D) {                                     \
3521     CALL_TEST_NEON_HELPER_3SAME(mnemonic,                   \
3522                                 2D,                         \
3523                                 kInput64bitsAccDestination, \
3524                                 kInput64bits##input);       \
3525   }
3526 
3527 #define DEFINE_TEST_NEON_3SAME_FP(mnemonic, input)           \
3528   TEST(mnemonic##_4H) {                                      \
3529     CALL_TEST_NEON_HELPER_3SAME(mnemonic,                    \
3530                                 4H,                          \
3531                                 kInputFloat16AccDestination, \
3532                                 kInputFloat16##input);       \
3533   }                                                          \
3534   TEST(mnemonic##_8H) {                                      \
3535     CALL_TEST_NEON_HELPER_3SAME(mnemonic,                    \
3536                                 8H,                          \
3537                                 kInputFloat16AccDestination, \
3538                                 kInputFloat16##input);       \
3539   }                                                          \
3540   TEST(mnemonic##_2S) {                                      \
3541     CALL_TEST_NEON_HELPER_3SAME(mnemonic,                    \
3542                                 2S,                          \
3543                                 kInputFloatAccDestination,   \
3544                                 kInputFloat##input);         \
3545   }                                                          \
3546   TEST(mnemonic##_4S) {                                      \
3547     CALL_TEST_NEON_HELPER_3SAME(mnemonic,                    \
3548                                 4S,                          \
3549                                 kInputFloatAccDestination,   \
3550                                 kInputFloat##input);         \
3551   }                                                          \
3552   TEST(mnemonic##_2D) {                                      \
3553     CALL_TEST_NEON_HELPER_3SAME(mnemonic,                    \
3554                                 2D,                          \
3555                                 kInputDoubleAccDestination,  \
3556                                 kInputDouble##input);        \
3557   }
3558 
3559 #define DEFINE_TEST_NEON_3SAME_SCALAR_D(mnemonic, input)    \
3560   TEST(mnemonic##_D) {                                      \
3561     CALL_TEST_NEON_HELPER_3SAME(mnemonic,                   \
3562                                 D,                          \
3563                                 kInput64bitsAccDestination, \
3564                                 kInput64bits##input);       \
3565   }
3566 
3567 #define DEFINE_TEST_NEON_3SAME_SCALAR_HS(mnemonic, input)   \
3568   TEST(mnemonic##_H) {                                      \
3569     CALL_TEST_NEON_HELPER_3SAME(mnemonic,                   \
3570                                 H,                          \
3571                                 kInput16bitsAccDestination, \
3572                                 kInput16bits##input);       \
3573   }                                                         \
3574   TEST(mnemonic##_S) {                                      \
3575     CALL_TEST_NEON_HELPER_3SAME(mnemonic,                   \
3576                                 S,                          \
3577                                 kInput32bitsAccDestination, \
3578                                 kInput32bits##input);       \
3579   }
3580 
3581 #define DEFINE_TEST_NEON_3SAME_SCALAR(mnemonic, input)      \
3582   TEST(mnemonic##_B) {                                      \
3583     CALL_TEST_NEON_HELPER_3SAME(mnemonic,                   \
3584                                 B,                          \
3585                                 kInput8bitsAccDestination,  \
3586                                 kInput8bits##input);        \
3587   }                                                         \
3588   TEST(mnemonic##_H) {                                      \
3589     CALL_TEST_NEON_HELPER_3SAME(mnemonic,                   \
3590                                 H,                          \
3591                                 kInput16bitsAccDestination, \
3592                                 kInput16bits##input);       \
3593   }                                                         \
3594   TEST(mnemonic##_S) {                                      \
3595     CALL_TEST_NEON_HELPER_3SAME(mnemonic,                   \
3596                                 S,                          \
3597                                 kInput32bitsAccDestination, \
3598                                 kInput32bits##input);       \
3599   }                                                         \
3600   TEST(mnemonic##_D) {                                      \
3601     CALL_TEST_NEON_HELPER_3SAME(mnemonic,                   \
3602                                 D,                          \
3603                                 kInput64bitsAccDestination, \
3604                                 kInput64bits##input);       \
3605   }
3606 
3607 #define DEFINE_TEST_NEON_3SAME_FP_SCALAR(mnemonic, input)    \
3608   TEST(mnemonic##_H) {                                       \
3609     CALL_TEST_NEON_HELPER_3SAME(mnemonic,                    \
3610                                 H,                           \
3611                                 kInputFloat16AccDestination, \
3612                                 kInputFloat16##input);       \
3613   }                                                          \
3614   TEST(mnemonic##_S) {                                       \
3615     CALL_TEST_NEON_HELPER_3SAME(mnemonic,                    \
3616                                 S,                           \
3617                                 kInputFloatAccDestination,   \
3618                                 kInputFloat##input);         \
3619   }                                                          \
3620   TEST(mnemonic##_D) {                                       \
3621     CALL_TEST_NEON_HELPER_3SAME(mnemonic,                    \
3622                                 D,                           \
3623                                 kInputDoubleAccDestination,  \
3624                                 kInputDouble##input);        \
3625   }
3626 
3627 #define DEFINE_TEST_NEON_FHM(mnemonic, input_d, input_n, input_m) \
3628   TEST(mnemonic##_2S) {                                           \
3629     CALL_TEST_NEON_HELPER_3DIFF(mnemonic,                         \
3630                                 2S,                               \
3631                                 2H,                               \
3632                                 2H,                               \
3633                                 kInputFloatAccDestination,        \
3634                                 kInputFloat16##input_n,           \
3635                                 kInputFloat16##input_m);          \
3636   }                                                               \
3637   TEST(mnemonic##_4S) {                                           \
3638     CALL_TEST_NEON_HELPER_3DIFF(mnemonic,                         \
3639                                 4S,                               \
3640                                 4H,                               \
3641                                 4H,                               \
3642                                 kInputFloatAccDestination,        \
3643                                 kInputFloat16##input_n,           \
3644                                 kInputFloat16##input_m);          \
3645   }
3646 
3647 #define CALL_TEST_NEON_HELPER_3DIFF(mnemonic, \
3648                                     vdform,   \
3649                                     vnform,   \
3650                                     vmform,   \
3651                                     input_d,  \
3652                                     input_n,  \
3653                                     input_m)  \
3654   {                                           \
3655     CALL_TEST_NEON_HELPER_2Op(mnemonic,       \
3656                               vdform,         \
3657                               vnform,         \
3658                               vmform,         \
3659                               input_d,        \
3660                               input_n,        \
3661                               input_m);       \
3662   }
3663 
3664 #define DEFINE_TEST_NEON_3DIFF_LONG_8H(mnemonic, input)     \
3665   TEST(mnemonic##_8H) {                                     \
3666     CALL_TEST_NEON_HELPER_3DIFF(mnemonic,                   \
3667                                 8H,                         \
3668                                 8B,                         \
3669                                 8B,                         \
3670                                 kInput16bitsAccDestination, \
3671                                 kInput8bits##input,         \
3672                                 kInput8bits##input);        \
3673   }                                                         \
3674   TEST(mnemonic##2_8H) {                                    \
3675     CALL_TEST_NEON_HELPER_3DIFF(mnemonic##2,                \
3676                                 8H,                         \
3677                                 16B,                        \
3678                                 16B,                        \
3679                                 kInput16bitsAccDestination, \
3680                                 kInput8bits##input,         \
3681                                 kInput8bits##input);        \
3682   }
3683 
3684 #define DEFINE_TEST_NEON_3DIFF_LONG_4S(mnemonic, input)     \
3685   TEST(mnemonic##_4S) {                                     \
3686     CALL_TEST_NEON_HELPER_3DIFF(mnemonic,                   \
3687                                 4S,                         \
3688                                 4H,                         \
3689                                 4H,                         \
3690                                 kInput32bitsAccDestination, \
3691                                 kInput16bits##input,        \
3692                                 kInput16bits##input);       \
3693   }                                                         \
3694   TEST(mnemonic##2_4S) {                                    \
3695     CALL_TEST_NEON_HELPER_3DIFF(mnemonic##2,                \
3696                                 4S,                         \
3697                                 8H,                         \
3698                                 8H,                         \
3699                                 kInput32bitsAccDestination, \
3700                                 kInput16bits##input,        \
3701                                 kInput16bits##input);       \
3702   }
3703 
3704 #define DEFINE_TEST_NEON_3DIFF_LONG_2D(mnemonic, input)     \
3705   TEST(mnemonic##_2D) {                                     \
3706     CALL_TEST_NEON_HELPER_3DIFF(mnemonic,                   \
3707                                 2D,                         \
3708                                 2S,                         \
3709                                 2S,                         \
3710                                 kInput64bitsAccDestination, \
3711                                 kInput32bits##input,        \
3712                                 kInput32bits##input);       \
3713   }                                                         \
3714   TEST(mnemonic##2_2D) {                                    \
3715     CALL_TEST_NEON_HELPER_3DIFF(mnemonic##2,                \
3716                                 2D,                         \
3717                                 4S,                         \
3718                                 4S,                         \
3719                                 kInput64bitsAccDestination, \
3720                                 kInput32bits##input,        \
3721                                 kInput32bits##input);       \
3722   }
3723 
3724 #define DEFINE_TEST_NEON_3DIFF_LONG_SD(mnemonic, input) \
3725   DEFINE_TEST_NEON_3DIFF_LONG_4S(mnemonic, input)       \
3726   DEFINE_TEST_NEON_3DIFF_LONG_2D(mnemonic, input)
3727 
3728 #define DEFINE_TEST_NEON_3DIFF_LONG(mnemonic, input) \
3729   DEFINE_TEST_NEON_3DIFF_LONG_8H(mnemonic, input)    \
3730   DEFINE_TEST_NEON_3DIFF_LONG_4S(mnemonic, input)    \
3731   DEFINE_TEST_NEON_3DIFF_LONG_2D(mnemonic, input)
3732 
3733 #define DEFINE_TEST_NEON_3DIFF_SCALAR_LONG_S(mnemonic, input) \
3734   TEST(mnemonic##_S) {                                        \
3735     CALL_TEST_NEON_HELPER_3DIFF(mnemonic,                     \
3736                                 S,                            \
3737                                 H,                            \
3738                                 H,                            \
3739                                 kInput32bitsAccDestination,   \
3740                                 kInput16bits##input,          \
3741                                 kInput16bits##input);         \
3742   }
3743 
3744 #define DEFINE_TEST_NEON_3DIFF_SCALAR_LONG_D(mnemonic, input) \
3745   TEST(mnemonic##_D) {                                        \
3746     CALL_TEST_NEON_HELPER_3DIFF(mnemonic,                     \
3747                                 D,                            \
3748                                 S,                            \
3749                                 S,                            \
3750                                 kInput64bitsAccDestination,   \
3751                                 kInput32bits##input,          \
3752                                 kInput32bits##input);         \
3753   }
3754 
3755 #define DEFINE_TEST_NEON_3DIFF_SCALAR_LONG_SD(mnemonic, input) \
3756   DEFINE_TEST_NEON_3DIFF_SCALAR_LONG_S(mnemonic, input)        \
3757   DEFINE_TEST_NEON_3DIFF_SCALAR_LONG_D(mnemonic, input)
3758 
3759 #define DEFINE_TEST_NEON_3DIFF_WIDE(mnemonic, input)        \
3760   TEST(mnemonic##_8H) {                                     \
3761     CALL_TEST_NEON_HELPER_3DIFF(mnemonic,                   \
3762                                 8H,                         \
3763                                 8H,                         \
3764                                 8B,                         \
3765                                 kInput16bitsAccDestination, \
3766                                 kInput16bits##input,        \
3767                                 kInput8bits##input);        \
3768   }                                                         \
3769   TEST(mnemonic##_4S) {                                     \
3770     CALL_TEST_NEON_HELPER_3DIFF(mnemonic,                   \
3771                                 4S,                         \
3772                                 4S,                         \
3773                                 4H,                         \
3774                                 kInput32bitsAccDestination, \
3775                                 kInput32bits##input,        \
3776                                 kInput16bits##input);       \
3777   }                                                         \
3778   TEST(mnemonic##_2D) {                                     \
3779     CALL_TEST_NEON_HELPER_3DIFF(mnemonic,                   \
3780                                 2D,                         \
3781                                 2D,                         \
3782                                 2S,                         \
3783                                 kInput64bitsAccDestination, \
3784                                 kInput64bits##input,        \
3785                                 kInput32bits##input);       \
3786   }                                                         \
3787   TEST(mnemonic##2_8H) {                                    \
3788     CALL_TEST_NEON_HELPER_3DIFF(mnemonic##2,                \
3789                                 8H,                         \
3790                                 8H,                         \
3791                                 16B,                        \
3792                                 kInput16bitsAccDestination, \
3793                                 kInput16bits##input,        \
3794                                 kInput8bits##input);        \
3795   }                                                         \
3796   TEST(mnemonic##2_4S) {                                    \
3797     CALL_TEST_NEON_HELPER_3DIFF(mnemonic##2,                \
3798                                 4S,                         \
3799                                 4S,                         \
3800                                 8H,                         \
3801                                 kInput32bitsAccDestination, \
3802                                 kInput32bits##input,        \
3803                                 kInput16bits##input);       \
3804   }                                                         \
3805   TEST(mnemonic##2_2D) {                                    \
3806     CALL_TEST_NEON_HELPER_3DIFF(mnemonic##2,                \
3807                                 2D,                         \
3808                                 2D,                         \
3809                                 4S,                         \
3810                                 kInput64bitsAccDestination, \
3811                                 kInput64bits##input,        \
3812                                 kInput32bits##input);       \
3813   }
3814 
3815 #define DEFINE_TEST_NEON_3DIFF_NARROW(mnemonic, input)      \
3816   TEST(mnemonic##_8B) {                                     \
3817     CALL_TEST_NEON_HELPER_3DIFF(mnemonic,                   \
3818                                 8B,                         \
3819                                 8H,                         \
3820                                 8H,                         \
3821                                 kInput8bitsAccDestination,  \
3822                                 kInput16bits##input,        \
3823                                 kInput16bits##input);       \
3824   }                                                         \
3825   TEST(mnemonic##_4H) {                                     \
3826     CALL_TEST_NEON_HELPER_3DIFF(mnemonic,                   \
3827                                 4H,                         \
3828                                 4S,                         \
3829                                 4S,                         \
3830                                 kInput16bitsAccDestination, \
3831                                 kInput32bits##input,        \
3832                                 kInput32bits##input);       \
3833   }                                                         \
3834   TEST(mnemonic##_2S) {                                     \
3835     CALL_TEST_NEON_HELPER_3DIFF(mnemonic,                   \
3836                                 2S,                         \
3837                                 2D,                         \
3838                                 2D,                         \
3839                                 kInput32bitsAccDestination, \
3840                                 kInput64bits##input,        \
3841                                 kInput64bits##input);       \
3842   }                                                         \
3843   TEST(mnemonic##2_16B) {                                   \
3844     CALL_TEST_NEON_HELPER_3DIFF(mnemonic##2,                \
3845                                 16B,                        \
3846                                 8H,                         \
3847                                 8H,                         \
3848                                 kInput8bitsAccDestination,  \
3849                                 kInput16bits##input,        \
3850                                 kInput16bits##input);       \
3851   }                                                         \
3852   TEST(mnemonic##2_8H) {                                    \
3853     CALL_TEST_NEON_HELPER_3DIFF(mnemonic##2,                \
3854                                 8H,                         \
3855                                 4S,                         \
3856                                 4S,                         \
3857                                 kInput16bitsAccDestination, \
3858                                 kInput32bits##input,        \
3859                                 kInput32bits##input);       \
3860   }                                                         \
3861   TEST(mnemonic##2_4S) {                                    \
3862     CALL_TEST_NEON_HELPER_3DIFF(mnemonic##2,                \
3863                                 4S,                         \
3864                                 2D,                         \
3865                                 2D,                         \
3866                                 kInput32bitsAccDestination, \
3867                                 kInput64bits##input,        \
3868                                 kInput64bits##input);       \
3869   }
3870 
3871 #define DEFINE_TEST_NEON_3DIFF_DOUBLE_WIDE(mnemonic, input) \
3872   TEST(mnemonic##_2S) {                                     \
3873     CALL_TEST_NEON_HELPER_3DIFF(mnemonic,                   \
3874                                 2S,                         \
3875                                 8B,                         \
3876                                 8B,                         \
3877                                 kInput32bitsAccDestination, \
3878                                 kInput8bits##input,         \
3879                                 kInput8bits##input);        \
3880   }                                                         \
3881   TEST(mnemonic##_4S) {                                     \
3882     CALL_TEST_NEON_HELPER_3DIFF(mnemonic,                   \
3883                                 4S,                         \
3884                                 16B,                        \
3885                                 16B,                        \
3886                                 kInput32bitsAccDestination, \
3887                                 kInput8bits##input,         \
3888                                 kInput8bits##input);        \
3889   }
3890 
3891 
3892 #define CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,  \
3893                                      vdform,    \
3894                                      vnform,    \
3895                                      input_n,   \
3896                                      input_imm) \
3897   {                                             \
3898     CALL_TEST_NEON_HELPER_2OpImm(mnemonic,      \
3899                                  vdform,        \
3900                                  vnform,        \
3901                                  input_n,       \
3902                                  input_imm);    \
3903   }
3904 
3905 #define DEFINE_TEST_NEON_2OPIMM(mnemonic, input, input_imm)   \
3906   TEST(mnemonic##_8B_2OPIMM) {                                \
3907     CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                    \
3908                                  8B,                          \
3909                                  8B,                          \
3910                                  kInput8bits##input,          \
3911                                  kInput8bitsImm##input_imm);  \
3912   }                                                           \
3913   TEST(mnemonic##_16B_2OPIMM) {                               \
3914     CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                    \
3915                                  16B,                         \
3916                                  16B,                         \
3917                                  kInput8bits##input,          \
3918                                  kInput8bitsImm##input_imm);  \
3919   }                                                           \
3920   TEST(mnemonic##_4H_2OPIMM) {                                \
3921     CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                    \
3922                                  4H,                          \
3923                                  4H,                          \
3924                                  kInput16bits##input,         \
3925                                  kInput16bitsImm##input_imm); \
3926   }                                                           \
3927   TEST(mnemonic##_8H_2OPIMM) {                                \
3928     CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                    \
3929                                  8H,                          \
3930                                  8H,                          \
3931                                  kInput16bits##input,         \
3932                                  kInput16bitsImm##input_imm); \
3933   }                                                           \
3934   TEST(mnemonic##_2S_2OPIMM) {                                \
3935     CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                    \
3936                                  2S,                          \
3937                                  2S,                          \
3938                                  kInput32bits##input,         \
3939                                  kInput32bitsImm##input_imm); \
3940   }                                                           \
3941   TEST(mnemonic##_4S_2OPIMM) {                                \
3942     CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                    \
3943                                  4S,                          \
3944                                  4S,                          \
3945                                  kInput32bits##input,         \
3946                                  kInput32bitsImm##input_imm); \
3947   }                                                           \
3948   TEST(mnemonic##_2D_2OPIMM) {                                \
3949     CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                    \
3950                                  2D,                          \
3951                                  2D,                          \
3952                                  kInput64bits##input,         \
3953                                  kInput64bitsImm##input_imm); \
3954   }
3955 
3956 #define DEFINE_TEST_NEON_2OPIMM_COPY(mnemonic, input, input_imm) \
3957   TEST(mnemonic##_8B_2OPIMM) {                                   \
3958     CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                       \
3959                                  8B,                             \
3960                                  B,                              \
3961                                  kInput8bits##input,             \
3962                                  kInput8bitsImm##input_imm);     \
3963   }                                                              \
3964   TEST(mnemonic##_16B_2OPIMM) {                                  \
3965     CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                       \
3966                                  16B,                            \
3967                                  B,                              \
3968                                  kInput8bits##input,             \
3969                                  kInput8bitsImm##input_imm);     \
3970   }                                                              \
3971   TEST(mnemonic##_4H_2OPIMM) {                                   \
3972     CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                       \
3973                                  4H,                             \
3974                                  H,                              \
3975                                  kInput16bits##input,            \
3976                                  kInput16bitsImm##input_imm);    \
3977   }                                                              \
3978   TEST(mnemonic##_8H_2OPIMM) {                                   \
3979     CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                       \
3980                                  8H,                             \
3981                                  H,                              \
3982                                  kInput16bits##input,            \
3983                                  kInput16bitsImm##input_imm);    \
3984   }                                                              \
3985   TEST(mnemonic##_2S_2OPIMM) {                                   \
3986     CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                       \
3987                                  2S,                             \
3988                                  S,                              \
3989                                  kInput32bits##input,            \
3990                                  kInput32bitsImm##input_imm);    \
3991   }                                                              \
3992   TEST(mnemonic##_4S_2OPIMM) {                                   \
3993     CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                       \
3994                                  4S,                             \
3995                                  S,                              \
3996                                  kInput32bits##input,            \
3997                                  kInput32bitsImm##input_imm);    \
3998   }                                                              \
3999   TEST(mnemonic##_2D_2OPIMM) {                                   \
4000     CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                       \
4001                                  2D,                             \
4002                                  D,                              \
4003                                  kInput64bits##input,            \
4004                                  kInput64bitsImm##input_imm);    \
4005   }
4006 
4007 #define DEFINE_TEST_NEON_2OPIMM_NARROW(mnemonic, input, input_imm) \
4008   TEST(mnemonic##_8B_2OPIMM) {                                     \
4009     CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                         \
4010                                  8B,                               \
4011                                  8H,                               \
4012                                  kInput16bits##input,              \
4013                                  kInput8bitsImm##input_imm);       \
4014   }                                                                \
4015   TEST(mnemonic##_4H_2OPIMM) {                                     \
4016     CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                         \
4017                                  4H,                               \
4018                                  4S,                               \
4019                                  kInput32bits##input,              \
4020                                  kInput16bitsImm##input_imm);      \
4021   }                                                                \
4022   TEST(mnemonic##_2S_2OPIMM) {                                     \
4023     CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                         \
4024                                  2S,                               \
4025                                  2D,                               \
4026                                  kInput64bits##input,              \
4027                                  kInput32bitsImm##input_imm);      \
4028   }                                                                \
4029   TEST(mnemonic##2_16B_2OPIMM) {                                   \
4030     CALL_TEST_NEON_HELPER_2OPIMM(mnemonic##2,                      \
4031                                  16B,                              \
4032                                  8H,                               \
4033                                  kInput16bits##input,              \
4034                                  kInput8bitsImm##input_imm);       \
4035   }                                                                \
4036   TEST(mnemonic##2_8H_2OPIMM) {                                    \
4037     CALL_TEST_NEON_HELPER_2OPIMM(mnemonic##2,                      \
4038                                  8H,                               \
4039                                  4S,                               \
4040                                  kInput32bits##input,              \
4041                                  kInput16bitsImm##input_imm);      \
4042   }                                                                \
4043   TEST(mnemonic##2_4S_2OPIMM) {                                    \
4044     CALL_TEST_NEON_HELPER_2OPIMM(mnemonic##2,                      \
4045                                  4S,                               \
4046                                  2D,                               \
4047                                  kInput64bits##input,              \
4048                                  kInput32bitsImm##input_imm);      \
4049   }
4050 
4051 #define DEFINE_TEST_NEON_2OPIMM_SCALAR_NARROW(mnemonic, input, input_imm) \
4052   TEST(mnemonic##_B_2OPIMM) {                                             \
4053     CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                                \
4054                                  B,                                       \
4055                                  H,                                       \
4056                                  kInput16bits##input,                     \
4057                                  kInput8bitsImm##input_imm);              \
4058   }                                                                       \
4059   TEST(mnemonic##_H_2OPIMM) {                                             \
4060     CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                                \
4061                                  H,                                       \
4062                                  S,                                       \
4063                                  kInput32bits##input,                     \
4064                                  kInput16bitsImm##input_imm);             \
4065   }                                                                       \
4066   TEST(mnemonic##_S_2OPIMM) {                                             \
4067     CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                                \
4068                                  S,                                       \
4069                                  D,                                       \
4070                                  kInput64bits##input,                     \
4071                                  kInput32bitsImm##input_imm);             \
4072   }
4073 
4074 #define DEFINE_TEST_NEON_2OPIMM_FCMP_ZERO(mnemonic, input, input_imm) \
4075   TEST(mnemonic##_4H_2OPIMM) {                                        \
4076     CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                            \
4077                                  4H,                                  \
4078                                  4H,                                  \
4079                                  kInputFloat16##input,                \
4080                                  kInputDoubleImm##input_imm);         \
4081   }                                                                   \
4082   TEST(mnemonic##_8H_2OPIMM) {                                        \
4083     CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                            \
4084                                  8H,                                  \
4085                                  8H,                                  \
4086                                  kInputFloat16##input,                \
4087                                  kInputDoubleImm##input_imm);         \
4088   }                                                                   \
4089   TEST(mnemonic##_2S_2OPIMM) {                                        \
4090     CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                            \
4091                                  2S,                                  \
4092                                  2S,                                  \
4093                                  kInputFloat##Basic,                  \
4094                                  kInputDoubleImm##input_imm);         \
4095   }                                                                   \
4096   TEST(mnemonic##_4S_2OPIMM) {                                        \
4097     CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                            \
4098                                  4S,                                  \
4099                                  4S,                                  \
4100                                  kInputFloat##input,                  \
4101                                  kInputDoubleImm##input_imm);         \
4102   }                                                                   \
4103   TEST(mnemonic##_2D_2OPIMM) {                                        \
4104     CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                            \
4105                                  2D,                                  \
4106                                  2D,                                  \
4107                                  kInputDouble##input,                 \
4108                                  kInputDoubleImm##input_imm);         \
4109   }
4110 
4111 #define DEFINE_TEST_NEON_2OPIMM_FP(mnemonic, input, input_imm) \
4112   TEST(mnemonic##_4H_2OPIMM) {                                 \
4113     CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                     \
4114                                  4H,                           \
4115                                  4H,                           \
4116                                  kInputFloat16##input,         \
4117                                  kInput16bitsImm##input_imm);  \
4118   }                                                            \
4119   TEST(mnemonic##_8H_2OPIMM) {                                 \
4120     CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                     \
4121                                  8H,                           \
4122                                  8H,                           \
4123                                  kInputFloat16##input,         \
4124                                  kInput16bitsImm##input_imm);  \
4125   }                                                            \
4126   TEST(mnemonic##_2S_2OPIMM) {                                 \
4127     CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                     \
4128                                  2S,                           \
4129                                  2S,                           \
4130                                  kInputFloat##Basic,           \
4131                                  kInput32bitsImm##input_imm);  \
4132   }                                                            \
4133   TEST(mnemonic##_4S_2OPIMM) {                                 \
4134     CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                     \
4135                                  4S,                           \
4136                                  4S,                           \
4137                                  kInputFloat##input,           \
4138                                  kInput32bitsImm##input_imm);  \
4139   }                                                            \
4140   TEST(mnemonic##_2D_2OPIMM) {                                 \
4141     CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                     \
4142                                  2D,                           \
4143                                  2D,                           \
4144                                  kInputDouble##input,          \
4145                                  kInput64bitsImm##input_imm);  \
4146   }
4147 
4148 #define DEFINE_TEST_NEON_2OPIMM_FP_SCALAR(mnemonic, input, input_imm) \
4149   TEST(mnemonic##_H_2OPIMM) {                                         \
4150     CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                            \
4151                                  H,                                   \
4152                                  H,                                   \
4153                                  kInputFloat16##Basic,                \
4154                                  kInput16bitsImm##input_imm);         \
4155   }                                                                   \
4156   TEST(mnemonic##_S_2OPIMM) {                                         \
4157     CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                            \
4158                                  S,                                   \
4159                                  S,                                   \
4160                                  kInputFloat##Basic,                  \
4161                                  kInput32bitsImm##input_imm);         \
4162   }                                                                   \
4163   TEST(mnemonic##_D_2OPIMM) {                                         \
4164     CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                            \
4165                                  D,                                   \
4166                                  D,                                   \
4167                                  kInputDouble##input,                 \
4168                                  kInput64bitsImm##input_imm);         \
4169   }
4170 
4171 #define DEFINE_TEST_NEON_2OPIMM_HSD(mnemonic, input, input_imm) \
4172   TEST(mnemonic##_4H_2OPIMM) {                                  \
4173     CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                      \
4174                                  4H,                            \
4175                                  4H,                            \
4176                                  kInput16bits##input,           \
4177                                  kInput16bitsImm##input_imm);   \
4178   }                                                             \
4179   TEST(mnemonic##_8H_2OPIMM) {                                  \
4180     CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                      \
4181                                  8H,                            \
4182                                  8H,                            \
4183                                  kInput16bits##input,           \
4184                                  kInput16bitsImm##input_imm);   \
4185   }                                                             \
4186   TEST(mnemonic##_2S_2OPIMM) {                                  \
4187     CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                      \
4188                                  2S,                            \
4189                                  2S,                            \
4190                                  kInput32bits##input,           \
4191                                  kInput32bitsImm##input_imm);   \
4192   }                                                             \
4193   TEST(mnemonic##_4S_2OPIMM) {                                  \
4194     CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                      \
4195                                  4S,                            \
4196                                  4S,                            \
4197                                  kInput32bits##input,           \
4198                                  kInput32bitsImm##input_imm);   \
4199   }                                                             \
4200   TEST(mnemonic##_2D_2OPIMM) {                                  \
4201     CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                      \
4202                                  2D,                            \
4203                                  2D,                            \
4204                                  kInput64bits##input,           \
4205                                  kInput64bitsImm##input_imm);   \
4206   }
4207 
4208 #define DEFINE_TEST_NEON_2OPIMM_SCALAR_D(mnemonic, input, input_imm) \
4209   TEST(mnemonic##_D_2OPIMM) {                                        \
4210     CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                           \
4211                                  D,                                  \
4212                                  D,                                  \
4213                                  kInput64bits##input,                \
4214                                  kInput64bitsImm##input_imm);        \
4215   }
4216 
4217 #define DEFINE_TEST_NEON_2OPIMM_SCALAR_HSD(mnemonic, input, input_imm) \
4218   TEST(mnemonic##_H_2OPIMM) {                                          \
4219     CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                             \
4220                                  H,                                    \
4221                                  H,                                    \
4222                                  kInput16bits##input,                  \
4223                                  kInput16bitsImm##input_imm);          \
4224   }                                                                    \
4225   TEST(mnemonic##_S_2OPIMM) {                                          \
4226     CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                             \
4227                                  S,                                    \
4228                                  S,                                    \
4229                                  kInput32bits##input,                  \
4230                                  kInput32bitsImm##input_imm);          \
4231   }                                                                    \
4232   DEFINE_TEST_NEON_2OPIMM_SCALAR_D(mnemonic, input, input_imm)
4233 
4234 #define DEFINE_TEST_NEON_2OPIMM_FP_SCALAR_D(mnemonic, input, input_imm) \
4235   TEST(mnemonic##_D_2OPIMM) {                                           \
4236     CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                              \
4237                                  D,                                     \
4238                                  D,                                     \
4239                                  kInputDouble##input,                   \
4240                                  kInputDoubleImm##input_imm);           \
4241   }
4242 
4243 #define DEFINE_TEST_NEON_2OPIMM_FP_SCALAR_HSD(mnemonic, input, input_imm) \
4244   TEST(mnemonic##_H_2OPIMM) {                                             \
4245     CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                                \
4246                                  H,                                       \
4247                                  H,                                       \
4248                                  kInputFloat16##input,                    \
4249                                  kInputDoubleImm##input_imm);             \
4250   }                                                                       \
4251   TEST(mnemonic##_S_2OPIMM) {                                             \
4252     CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                                \
4253                                  S,                                       \
4254                                  S,                                       \
4255                                  kInputFloat##input,                      \
4256                                  kInputDoubleImm##input_imm);             \
4257   }                                                                       \
4258   DEFINE_TEST_NEON_2OPIMM_FP_SCALAR_D(mnemonic, input, input_imm)
4259 
4260 #define DEFINE_TEST_NEON_2OPIMM_SCALAR(mnemonic, input, input_imm) \
4261   TEST(mnemonic##_B_2OPIMM) {                                      \
4262     CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                         \
4263                                  B,                                \
4264                                  B,                                \
4265                                  kInput8bits##input,               \
4266                                  kInput8bitsImm##input_imm);       \
4267   }                                                                \
4268   DEFINE_TEST_NEON_2OPIMM_SCALAR_HSD(mnemonic, input, input_imm)
4269 
4270 #define DEFINE_TEST_NEON_2OPIMM_LONG(mnemonic, input, input_imm) \
4271   TEST(mnemonic##_8H_2OPIMM) {                                   \
4272     CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                       \
4273                                  8H,                             \
4274                                  8B,                             \
4275                                  kInput8bits##input,             \
4276                                  kInput8bitsImm##input_imm);     \
4277   }                                                              \
4278   TEST(mnemonic##_4S_2OPIMM) {                                   \
4279     CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                       \
4280                                  4S,                             \
4281                                  4H,                             \
4282                                  kInput16bits##input,            \
4283                                  kInput16bitsImm##input_imm);    \
4284   }                                                              \
4285   TEST(mnemonic##_2D_2OPIMM) {                                   \
4286     CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                       \
4287                                  2D,                             \
4288                                  2S,                             \
4289                                  kInput32bits##input,            \
4290                                  kInput32bitsImm##input_imm);    \
4291   }                                                              \
4292   TEST(mnemonic##2_8H_2OPIMM) {                                  \
4293     CALL_TEST_NEON_HELPER_2OPIMM(mnemonic##2,                    \
4294                                  8H,                             \
4295                                  16B,                            \
4296                                  kInput8bits##input,             \
4297                                  kInput8bitsImm##input_imm);     \
4298   }                                                              \
4299   TEST(mnemonic##2_4S_2OPIMM) {                                  \
4300     CALL_TEST_NEON_HELPER_2OPIMM(mnemonic##2,                    \
4301                                  4S,                             \
4302                                  8H,                             \
4303                                  kInput16bits##input,            \
4304                                  kInput16bitsImm##input_imm);    \
4305   }                                                              \
4306   TEST(mnemonic##2_2D_2OPIMM) {                                  \
4307     CALL_TEST_NEON_HELPER_2OPIMM(mnemonic##2,                    \
4308                                  2D,                             \
4309                                  4S,                             \
4310                                  kInput32bits##input,            \
4311                                  kInput32bitsImm##input_imm);    \
4312   }
4313 
4314 #define CALL_TEST_NEON_HELPER_BYELEMENT_DOT_PRODUCT(mnemonic,           \
4315                                                     vdform,             \
4316                                                     vnform,             \
4317                                                     vmform,             \
4318                                                     input_d,            \
4319                                                     input_n,            \
4320                                                     input_m,            \
4321                                                     indices,            \
4322                                                     vm_subvector_count) \
4323   {                                                                     \
4324     CALL_TEST_NEON_HELPER_ByElement_Dot_Product(mnemonic,               \
4325                                                 vdform,                 \
4326                                                 vnform,                 \
4327                                                 vmform,                 \
4328                                                 input_d,                \
4329                                                 input_n,                \
4330                                                 input_m,                \
4331                                                 indices,                \
4332                                                 vm_subvector_count);    \
4333   }
4334 
4335 #define DEFINE_TEST_NEON_BYELEMENT_DOT_PRODUCT(mnemonic,               \
4336                                                input_d,                \
4337                                                input_n,                \
4338                                                input_m)                \
4339   TEST(mnemonic##_2S_8B_B) {                                           \
4340     CALL_TEST_NEON_HELPER_BYELEMENT_DOT_PRODUCT(mnemonic,              \
4341                                                 2S,                    \
4342                                                 8B,                    \
4343                                                 B,                     \
4344                                                 kInput32bits##input_d, \
4345                                                 kInput8bits##input_n,  \
4346                                                 kInput8bits##input_m,  \
4347                                                 kInputSIndices,        \
4348                                                 4);                    \
4349   }                                                                    \
4350   TEST(mnemonic##_4S_16B_B) {                                          \
4351     CALL_TEST_NEON_HELPER_BYELEMENT_DOT_PRODUCT(mnemonic,              \
4352                                                 4S,                    \
4353                                                 16B,                   \
4354                                                 B,                     \
4355                                                 kInput32bits##input_d, \
4356                                                 kInput8bits##input_n,  \
4357                                                 kInput8bits##input_m,  \
4358                                                 kInputSIndices,        \
4359                                                 4);                    \
4360   }
4361 
4362 #define CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic, \
4363                                         vdform,   \
4364                                         vnform,   \
4365                                         vmform,   \
4366                                         input_d,  \
4367                                         input_n,  \
4368                                         input_m,  \
4369                                         indices)  \
4370   {                                               \
4371     CALL_TEST_NEON_HELPER_ByElement(mnemonic,     \
4372                                     vdform,       \
4373                                     vnform,       \
4374                                     vmform,       \
4375                                     input_d,      \
4376                                     input_n,      \
4377                                     input_m,      \
4378                                     indices);     \
4379   }
4380 
4381 #define DEFINE_TEST_NEON_BYELEMENT(mnemonic, input_d, input_n, input_m) \
4382   TEST(mnemonic##_4H_4H_H) {                                            \
4383     CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic,                           \
4384                                     4H,                                 \
4385                                     4H,                                 \
4386                                     H,                                  \
4387                                     kInput16bits##input_d,              \
4388                                     kInput16bits##input_n,              \
4389                                     kInput16bits##input_m,              \
4390                                     kInputHIndices);                    \
4391   }                                                                     \
4392   TEST(mnemonic##_8H_8H_H) {                                            \
4393     CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic,                           \
4394                                     8H,                                 \
4395                                     8H,                                 \
4396                                     H,                                  \
4397                                     kInput16bits##input_d,              \
4398                                     kInput16bits##input_n,              \
4399                                     kInput16bits##input_m,              \
4400                                     kInputHIndices);                    \
4401   }                                                                     \
4402   TEST(mnemonic##_2S_2S_S) {                                            \
4403     CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic,                           \
4404                                     2S,                                 \
4405                                     2S,                                 \
4406                                     S,                                  \
4407                                     kInput32bits##input_d,              \
4408                                     kInput32bits##input_n,              \
4409                                     kInput32bits##input_m,              \
4410                                     kInputSIndices);                    \
4411   }                                                                     \
4412   TEST(mnemonic##_4S_4S_S) {                                            \
4413     CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic,                           \
4414                                     4S,                                 \
4415                                     4S,                                 \
4416                                     S,                                  \
4417                                     kInput32bits##input_d,              \
4418                                     kInput32bits##input_n,              \
4419                                     kInput32bits##input_m,              \
4420                                     kInputSIndices);                    \
4421   }
4422 
4423 #define DEFINE_TEST_NEON_BYELEMENT_SCALAR(mnemonic, input_d, input_n, input_m) \
4424   TEST(mnemonic##_H_H_H) {                                                     \
4425     CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic,                                  \
4426                                     H,                                         \
4427                                     H,                                         \
4428                                     H,                                         \
4429                                     kInput16bits##input_d,                     \
4430                                     kInput16bits##input_n,                     \
4431                                     kInput16bits##input_m,                     \
4432                                     kInputHIndices);                           \
4433   }                                                                            \
4434   TEST(mnemonic##_S_S_S) {                                                     \
4435     CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic,                                  \
4436                                     S,                                         \
4437                                     S,                                         \
4438                                     S,                                         \
4439                                     kInput32bits##input_d,                     \
4440                                     kInput32bits##input_n,                     \
4441                                     kInput32bits##input_m,                     \
4442                                     kInputSIndices);                           \
4443   }
4444 
4445 #define DEFINE_TEST_NEON_FP_BYELEMENT(mnemonic, input_d, input_n, input_m) \
4446   TEST(mnemonic##_4H_4H_H) {                                               \
4447     CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic,                              \
4448                                     4H,                                    \
4449                                     4H,                                    \
4450                                     H,                                     \
4451                                     kInputFloat16##input_d,                \
4452                                     kInputFloat16##input_n,                \
4453                                     kInputFloat16##input_m,                \
4454                                     kInputHIndices);                       \
4455   }                                                                        \
4456   TEST(mnemonic##_8H_8H_H) {                                               \
4457     CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic,                              \
4458                                     8H,                                    \
4459                                     8H,                                    \
4460                                     H,                                     \
4461                                     kInputFloat16##input_d,                \
4462                                     kInputFloat16##input_n,                \
4463                                     kInputFloat16##input_m,                \
4464                                     kInputHIndices);                       \
4465   }                                                                        \
4466   TEST(mnemonic##_2S_2S_S) {                                               \
4467     CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic,                              \
4468                                     2S,                                    \
4469                                     2S,                                    \
4470                                     S,                                     \
4471                                     kInputFloat##input_d,                  \
4472                                     kInputFloat##input_n,                  \
4473                                     kInputFloat##input_m,                  \
4474                                     kInputSIndices);                       \
4475   }                                                                        \
4476   TEST(mnemonic##_4S_4S_S) {                                               \
4477     CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic,                              \
4478                                     4S,                                    \
4479                                     4S,                                    \
4480                                     S,                                     \
4481                                     kInputFloat##input_d,                  \
4482                                     kInputFloat##input_n,                  \
4483                                     kInputFloat##input_m,                  \
4484                                     kInputSIndices);                       \
4485   }                                                                        \
4486   TEST(mnemonic##_2D_2D_D) {                                               \
4487     CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic,                              \
4488                                     2D,                                    \
4489                                     2D,                                    \
4490                                     D,                                     \
4491                                     kInputDouble##input_d,                 \
4492                                     kInputDouble##input_n,                 \
4493                                     kInputDouble##input_m,                 \
4494                                     kInputDIndices);                       \
4495   }
4496 
4497 #define DEFINE_TEST_NEON_FHM_BYELEMENT(mnemonic, input_d, input_n, input_m) \
4498   TEST(mnemonic##_2S_2H_H) {                                                \
4499     CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic,                               \
4500                                     2S,                                     \
4501                                     2H,                                     \
4502                                     H,                                      \
4503                                     kInputFloatAccDestination,              \
4504                                     kInputFloat16##input_n,                 \
4505                                     kInputFloat16##input_m,                 \
4506                                     kInputHIndices);                        \
4507   }                                                                         \
4508   TEST(mnemonic##_4S_4H_H) {                                                \
4509     CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic,                               \
4510                                     4S,                                     \
4511                                     4H,                                     \
4512                                     H,                                      \
4513                                     kInputFloatAccDestination,              \
4514                                     kInputFloat16##input_n,                 \
4515                                     kInputFloat16##input_m,                 \
4516                                     kInputHIndices);                        \
4517   }
4518 
4519 #define DEFINE_TEST_NEON_FP_BYELEMENT_SCALAR(mnemonic, inp_d, inp_n, inp_m) \
4520   TEST(mnemonic##_H_H_H) {                                                  \
4521     CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic,                               \
4522                                     H,                                      \
4523                                     H,                                      \
4524                                     H,                                      \
4525                                     kInputFloat16##inp_d,                   \
4526                                     kInputFloat16##inp_n,                   \
4527                                     kInputFloat16##inp_m,                   \
4528                                     kInputHIndices);                        \
4529   }                                                                         \
4530   TEST(mnemonic##_S_S_S) {                                                  \
4531     CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic,                               \
4532                                     S,                                      \
4533                                     S,                                      \
4534                                     S,                                      \
4535                                     kInputFloat##inp_d,                     \
4536                                     kInputFloat##inp_n,                     \
4537                                     kInputFloat##inp_m,                     \
4538                                     kInputSIndices);                        \
4539   }                                                                         \
4540   TEST(mnemonic##_D_D_D) {                                                  \
4541     CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic,                               \
4542                                     D,                                      \
4543                                     D,                                      \
4544                                     D,                                      \
4545                                     kInputDouble##inp_d,                    \
4546                                     kInputDouble##inp_n,                    \
4547                                     kInputDouble##inp_m,                    \
4548                                     kInputDIndices);                        \
4549   }
4550 
4551 
4552 #define DEFINE_TEST_NEON_BYELEMENT_DIFF(mnemonic, input_d, input_n, input_m) \
4553   TEST(mnemonic##_4S_4H_H) {                                                 \
4554     CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic,                                \
4555                                     4S,                                      \
4556                                     4H,                                      \
4557                                     H,                                       \
4558                                     kInput32bits##input_d,                   \
4559                                     kInput16bits##input_n,                   \
4560                                     kInput16bits##input_m,                   \
4561                                     kInputHIndices);                         \
4562   }                                                                          \
4563   TEST(mnemonic##2_4S_8H_H) {                                                \
4564     CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic##2,                             \
4565                                     4S,                                      \
4566                                     8H,                                      \
4567                                     H,                                       \
4568                                     kInput32bits##input_d,                   \
4569                                     kInput16bits##input_n,                   \
4570                                     kInput16bits##input_m,                   \
4571                                     kInputHIndices);                         \
4572   }                                                                          \
4573   TEST(mnemonic##_2D_2S_S) {                                                 \
4574     CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic,                                \
4575                                     2D,                                      \
4576                                     2S,                                      \
4577                                     S,                                       \
4578                                     kInput64bits##input_d,                   \
4579                                     kInput32bits##input_n,                   \
4580                                     kInput32bits##input_m,                   \
4581                                     kInputSIndices);                         \
4582   }                                                                          \
4583   TEST(mnemonic##2_2D_4S_S) {                                                \
4584     CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic##2,                             \
4585                                     2D,                                      \
4586                                     4S,                                      \
4587                                     S,                                       \
4588                                     kInput64bits##input_d,                   \
4589                                     kInput32bits##input_n,                   \
4590                                     kInput32bits##input_m,                   \
4591                                     kInputSIndices);                         \
4592   }
4593 
4594 #define DEFINE_TEST_NEON_BYELEMENT_DIFF_SCALAR(mnemonic,   \
4595                                                input_d,    \
4596                                                input_n,    \
4597                                                input_m)    \
4598   TEST(mnemonic##_S_H_H) {                                 \
4599     CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic,              \
4600                                     S,                     \
4601                                     H,                     \
4602                                     H,                     \
4603                                     kInput32bits##input_d, \
4604                                     kInput16bits##input_n, \
4605                                     kInput16bits##input_m, \
4606                                     kInputHIndices);       \
4607   }                                                        \
4608   TEST(mnemonic##_D_S_S) {                                 \
4609     CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic,              \
4610                                     D,                     \
4611                                     S,                     \
4612                                     S,                     \
4613                                     kInput64bits##input_d, \
4614                                     kInput32bits##input_n, \
4615                                     kInput32bits##input_m, \
4616                                     kInputSIndices);       \
4617   }
4618 
4619 
4620 #define CALL_TEST_NEON_HELPER_2OP2IMM(mnemonic,                 \
4621                                       variant,                  \
4622                                       input_d,                  \
4623                                       input_imm1,               \
4624                                       input_n,                  \
4625                                       input_imm2)               \
4626   {                                                             \
4627     CALL_TEST_NEON_HELPER_OpImmOpImm(&MacroAssembler::mnemonic, \
4628                                      mnemonic,                  \
4629                                      variant,                   \
4630                                      variant,                   \
4631                                      input_d,                   \
4632                                      input_imm1,                \
4633                                      input_n,                   \
4634                                      input_imm2);               \
4635   }
4636 
4637 #define DEFINE_TEST_NEON_2OP2IMM(mnemonic,                      \
4638                                  input_d,                       \
4639                                  input_imm1,                    \
4640                                  input_n,                       \
4641                                  input_imm2)                    \
4642   TEST(mnemonic##_B) {                                          \
4643     CALL_TEST_NEON_HELPER_2OP2IMM(mnemonic,                     \
4644                                   16B,                          \
4645                                   kInput8bits##input_d,         \
4646                                   kInput8bitsImm##input_imm1,   \
4647                                   kInput8bits##input_n,         \
4648                                   kInput8bitsImm##input_imm2);  \
4649   }                                                             \
4650   TEST(mnemonic##_H) {                                          \
4651     CALL_TEST_NEON_HELPER_2OP2IMM(mnemonic,                     \
4652                                   8H,                           \
4653                                   kInput16bits##input_d,        \
4654                                   kInput16bitsImm##input_imm1,  \
4655                                   kInput16bits##input_n,        \
4656                                   kInput16bitsImm##input_imm2); \
4657   }                                                             \
4658   TEST(mnemonic##_S) {                                          \
4659     CALL_TEST_NEON_HELPER_2OP2IMM(mnemonic,                     \
4660                                   4S,                           \
4661                                   kInput32bits##input_d,        \
4662                                   kInput32bitsImm##input_imm1,  \
4663                                   kInput32bits##input_n,        \
4664                                   kInput32bitsImm##input_imm2); \
4665   }                                                             \
4666   TEST(mnemonic##_D) {                                          \
4667     CALL_TEST_NEON_HELPER_2OP2IMM(mnemonic,                     \
4668                                   2D,                           \
4669                                   kInput64bits##input_d,        \
4670                                   kInput64bitsImm##input_imm1,  \
4671                                   kInput64bits##input_n,        \
4672                                   kInput64bitsImm##input_imm2); \
4673   }
4674 
4675 
4676 // Advanced SIMD copy.
4677 DEFINE_TEST_NEON_2OP2IMM(
4678     ins, Basic, LaneCountFromZero, Basic, LaneCountFromZero)
4679 DEFINE_TEST_NEON_2OPIMM_COPY(dup, Basic, LaneCountFromZero)
4680 
4681 
4682 // Advanced SIMD scalar copy.
4683 DEFINE_TEST_NEON_2OPIMM_SCALAR(dup, Basic, LaneCountFromZero)
4684 
4685 
4686 // Advanced SIMD three same.
4687 DEFINE_TEST_NEON_3SAME_NO2D(shadd, Basic)
4688 DEFINE_TEST_NEON_3SAME(sqadd, Basic)
4689 DEFINE_TEST_NEON_3SAME_NO2D(srhadd, Basic)
4690 DEFINE_TEST_NEON_3SAME_NO2D(shsub, Basic)
4691 DEFINE_TEST_NEON_3SAME(sqsub, Basic)
4692 DEFINE_TEST_NEON_3SAME(cmgt, Basic)
4693 DEFINE_TEST_NEON_3SAME(cmge, Basic)
4694 DEFINE_TEST_NEON_3SAME(sshl, Basic)
4695 DEFINE_TEST_NEON_3SAME(sqshl, Basic)
4696 DEFINE_TEST_NEON_3SAME(srshl, Basic)
4697 DEFINE_TEST_NEON_3SAME(sqrshl, Basic)
4698 DEFINE_TEST_NEON_3SAME_NO2D(smax, Basic)
4699 DEFINE_TEST_NEON_3SAME_NO2D(smin, Basic)
4700 DEFINE_TEST_NEON_3SAME_NO2D(sabd, Basic)
4701 DEFINE_TEST_NEON_3SAME_NO2D(saba, Basic)
4702 DEFINE_TEST_NEON_3SAME(add, Basic)
4703 DEFINE_TEST_NEON_3SAME(cmtst, Basic)
4704 DEFINE_TEST_NEON_3SAME_NO2D(mla, Basic)
4705 DEFINE_TEST_NEON_3SAME_NO2D(mul, Basic)
4706 DEFINE_TEST_NEON_3SAME_NO2D(smaxp, Basic)
4707 DEFINE_TEST_NEON_3SAME_NO2D(sminp, Basic)
4708 DEFINE_TEST_NEON_3SAME_HS(sqdmulh, Basic)
4709 DEFINE_TEST_NEON_3SAME(addp, Basic)
4710 DEFINE_TEST_NEON_3SAME_FP(fmaxnm, Basic)
4711 DEFINE_TEST_NEON_3SAME_FP(fmla, Basic)
4712 DEFINE_TEST_NEON_3SAME_FP(fadd, Basic)
4713 DEFINE_TEST_NEON_3SAME_FP(fmulx, Basic)
4714 DEFINE_TEST_NEON_3SAME_FP(fcmeq, Basic)
4715 DEFINE_TEST_NEON_3SAME_FP(fmax, Basic)
4716 DEFINE_TEST_NEON_3SAME_FP(frecps, Basic)
4717 DEFINE_TEST_NEON_3SAME_8B_16B(and_, Basic)
4718 DEFINE_TEST_NEON_3SAME_8B_16B(bic, Basic)
4719 DEFINE_TEST_NEON_3SAME_FP(fminnm, Basic)
4720 DEFINE_TEST_NEON_3SAME_FP(fmls, Basic)
4721 DEFINE_TEST_NEON_3SAME_FP(fsub, Basic)
4722 DEFINE_TEST_NEON_3SAME_FP(fmin, Basic)
4723 DEFINE_TEST_NEON_3SAME_FP(frsqrts, Basic)
4724 DEFINE_TEST_NEON_3SAME_8B_16B(orr, Basic)
4725 DEFINE_TEST_NEON_3SAME_8B_16B(orn, Basic)
4726 DEFINE_TEST_NEON_3SAME_NO2D(uhadd, Basic)
4727 DEFINE_TEST_NEON_3SAME(uqadd, Basic)
4728 DEFINE_TEST_NEON_3SAME_NO2D(urhadd, Basic)
4729 DEFINE_TEST_NEON_3SAME_NO2D(uhsub, Basic)
4730 DEFINE_TEST_NEON_3SAME(uqsub, Basic)
4731 DEFINE_TEST_NEON_3SAME(cmhi, Basic)
4732 DEFINE_TEST_NEON_3SAME(cmhs, Basic)
4733 DEFINE_TEST_NEON_3SAME(ushl, Basic)
4734 DEFINE_TEST_NEON_3SAME(uqshl, Basic)
4735 DEFINE_TEST_NEON_3SAME(urshl, Basic)
4736 DEFINE_TEST_NEON_3SAME(uqrshl, Basic)
4737 DEFINE_TEST_NEON_3SAME_NO2D(umax, Basic)
4738 DEFINE_TEST_NEON_3SAME_NO2D(umin, Basic)
4739 DEFINE_TEST_NEON_3SAME_NO2D(uabd, Basic)
4740 DEFINE_TEST_NEON_3SAME_NO2D(uaba, Basic)
4741 DEFINE_TEST_NEON_3SAME(sub, Basic)
4742 DEFINE_TEST_NEON_3SAME(cmeq, Basic)
4743 DEFINE_TEST_NEON_3SAME_NO2D(mls, Basic)
4744 DEFINE_TEST_NEON_3SAME_8B_16B(pmul, Basic)
4745 DEFINE_TEST_NEON_3SAME_NO2D(uminp, Basic)
4746 DEFINE_TEST_NEON_3SAME_NO2D(umaxp, Basic)
4747 DEFINE_TEST_NEON_3SAME_HS(sqrdmulh, Basic)
4748 DEFINE_TEST_NEON_3SAME_HS(sqrdmlah, Basic)
4749 DEFINE_TEST_NEON_3SAME_HS(sqrdmlsh, Basic)
4750 DEFINE_TEST_NEON_3DIFF_DOUBLE_WIDE(udot, Basic)
4751 DEFINE_TEST_NEON_3DIFF_DOUBLE_WIDE(sdot, Basic)
4752 DEFINE_TEST_NEON_3SAME_FP(fmaxnmp, Basic)
4753 DEFINE_TEST_NEON_3SAME_FP(faddp, Basic)
4754 DEFINE_TEST_NEON_3SAME_FP(fmul, Basic)
4755 DEFINE_TEST_NEON_3SAME_FP(fcmge, Basic)
4756 DEFINE_TEST_NEON_3SAME_FP(facge, Basic)
4757 DEFINE_TEST_NEON_3SAME_FP(fmaxp, Basic)
4758 DEFINE_TEST_NEON_3SAME_FP(fdiv, Basic)
4759 DEFINE_TEST_NEON_3SAME_8B_16B(eor, Basic)
4760 DEFINE_TEST_NEON_3SAME_8B_16B(bsl, Basic)
4761 DEFINE_TEST_NEON_3SAME_FP(fminnmp, Basic)
4762 DEFINE_TEST_NEON_3SAME_FP(fabd, Basic)
4763 DEFINE_TEST_NEON_3SAME_FP(fcmgt, Basic)
4764 DEFINE_TEST_NEON_3SAME_FP(facgt, Basic)
4765 DEFINE_TEST_NEON_3SAME_FP(fminp, Basic)
4766 DEFINE_TEST_NEON_3SAME_8B_16B(bit, Basic)
4767 DEFINE_TEST_NEON_3SAME_8B_16B(bif, Basic)
4768 
4769 
4770 // Advanced SIMD scalar three same.
4771 DEFINE_TEST_NEON_3SAME_SCALAR(sqadd, Basic)
4772 DEFINE_TEST_NEON_3SAME_SCALAR(sqsub, Basic)
4773 DEFINE_TEST_NEON_3SAME_SCALAR_D(cmgt, Basic)
4774 DEFINE_TEST_NEON_3SAME_SCALAR_D(cmge, Basic)
4775 DEFINE_TEST_NEON_3SAME_SCALAR_D(sshl, Basic)
4776 DEFINE_TEST_NEON_3SAME_SCALAR(sqshl, Basic)
4777 DEFINE_TEST_NEON_3SAME_SCALAR_D(srshl, Basic)
4778 DEFINE_TEST_NEON_3SAME_SCALAR(sqrshl, Basic)
4779 DEFINE_TEST_NEON_3SAME_SCALAR_D(add, Basic)
4780 DEFINE_TEST_NEON_3SAME_SCALAR_D(cmtst, Basic)
4781 DEFINE_TEST_NEON_3SAME_SCALAR_HS(sqdmulh, Basic)
4782 DEFINE_TEST_NEON_3SAME_FP_SCALAR(fmulx, Basic)
4783 DEFINE_TEST_NEON_3SAME_FP_SCALAR(fcmeq, Basic)
4784 DEFINE_TEST_NEON_3SAME_FP_SCALAR(frecps, Basic)
4785 DEFINE_TEST_NEON_3SAME_FP_SCALAR(frsqrts, Basic)
4786 DEFINE_TEST_NEON_3SAME_SCALAR_D(uqadd, Basic)
4787 DEFINE_TEST_NEON_3SAME_SCALAR_D(uqsub, Basic)
4788 DEFINE_TEST_NEON_3SAME_SCALAR_D(cmhi, Basic)
4789 DEFINE_TEST_NEON_3SAME_SCALAR_D(cmhs, Basic)
4790 DEFINE_TEST_NEON_3SAME_SCALAR_D(ushl, Basic)
4791 DEFINE_TEST_NEON_3SAME_SCALAR(uqshl, Basic)
4792 DEFINE_TEST_NEON_3SAME_SCALAR_D(urshl, Basic)
4793 DEFINE_TEST_NEON_3SAME_SCALAR(uqrshl, Basic)
4794 DEFINE_TEST_NEON_3SAME_SCALAR_D(sub, Basic)
4795 DEFINE_TEST_NEON_3SAME_SCALAR_D(cmeq, Basic)
4796 DEFINE_TEST_NEON_3SAME_SCALAR_HS(sqrdmulh, Basic)
4797 DEFINE_TEST_NEON_3SAME_SCALAR_HS(sqrdmlah, Basic)
4798 DEFINE_TEST_NEON_3SAME_SCALAR_HS(sqrdmlsh, Basic)
4799 DEFINE_TEST_NEON_3SAME_FP_SCALAR(fcmge, Basic)
4800 DEFINE_TEST_NEON_3SAME_FP_SCALAR(facge, Basic)
4801 DEFINE_TEST_NEON_3SAME_FP_SCALAR(fabd, Basic)
4802 DEFINE_TEST_NEON_3SAME_FP_SCALAR(fcmgt, Basic)
4803 DEFINE_TEST_NEON_3SAME_FP_SCALAR(facgt, Basic)
4804 
4805 
4806 // Advanced SIMD FHM instructions (FMLAL, FMLSL).
4807 // These are oddballs: they are encoded under the 3SAME group but behave
4808 // quite differently.
4809 DEFINE_TEST_NEON_FHM(fmlal, Basic, Basic, Basic)
4810 DEFINE_TEST_NEON_FHM(fmlal2, Basic, Basic, Basic)
4811 DEFINE_TEST_NEON_FHM(fmlsl, Basic, Basic, Basic)
4812 DEFINE_TEST_NEON_FHM(fmlsl2, Basic, Basic, Basic)
4813 
4814 
4815 // Advanced SIMD three different.
4816 DEFINE_TEST_NEON_3DIFF_LONG(saddl, Basic)
4817 DEFINE_TEST_NEON_3DIFF_WIDE(saddw, Basic)
4818 DEFINE_TEST_NEON_3DIFF_LONG(ssubl, Basic)
4819 DEFINE_TEST_NEON_3DIFF_WIDE(ssubw, Basic)
4820 DEFINE_TEST_NEON_3DIFF_NARROW(addhn, Basic)
4821 DEFINE_TEST_NEON_3DIFF_LONG(sabal, Basic)
4822 DEFINE_TEST_NEON_3DIFF_NARROW(subhn, Basic)
4823 DEFINE_TEST_NEON_3DIFF_LONG(sabdl, Basic)
4824 DEFINE_TEST_NEON_3DIFF_LONG(smlal, Basic)
4825 DEFINE_TEST_NEON_3DIFF_LONG_SD(sqdmlal, Basic)
4826 DEFINE_TEST_NEON_3DIFF_LONG(smlsl, Basic)
4827 DEFINE_TEST_NEON_3DIFF_LONG_SD(sqdmlsl, Basic)
4828 DEFINE_TEST_NEON_3DIFF_LONG(smull, Basic)
4829 DEFINE_TEST_NEON_3DIFF_LONG_SD(sqdmull, Basic)
4830 DEFINE_TEST_NEON_3DIFF_LONG_8H(pmull, Basic)
4831 DEFINE_TEST_NEON_3DIFF_LONG(uaddl, Basic)
4832 DEFINE_TEST_NEON_3DIFF_WIDE(uaddw, Basic)
4833 DEFINE_TEST_NEON_3DIFF_LONG(usubl, Basic)
4834 DEFINE_TEST_NEON_3DIFF_WIDE(usubw, Basic)
4835 DEFINE_TEST_NEON_3DIFF_NARROW(raddhn, Basic)
4836 DEFINE_TEST_NEON_3DIFF_LONG(uabal, Basic)
4837 DEFINE_TEST_NEON_3DIFF_NARROW(rsubhn, Basic)
4838 DEFINE_TEST_NEON_3DIFF_LONG(uabdl, Basic)
4839 DEFINE_TEST_NEON_3DIFF_LONG(umlal, Basic)
4840 DEFINE_TEST_NEON_3DIFF_LONG(umlsl, Basic)
4841 DEFINE_TEST_NEON_3DIFF_LONG(umull, Basic)
4842 
4843 
4844 // Advanced SIMD scalar three different.
4845 DEFINE_TEST_NEON_3DIFF_SCALAR_LONG_SD(sqdmlal, Basic)
4846 DEFINE_TEST_NEON_3DIFF_SCALAR_LONG_SD(sqdmlsl, Basic)
4847 DEFINE_TEST_NEON_3DIFF_SCALAR_LONG_SD(sqdmull, Basic)
4848 
4849 
4850 // Advanced SIMD scalar pairwise.
4851 TEST(addp_SCALAR) {
4852   CALL_TEST_NEON_HELPER_2DIFF(addp, D, 2D, kInput64bitsBasic);
4853 }
DEFINE_TEST_NEON_2DIFF_FP_SCALAR_SD(fmaxnmp,Basic)4854 DEFINE_TEST_NEON_2DIFF_FP_SCALAR_SD(fmaxnmp, Basic)
4855 DEFINE_TEST_NEON_2DIFF_FP_SCALAR_SD(faddp, Basic)
4856 DEFINE_TEST_NEON_2DIFF_FP_SCALAR_SD(fmaxp, Basic)
4857 DEFINE_TEST_NEON_2DIFF_FP_SCALAR_SD(fminnmp, Basic)
4858 DEFINE_TEST_NEON_2DIFF_FP_SCALAR_SD(fminp, Basic)
4859 
4860 
4861 // Advanced SIMD shift by immediate.
4862 DEFINE_TEST_NEON_2OPIMM(sshr, Basic, TypeWidth)
4863 DEFINE_TEST_NEON_2OPIMM(ssra, Basic, TypeWidth)
4864 DEFINE_TEST_NEON_2OPIMM(srshr, Basic, TypeWidth)
4865 DEFINE_TEST_NEON_2OPIMM(srsra, Basic, TypeWidth)
4866 DEFINE_TEST_NEON_2OPIMM(shl, Basic, TypeWidthFromZero)
4867 DEFINE_TEST_NEON_2OPIMM(sqshl, Basic, TypeWidthFromZero)
4868 DEFINE_TEST_NEON_2OPIMM_NARROW(shrn, Basic, TypeWidth)
4869 DEFINE_TEST_NEON_2OPIMM_NARROW(rshrn, Basic, TypeWidth)
4870 DEFINE_TEST_NEON_2OPIMM_NARROW(sqshrn, Basic, TypeWidth)
4871 DEFINE_TEST_NEON_2OPIMM_NARROW(sqrshrn, Basic, TypeWidth)
4872 DEFINE_TEST_NEON_2OPIMM_LONG(sshll, Basic, TypeWidthFromZero)
4873 DEFINE_TEST_NEON_2OPIMM_HSD(scvtf,
4874                             FixedPointConversions,
4875                             TypeWidthFromZeroToWidth)
4876 DEFINE_TEST_NEON_2OPIMM_FP(fcvtzs, Conversions, TypeWidthFromZeroToWidth)
4877 DEFINE_TEST_NEON_2OPIMM(ushr, Basic, TypeWidth)
4878 DEFINE_TEST_NEON_2OPIMM(usra, Basic, TypeWidth)
4879 DEFINE_TEST_NEON_2OPIMM(urshr, Basic, TypeWidth)
4880 DEFINE_TEST_NEON_2OPIMM(ursra, Basic, TypeWidth)
4881 DEFINE_TEST_NEON_2OPIMM(sri, Basic, TypeWidth)
4882 DEFINE_TEST_NEON_2OPIMM(sli, Basic, TypeWidthFromZero)
4883 DEFINE_TEST_NEON_2OPIMM(sqshlu, Basic, TypeWidthFromZero)
4884 DEFINE_TEST_NEON_2OPIMM(uqshl, Basic, TypeWidthFromZero)
4885 DEFINE_TEST_NEON_2OPIMM_NARROW(sqshrun, Basic, TypeWidth)
4886 DEFINE_TEST_NEON_2OPIMM_NARROW(sqrshrun, Basic, TypeWidth)
4887 DEFINE_TEST_NEON_2OPIMM_NARROW(uqshrn, Basic, TypeWidth)
4888 DEFINE_TEST_NEON_2OPIMM_NARROW(uqrshrn, Basic, TypeWidth)
4889 DEFINE_TEST_NEON_2OPIMM_LONG(ushll, Basic, TypeWidthFromZero)
4890 DEFINE_TEST_NEON_2OPIMM_HSD(ucvtf,
4891                             FixedPointConversions,
4892                             TypeWidthFromZeroToWidth)
4893 DEFINE_TEST_NEON_2OPIMM_FP(fcvtzu, Conversions, TypeWidthFromZeroToWidth)
4894 
4895 
4896 // Advanced SIMD scalar shift by immediate..
4897 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(sshr, Basic, TypeWidth)
4898 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(ssra, Basic, TypeWidth)
4899 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(srshr, Basic, TypeWidth)
4900 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(srsra, Basic, TypeWidth)
4901 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(shl, Basic, TypeWidthFromZero)
4902 DEFINE_TEST_NEON_2OPIMM_SCALAR(sqshl, Basic, TypeWidthFromZero)
4903 DEFINE_TEST_NEON_2OPIMM_SCALAR_NARROW(sqshrn, Basic, TypeWidth)
4904 DEFINE_TEST_NEON_2OPIMM_SCALAR_NARROW(sqrshrn, Basic, TypeWidth)
4905 DEFINE_TEST_NEON_2OPIMM_SCALAR_HSD(scvtf,
4906                                    FixedPointConversions,
4907                                    TypeWidthFromZeroToWidth)
4908 DEFINE_TEST_NEON_2OPIMM_FP_SCALAR(fcvtzs, Conversions, TypeWidthFromZeroToWidth)
4909 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(ushr, Basic, TypeWidth)
4910 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(usra, Basic, TypeWidth)
4911 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(urshr, Basic, TypeWidth)
4912 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(ursra, Basic, TypeWidth)
4913 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(sri, Basic, TypeWidth)
4914 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(sli, Basic, TypeWidthFromZero)
4915 DEFINE_TEST_NEON_2OPIMM_SCALAR(sqshlu, Basic, TypeWidthFromZero)
4916 DEFINE_TEST_NEON_2OPIMM_SCALAR(uqshl, Basic, TypeWidthFromZero)
4917 DEFINE_TEST_NEON_2OPIMM_SCALAR_NARROW(sqshrun, Basic, TypeWidth)
4918 DEFINE_TEST_NEON_2OPIMM_SCALAR_NARROW(sqrshrun, Basic, TypeWidth)
4919 DEFINE_TEST_NEON_2OPIMM_SCALAR_NARROW(uqshrn, Basic, TypeWidth)
4920 DEFINE_TEST_NEON_2OPIMM_SCALAR_NARROW(uqrshrn, Basic, TypeWidth)
4921 DEFINE_TEST_NEON_2OPIMM_SCALAR_HSD(ucvtf,
4922                                    FixedPointConversions,
4923                                    TypeWidthFromZeroToWidth)
4924 DEFINE_TEST_NEON_2OPIMM_FP_SCALAR(fcvtzu, Conversions, TypeWidthFromZeroToWidth)
4925 
4926 
4927 // Advanced SIMD two-register miscellaneous.
4928 DEFINE_TEST_NEON_2SAME_NO2D(rev64, Basic)
4929 DEFINE_TEST_NEON_2SAME_8B_16B(rev16, Basic)
4930 DEFINE_TEST_NEON_2DIFF_LONG(saddlp, Basic)
4931 DEFINE_TEST_NEON_2SAME(suqadd, Basic)
4932 DEFINE_TEST_NEON_2SAME_NO2D(cls, Basic)
4933 DEFINE_TEST_NEON_2SAME_8B_16B(cnt, Basic)
4934 DEFINE_TEST_NEON_2DIFF_LONG(sadalp, Basic)
4935 DEFINE_TEST_NEON_2SAME(sqabs, Basic)
4936 DEFINE_TEST_NEON_2OPIMM(cmgt, Basic, Zero)
4937 DEFINE_TEST_NEON_2OPIMM(cmeq, Basic, Zero)
4938 DEFINE_TEST_NEON_2OPIMM(cmlt, Basic, Zero)
4939 DEFINE_TEST_NEON_2SAME(abs, Basic)
4940 DEFINE_TEST_NEON_2DIFF_NARROW(xtn, Basic)
4941 DEFINE_TEST_NEON_2DIFF_NARROW(sqxtn, Basic)
4942 DEFINE_TEST_NEON_2DIFF_FP_NARROW(fcvtn, Conversions)
4943 DEFINE_TEST_NEON_2DIFF_FP_LONG(fcvtl, Conversions)
4944 DEFINE_TEST_NEON_2SAME_FP_FP16(frintn, Conversions)
4945 DEFINE_TEST_NEON_2SAME_FP_FP16(frintm, Conversions)
4946 DEFINE_TEST_NEON_2SAME_FP_FP16(fcvtns, Conversions)
4947 DEFINE_TEST_NEON_2SAME_FP_FP16(fcvtms, Conversions)
4948 DEFINE_TEST_NEON_2SAME_FP_FP16(fcvtas, Conversions)
4949 // SCVTF (vector, integer) covered by SCVTF(vector, fixed point) with fbits 0.
4950 DEFINE_TEST_NEON_2OPIMM_FCMP_ZERO(fcmgt, Basic, Zero)
4951 DEFINE_TEST_NEON_2OPIMM_FCMP_ZERO(fcmeq, Basic, Zero)
4952 DEFINE_TEST_NEON_2OPIMM_FCMP_ZERO(fcmlt, Basic, Zero)
4953 DEFINE_TEST_NEON_2SAME_FP_FP16(fabs, Basic)
4954 DEFINE_TEST_NEON_2SAME_FP_FP16(frintp, Conversions)
4955 DEFINE_TEST_NEON_2SAME_FP_FP16(frintz, Conversions)
4956 DEFINE_TEST_NEON_2SAME_FP_FP16(fcvtps, Conversions)
4957 // FCVTZS(vector, integer) covered by FCVTZS(vector, fixed point) with fbits 0.
4958 DEFINE_TEST_NEON_2SAME_2S_4S(urecpe, Basic)
4959 DEFINE_TEST_NEON_2SAME_FP_FP16(frecpe, Basic)
4960 DEFINE_TEST_NEON_2SAME_BH(rev32, Basic)
4961 DEFINE_TEST_NEON_2DIFF_LONG(uaddlp, Basic)
4962 DEFINE_TEST_NEON_2SAME(usqadd, Basic)
4963 DEFINE_TEST_NEON_2SAME_NO2D(clz, Basic)
4964 DEFINE_TEST_NEON_2DIFF_LONG(uadalp, Basic)
4965 DEFINE_TEST_NEON_2SAME(sqneg, Basic)
4966 DEFINE_TEST_NEON_2OPIMM(cmge, Basic, Zero)
4967 DEFINE_TEST_NEON_2OPIMM(cmle, Basic, Zero)
4968 DEFINE_TEST_NEON_2SAME(neg, Basic)
4969 DEFINE_TEST_NEON_2DIFF_NARROW(sqxtun, Basic)
4970 DEFINE_TEST_NEON_2OPIMM_LONG(shll, Basic, SHLL)
4971 DEFINE_TEST_NEON_2DIFF_NARROW(uqxtn, Basic)
4972 DEFINE_TEST_NEON_2DIFF_FP_NARROW_2S(fcvtxn, Conversions)
4973 DEFINE_TEST_NEON_2SAME_FP(frint32x, Conversions)
4974 DEFINE_TEST_NEON_2SAME_FP(frint64x, Conversions)
4975 DEFINE_TEST_NEON_2SAME_FP(frint32z, Conversions)
4976 DEFINE_TEST_NEON_2SAME_FP(frint64z, Conversions)
4977 DEFINE_TEST_NEON_2SAME_FP_FP16(frinta, Conversions)
4978 DEFINE_TEST_NEON_2SAME_FP_FP16(frintx, Conversions)
4979 DEFINE_TEST_NEON_2SAME_FP_FP16(fcvtnu, Conversions)
4980 DEFINE_TEST_NEON_2SAME_FP_FP16(fcvtmu, Conversions)
4981 DEFINE_TEST_NEON_2SAME_FP_FP16(fcvtau, Conversions)
4982 // UCVTF (vector, integer) covered by UCVTF(vector, fixed point) with fbits 0.
4983 DEFINE_TEST_NEON_2SAME_8B_16B(not_, Basic)
4984 DEFINE_TEST_NEON_2SAME_8B_16B(rbit, Basic)
4985 DEFINE_TEST_NEON_2OPIMM_FCMP_ZERO(fcmge, Basic, Zero)
4986 DEFINE_TEST_NEON_2OPIMM_FCMP_ZERO(fcmle, Basic, Zero)
4987 DEFINE_TEST_NEON_2SAME_FP_FP16(fneg, Basic)
4988 DEFINE_TEST_NEON_2SAME_FP_FP16(frinti, Conversions)
4989 DEFINE_TEST_NEON_2SAME_FP_FP16(fcvtpu, Conversions)
4990 // FCVTZU(vector, integer) covered by FCVTZU(vector, fixed point) with fbits 0.
4991 DEFINE_TEST_NEON_2SAME_2S_4S(ursqrte, Basic)
4992 DEFINE_TEST_NEON_2SAME_FP_FP16(frsqrte, Basic)
4993 DEFINE_TEST_NEON_2SAME_FP_FP16(fsqrt, Basic)
4994 
4995 
4996 // Advanced SIMD scalar two-register miscellaneous.
4997 DEFINE_TEST_NEON_2SAME_SCALAR(suqadd, Basic)
4998 DEFINE_TEST_NEON_2SAME_SCALAR(sqabs, Basic)
4999 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(cmgt, Basic, Zero)
5000 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(cmeq, Basic, Zero)
5001 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(cmlt, Basic, Zero)
5002 DEFINE_TEST_NEON_2SAME_SCALAR_D(abs, Basic)
5003 DEFINE_TEST_NEON_2DIFF_SCALAR_NARROW(sqxtn, Basic)
5004 DEFINE_TEST_NEON_2SAME_FP_FP16_SCALAR(fcvtns, Conversions)
5005 DEFINE_TEST_NEON_2SAME_FP_FP16_SCALAR(fcvtms, Conversions)
5006 DEFINE_TEST_NEON_2SAME_FP_FP16_SCALAR(fcvtas, Conversions)
5007 // SCVTF (vector, integer) covered by SCVTF(vector, fixed point) with fbits 0.
5008 DEFINE_TEST_NEON_2OPIMM_FP_SCALAR_HSD(fcmgt, Basic, Zero)
5009 DEFINE_TEST_NEON_2OPIMM_FP_SCALAR_HSD(fcmeq, Basic, Zero)
5010 DEFINE_TEST_NEON_2OPIMM_FP_SCALAR_HSD(fcmlt, Basic, Zero)
5011 DEFINE_TEST_NEON_2SAME_FP_FP16_SCALAR(fcvtps, Conversions)
5012 // FCVTZS(vector, integer) covered by FCVTZS(vector, fixed point) with fbits 0.
5013 DEFINE_TEST_NEON_2SAME_FP_FP16_SCALAR(frecpe, Basic)
5014 DEFINE_TEST_NEON_2SAME_FP_FP16_SCALAR(frecpx, Basic)
5015 DEFINE_TEST_NEON_2SAME_SCALAR(usqadd, Basic)
5016 DEFINE_TEST_NEON_2SAME_SCALAR(sqneg, Basic)
5017 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(cmge, Basic, Zero)
5018 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(cmle, Basic, Zero)
5019 DEFINE_TEST_NEON_2SAME_SCALAR_D(neg, Basic)
5020 DEFINE_TEST_NEON_2DIFF_SCALAR_NARROW(sqxtun, Basic)
5021 DEFINE_TEST_NEON_2DIFF_SCALAR_NARROW(uqxtn, Basic)
5022 TEST(fcvtxn_SCALAR) {
5023   CALL_TEST_NEON_HELPER_2DIFF(fcvtxn, S, D, kInputDoubleConversions);
5024 }
DEFINE_TEST_NEON_2SAME_FP_FP16_SCALAR(fcvtnu,Conversions)5025 DEFINE_TEST_NEON_2SAME_FP_FP16_SCALAR(fcvtnu, Conversions)
5026 DEFINE_TEST_NEON_2SAME_FP_FP16_SCALAR(fcvtmu, Conversions)
5027 DEFINE_TEST_NEON_2SAME_FP_FP16_SCALAR(fcvtau, Conversions)
5028 // UCVTF (vector, integer) covered by UCVTF(vector, fixed point) with fbits 0.
5029 DEFINE_TEST_NEON_2OPIMM_FP_SCALAR_HSD(fcmge, Basic, Zero)
5030 DEFINE_TEST_NEON_2OPIMM_FP_SCALAR_HSD(fcmle, Basic, Zero)
5031 DEFINE_TEST_NEON_2SAME_FP_FP16_SCALAR(fcvtpu, Conversions)
5032 // FCVTZU(vector, integer) covered by FCVTZU(vector, fixed point) with fbits 0.
5033 DEFINE_TEST_NEON_2SAME_FP_FP16_SCALAR(frsqrte, Basic)
5034 
5035 
5036 // Advanced SIMD across lanes.
5037 DEFINE_TEST_NEON_ACROSS_LONG(saddlv, Basic)
5038 DEFINE_TEST_NEON_ACROSS(smaxv, Basic)
5039 DEFINE_TEST_NEON_ACROSS(sminv, Basic)
5040 DEFINE_TEST_NEON_ACROSS(addv, Basic)
5041 DEFINE_TEST_NEON_ACROSS_LONG(uaddlv, Basic)
5042 DEFINE_TEST_NEON_ACROSS(umaxv, Basic)
5043 DEFINE_TEST_NEON_ACROSS(uminv, Basic)
5044 DEFINE_TEST_NEON_ACROSS_FP(fmaxnmv, Basic)
5045 DEFINE_TEST_NEON_ACROSS_FP(fmaxv, Basic)
5046 DEFINE_TEST_NEON_ACROSS_FP(fminnmv, Basic)
5047 DEFINE_TEST_NEON_ACROSS_FP(fminv, Basic)
5048 
5049 
5050 // Advanced SIMD permute.
5051 DEFINE_TEST_NEON_3SAME(uzp1, Basic)
5052 DEFINE_TEST_NEON_3SAME(trn1, Basic)
5053 DEFINE_TEST_NEON_3SAME(zip1, Basic)
5054 DEFINE_TEST_NEON_3SAME(uzp2, Basic)
5055 DEFINE_TEST_NEON_3SAME(trn2, Basic)
5056 DEFINE_TEST_NEON_3SAME(zip2, Basic)
5057 
5058 
5059 // Advanced SIMD vector x indexed element.
5060 DEFINE_TEST_NEON_BYELEMENT_DIFF(smlal, Basic, Basic, Basic)
5061 DEFINE_TEST_NEON_BYELEMENT_DIFF(sqdmlal, Basic, Basic, Basic)
5062 DEFINE_TEST_NEON_BYELEMENT_DIFF(smlsl, Basic, Basic, Basic)
5063 DEFINE_TEST_NEON_BYELEMENT_DIFF(sqdmlsl, Basic, Basic, Basic)
5064 DEFINE_TEST_NEON_BYELEMENT(mul, Basic, Basic, Basic)
5065 DEFINE_TEST_NEON_BYELEMENT_DIFF(smull, Basic, Basic, Basic)
5066 DEFINE_TEST_NEON_BYELEMENT_DIFF(sqdmull, Basic, Basic, Basic)
5067 DEFINE_TEST_NEON_BYELEMENT(sqdmulh, Basic, Basic, Basic)
5068 DEFINE_TEST_NEON_BYELEMENT(sqrdmulh, Basic, Basic, Basic)
5069 DEFINE_TEST_NEON_BYELEMENT(sqrdmlah, Basic, Basic, Basic)
5070 DEFINE_TEST_NEON_BYELEMENT(sqrdmlsh, Basic, Basic, Basic)
5071 DEFINE_TEST_NEON_BYELEMENT_DOT_PRODUCT(udot, Basic, Basic, Basic)
5072 DEFINE_TEST_NEON_BYELEMENT_DOT_PRODUCT(sdot, Basic, Basic, Basic)
5073 DEFINE_TEST_NEON_FP_BYELEMENT(fmla, Basic, Basic, Basic)
5074 DEFINE_TEST_NEON_FP_BYELEMENT(fmls, Basic, Basic, Basic)
5075 DEFINE_TEST_NEON_FP_BYELEMENT(fmul, Basic, Basic, Basic)
5076 DEFINE_TEST_NEON_BYELEMENT(mla, Basic, Basic, Basic)
5077 DEFINE_TEST_NEON_BYELEMENT_DIFF(umlal, Basic, Basic, Basic)
5078 DEFINE_TEST_NEON_BYELEMENT(mls, Basic, Basic, Basic)
5079 DEFINE_TEST_NEON_BYELEMENT_DIFF(umlsl, Basic, Basic, Basic)
5080 DEFINE_TEST_NEON_BYELEMENT_DIFF(umull, Basic, Basic, Basic)
5081 DEFINE_TEST_NEON_FP_BYELEMENT(fmulx, Basic, Basic, Basic)
5082 
5083 
5084 // Advanced SIMD scalar x indexed element.
5085 DEFINE_TEST_NEON_BYELEMENT_DIFF_SCALAR(sqdmlal, Basic, Basic, Basic)
5086 DEFINE_TEST_NEON_BYELEMENT_DIFF_SCALAR(sqdmlsl, Basic, Basic, Basic)
5087 DEFINE_TEST_NEON_BYELEMENT_DIFF_SCALAR(sqdmull, Basic, Basic, Basic)
5088 DEFINE_TEST_NEON_BYELEMENT_SCALAR(sqdmulh, Basic, Basic, Basic)
5089 DEFINE_TEST_NEON_BYELEMENT_SCALAR(sqrdmulh, Basic, Basic, Basic)
5090 DEFINE_TEST_NEON_BYELEMENT_SCALAR(sqrdmlah, Basic, Basic, Basic)
5091 DEFINE_TEST_NEON_BYELEMENT_SCALAR(sqrdmlsh, Basic, Basic, Basic)
5092 DEFINE_TEST_NEON_FP_BYELEMENT_SCALAR(fmla, Basic, Basic, Basic)
5093 DEFINE_TEST_NEON_FP_BYELEMENT_SCALAR(fmls, Basic, Basic, Basic)
5094 DEFINE_TEST_NEON_FP_BYELEMENT_SCALAR(fmul, Basic, Basic, Basic)
5095 DEFINE_TEST_NEON_FP_BYELEMENT_SCALAR(fmulx, Basic, Basic, Basic)
5096 
5097 
5098 DEFINE_TEST_NEON_FHM_BYELEMENT(fmlal, Basic, Basic, Basic)
5099 DEFINE_TEST_NEON_FHM_BYELEMENT(fmlal2, Basic, Basic, Basic)
5100 DEFINE_TEST_NEON_FHM_BYELEMENT(fmlsl, Basic, Basic, Basic)
5101 DEFINE_TEST_NEON_FHM_BYELEMENT(fmlsl2, Basic, Basic, Basic)
5102 
5103 
5104 #ifdef VIXL_ENABLE_IMPLICIT_CHECKS
5105 TEST(ImplicitCheck) {
5106   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
5107   START_IMPLICIT_CHECK();
5108 
5109   EmissionCheckScope guard(&masm, masm.GetBuffer()->GetRemainingBytes());
5110   // Invalid memory reads.
5111   __ ldar(w3, bad_memory);
5112   __ ldar(x4, bad_memory);
5113   __ ldarb(w5, bad_memory);
5114   __ ldarb(x6, bad_memory);
5115   __ ldarh(w7, bad_memory);
5116   __ ldarh(x8, bad_memory);
5117   __ ldaxp(w9, w10, bad_memory);
5118   __ ldaxp(x11, x12, bad_memory);
5119   __ ldaxr(w13, bad_memory);
5120   __ ldaxr(x14, bad_memory);
5121   __ ldaxrb(w15, bad_memory);
5122   __ ldaxrb(x16, bad_memory);
5123   __ ldaxrh(w17, bad_memory);
5124   __ ldaxrh(x18, bad_memory);
5125   __ ldnp(w19, w20, bad_memory);
5126   __ ldnp(x21, x22, bad_memory);
5127   __ ldp(w23, w24, bad_memory);
5128   __ ldp(x25, x26, bad_memory);
5129   __ ldpsw(x27, x28, bad_memory);
5130   __ ldr(w29, bad_memory);
5131   __ ldr(x2, bad_memory);
5132   __ ldrb(w3, bad_memory);
5133   __ ldrb(x4, bad_memory);
5134   __ ldrh(w5, bad_memory);
5135   __ ldrh(x6, bad_memory);
5136   __ ldrsb(w7, bad_memory);
5137   __ ldrsb(x8, bad_memory);
5138   __ ldrsh(w9, bad_memory);
5139   __ ldrsh(x10, bad_memory);
5140   __ ldrsw(x11, bad_memory);
5141   __ ldur(w12, bad_memory);
5142   __ ldur(x13, bad_memory);
5143   __ ldurb(w14, bad_memory);
5144   __ ldurb(x15, bad_memory);
5145   __ ldurh(w16, bad_memory);
5146   __ ldurh(x17, bad_memory);
5147   __ ldursb(w18, bad_memory);
5148   __ ldursb(x19, bad_memory);
5149   __ ldursh(w20, bad_memory);
5150   __ ldursh(x21, bad_memory);
5151   __ ldursw(x22, bad_memory);
5152   __ ldxp(w23, w24, bad_memory);
5153   __ ldxp(x25, x26, bad_memory);
5154   __ ldxr(w27, bad_memory);
5155   __ ldxr(x28, bad_memory);
5156   __ ldxrb(w29, bad_memory);
5157   __ ldxrb(x2, bad_memory);
5158   __ ldxrh(w3, bad_memory);
5159   __ ldxrh(x4, bad_memory);
5160 
5161   // Invalid memory writes. Note: exclusive store instructions are not tested
5162   // because they can fail due to the global monitor before trying to perform a
5163   // memory store.
5164   __ stlr(w18, bad_memory);
5165   __ stlr(x19, bad_memory);
5166   __ stlrb(w20, bad_memory);
5167   __ stlrb(x21, bad_memory);
5168   __ stlrh(w22, bad_memory);
5169   __ stlrh(x23, bad_memory);
5170   __ stnp(w14, w15, bad_memory);
5171   __ stnp(x16, x17, bad_memory);
5172   __ stp(w18, w19, bad_memory);
5173   __ stp(x20, x21, bad_memory);
5174   __ str(w22, bad_memory);
5175   __ str(x23, bad_memory);
5176   __ strb(w24, bad_memory);
5177   __ strb(x25, bad_memory);
5178   __ strh(w26, bad_memory);
5179   __ strh(x27, bad_memory);
5180   __ stur(w28, bad_memory);
5181   __ stur(x29, bad_memory);
5182   __ sturb(w2, bad_memory);
5183   __ sturb(x3, bad_memory);
5184   __ sturh(w4, bad_memory);
5185   __ sturh(x5, bad_memory);
5186 
5187   END_IMPLICIT_CHECK();
5188   TRY_RUN_IMPLICIT_CHECK();
5189 }
5190 
TEST(ImplicitCheckNeon)5191 TEST(ImplicitCheckNeon) {
5192   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
5193   START_IMPLICIT_CHECK();
5194 
5195   EmissionCheckScope guard(&masm, masm.GetBuffer()->GetRemainingBytes());
5196   __ ld1(v18.V16B(), v19.V16B(), v20.V16B(), v21.V16B(), bad_memory);
5197   __ ld1(v23.V16B(), v24.V16B(), v25.V16B(), v26.V16B(), bad_memory);
5198   __ ld1(v5.V16B(), v6.V16B(), v7.V16B(), v8.V16B(), bad_memory);
5199   __ ld1(v18.V16B(), v19.V16B(), v20.V16B(), bad_memory);
5200   __ ld1(v13.V16B(), v14.V16B(), v15.V16B(), bad_memory);
5201   __ ld1(v19.V16B(), v20.V16B(), v21.V16B(), bad_memory);
5202   __ ld1(v17.V16B(), v18.V16B(), bad_memory);
5203   __ ld1(v20.V16B(), v21.V16B(), bad_memory);
5204   __ ld1(v28.V16B(), v29.V16B(), bad_memory);
5205   __ ld1(v29.V16B(), bad_memory);
5206   __ ld1(v21.V16B(), bad_memory);
5207   __ ld1(v4.V16B(), bad_memory);
5208   __ ld1(v4.V1D(), v5.V1D(), v6.V1D(), v7.V1D(), bad_memory);
5209   __ ld1(v17.V1D(), v18.V1D(), v19.V1D(), v20.V1D(), bad_memory);
5210   __ ld1(v28.V1D(), v29.V1D(), v30.V1D(), v31.V1D(), bad_memory);
5211   __ ld1(v20.V1D(), v21.V1D(), v22.V1D(), bad_memory);
5212   __ ld1(v19.V1D(), v20.V1D(), v21.V1D(), bad_memory);
5213   __ ld1(v12.V1D(), v13.V1D(), v14.V1D(), bad_memory);
5214   __ ld1(v29.V1D(), v30.V1D(), bad_memory);
5215   __ ld1(v31.V1D(), v0.V1D(), bad_memory);
5216   __ ld1(v3.V1D(), v4.V1D(), bad_memory);
5217   __ ld1(v28.V1D(), bad_memory);
5218   __ ld1(v11.V1D(), bad_memory);
5219   __ ld1(v29.V1D(), bad_memory);
5220   __ ld1(v28.V2D(), v29.V2D(), v30.V2D(), v31.V2D(), bad_memory);
5221   __ ld1(v8.V2D(), v9.V2D(), v10.V2D(), v11.V2D(), bad_memory);
5222   __ ld1(v14.V2D(), v15.V2D(), v16.V2D(), v17.V2D(), bad_memory);
5223   __ ld1(v26.V2D(), v27.V2D(), v28.V2D(), bad_memory);
5224   __ ld1(v5.V2D(), v6.V2D(), v7.V2D(), bad_memory);
5225   __ ld1(v26.V2D(), v27.V2D(), v28.V2D(), bad_memory);
5226   __ ld1(v18.V2D(), v19.V2D(), bad_memory);
5227   __ ld1(v21.V2D(), v22.V2D(), bad_memory);
5228   __ ld1(v17.V2D(), v18.V2D(), bad_memory);
5229   __ ld1(v5.V2D(), bad_memory);
5230   __ ld1(v6.V2D(), bad_memory);
5231   __ ld1(v15.V2D(), bad_memory);
5232   __ ld1(v30.V2S(), v31.V2S(), v0.V2S(), v1.V2S(), bad_memory);
5233   __ ld1(v24.V2S(), v25.V2S(), v26.V2S(), v27.V2S(), bad_memory);
5234   __ ld1(v27.V2S(), v28.V2S(), v29.V2S(), v30.V2S(), bad_memory);
5235   __ ld1(v11.V2S(), v12.V2S(), v13.V2S(), bad_memory);
5236   __ ld1(v8.V2S(), v9.V2S(), v10.V2S(), bad_memory);
5237   __ ld1(v31.V2S(), v0.V2S(), v1.V2S(), bad_memory);
5238   __ ld1(v0.V2S(), v1.V2S(), bad_memory);
5239   __ ld1(v13.V2S(), v14.V2S(), bad_memory);
5240   __ ld1(v3.V2S(), v4.V2S(), bad_memory);
5241   __ ld1(v26.V2S(), bad_memory);
5242   __ ld1(v0.V2S(), bad_memory);
5243   __ ld1(v11.V2S(), bad_memory);
5244   __ ld1(v16.V4H(), v17.V4H(), v18.V4H(), v19.V4H(), bad_memory);
5245   __ ld1(v24.V4H(), v25.V4H(), v26.V4H(), v27.V4H(), bad_memory);
5246   __ ld1(v1.V4H(), v2.V4H(), v3.V4H(), v4.V4H(), bad_memory);
5247   __ ld1(v30.V4H(), v31.V4H(), v0.V4H(), bad_memory);
5248   __ ld1(v25.V4H(), v26.V4H(), v27.V4H(), bad_memory);
5249   __ ld1(v3.V4H(), v4.V4H(), v5.V4H(), bad_memory);
5250   __ ld1(v3.V4H(), v4.V4H(), bad_memory);
5251   __ ld1(v3.V4H(), v4.V4H(), bad_memory);
5252   __ ld1(v23.V4H(), v24.V4H(), bad_memory);
5253   __ ld1(v26.V4H(), bad_memory);
5254   __ ld1(v1.V4H(), bad_memory);
5255   __ ld1(v14.V4H(), bad_memory);
5256   __ ld1(v26.V4S(), v27.V4S(), v28.V4S(), v29.V4S(), bad_memory);
5257   __ ld1(v28.V4S(), v29.V4S(), v30.V4S(), v31.V4S(), bad_memory);
5258   __ ld1(v4.V4S(), v5.V4S(), v6.V4S(), v7.V4S(), bad_memory);
5259   __ ld1(v2.V4S(), v3.V4S(), v4.V4S(), bad_memory);
5260   __ ld1(v22.V4S(), v23.V4S(), v24.V4S(), bad_memory);
5261   __ ld1(v15.V4S(), v16.V4S(), v17.V4S(), bad_memory);
5262   __ ld1(v20.V4S(), v21.V4S(), bad_memory);
5263   __ ld1(v30.V4S(), v31.V4S(), bad_memory);
5264   __ ld1(v11.V4S(), v12.V4S(), bad_memory);
5265   __ ld1(v15.V4S(), bad_memory);
5266   __ ld1(v12.V4S(), bad_memory);
5267   __ ld1(v0.V4S(), bad_memory);
5268   __ ld1(v17.V8B(), v18.V8B(), v19.V8B(), v20.V8B(), bad_memory);
5269   __ ld1(v5.V8B(), v6.V8B(), v7.V8B(), v8.V8B(), bad_memory);
5270   __ ld1(v9.V8B(), v10.V8B(), v11.V8B(), v12.V8B(), bad_memory);
5271   __ ld1(v4.V8B(), v5.V8B(), v6.V8B(), bad_memory);
5272   __ ld1(v2.V8B(), v3.V8B(), v4.V8B(), bad_memory);
5273   __ ld1(v12.V8B(), v13.V8B(), v14.V8B(), bad_memory);
5274   __ ld1(v10.V8B(), v11.V8B(), bad_memory);
5275   __ ld1(v11.V8B(), v12.V8B(), bad_memory);
5276   __ ld1(v27.V8B(), v28.V8B(), bad_memory);
5277   __ ld1(v31.V8B(), bad_memory);
5278   __ ld1(v10.V8B(), bad_memory);
5279   __ ld1(v28.V8B(), bad_memory);
5280   __ ld1(v5.V8H(), v6.V8H(), v7.V8H(), v8.V8H(), bad_memory);
5281   __ ld1(v2.V8H(), v3.V8H(), v4.V8H(), v5.V8H(), bad_memory);
5282   __ ld1(v10.V8H(), v11.V8H(), v12.V8H(), v13.V8H(), bad_memory);
5283   __ ld1(v26.V8H(), v27.V8H(), v28.V8H(), bad_memory);
5284   __ ld1(v3.V8H(), v4.V8H(), v5.V8H(), bad_memory);
5285   __ ld1(v17.V8H(), v18.V8H(), v19.V8H(), bad_memory);
5286   __ ld1(v4.V8H(), v5.V8H(), bad_memory);
5287   __ ld1(v21.V8H(), v22.V8H(), bad_memory);
5288   __ ld1(v4.V8H(), v5.V8H(), bad_memory);
5289   __ ld1(v9.V8H(), bad_memory);
5290   __ ld1(v27.V8H(), bad_memory);
5291   __ ld1(v26.V8H(), bad_memory);
5292   __ ld1(v19.B(), 1, bad_memory);
5293   __ ld1(v12.B(), 3, bad_memory);
5294   __ ld1(v27.B(), 12, bad_memory);
5295   __ ld1(v10.D(), 1, bad_memory);
5296   __ ld1(v26.D(), 1, bad_memory);
5297   __ ld1(v7.D(), 1, bad_memory);
5298   __ ld1(v19.H(), 5, bad_memory);
5299   __ ld1(v10.H(), 1, bad_memory);
5300   __ ld1(v5.H(), 4, bad_memory);
5301   __ ld1(v21.S(), 2, bad_memory);
5302   __ ld1(v13.S(), 2, bad_memory);
5303   __ ld1(v1.S(), 2, bad_memory);
5304   __ ld1r(v2.V16B(), bad_memory);
5305   __ ld1r(v2.V16B(), bad_memory);
5306   __ ld1r(v22.V16B(), bad_memory);
5307   __ ld1r(v25.V1D(), bad_memory);
5308   __ ld1r(v9.V1D(), bad_memory);
5309   __ ld1r(v23.V1D(), bad_memory);
5310   __ ld1r(v19.V2D(), bad_memory);
5311   __ ld1r(v21.V2D(), bad_memory);
5312   __ ld1r(v30.V2D(), bad_memory);
5313   __ ld1r(v24.V2S(), bad_memory);
5314   __ ld1r(v26.V2S(), bad_memory);
5315   __ ld1r(v28.V2S(), bad_memory);
5316   __ ld1r(v19.V4H(), bad_memory);
5317   __ ld1r(v1.V4H(), bad_memory);
5318   __ ld1r(v21.V4H(), bad_memory);
5319   __ ld1r(v15.V4S(), bad_memory);
5320   __ ld1r(v21.V4S(), bad_memory);
5321   __ ld1r(v23.V4S(), bad_memory);
5322   __ ld1r(v26.V8B(), bad_memory);
5323   __ ld1r(v14.V8B(), bad_memory);
5324   __ ld1r(v19.V8B(), bad_memory);
5325   __ ld1r(v13.V8H(), bad_memory);
5326   __ ld1r(v30.V8H(), bad_memory);
5327   __ ld1r(v27.V8H(), bad_memory);
5328   __ ld2(v21.V16B(), v22.V16B(), bad_memory);
5329   __ ld2(v21.V16B(), v22.V16B(), bad_memory);
5330   __ ld2(v12.V16B(), v13.V16B(), bad_memory);
5331   __ ld2(v14.V2D(), v15.V2D(), bad_memory);
5332   __ ld2(v0.V2D(), v1.V2D(), bad_memory);
5333   __ ld2(v12.V2D(), v13.V2D(), bad_memory);
5334   __ ld2(v27.V2S(), v28.V2S(), bad_memory);
5335   __ ld2(v2.V2S(), v3.V2S(), bad_memory);
5336   __ ld2(v12.V2S(), v13.V2S(), bad_memory);
5337   __ ld2(v9.V4H(), v10.V4H(), bad_memory);
5338   __ ld2(v23.V4H(), v24.V4H(), bad_memory);
5339   __ ld2(v1.V4H(), v2.V4H(), bad_memory);
5340   __ ld2(v20.V4S(), v21.V4S(), bad_memory);
5341   __ ld2(v10.V4S(), v11.V4S(), bad_memory);
5342   __ ld2(v24.V4S(), v25.V4S(), bad_memory);
5343   __ ld2(v17.V8B(), v18.V8B(), bad_memory);
5344   __ ld2(v13.V8B(), v14.V8B(), bad_memory);
5345   __ ld2(v7.V8B(), v8.V8B(), bad_memory);
5346   __ ld2(v30.V8H(), v31.V8H(), bad_memory);
5347   __ ld2(v4.V8H(), v5.V8H(), bad_memory);
5348   __ ld2(v13.V8H(), v14.V8H(), bad_memory);
5349   __ ld2(v5.B(), v6.B(), 12, bad_memory);
5350   __ ld2(v16.B(), v17.B(), 7, bad_memory);
5351   __ ld2(v29.B(), v30.B(), 2, bad_memory);
5352   __ ld2(v11.D(), v12.D(), 1, bad_memory);
5353   __ ld2(v26.D(), v27.D(), 0, bad_memory);
5354   __ ld2(v25.D(), v26.D(), 0, bad_memory);
5355   __ ld2(v18.H(), v19.H(), 7, bad_memory);
5356   __ ld2(v17.H(), v18.H(), 5, bad_memory);
5357   __ ld2(v30.H(), v31.H(), 2, bad_memory);
5358   __ ld2(v29.S(), v30.S(), 3, bad_memory);
5359   __ ld2(v28.S(), v29.S(), 0, bad_memory);
5360   __ ld2(v6.S(), v7.S(), 1, bad_memory);
5361   __ ld2r(v26.V16B(), v27.V16B(), bad_memory);
5362   __ ld2r(v21.V16B(), v22.V16B(), bad_memory);
5363   __ ld2r(v5.V16B(), v6.V16B(), bad_memory);
5364   __ ld2r(v26.V1D(), v27.V1D(), bad_memory);
5365   __ ld2r(v14.V1D(), v15.V1D(), bad_memory);
5366   __ ld2r(v23.V1D(), v24.V1D(), bad_memory);
5367   __ ld2r(v11.V2D(), v12.V2D(), bad_memory);
5368   __ ld2r(v29.V2D(), v30.V2D(), bad_memory);
5369   __ ld2r(v15.V2D(), v16.V2D(), bad_memory);
5370   __ ld2r(v26.V2S(), v27.V2S(), bad_memory);
5371   __ ld2r(v22.V2S(), v23.V2S(), bad_memory);
5372   __ ld2r(v2.V2S(), v3.V2S(), bad_memory);
5373   __ ld2r(v2.V4H(), v3.V4H(), bad_memory);
5374   __ ld2r(v9.V4H(), v10.V4H(), bad_memory);
5375   __ ld2r(v6.V4H(), v7.V4H(), bad_memory);
5376   __ ld2r(v7.V4S(), v8.V4S(), bad_memory);
5377   __ ld2r(v19.V4S(), v20.V4S(), bad_memory);
5378   __ ld2r(v21.V4S(), v22.V4S(), bad_memory);
5379   __ ld2r(v26.V8B(), v27.V8B(), bad_memory);
5380   __ ld2r(v20.V8B(), v21.V8B(), bad_memory);
5381   __ ld2r(v11.V8B(), v12.V8B(), bad_memory);
5382   __ ld2r(v12.V8H(), v13.V8H(), bad_memory);
5383   __ ld2r(v6.V8H(), v7.V8H(), bad_memory);
5384   __ ld2r(v25.V8H(), v26.V8H(), bad_memory);
5385   __ ld3(v20.V16B(), v21.V16B(), v22.V16B(), bad_memory);
5386   __ ld3(v28.V16B(), v29.V16B(), v30.V16B(), bad_memory);
5387   __ ld3(v20.V16B(), v21.V16B(), v22.V16B(), bad_memory);
5388   __ ld3(v21.V2D(), v22.V2D(), v23.V2D(), bad_memory);
5389   __ ld3(v18.V2D(), v19.V2D(), v20.V2D(), bad_memory);
5390   __ ld3(v27.V2D(), v28.V2D(), v29.V2D(), bad_memory);
5391   __ ld3(v7.V2S(), v8.V2S(), v9.V2S(), bad_memory);
5392   __ ld3(v20.V2S(), v21.V2S(), v22.V2S(), bad_memory);
5393   __ ld3(v26.V2S(), v27.V2S(), v28.V2S(), bad_memory);
5394   __ ld3(v27.V4H(), v28.V4H(), v29.V4H(), bad_memory);
5395   __ ld3(v28.V4H(), v29.V4H(), v30.V4H(), bad_memory);
5396   __ ld3(v7.V4H(), v8.V4H(), v9.V4H(), bad_memory);
5397   __ ld3(v2.V4S(), v3.V4S(), v4.V4S(), bad_memory);
5398   __ ld3(v24.V4S(), v25.V4S(), v26.V4S(), bad_memory);
5399   __ ld3(v11.V4S(), v12.V4S(), v13.V4S(), bad_memory);
5400   __ ld3(v29.V8B(), v30.V8B(), v31.V8B(), bad_memory);
5401   __ ld3(v1.V8B(), v2.V8B(), v3.V8B(), bad_memory);
5402   __ ld3(v12.V8B(), v13.V8B(), v14.V8B(), bad_memory);
5403   __ ld3(v22.V8H(), v23.V8H(), v24.V8H(), bad_memory);
5404   __ ld3(v13.V8H(), v14.V8H(), v15.V8H(), bad_memory);
5405   __ ld3(v28.V8H(), v29.V8H(), v30.V8H(), bad_memory);
5406   __ ld3(v21.B(), v22.B(), v23.B(), 11, bad_memory);
5407   __ ld3(v5.B(), v6.B(), v7.B(), 9, bad_memory);
5408   __ ld3(v23.B(), v24.B(), v25.B(), 0, bad_memory);
5409   __ ld3(v16.D(), v17.D(), v18.D(), 0, bad_memory);
5410   __ ld3(v30.D(), v31.D(), v0.D(), 0, bad_memory);
5411   __ ld3(v28.D(), v29.D(), v30.D(), 1, bad_memory);
5412   __ ld3(v13.H(), v14.H(), v15.H(), 2, bad_memory);
5413   __ ld3(v22.H(), v23.H(), v24.H(), 7, bad_memory);
5414   __ ld3(v14.H(), v15.H(), v16.H(), 3, bad_memory);
5415   __ ld3(v22.S(), v23.S(), v24.S(), 3, bad_memory);
5416   __ ld3(v30.S(), v31.S(), v0.S(), 2, bad_memory);
5417   __ ld3(v12.S(), v13.S(), v14.S(), 1, bad_memory);
5418   __ ld3r(v24.V16B(), v25.V16B(), v26.V16B(), bad_memory);
5419   __ ld3r(v24.V16B(), v25.V16B(), v26.V16B(), bad_memory);
5420   __ ld3r(v3.V16B(), v4.V16B(), v5.V16B(), bad_memory);
5421   __ ld3r(v4.V1D(), v5.V1D(), v6.V1D(), bad_memory);
5422   __ ld3r(v7.V1D(), v8.V1D(), v9.V1D(), bad_memory);
5423   __ ld3r(v17.V1D(), v18.V1D(), v19.V1D(), bad_memory);
5424   __ ld3r(v16.V2D(), v17.V2D(), v18.V2D(), bad_memory);
5425   __ ld3r(v20.V2D(), v21.V2D(), v22.V2D(), bad_memory);
5426   __ ld3r(v14.V2D(), v15.V2D(), v16.V2D(), bad_memory);
5427   __ ld3r(v10.V2S(), v11.V2S(), v12.V2S(), bad_memory);
5428   __ ld3r(v0.V2S(), v1.V2S(), v2.V2S(), bad_memory);
5429   __ ld3r(v23.V2S(), v24.V2S(), v25.V2S(), bad_memory);
5430   __ ld3r(v22.V4H(), v23.V4H(), v24.V4H(), bad_memory);
5431   __ ld3r(v6.V4H(), v7.V4H(), v8.V4H(), bad_memory);
5432   __ ld3r(v7.V4H(), v8.V4H(), v9.V4H(), bad_memory);
5433   __ ld3r(v26.V4S(), v27.V4S(), v28.V4S(), bad_memory);
5434   __ ld3r(v0.V4S(), v1.V4S(), v2.V4S(), bad_memory);
5435   __ ld3r(v30.V4S(), v31.V4S(), v0.V4S(), bad_memory);
5436   __ ld3r(v2.V8B(), v3.V8B(), v4.V8B(), bad_memory);
5437   __ ld3r(v10.V8B(), v11.V8B(), v12.V8B(), bad_memory);
5438   __ ld3r(v28.V8B(), v29.V8B(), v30.V8B(), bad_memory);
5439   __ ld3r(v6.V8H(), v7.V8H(), v8.V8H(), bad_memory);
5440   __ ld3r(v29.V8H(), v30.V8H(), v31.V8H(), bad_memory);
5441   __ ld3r(v7.V8H(), v8.V8H(), v9.V8H(), bad_memory);
5442   __ ld4(v3.V16B(), v4.V16B(), v5.V16B(), v6.V16B(), bad_memory);
5443   __ ld4(v2.V16B(), v3.V16B(), v4.V16B(), v5.V16B(), bad_memory);
5444   __ ld4(v5.V16B(), v6.V16B(), v7.V16B(), v8.V16B(), bad_memory);
5445   __ ld4(v18.V2D(), v19.V2D(), v20.V2D(), v21.V2D(), bad_memory);
5446   __ ld4(v4.V2D(), v5.V2D(), v6.V2D(), v7.V2D(), bad_memory);
5447   __ ld4(v29.V2D(), v30.V2D(), v31.V2D(), v0.V2D(), bad_memory);
5448   __ ld4(v27.V2S(), v28.V2S(), v29.V2S(), v30.V2S(), bad_memory);
5449   __ ld4(v24.V2S(), v25.V2S(), v26.V2S(), v27.V2S(), bad_memory);
5450   __ ld4(v4.V2S(), v5.V2S(), v6.V2S(), v7.V2S(), bad_memory);
5451   __ ld4(v16.V4H(), v17.V4H(), v18.V4H(), v19.V4H(), bad_memory);
5452   __ ld4(v23.V4H(), v24.V4H(), v25.V4H(), v26.V4H(), bad_memory);
5453   __ ld4(v2.V4H(), v3.V4H(), v4.V4H(), v5.V4H(), bad_memory);
5454   __ ld4(v7.V4S(), v8.V4S(), v9.V4S(), v10.V4S(), bad_memory);
5455   __ ld4(v28.V4S(), v29.V4S(), v30.V4S(), v31.V4S(), bad_memory);
5456   __ ld4(v29.V4S(), v30.V4S(), v31.V4S(), v0.V4S(), bad_memory);
5457   __ ld4(v15.V8B(), v16.V8B(), v17.V8B(), v18.V8B(), bad_memory);
5458   __ ld4(v27.V8B(), v28.V8B(), v29.V8B(), v30.V8B(), bad_memory);
5459   __ ld4(v5.V8B(), v6.V8B(), v7.V8B(), v8.V8B(), bad_memory);
5460   __ ld4(v25.V8H(), v26.V8H(), v27.V8H(), v28.V8H(), bad_memory);
5461   __ ld4(v2.V8H(), v3.V8H(), v4.V8H(), v5.V8H(), bad_memory);
5462   __ ld4(v20.V8H(), v21.V8H(), v22.V8H(), v23.V8H(), bad_memory);
5463   __ ld4(v20.B(), v21.B(), v22.B(), v23.B(), 3, bad_memory);
5464   __ ld4(v12.B(), v13.B(), v14.B(), v15.B(), 3, bad_memory);
5465   __ ld4(v27.B(), v28.B(), v29.B(), v30.B(), 6, bad_memory);
5466   __ ld4(v28.D(), v29.D(), v30.D(), v31.D(), 1, bad_memory);
5467   __ ld4(v15.D(), v16.D(), v17.D(), v18.D(), 1, bad_memory);
5468   __ ld4(v16.D(), v17.D(), v18.D(), v19.D(), 1, bad_memory);
5469   __ ld4(v2.H(), v3.H(), v4.H(), v5.H(), 6, bad_memory);
5470   __ ld4(v5.H(), v6.H(), v7.H(), v8.H(), 3, bad_memory);
5471   __ ld4(v7.H(), v8.H(), v9.H(), v10.H(), 6, bad_memory);
5472   __ ld4(v6.S(), v7.S(), v8.S(), v9.S(), 1, bad_memory);
5473   __ ld4(v25.S(), v26.S(), v27.S(), v28.S(), 2, bad_memory);
5474   __ ld4(v8.S(), v9.S(), v10.S(), v11.S(), 3, bad_memory);
5475   __ ld4r(v14.V16B(), v15.V16B(), v16.V16B(), v17.V16B(), bad_memory);
5476   __ ld4r(v13.V16B(), v14.V16B(), v15.V16B(), v16.V16B(), bad_memory);
5477   __ ld4r(v9.V16B(), v10.V16B(), v11.V16B(), v12.V16B(), bad_memory);
5478   __ ld4r(v8.V1D(), v9.V1D(), v10.V1D(), v11.V1D(), bad_memory);
5479   __ ld4r(v4.V1D(), v5.V1D(), v6.V1D(), v7.V1D(), bad_memory);
5480   __ ld4r(v26.V1D(), v27.V1D(), v28.V1D(), v29.V1D(), bad_memory);
5481   __ ld4r(v19.V2D(), v20.V2D(), v21.V2D(), v22.V2D(), bad_memory);
5482   __ ld4r(v28.V2D(), v29.V2D(), v30.V2D(), v31.V2D(), bad_memory);
5483   __ ld4r(v15.V2D(), v16.V2D(), v17.V2D(), v18.V2D(), bad_memory);
5484   __ ld4r(v31.V2S(), v0.V2S(), v1.V2S(), v2.V2S(), bad_memory);
5485   __ ld4r(v28.V2S(), v29.V2S(), v30.V2S(), v31.V2S(), bad_memory);
5486   __ ld4r(v11.V2S(), v12.V2S(), v13.V2S(), v14.V2S(), bad_memory);
5487   __ ld4r(v19.V4H(), v20.V4H(), v21.V4H(), v22.V4H(), bad_memory);
5488   __ ld4r(v22.V4H(), v23.V4H(), v24.V4H(), v25.V4H(), bad_memory);
5489   __ ld4r(v20.V4H(), v21.V4H(), v22.V4H(), v23.V4H(), bad_memory);
5490   __ ld4r(v16.V4S(), v17.V4S(), v18.V4S(), v19.V4S(), bad_memory);
5491   __ ld4r(v25.V4S(), v26.V4S(), v27.V4S(), v28.V4S(), bad_memory);
5492   __ ld4r(v23.V4S(), v24.V4S(), v25.V4S(), v26.V4S(), bad_memory);
5493   __ ld4r(v22.V8B(), v23.V8B(), v24.V8B(), v25.V8B(), bad_memory);
5494   __ ld4r(v27.V8B(), v28.V8B(), v29.V8B(), v30.V8B(), bad_memory);
5495   __ ld4r(v29.V8B(), v30.V8B(), v31.V8B(), v0.V8B(), bad_memory);
5496   __ ld4r(v28.V8H(), v29.V8H(), v30.V8H(), v31.V8H(), bad_memory);
5497   __ ld4r(v25.V8H(), v26.V8H(), v27.V8H(), v28.V8H(), bad_memory);
5498   __ ld4r(v22.V8H(), v23.V8H(), v24.V8H(), v25.V8H(), bad_memory);
5499 
5500   __ st1(v18.V16B(), v19.V16B(), v20.V16B(), v21.V16B(), bad_memory);
5501   __ st1(v10.V16B(), v11.V16B(), v12.V16B(), v13.V16B(), bad_memory);
5502   __ st1(v27.V16B(), v28.V16B(), v29.V16B(), v30.V16B(), bad_memory);
5503   __ st1(v16.V16B(), v17.V16B(), v18.V16B(), bad_memory);
5504   __ st1(v21.V16B(), v22.V16B(), v23.V16B(), bad_memory);
5505   __ st1(v9.V16B(), v10.V16B(), v11.V16B(), bad_memory);
5506   __ st1(v7.V16B(), v8.V16B(), bad_memory);
5507   __ st1(v26.V16B(), v27.V16B(), bad_memory);
5508   __ st1(v22.V16B(), v23.V16B(), bad_memory);
5509   __ st1(v23.V16B(), bad_memory);
5510   __ st1(v28.V16B(), bad_memory);
5511   __ st1(v2.V16B(), bad_memory);
5512   __ st1(v29.V1D(), v30.V1D(), v31.V1D(), v0.V1D(), bad_memory);
5513   __ st1(v12.V1D(), v13.V1D(), v14.V1D(), v15.V1D(), bad_memory);
5514   __ st1(v30.V1D(), v31.V1D(), v0.V1D(), v1.V1D(), bad_memory);
5515   __ st1(v16.V1D(), v17.V1D(), v18.V1D(), bad_memory);
5516   __ st1(v3.V1D(), v4.V1D(), v5.V1D(), bad_memory);
5517   __ st1(v14.V1D(), v15.V1D(), v16.V1D(), bad_memory);
5518   __ st1(v18.V1D(), v19.V1D(), bad_memory);
5519   __ st1(v5.V1D(), v6.V1D(), bad_memory);
5520   __ st1(v2.V1D(), v3.V1D(), bad_memory);
5521   __ st1(v4.V1D(), bad_memory);
5522   __ st1(v27.V1D(), bad_memory);
5523   __ st1(v23.V1D(), bad_memory);
5524   __ st1(v2.V2D(), v3.V2D(), v4.V2D(), v5.V2D(), bad_memory);
5525   __ st1(v22.V2D(), v23.V2D(), v24.V2D(), v25.V2D(), bad_memory);
5526   __ st1(v28.V2D(), v29.V2D(), v30.V2D(), v31.V2D(), bad_memory);
5527   __ st1(v17.V2D(), v18.V2D(), v19.V2D(), bad_memory);
5528   __ st1(v16.V2D(), v17.V2D(), v18.V2D(), bad_memory);
5529   __ st1(v22.V2D(), v23.V2D(), v24.V2D(), bad_memory);
5530   __ st1(v21.V2D(), v22.V2D(), bad_memory);
5531   __ st1(v6.V2D(), v7.V2D(), bad_memory);
5532   __ st1(v27.V2D(), v28.V2D(), bad_memory);
5533   __ st1(v21.V2D(), bad_memory);
5534   __ st1(v29.V2D(), bad_memory);
5535   __ st1(v20.V2D(), bad_memory);
5536   __ st1(v22.V2S(), v23.V2S(), v24.V2S(), v25.V2S(), bad_memory);
5537   __ st1(v8.V2S(), v9.V2S(), v10.V2S(), v11.V2S(), bad_memory);
5538   __ st1(v15.V2S(), v16.V2S(), v17.V2S(), v18.V2S(), bad_memory);
5539   __ st1(v2.V2S(), v3.V2S(), v4.V2S(), bad_memory);
5540   __ st1(v23.V2S(), v24.V2S(), v25.V2S(), bad_memory);
5541   __ st1(v7.V2S(), v8.V2S(), v9.V2S(), bad_memory);
5542   __ st1(v28.V2S(), v29.V2S(), bad_memory);
5543   __ st1(v29.V2S(), v30.V2S(), bad_memory);
5544   __ st1(v23.V2S(), v24.V2S(), bad_memory);
5545   __ st1(v6.V2S(), bad_memory);
5546   __ st1(v11.V2S(), bad_memory);
5547   __ st1(v17.V2S(), bad_memory);
5548   __ st1(v6.V4H(), v7.V4H(), v8.V4H(), v9.V4H(), bad_memory);
5549   __ st1(v9.V4H(), v10.V4H(), v11.V4H(), v12.V4H(), bad_memory);
5550   __ st1(v25.V4H(), v26.V4H(), v27.V4H(), v28.V4H(), bad_memory);
5551   __ st1(v11.V4H(), v12.V4H(), v13.V4H(), bad_memory);
5552   __ st1(v10.V4H(), v11.V4H(), v12.V4H(), bad_memory);
5553   __ st1(v12.V4H(), v13.V4H(), v14.V4H(), bad_memory);
5554   __ st1(v13.V4H(), v14.V4H(), bad_memory);
5555   __ st1(v15.V4H(), v16.V4H(), bad_memory);
5556   __ st1(v21.V4H(), v22.V4H(), bad_memory);
5557   __ st1(v16.V4H(), bad_memory);
5558   __ st1(v8.V4H(), bad_memory);
5559   __ st1(v30.V4H(), bad_memory);
5560   __ st1(v3.V4S(), v4.V4S(), v5.V4S(), v6.V4S(), bad_memory);
5561   __ st1(v25.V4S(), v26.V4S(), v27.V4S(), v28.V4S(), bad_memory);
5562   __ st1(v5.V4S(), v6.V4S(), v7.V4S(), v8.V4S(), bad_memory);
5563   __ st1(v31.V4S(), v0.V4S(), v1.V4S(), bad_memory);
5564   __ st1(v30.V4S(), v31.V4S(), v0.V4S(), bad_memory);
5565   __ st1(v6.V4S(), v7.V4S(), v8.V4S(), bad_memory);
5566   __ st1(v17.V4S(), v18.V4S(), bad_memory);
5567   __ st1(v31.V4S(), v0.V4S(), bad_memory);
5568   __ st1(v1.V4S(), v2.V4S(), bad_memory);
5569   __ st1(v26.V4S(), bad_memory);
5570   __ st1(v15.V4S(), bad_memory);
5571   __ st1(v13.V4S(), bad_memory);
5572   __ st1(v26.V8B(), v27.V8B(), v28.V8B(), v29.V8B(), bad_memory);
5573   __ st1(v10.V8B(), v11.V8B(), v12.V8B(), v13.V8B(), bad_memory);
5574   __ st1(v15.V8B(), v16.V8B(), v17.V8B(), v18.V8B(), bad_memory);
5575   __ st1(v19.V8B(), v20.V8B(), v21.V8B(), bad_memory);
5576   __ st1(v31.V8B(), v0.V8B(), v1.V8B(), bad_memory);
5577   __ st1(v9.V8B(), v10.V8B(), v11.V8B(), bad_memory);
5578   __ st1(v12.V8B(), v13.V8B(), bad_memory);
5579   __ st1(v2.V8B(), v3.V8B(), bad_memory);
5580   __ st1(v0.V8B(), v1.V8B(), bad_memory);
5581   __ st1(v16.V8B(), bad_memory);
5582   __ st1(v25.V8B(), bad_memory);
5583   __ st1(v31.V8B(), bad_memory);
5584   __ st1(v4.V8H(), v5.V8H(), v6.V8H(), v7.V8H(), bad_memory);
5585   __ st1(v3.V8H(), v4.V8H(), v5.V8H(), v6.V8H(), bad_memory);
5586   __ st1(v26.V8H(), v27.V8H(), v28.V8H(), v29.V8H(), bad_memory);
5587   __ st1(v10.V8H(), v11.V8H(), v12.V8H(), bad_memory);
5588   __ st1(v21.V8H(), v22.V8H(), v23.V8H(), bad_memory);
5589   __ st1(v18.V8H(), v19.V8H(), v20.V8H(), bad_memory);
5590   __ st1(v26.V8H(), v27.V8H(), bad_memory);
5591   __ st1(v24.V8H(), v25.V8H(), bad_memory);
5592   __ st1(v17.V8H(), v18.V8H(), bad_memory);
5593   __ st1(v29.V8H(), bad_memory);
5594   __ st1(v19.V8H(), bad_memory);
5595   __ st1(v23.V8H(), bad_memory);
5596   __ st1(v19.B(), 15, bad_memory);
5597   __ st1(v25.B(), 9, bad_memory);
5598   __ st1(v4.B(), 8, bad_memory);
5599   __ st1(v13.D(), 0, bad_memory);
5600   __ st1(v30.D(), 0, bad_memory);
5601   __ st1(v3.D(), 0, bad_memory);
5602   __ st1(v22.H(), 0, bad_memory);
5603   __ st1(v31.H(), 7, bad_memory);
5604   __ st1(v23.H(), 3, bad_memory);
5605   __ st1(v0.S(), 0, bad_memory);
5606   __ st1(v11.S(), 3, bad_memory);
5607   __ st1(v24.S(), 3, bad_memory);
5608   __ st2(v7.V16B(), v8.V16B(), bad_memory);
5609   __ st2(v5.V16B(), v6.V16B(), bad_memory);
5610   __ st2(v18.V16B(), v19.V16B(), bad_memory);
5611   __ st2(v14.V2D(), v15.V2D(), bad_memory);
5612   __ st2(v7.V2D(), v8.V2D(), bad_memory);
5613   __ st2(v24.V2D(), v25.V2D(), bad_memory);
5614   __ st2(v22.V2S(), v23.V2S(), bad_memory);
5615   __ st2(v4.V2S(), v5.V2S(), bad_memory);
5616   __ st2(v2.V2S(), v3.V2S(), bad_memory);
5617   __ st2(v23.V4H(), v24.V4H(), bad_memory);
5618   __ st2(v8.V4H(), v9.V4H(), bad_memory);
5619   __ st2(v7.V4H(), v8.V4H(), bad_memory);
5620   __ st2(v17.V4S(), v18.V4S(), bad_memory);
5621   __ st2(v6.V4S(), v7.V4S(), bad_memory);
5622   __ st2(v26.V4S(), v27.V4S(), bad_memory);
5623   __ st2(v31.V8B(), v0.V8B(), bad_memory);
5624   __ st2(v0.V8B(), v1.V8B(), bad_memory);
5625   __ st2(v21.V8B(), v22.V8B(), bad_memory);
5626   __ st2(v7.V8H(), v8.V8H(), bad_memory);
5627   __ st2(v22.V8H(), v23.V8H(), bad_memory);
5628   __ st2(v4.V8H(), v5.V8H(), bad_memory);
5629   __ st2(v8.B(), v9.B(), 15, bad_memory);
5630   __ st2(v8.B(), v9.B(), 15, bad_memory);
5631   __ st2(v7.B(), v8.B(), 4, bad_memory);
5632   __ st2(v25.D(), v26.D(), 0, bad_memory);
5633   __ st2(v17.D(), v18.D(), 1, bad_memory);
5634   __ st2(v3.D(), v4.D(), 1, bad_memory);
5635   __ st2(v4.H(), v5.H(), 3, bad_memory);
5636   __ st2(v0.H(), v1.H(), 5, bad_memory);
5637   __ st2(v22.H(), v23.H(), 2, bad_memory);
5638   __ st2(v14.S(), v15.S(), 3, bad_memory);
5639   __ st2(v23.S(), v24.S(), 3, bad_memory);
5640   __ st2(v0.S(), v1.S(), 2, bad_memory);
5641   __ st3(v26.V16B(), v27.V16B(), v28.V16B(), bad_memory);
5642   __ st3(v21.V16B(), v22.V16B(), v23.V16B(), bad_memory);
5643   __ st3(v24.V16B(), v25.V16B(), v26.V16B(), bad_memory);
5644   __ st3(v17.V2D(), v18.V2D(), v19.V2D(), bad_memory);
5645   __ st3(v23.V2D(), v24.V2D(), v25.V2D(), bad_memory);
5646   __ st3(v10.V2D(), v11.V2D(), v12.V2D(), bad_memory);
5647   __ st3(v9.V2S(), v10.V2S(), v11.V2S(), bad_memory);
5648   __ st3(v13.V2S(), v14.V2S(), v15.V2S(), bad_memory);
5649   __ st3(v22.V2S(), v23.V2S(), v24.V2S(), bad_memory);
5650   __ st3(v31.V4H(), v0.V4H(), v1.V4H(), bad_memory);
5651   __ st3(v8.V4H(), v9.V4H(), v10.V4H(), bad_memory);
5652   __ st3(v19.V4H(), v20.V4H(), v21.V4H(), bad_memory);
5653   __ st3(v18.V4S(), v19.V4S(), v20.V4S(), bad_memory);
5654   __ st3(v25.V4S(), v26.V4S(), v27.V4S(), bad_memory);
5655   __ st3(v16.V4S(), v17.V4S(), v18.V4S(), bad_memory);
5656   __ st3(v27.V8B(), v28.V8B(), v29.V8B(), bad_memory);
5657   __ st3(v29.V8B(), v30.V8B(), v31.V8B(), bad_memory);
5658   __ st3(v30.V8B(), v31.V8B(), v0.V8B(), bad_memory);
5659   __ st3(v8.V8H(), v9.V8H(), v10.V8H(), bad_memory);
5660   __ st3(v18.V8H(), v19.V8H(), v20.V8H(), bad_memory);
5661   __ st3(v18.V8H(), v19.V8H(), v20.V8H(), bad_memory);
5662   __ st3(v31.B(), v0.B(), v1.B(), 10, bad_memory);
5663   __ st3(v4.B(), v5.B(), v6.B(), 5, bad_memory);
5664   __ st3(v5.B(), v6.B(), v7.B(), 1, bad_memory);
5665   __ st3(v5.D(), v6.D(), v7.D(), 0, bad_memory);
5666   __ st3(v6.D(), v7.D(), v8.D(), 0, bad_memory);
5667   __ st3(v0.D(), v1.D(), v2.D(), 0, bad_memory);
5668   __ st3(v31.H(), v0.H(), v1.H(), 2, bad_memory);
5669   __ st3(v14.H(), v15.H(), v16.H(), 5, bad_memory);
5670   __ st3(v21.H(), v22.H(), v23.H(), 6, bad_memory);
5671   __ st3(v21.S(), v22.S(), v23.S(), 0, bad_memory);
5672   __ st3(v11.S(), v12.S(), v13.S(), 1, bad_memory);
5673   __ st3(v15.S(), v16.S(), v17.S(), 0, bad_memory);
5674   __ st4(v22.V16B(), v23.V16B(), v24.V16B(), v25.V16B(), bad_memory);
5675   __ st4(v24.V16B(), v25.V16B(), v26.V16B(), v27.V16B(), bad_memory);
5676   __ st4(v15.V16B(), v16.V16B(), v17.V16B(), v18.V16B(), bad_memory);
5677   __ st4(v16.V2D(), v17.V2D(), v18.V2D(), v19.V2D(), bad_memory);
5678   __ st4(v17.V2D(), v18.V2D(), v19.V2D(), v20.V2D(), bad_memory);
5679   __ st4(v9.V2D(), v10.V2D(), v11.V2D(), v12.V2D(), bad_memory);
5680   __ st4(v23.V2S(), v24.V2S(), v25.V2S(), v26.V2S(), bad_memory);
5681   __ st4(v15.V2S(), v16.V2S(), v17.V2S(), v18.V2S(), bad_memory);
5682   __ st4(v24.V2S(), v25.V2S(), v26.V2S(), v27.V2S(), bad_memory);
5683   __ st4(v14.V4H(), v15.V4H(), v16.V4H(), v17.V4H(), bad_memory);
5684   __ st4(v18.V4H(), v19.V4H(), v20.V4H(), v21.V4H(), bad_memory);
5685   __ st4(v1.V4H(), v2.V4H(), v3.V4H(), v4.V4H(), bad_memory);
5686   __ st4(v13.V4S(), v14.V4S(), v15.V4S(), v16.V4S(), bad_memory);
5687   __ st4(v6.V4S(), v7.V4S(), v8.V4S(), v9.V4S(), bad_memory);
5688   __ st4(v15.V4S(), v16.V4S(), v17.V4S(), v18.V4S(), bad_memory);
5689   __ st4(v26.V8B(), v27.V8B(), v28.V8B(), v29.V8B(), bad_memory);
5690   __ st4(v25.V8B(), v26.V8B(), v27.V8B(), v28.V8B(), bad_memory);
5691   __ st4(v19.V8B(), v20.V8B(), v21.V8B(), v22.V8B(), bad_memory);
5692   __ st4(v19.V8H(), v20.V8H(), v21.V8H(), v22.V8H(), bad_memory);
5693   __ st4(v15.V8H(), v16.V8H(), v17.V8H(), v18.V8H(), bad_memory);
5694   __ st4(v31.V8H(), v0.V8H(), v1.V8H(), v2.V8H(), bad_memory);
5695   __ st4(v0.B(), v1.B(), v2.B(), v3.B(), 13, bad_memory);
5696   __ st4(v4.B(), v5.B(), v6.B(), v7.B(), 10, bad_memory);
5697   __ st4(v9.B(), v10.B(), v11.B(), v12.B(), 9, bad_memory);
5698   __ st4(v2.D(), v3.D(), v4.D(), v5.D(), 1, bad_memory);
5699   __ st4(v7.D(), v8.D(), v9.D(), v10.D(), 0, bad_memory);
5700   __ st4(v31.D(), v0.D(), v1.D(), v2.D(), 1, bad_memory);
5701   __ st4(v2.H(), v3.H(), v4.H(), v5.H(), 1, bad_memory);
5702   __ st4(v27.H(), v28.H(), v29.H(), v30.H(), 3, bad_memory);
5703   __ st4(v24.H(), v25.H(), v26.H(), v27.H(), 4, bad_memory);
5704   __ st4(v18.S(), v19.S(), v20.S(), v21.S(), 2, bad_memory);
5705   __ st4(v6.S(), v7.S(), v8.S(), v9.S(), 2, bad_memory);
5706   __ st4(v25.S(), v26.S(), v27.S(), v28.S(), 1, bad_memory);
5707 
5708   END_IMPLICIT_CHECK();
5709   TRY_RUN_IMPLICIT_CHECK();
5710 }
5711 
TEST(ImplicitCheckSve)5712 TEST(ImplicitCheckSve) {
5713   SETUP_WITH_FEATURES(CPUFeatures::kSVE,
5714                       CPUFeatures::kSVE2,
5715                       CPUFeatures::kNEON);
5716   START_IMPLICIT_CHECK();
5717 
5718   SVEMemOperand bad_sve_memory = SVEMemOperand(ip0);
5719 
5720   EmissionCheckScope guard(&masm, masm.GetBuffer()->GetRemainingBytes());
5721   // Simple, unpredicated loads and stores.
5722   __ Str(p12.VnD(), bad_sve_memory);
5723   __ Str(p13.VnS(), bad_sve_memory);
5724   __ Str(p14.VnH(), bad_sve_memory);
5725   __ Str(p15.VnB(), bad_sve_memory);
5726   __ Ldr(p8.VnD(), bad_sve_memory);
5727   __ Ldr(p9.VnS(), bad_sve_memory);
5728   __ Ldr(p10.VnH(), bad_sve_memory);
5729   __ Ldr(p11.VnB(), bad_sve_memory);
5730 
5731   __ Str(z0.VnD(), bad_sve_memory);
5732   __ Str(z1.VnS(), bad_sve_memory);
5733   __ Str(z2.VnH(), bad_sve_memory);
5734   __ Str(z3.VnB(), bad_sve_memory);
5735   __ Ldr(z20.VnD(), bad_sve_memory);
5736   __ Ldr(z21.VnS(), bad_sve_memory);
5737   __ Ldr(z22.VnH(), bad_sve_memory);
5738   __ Ldr(z23.VnB(), bad_sve_memory);
5739 
5740   // Structured accesses.
5741   __ St1b(z0.VnB(), p2, bad_sve_memory);
5742   __ St1h(z1.VnH(), p1, bad_sve_memory);
5743   __ St1w(z2.VnS(), p1, bad_sve_memory);
5744   __ St1d(z3.VnD(), p2, bad_sve_memory);
5745   __ Ld1b(z20.VnB(), p1.Zeroing(), bad_sve_memory);
5746   __ Ld1h(z21.VnH(), p2.Zeroing(), bad_sve_memory);
5747   __ Ld1w(z22.VnS(), p1.Zeroing(), bad_sve_memory);
5748   __ Ld1d(z23.VnD(), p1.Zeroing(), bad_sve_memory);
5749 
5750   // Structured, packed accesses.
5751   __ St1b(z2.VnH(), p1, bad_sve_memory);
5752   __ St1b(z3.VnS(), p2, bad_sve_memory);
5753   __ St1b(z4.VnD(), p2, bad_sve_memory);
5754   __ St1h(z0.VnS(), p1, bad_sve_memory);
5755   __ St1h(z1.VnD(), p1, bad_sve_memory);
5756   __ St1w(z2.VnD(), p1, bad_sve_memory);
5757   __ Ld1b(z20.VnH(), p1.Zeroing(), bad_sve_memory);
5758   __ Ld1b(z21.VnS(), p1.Zeroing(), bad_sve_memory);
5759   __ Ld1b(z22.VnD(), p1.Zeroing(), bad_sve_memory);
5760   __ Ld1h(z23.VnS(), p2.Zeroing(), bad_sve_memory);
5761   __ Ld1h(z24.VnD(), p2.Zeroing(), bad_sve_memory);
5762   __ Ld1w(z20.VnD(), p1.Zeroing(), bad_sve_memory);
5763   __ Ld1sb(z21.VnH(), p1.Zeroing(), bad_sve_memory);
5764   __ Ld1sb(z22.VnS(), p1.Zeroing(), bad_sve_memory);
5765   __ Ld1sb(z23.VnD(), p2.Zeroing(), bad_sve_memory);
5766   __ Ld1sh(z24.VnS(), p2.Zeroing(), bad_sve_memory);
5767   __ Ld1sh(z20.VnD(), p1.Zeroing(), bad_sve_memory);
5768   __ Ld1sw(z21.VnD(), p1.Zeroing(), bad_sve_memory);
5769 
5770   // Structured, interleaved accesses.
5771   __ St2b(z0.VnB(), z1.VnB(), p4, bad_sve_memory);
5772   __ St2h(z1.VnH(), z2.VnH(), p4, bad_sve_memory);
5773   __ St2w(z2.VnS(), z3.VnS(), p3, bad_sve_memory);
5774   __ St2d(z3.VnD(), z4.VnD(), p4, bad_sve_memory);
5775   __ Ld2b(z20.VnB(), z21.VnB(), p5.Zeroing(), bad_sve_memory);
5776   __ Ld2h(z21.VnH(), z22.VnH(), p6.Zeroing(), bad_sve_memory);
5777   __ Ld2w(z22.VnS(), z23.VnS(), p6.Zeroing(), bad_sve_memory);
5778   __ Ld2d(z23.VnD(), z24.VnD(), p5.Zeroing(), bad_sve_memory);
5779 
5780   __ St3b(z4.VnB(), z5.VnB(), z6.VnB(), p4, bad_sve_memory);
5781   __ St3h(z5.VnH(), z6.VnH(), z7.VnH(), p4, bad_sve_memory);
5782   __ St3w(z6.VnS(), z7.VnS(), z8.VnS(), p3, bad_sve_memory);
5783   __ St3d(z7.VnD(), z8.VnD(), z9.VnD(), p4, bad_sve_memory);
5784   __ Ld3b(z24.VnB(), z25.VnB(), z26.VnB(), p5.Zeroing(), bad_sve_memory);
5785   __ Ld3h(z25.VnH(), z26.VnH(), z27.VnH(), p6.Zeroing(), bad_sve_memory);
5786   __ Ld3w(z26.VnS(), z27.VnS(), z28.VnS(), p6.Zeroing(), bad_sve_memory);
5787   __ Ld3d(z27.VnD(), z28.VnD(), z29.VnD(), p5.Zeroing(), bad_sve_memory);
5788 
5789   __ St4b(z31.VnB(), z0.VnB(), z1.VnB(), z2.VnB(), p4, bad_sve_memory);
5790   __ St4h(z0.VnH(), z1.VnH(), z2.VnH(), z3.VnH(), p4, bad_sve_memory);
5791   __ St4w(z1.VnS(), z2.VnS(), z3.VnS(), z4.VnS(), p3, bad_sve_memory);
5792   __ St4d(z2.VnD(), z3.VnD(), z4.VnD(), z5.VnD(), p4, bad_sve_memory);
5793   __ Ld4b(z25.VnB(),
5794           z26.VnB(),
5795           z27.VnB(),
5796           z28.VnB(),
5797           p5.Zeroing(),
5798           bad_sve_memory);
5799   __ Ld4h(z26.VnH(),
5800           z27.VnH(),
5801           z28.VnH(),
5802           z29.VnH(),
5803           p6.Zeroing(),
5804           bad_sve_memory);
5805   __ Ld4w(z27.VnS(),
5806           z28.VnS(),
5807           z29.VnS(),
5808           z30.VnS(),
5809           p6.Zeroing(),
5810           bad_sve_memory);
5811   __ Ld4d(z28.VnD(),
5812           z29.VnD(),
5813           z30.VnD(),
5814           z31.VnD(),
5815           p5.Zeroing(),
5816           bad_sve_memory);
5817 
5818   END_IMPLICIT_CHECK();
5819   TRY_RUN_IMPLICIT_CHECK();
5820 }
5821 
TEST(ImplicitCheckAtomics)5822 TEST(ImplicitCheckAtomics) {
5823   SETUP_WITH_FEATURES(CPUFeatures::kNEON, CPUFeatures::kAtomics);
5824   START_IMPLICIT_CHECK();
5825 
5826   EmissionCheckScope guard(&masm, masm.GetBuffer()->GetRemainingBytes());
5827 #define INST_LIST(OP)                 \
5828   __ Ld##OP##b(w0, w0, bad_memory);   \
5829   __ Ld##OP##ab(w0, w1, bad_memory);  \
5830   __ Ld##OP##lb(w0, w2, bad_memory);  \
5831   __ Ld##OP##alb(w0, w3, bad_memory); \
5832   __ Ld##OP##h(w0, w0, bad_memory);   \
5833   __ Ld##OP##ah(w0, w1, bad_memory);  \
5834   __ Ld##OP##lh(w0, w2, bad_memory);  \
5835   __ Ld##OP##alh(w0, w3, bad_memory); \
5836   __ Ld##OP(w0, w0, bad_memory);      \
5837   __ Ld##OP##a(w0, w1, bad_memory);   \
5838   __ Ld##OP##l(w0, w2, bad_memory);   \
5839   __ Ld##OP##al(w0, w3, bad_memory);  \
5840   __ Ld##OP(x0, x0, bad_memory);      \
5841   __ Ld##OP##a(x0, x1, bad_memory);   \
5842   __ Ld##OP##l(x0, x2, bad_memory);   \
5843   __ Ld##OP##al(x0, x3, bad_memory);  \
5844   __ St##OP##b(w0, bad_memory);       \
5845   __ St##OP##lb(w0, bad_memory);      \
5846   __ St##OP##h(w0, bad_memory);       \
5847   __ St##OP##lh(w0, bad_memory);      \
5848   __ St##OP(w0, bad_memory);          \
5849   __ St##OP##l(w0, bad_memory);       \
5850   __ St##OP(x0, bad_memory);          \
5851   __ St##OP##l(x0, bad_memory);
5852 
5853   INST_LIST(add);
5854   INST_LIST(set);
5855   INST_LIST(eor);
5856   INST_LIST(smin);
5857   INST_LIST(smax);
5858   INST_LIST(umin);
5859   INST_LIST(umax);
5860   INST_LIST(clr);
5861 
5862 #undef INST_LIST
5863 
5864   END_IMPLICIT_CHECK();
5865   TRY_RUN_IMPLICIT_CHECK();
5866 }
5867 
TEST(ImplicitCheckMops)5868 TEST(ImplicitCheckMops) {
5869   SETUP_WITH_FEATURES(CPUFeatures::kNEON, CPUFeatures::kMOPS);
5870   START_IMPLICIT_CHECK();
5871 
5872   EmissionCheckScope guard(&masm, masm.GetBuffer()->GetRemainingBytes());
5873   __ Set(x15, ip1, ip0);
5874   __ Setn(x15, ip1, ip0);
5875   __ Setg(x15, ip1, ip0);
5876   __ Setgn(x15, ip1, ip0);
5877 
5878   __ Cpy(x15, ip0, ip1);
5879   __ Cpyn(x15, ip0, ip1);
5880   __ Cpyrn(x15, ip0, ip1);
5881   __ Cpywn(x15, ip0, ip1);
5882   __ Cpyf(x15, ip0, ip1);
5883   __ Cpyfn(x15, ip0, ip1);
5884   __ Cpyfrn(x15, ip0, ip1);
5885   __ Cpyfwn(x15, ip0, ip1);
5886 
5887   // The macro-assembler expands each instruction into prologue, main and
5888   // epilogue instructions where only the main instruction will fail. Increase
5889   // the counter to account for those additional instructions and the following
5890   // instructions.
5891   __ Mov(x0, 3);
5892   __ Mul(x1, x1, x0);
5893   __ Add(x1, x1, x0);
5894 
5895   END_IMPLICIT_CHECK();
5896   TRY_RUN_IMPLICIT_CHECK();
5897 }
5898 #endif  // VIXL_ENABLE_IMPLICIT_CHECKS
5899 
5900 #undef __
5901 #define __ masm->
5902 
5903 #if defined(VIXL_INCLUDE_SIMULATOR_AARCH64) &&                 \
5904     defined(VIXL_HAS_ABI_SUPPORT) && __cplusplus >= 201103L && \
5905     (defined(__clang__) || GCC_VERSION_OR_NEWER(4, 9, 1))
5906 
5907 // Generate a function that stores zero to a hard-coded address.
GenerateStoreZero(MacroAssembler * masm,int32_t * target)5908 Instruction* GenerateStoreZero(MacroAssembler* masm, int32_t* target) {
5909   masm->Reset();
5910 
5911   UseScratchRegisterScope temps(masm);
5912   Register temp = temps.AcquireX();
5913   __ Mov(temp, reinterpret_cast<intptr_t>(target));
5914   __ Str(wzr, MemOperand(temp));
5915   __ Ret();
5916 
5917   masm->FinalizeCode();
5918   return masm->GetBuffer()->GetStartAddress<Instruction*>();
5919 }
5920 
5921 
5922 // Generate a function that stores the `int32_t` argument to a hard-coded
5923 // address.
5924 // In this example and the other below, we use the `abi` object to retrieve
5925 // argument and return locations even though we could easily hard code them.
5926 // This mirrors how more generic code (e.g. templated) user would use these
5927 // mechanisms.
GenerateStoreInput(MacroAssembler * masm,int32_t * target)5928 Instruction* GenerateStoreInput(MacroAssembler* masm, int32_t* target) {
5929   masm->Reset();
5930 
5931   ABI abi;
5932   Register input =
5933       Register(abi.GetNextParameterGenericOperand<int32_t>().GetCPURegister());
5934 
5935   UseScratchRegisterScope temps(masm);
5936   Register temp = temps.AcquireX();
5937   __ Mov(temp, reinterpret_cast<intptr_t>(target));
5938   __ Str(input, MemOperand(temp));
5939   __ Ret();
5940 
5941   masm->FinalizeCode();
5942   return masm->GetBuffer()->GetStartAddress<Instruction*>();
5943 }
5944 
5945 
5946 // A minimal implementation of a `pow` function.
GeneratePow(MacroAssembler * masm,unsigned pow)5947 Instruction* GeneratePow(MacroAssembler* masm, unsigned pow) {
5948   masm->Reset();
5949 
5950   ABI abi;
5951   Register input =
5952       Register(abi.GetNextParameterGenericOperand<int64_t>().GetCPURegister());
5953   Register result =
5954       Register(abi.GetReturnGenericOperand<int64_t>().GetCPURegister());
5955   UseScratchRegisterScope temps(masm);
5956   Register temp = temps.AcquireX();
5957 
5958   __ Mov(temp, 1);
5959   for (unsigned i = 0; i < pow; i++) {
5960     __ Mul(temp, temp, input);
5961   }
5962   __ Mov(result, temp);
5963   __ Ret();
5964 
5965   masm->FinalizeCode();
5966   return masm->GetBuffer()->GetStartAddress<Instruction*>();
5967 }
5968 
5969 
GenerateSum(MacroAssembler * masm)5970 Instruction* GenerateSum(MacroAssembler* masm) {
5971   masm->Reset();
5972 
5973   ABI abi;
5974   VRegister input_1 =
5975       VRegister(abi.GetNextParameterGenericOperand<float>().GetCPURegister());
5976   Register input_2 =
5977       Register(abi.GetNextParameterGenericOperand<int64_t>().GetCPURegister());
5978   VRegister input_3 =
5979       VRegister(abi.GetNextParameterGenericOperand<double>().GetCPURegister());
5980   VRegister result =
5981       VRegister(abi.GetReturnGenericOperand<double>().GetCPURegister());
5982 
5983   UseScratchRegisterScope temps(masm);
5984   VRegister temp = temps.AcquireD();
5985 
5986   __ Fcvt(input_1.D(), input_1);
5987   __ Scvtf(temp, input_2);
5988   __ Fadd(temp, temp, input_1.D());
5989   __ Fadd(result, temp, input_3);
5990   __ Ret();
5991 
5992   masm->FinalizeCode();
5993   return masm->GetBuffer()->GetStartAddress<Instruction*>();
5994 }
5995 
5996 
TEST(RunFrom)5997 TEST(RunFrom) {
5998   SETUP_WITH_FEATURES(CPUFeatures::kFP);
5999 
6000   // Run a function returning `void` and taking no argument.
6001   int32_t value = 0xbad;
6002   simulator.RunFrom(GenerateStoreZero(&masm, &value));
6003   VIXL_CHECK(value == 0);
6004 
6005   // Run a function returning `void` and taking one argument.
6006   int32_t argument = 0xf00d;
6007   simulator.RunFrom<void, int32_t>(GenerateStoreInput(&masm, &value), argument);
6008   VIXL_CHECK(value == 0xf00d);
6009 
6010   // Run a function taking one argument and returning a value.
6011   int64_t res_int64_t;
6012   res_int64_t =
6013       simulator.RunFrom<int64_t, int64_t>(GeneratePow(&masm, 0), 0xbad);
6014   VIXL_CHECK(res_int64_t == 1);
6015   res_int64_t = simulator.RunFrom<int64_t, int64_t>(GeneratePow(&masm, 1), 123);
6016   VIXL_CHECK(res_int64_t == 123);
6017   res_int64_t = simulator.RunFrom<int64_t, int64_t>(GeneratePow(&masm, 10), 2);
6018   VIXL_CHECK(res_int64_t == 1024);
6019 
6020   // Run a function taking multiple arguments in registers.
6021   double res_double =
6022       simulator.RunFrom<double, float, int64_t, double>(GenerateSum(&masm),
6023                                                         1.0,
6024                                                         2,
6025                                                         3.0);
6026   VIXL_CHECK(res_double == 6.0);
6027 }
6028 
6029 #endif
6030 
6031 
6032 }  // namespace aarch64
6033 }  // namespace vixl
6034