• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2015, VIXL authors
2 // All rights reserved.
3 //
4 // Redistribution and use in source and binary forms, with or without
5 // modification, are permitted provided that the following conditions are met:
6 //
7 //   * Redistributions of source code must retain the above copyright notice,
8 //     this list of conditions and the following disclaimer.
9 //   * Redistributions in binary form must reproduce the above copyright notice,
10 //     this list of conditions and the following disclaimer in the documentation
11 //     and/or other materials provided with the distribution.
12 //   * Neither the name of ARM Limited nor the names of its contributors may be
13 //     used to endorse or promote products derived from this software without
14 //     specific prior written permission.
15 //
16 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
17 // ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18 // WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19 // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
20 // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
22 // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
23 // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
24 // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 
27 #include <cfloat>
28 #include <cstdio>
29 
30 #include <sstream>
31 
32 #include "test-runner.h"
33 #include "test-utils.h"
34 
35 #include "aarch64/test-simulator-inputs-aarch64.h"
36 #include "aarch64/test-simulator-traces-aarch64.h"
37 #include "aarch64/test-utils-aarch64.h"
38 
39 #include "aarch64/cpu-features-auditor-aarch64.h"
40 #include "aarch64/macro-assembler-aarch64.h"
41 #include "aarch64/simulator-aarch64.h"
42 
43 namespace vixl {
44 namespace aarch64 {
45 
46 // ==== Simulator Tests ====
47 //
48 // These simulator tests check instruction behaviour against a trace taken from
49 // real AArch64 hardware. The same test code is used to generate the trace; the
50 // results are printed to stdout when the test is run with
51 // --generate_test_trace.
52 //
53 // The input lists and expected results are stored in test/traces. The expected
54 // results can be regenerated using tools/generate_simulator_traces.py. Adding a
55 // test for a new instruction is described at the top of
56 // test-simulator-traces-aarch64.h.
57 
58 #define __ masm.
59 #define TEST(name) TEST_(AARCH64_SIM_##name)
60 
61 #define SETUP() SETUP_WITH_FEATURES(CPUFeatures())
62 
63 #ifdef VIXL_INCLUDE_SIMULATOR_AARCH64
64 
65 #define SETUP_WITH_FEATURES(...)                 \
66   MacroAssembler masm;                           \
67   masm.SetCPUFeatures(CPUFeatures(__VA_ARGS__)); \
68   Decoder decoder;                               \
69   Simulator simulator(&decoder);                 \
70   simulator.SetColouredTrace(Test::coloured_trace());
71 
72 #define START()                         \
73   masm.Reset();                         \
74   simulator.ResetState();               \
75   __ PushCalleeSavedRegisters();        \
76   if (Test::trace_reg()) {              \
77     __ Trace(LOG_STATE, TRACE_ENABLE);  \
78   }                                     \
79   if (Test::trace_write()) {            \
80     __ Trace(LOG_WRITE, TRACE_ENABLE);  \
81   }                                     \
82   if (Test::trace_sim()) {              \
83     __ Trace(LOG_DISASM, TRACE_ENABLE); \
84   }
85 
86 #define END()                       \
87   __ Trace(LOG_ALL, TRACE_DISABLE); \
88   __ PopCalleeSavedRegisters();     \
89   __ Ret();                         \
90   masm.FinalizeCode()
91 
92 #define TRY_RUN(skipped)                                                \
93   DISASSEMBLE();                                                        \
94   simulator.RunFrom(masm.GetBuffer()->GetStartAddress<Instruction*>()); \
95   /* The simulator can run every test. */                               \
96   *skipped = false
97 
98 
99 #else  // VIXL_INCLUDE_SIMULATOR_AARCH64
100 
101 #define SETUP_WITH_FEATURES(...)                 \
102   MacroAssembler masm;                           \
103   masm.SetCPUFeatures(CPUFeatures(__VA_ARGS__)); \
104   CPU::SetUp()
105 
106 #define START() \
107   masm.Reset(); \
108   __ PushCalleeSavedRegisters()
109 
110 #define END()                   \
111   __ PopCalleeSavedRegisters(); \
112   __ Ret();                     \
113   masm.FinalizeCode()
114 
115 #define TRY_RUN(skipped)                                                      \
116   DISASSEMBLE();                                                              \
117   /* If the test uses features that the current CPU doesn't support, don't */ \
118   /* attempt to run it natively.                                           */ \
119   {                                                                           \
120     Decoder decoder;                                                          \
121     /* TODO: Once available, use runtime feature detection. The use of  */    \
122     /* AArch64LegacyBaseline is a stopgap.                              */    \
123     const CPUFeatures& this_machine = CPUFeatures::AArch64LegacyBaseline();   \
124     CPUFeaturesAuditor auditor(&decoder, this_machine);                       \
125     CodeBuffer* buffer = masm.GetBuffer();                                    \
126     decoder.Decode(buffer->GetStartAddress<Instruction*>(),                   \
127                    buffer->GetEndAddress<Instruction*>());                    \
128     const CPUFeatures& requirements = auditor.GetSeenFeatures();              \
129     if (this_machine.Has(requirements)) {                                     \
130       masm.GetBuffer()->SetExecutable();                                      \
131       ExecuteMemory(buffer->GetStartAddress<byte*>(),                         \
132                     masm.GetSizeOfCodeGenerated());                           \
133       masm.GetBuffer()->SetWritable();                                        \
134       *skipped = false;                                                       \
135     } else {                                                                  \
136       std::stringstream os;                                                   \
137       /* Note: This message needs to match REGEXP_MISSING_FEATURES from    */ \
138       /* tools/threaded_test.py.                                           */ \
139       os << "SKIPPED: Missing features: { ";                                  \
140       os << requirements.Without(this_machine) << " }\n";                     \
141       printf("%s", os.str().c_str());                                         \
142       *skipped = true;                                                        \
143     }                                                                         \
144   }
145 
146 
147 #endif  // VIXL_INCLUDE_SIMULATOR_AARCH64
148 
149 
150 #define DISASSEMBLE()                                             \
151   if (Test::disassemble()) {                                      \
152     PrintDisassembler disasm(stdout);                             \
153     CodeBuffer* buffer = masm.GetBuffer();                        \
154     Instruction* start = buffer->GetStartAddress<Instruction*>(); \
155     Instruction* end = buffer->GetEndAddress<Instruction*>();     \
156     disasm.DisassembleBuffer(start, end);                         \
157   }
158 
159 // The maximum number of errors to report in detail for each test.
160 static const unsigned kErrorReportLimit = 8;
161 
162 
163 // Overloaded versions of RawbitsToDouble and RawbitsToFloat for use in the
164 // templated test functions.
rawbits_to_fp(uint32_t bits)165 static float rawbits_to_fp(uint32_t bits) { return RawbitsToFloat(bits); }
166 
rawbits_to_fp(uint64_t bits)167 static double rawbits_to_fp(uint64_t bits) { return RawbitsToDouble(bits); }
168 
169 // The rawbits_to_fp functions are only used for printing decimal values so we
170 // just approximate FP16 as double.
rawbits_to_fp(uint16_t bits)171 static double rawbits_to_fp(uint16_t bits) {
172   return FPToDouble(RawbitsToFloat16(bits), kIgnoreDefaultNaN);
173 }
174 
175 
176 // MacroAssembler member function pointers to pass to the test dispatchers.
177 typedef void (MacroAssembler::*Test1OpFPHelper_t)(const VRegister& fd,
178                                                   const VRegister& fn);
179 typedef void (MacroAssembler::*Test2OpFPHelper_t)(const VRegister& fd,
180                                                   const VRegister& fn,
181                                                   const VRegister& fm);
182 typedef void (MacroAssembler::*Test3OpFPHelper_t)(const VRegister& fd,
183                                                   const VRegister& fn,
184                                                   const VRegister& fm,
185                                                   const VRegister& fa);
186 typedef void (MacroAssembler::*TestFPCmpHelper_t)(const VRegister& fn,
187                                                   const VRegister& fm);
188 typedef void (MacroAssembler::*TestFPCmpZeroHelper_t)(const VRegister& fn,
189                                                       double value);
190 typedef void (MacroAssembler::*TestFPToIntHelper_t)(const Register& rd,
191                                                     const VRegister& fn);
192 typedef void (MacroAssembler::*TestFPToFixedHelper_t)(const Register& rd,
193                                                       const VRegister& fn,
194                                                       int fbits);
195 typedef void (MacroAssembler::*TestFixedToFPHelper_t)(const VRegister& fd,
196                                                       const Register& rn,
197                                                       int fbits);
198 // TODO: 'Test2OpNEONHelper_t' and 'Test2OpFPHelper_t' can be
199 //       consolidated into one routine.
200 typedef void (MacroAssembler::*Test1OpNEONHelper_t)(const VRegister& vd,
201                                                     const VRegister& vn);
202 typedef void (MacroAssembler::*Test2OpNEONHelper_t)(const VRegister& vd,
203                                                     const VRegister& vn,
204                                                     const VRegister& vm);
205 typedef void (MacroAssembler::*TestByElementNEONHelper_t)(const VRegister& vd,
206                                                           const VRegister& vn,
207                                                           const VRegister& vm,
208                                                           int vm_index);
209 typedef void (MacroAssembler::*TestOpImmOpImmVdUpdateNEONHelper_t)(
210     const VRegister& vd, int imm1, const VRegister& vn, int imm2);
211 
212 // This helps using the same typename for both the function pointer
213 // and the array of immediates passed to helper routines.
214 template <typename T>
215 class Test2OpImmediateNEONHelper_t {
216  public:
217   typedef void (MacroAssembler::*mnemonic)(const VRegister& vd,
218                                            const VRegister& vn,
219                                            T imm);
220 };
221 
222 
223 // Maximum number of hex characters required to represent values of either
224 // templated type.
225 template <typename Ta, typename Tb>
MaxHexCharCount()226 static unsigned MaxHexCharCount() {
227   unsigned count = static_cast<unsigned>(std::max(sizeof(Ta), sizeof(Tb)));
228   return (count * 8) / 4;
229 }
230 
231 
232 // Standard test dispatchers.
233 
234 
Test1Op_Helper(Test1OpFPHelper_t helper,uintptr_t inputs,unsigned inputs_length,uintptr_t results,unsigned d_size,unsigned n_size,bool * skipped)235 static void Test1Op_Helper(Test1OpFPHelper_t helper,
236                            uintptr_t inputs,
237                            unsigned inputs_length,
238                            uintptr_t results,
239                            unsigned d_size,
240                            unsigned n_size,
241                            bool* skipped) {
242   VIXL_ASSERT((d_size == kDRegSize) || (d_size == kSRegSize) ||
243               (d_size == kHRegSize));
244   VIXL_ASSERT((n_size == kDRegSize) || (n_size == kSRegSize) ||
245               (n_size == kHRegSize));
246 
247   CPUFeatures features;
248   features.Combine(CPUFeatures::kFP, CPUFeatures::kFPHalf);
249   // For frint{32,64}{x,y} variants.
250   features.Combine(CPUFeatures::kFrintToFixedSizedInt);
251   SETUP_WITH_FEATURES(features);
252   START();
253 
254   // Roll up the loop to keep the code size down.
255   Label loop_n;
256 
257   Register out = x0;
258   Register inputs_base = x1;
259   Register length = w2;
260   Register index_n = w3;
261 
262   int n_index_shift;
263   VRegister fd;
264   VRegister fn;
265   if (n_size == kDRegSize) {
266     n_index_shift = kDRegSizeInBytesLog2;
267     fn = d1;
268   } else if (n_size == kSRegSize) {
269     n_index_shift = kSRegSizeInBytesLog2;
270     fn = s1;
271   } else {
272     n_index_shift = kHRegSizeInBytesLog2;
273     fn = h1;
274   }
275 
276   if (d_size == kDRegSize) {
277     fd = d0;
278   } else if (d_size == kSRegSize) {
279     fd = s0;
280   } else {
281     fd = h0;
282   }
283 
284 
285   __ Mov(out, results);
286   __ Mov(inputs_base, inputs);
287   __ Mov(length, inputs_length);
288 
289   __ Mov(index_n, 0);
290   __ Bind(&loop_n);
291   __ Ldr(fn, MemOperand(inputs_base, index_n, UXTW, n_index_shift));
292 
293   {
294     SingleEmissionCheckScope guard(&masm);
295     (masm.*helper)(fd, fn);
296   }
297   __ Str(fd, MemOperand(out, fd.GetSizeInBytes(), PostIndex));
298 
299   __ Add(index_n, index_n, 1);
300   __ Cmp(index_n, inputs_length);
301   __ B(lo, &loop_n);
302 
303   END();
304   TRY_RUN(skipped);
305 }
306 
307 
308 // Test FP instructions. The inputs[] and expected[] arrays should be arrays of
309 // rawbits representations of doubles or floats. This ensures that exact bit
310 // comparisons can be performed.
311 template <typename Tn, typename Td>
Test1Op(const char * name,Test1OpFPHelper_t helper,const Tn inputs[],unsigned inputs_length,const Td expected[],unsigned expected_length)312 static void Test1Op(const char* name,
313                     Test1OpFPHelper_t helper,
314                     const Tn inputs[],
315                     unsigned inputs_length,
316                     const Td expected[],
317                     unsigned expected_length) {
318   VIXL_ASSERT(inputs_length > 0);
319 
320   const unsigned results_length = inputs_length;
321   Td* results = new Td[results_length];
322 
323   const unsigned d_bits = sizeof(Td) * 8;
324   const unsigned n_bits = sizeof(Tn) * 8;
325   bool skipped;
326 
327   Test1Op_Helper(helper,
328                  reinterpret_cast<uintptr_t>(inputs),
329                  inputs_length,
330                  reinterpret_cast<uintptr_t>(results),
331                  d_bits,
332                  n_bits,
333                  &skipped);
334 
335   if (Test::generate_test_trace()) {
336     // Print the results.
337     printf("const uint%u_t kExpected_%s[] = {\n", d_bits, name);
338     for (unsigned d = 0; d < results_length; d++) {
339       printf("  0x%0*" PRIx64 ",\n",
340              d_bits / 4,
341              static_cast<uint64_t>(results[d]));
342     }
343     printf("};\n");
344     printf("const unsigned kExpectedCount_%s = %u;\n", name, results_length);
345   } else if (!skipped) {
346     // Check the results.
347     VIXL_CHECK(expected_length == results_length);
348     unsigned error_count = 0;
349     unsigned d = 0;
350     for (unsigned n = 0; n < inputs_length; n++, d++) {
351       if (results[d] != expected[d]) {
352         if (++error_count > kErrorReportLimit) continue;
353 
354         printf("%s 0x%0*" PRIx64 " (%s %g):\n",
355                name,
356                n_bits / 4,
357                static_cast<uint64_t>(inputs[n]),
358                name,
359                rawbits_to_fp(inputs[n]));
360         printf("  Expected: 0x%0*" PRIx64 " (%g)\n",
361                d_bits / 4,
362                static_cast<uint64_t>(expected[d]),
363                rawbits_to_fp(expected[d]));
364         printf("  Found:    0x%0*" PRIx64 " (%g)\n",
365                d_bits / 4,
366                static_cast<uint64_t>(results[d]),
367                rawbits_to_fp(results[d]));
368         printf("\n");
369       }
370     }
371     VIXL_ASSERT(d == expected_length);
372     if (error_count > kErrorReportLimit) {
373       printf("%u other errors follow.\n", error_count - kErrorReportLimit);
374     }
375     VIXL_CHECK(error_count == 0);
376   }
377   delete[] results;
378 }
379 
380 
Test2Op_Helper(Test2OpFPHelper_t helper,uintptr_t inputs,unsigned inputs_length,uintptr_t results,unsigned reg_size,bool * skipped)381 static void Test2Op_Helper(Test2OpFPHelper_t helper,
382                            uintptr_t inputs,
383                            unsigned inputs_length,
384                            uintptr_t results,
385                            unsigned reg_size,
386                            bool* skipped) {
387   VIXL_ASSERT((reg_size == kDRegSize) || (reg_size == kSRegSize) ||
388               (reg_size == kHRegSize));
389 
390   SETUP_WITH_FEATURES(CPUFeatures::kFP, CPUFeatures::kFPHalf);
391   START();
392 
393   // Roll up the loop to keep the code size down.
394   Label loop_n, loop_m;
395 
396   Register out = x0;
397   Register inputs_base = x1;
398   Register length = w2;
399   Register index_n = w3;
400   Register index_m = w4;
401 
402   bool double_op = reg_size == kDRegSize;
403   bool float_op = reg_size == kSRegSize;
404   int index_shift;
405   if (double_op) {
406     index_shift = kDRegSizeInBytesLog2;
407   } else if (float_op) {
408     index_shift = kSRegSizeInBytesLog2;
409   } else {
410     index_shift = kHRegSizeInBytesLog2;
411   }
412 
413   VRegister fd;
414   VRegister fn;
415   VRegister fm;
416 
417   if (double_op) {
418     fd = d0;
419     fn = d1;
420     fm = d2;
421   } else if (float_op) {
422     fd = s0;
423     fn = s1;
424     fm = s2;
425   } else {
426     fd = h0;
427     fn = h1;
428     fm = h2;
429   }
430 
431   __ Mov(out, results);
432   __ Mov(inputs_base, inputs);
433   __ Mov(length, inputs_length);
434 
435   __ Mov(index_n, 0);
436   __ Bind(&loop_n);
437   __ Ldr(fn, MemOperand(inputs_base, index_n, UXTW, index_shift));
438 
439   __ Mov(index_m, 0);
440   __ Bind(&loop_m);
441   __ Ldr(fm, MemOperand(inputs_base, index_m, UXTW, index_shift));
442 
443   {
444     SingleEmissionCheckScope guard(&masm);
445     (masm.*helper)(fd, fn, fm);
446   }
447   __ Str(fd, MemOperand(out, fd.GetSizeInBytes(), PostIndex));
448 
449   __ Add(index_m, index_m, 1);
450   __ Cmp(index_m, inputs_length);
451   __ B(lo, &loop_m);
452 
453   __ Add(index_n, index_n, 1);
454   __ Cmp(index_n, inputs_length);
455   __ B(lo, &loop_n);
456 
457   END();
458   TRY_RUN(skipped);
459 }
460 
461 
462 // Test FP instructions. The inputs[] and expected[] arrays should be arrays of
463 // rawbits representations of doubles or floats. This ensures that exact bit
464 // comparisons can be performed.
465 template <typename T>
Test2Op(const char * name,Test2OpFPHelper_t helper,const T inputs[],unsigned inputs_length,const T expected[],unsigned expected_length)466 static void Test2Op(const char* name,
467                     Test2OpFPHelper_t helper,
468                     const T inputs[],
469                     unsigned inputs_length,
470                     const T expected[],
471                     unsigned expected_length) {
472   VIXL_ASSERT(inputs_length > 0);
473 
474   const unsigned results_length = inputs_length * inputs_length;
475   T* results = new T[results_length];
476 
477   const unsigned bits = sizeof(T) * 8;
478   bool skipped;
479 
480   Test2Op_Helper(helper,
481                  reinterpret_cast<uintptr_t>(inputs),
482                  inputs_length,
483                  reinterpret_cast<uintptr_t>(results),
484                  bits,
485                  &skipped);
486 
487   if (Test::generate_test_trace()) {
488     // Print the results.
489     printf("const uint%u_t kExpected_%s[] = {\n", bits, name);
490     for (unsigned d = 0; d < results_length; d++) {
491       printf("  0x%0*" PRIx64 ",\n",
492              bits / 4,
493              static_cast<uint64_t>(results[d]));
494     }
495     printf("};\n");
496     printf("const unsigned kExpectedCount_%s = %u;\n", name, results_length);
497   } else if (!skipped) {
498     // Check the results.
499     VIXL_CHECK(expected_length == results_length);
500     unsigned error_count = 0;
501     unsigned d = 0;
502     for (unsigned n = 0; n < inputs_length; n++) {
503       for (unsigned m = 0; m < inputs_length; m++, d++) {
504         if (results[d] != expected[d]) {
505           if (++error_count > kErrorReportLimit) continue;
506 
507           printf("%s 0x%0*" PRIx64 ", 0x%0*" PRIx64 " (%s %g %g):\n",
508                  name,
509                  bits / 4,
510                  static_cast<uint64_t>(inputs[n]),
511                  bits / 4,
512                  static_cast<uint64_t>(inputs[m]),
513                  name,
514                  rawbits_to_fp(inputs[n]),
515                  rawbits_to_fp(inputs[m]));
516           printf("  Expected: 0x%0*" PRIx64 " (%g)\n",
517                  bits / 4,
518                  static_cast<uint64_t>(expected[d]),
519                  rawbits_to_fp(expected[d]));
520           printf("  Found:    0x%0*" PRIx64 " (%g)\n",
521                  bits / 4,
522                  static_cast<uint64_t>(results[d]),
523                  rawbits_to_fp(results[d]));
524           printf("\n");
525         }
526       }
527     }
528     VIXL_ASSERT(d == expected_length);
529     if (error_count > kErrorReportLimit) {
530       printf("%u other errors follow.\n", error_count - kErrorReportLimit);
531     }
532     VIXL_CHECK(error_count == 0);
533   }
534   delete[] results;
535 }
536 
537 
Test3Op_Helper(Test3OpFPHelper_t helper,uintptr_t inputs,unsigned inputs_length,uintptr_t results,unsigned reg_size,bool * skipped)538 static void Test3Op_Helper(Test3OpFPHelper_t helper,
539                            uintptr_t inputs,
540                            unsigned inputs_length,
541                            uintptr_t results,
542                            unsigned reg_size,
543                            bool* skipped) {
544   VIXL_ASSERT((reg_size == kDRegSize) || (reg_size == kSRegSize) ||
545               (reg_size == kHRegSize));
546 
547   SETUP_WITH_FEATURES(CPUFeatures::kFP, CPUFeatures::kFPHalf);
548   START();
549 
550   // Roll up the loop to keep the code size down.
551   Label loop_n, loop_m, loop_a;
552 
553   Register out = x0;
554   Register inputs_base = x1;
555   Register length = w2;
556   Register index_n = w3;
557   Register index_m = w4;
558   Register index_a = w5;
559 
560   bool double_op = reg_size == kDRegSize;
561   bool single_op = reg_size == kSRegSize;
562   int index_shift;
563   VRegister fd(0, reg_size);
564   VRegister fn(1, reg_size);
565   VRegister fm(2, reg_size);
566   VRegister fa(3, reg_size);
567   if (double_op) {
568     index_shift = kDRegSizeInBytesLog2;
569   } else if (single_op) {
570     index_shift = kSRegSizeInBytesLog2;
571   } else {
572     index_shift = kHRegSizeInBytesLog2;
573   }
574 
575   __ Mov(out, results);
576   __ Mov(inputs_base, inputs);
577   __ Mov(length, inputs_length);
578 
579   __ Mov(index_n, 0);
580   __ Bind(&loop_n);
581   __ Ldr(fn, MemOperand(inputs_base, index_n, UXTW, index_shift));
582 
583   __ Mov(index_m, 0);
584   __ Bind(&loop_m);
585   __ Ldr(fm, MemOperand(inputs_base, index_m, UXTW, index_shift));
586 
587   __ Mov(index_a, 0);
588   __ Bind(&loop_a);
589   __ Ldr(fa, MemOperand(inputs_base, index_a, UXTW, index_shift));
590 
591   {
592     SingleEmissionCheckScope guard(&masm);
593     (masm.*helper)(fd, fn, fm, fa);
594   }
595   __ Str(fd, MemOperand(out, fd.GetSizeInBytes(), PostIndex));
596 
597   __ Add(index_a, index_a, 1);
598   __ Cmp(index_a, inputs_length);
599   __ B(lo, &loop_a);
600 
601   __ Add(index_m, index_m, 1);
602   __ Cmp(index_m, inputs_length);
603   __ B(lo, &loop_m);
604 
605   __ Add(index_n, index_n, 1);
606   __ Cmp(index_n, inputs_length);
607   __ B(lo, &loop_n);
608 
609   END();
610   TRY_RUN(skipped);
611 }
612 
613 
614 // Test FP instructions. The inputs[] and expected[] arrays should be arrays of
615 // rawbits representations of doubles or floats. This ensures that exact bit
616 // comparisons can be performed.
617 template <typename T>
Test3Op(const char * name,Test3OpFPHelper_t helper,const T inputs[],unsigned inputs_length,const T expected[],unsigned expected_length)618 static void Test3Op(const char* name,
619                     Test3OpFPHelper_t helper,
620                     const T inputs[],
621                     unsigned inputs_length,
622                     const T expected[],
623                     unsigned expected_length) {
624   VIXL_ASSERT(inputs_length > 0);
625 
626   const unsigned results_length = inputs_length * inputs_length * inputs_length;
627   T* results = new T[results_length];
628 
629   const unsigned bits = sizeof(T) * 8;
630   bool skipped;
631 
632   Test3Op_Helper(helper,
633                  reinterpret_cast<uintptr_t>(inputs),
634                  inputs_length,
635                  reinterpret_cast<uintptr_t>(results),
636                  bits,
637                  &skipped);
638 
639   if (Test::generate_test_trace()) {
640     // Print the results.
641     printf("const uint%u_t kExpected_%s[] = {\n", bits, name);
642     for (unsigned d = 0; d < results_length; d++) {
643       printf("  0x%0*" PRIx64 ",\n",
644              bits / 4,
645              static_cast<uint64_t>(results[d]));
646     }
647     printf("};\n");
648     printf("const unsigned kExpectedCount_%s = %u;\n", name, results_length);
649   } else if (!skipped) {
650     // Check the results.
651     VIXL_CHECK(expected_length == results_length);
652     unsigned error_count = 0;
653     unsigned d = 0;
654     for (unsigned n = 0; n < inputs_length; n++) {
655       for (unsigned m = 0; m < inputs_length; m++) {
656         for (unsigned a = 0; a < inputs_length; a++, d++) {
657           if (results[d] != expected[d]) {
658             if (++error_count > kErrorReportLimit) continue;
659 
660             printf("%s 0x%0*" PRIx64 ", 0x%0*" PRIx64 ", 0x%0*" PRIx64
661                    " (%s %g %g %g):\n",
662                    name,
663                    bits / 4,
664                    static_cast<uint64_t>(inputs[n]),
665                    bits / 4,
666                    static_cast<uint64_t>(inputs[m]),
667                    bits / 4,
668                    static_cast<uint64_t>(inputs[a]),
669                    name,
670                    rawbits_to_fp(inputs[n]),
671                    rawbits_to_fp(inputs[m]),
672                    rawbits_to_fp(inputs[a]));
673             printf("  Expected: 0x%0*" PRIx64 " (%g)\n",
674                    bits / 4,
675                    static_cast<uint64_t>(expected[d]),
676                    rawbits_to_fp(expected[d]));
677             printf("  Found:    0x%0*" PRIx64 " (%g)\n",
678                    bits / 4,
679                    static_cast<uint64_t>(results[d]),
680                    rawbits_to_fp(results[d]));
681             printf("\n");
682           }
683         }
684       }
685     }
686     VIXL_ASSERT(d == expected_length);
687     if (error_count > kErrorReportLimit) {
688       printf("%u other errors follow.\n", error_count - kErrorReportLimit);
689     }
690     VIXL_CHECK(error_count == 0);
691   }
692   delete[] results;
693 }
694 
695 
TestCmp_Helper(TestFPCmpHelper_t helper,uintptr_t inputs,unsigned inputs_length,uintptr_t results,unsigned reg_size,bool * skipped)696 static void TestCmp_Helper(TestFPCmpHelper_t helper,
697                            uintptr_t inputs,
698                            unsigned inputs_length,
699                            uintptr_t results,
700                            unsigned reg_size,
701                            bool* skipped) {
702   VIXL_ASSERT((reg_size == kDRegSize) || (reg_size == kSRegSize));
703 
704   SETUP_WITH_FEATURES(CPUFeatures::kFP);
705   START();
706 
707   // Roll up the loop to keep the code size down.
708   Label loop_n, loop_m;
709 
710   Register out = x0;
711   Register inputs_base = x1;
712   Register length = w2;
713   Register index_n = w3;
714   Register index_m = w4;
715   Register flags = x5;
716 
717   bool double_op = reg_size == kDRegSize;
718   const int index_shift =
719       double_op ? kDRegSizeInBytesLog2 : kSRegSizeInBytesLog2;
720 
721   VRegister fn = double_op ? d1 : s1;
722   VRegister fm = double_op ? d2 : s2;
723 
724   __ Mov(out, results);
725   __ Mov(inputs_base, inputs);
726   __ Mov(length, inputs_length);
727 
728   __ Mov(index_n, 0);
729   __ Bind(&loop_n);
730   __ Ldr(fn, MemOperand(inputs_base, index_n, UXTW, index_shift));
731 
732   __ Mov(index_m, 0);
733   __ Bind(&loop_m);
734   __ Ldr(fm, MemOperand(inputs_base, index_m, UXTW, index_shift));
735 
736   {
737     SingleEmissionCheckScope guard(&masm);
738     (masm.*helper)(fn, fm);
739   }
740   __ Mrs(flags, NZCV);
741   __ Ubfx(flags, flags, 28, 4);
742   __ Strb(flags, MemOperand(out, 1, PostIndex));
743 
744   __ Add(index_m, index_m, 1);
745   __ Cmp(index_m, inputs_length);
746   __ B(lo, &loop_m);
747 
748   __ Add(index_n, index_n, 1);
749   __ Cmp(index_n, inputs_length);
750   __ B(lo, &loop_n);
751 
752   END();
753   TRY_RUN(skipped);
754 }
755 
756 
757 // Test FP instructions. The inputs[] and expected[] arrays should be arrays of
758 // rawbits representations of doubles or floats. This ensures that exact bit
759 // comparisons can be performed.
760 template <typename T>
TestCmp(const char * name,TestFPCmpHelper_t helper,const T inputs[],unsigned inputs_length,const uint8_t expected[],unsigned expected_length)761 static void TestCmp(const char* name,
762                     TestFPCmpHelper_t helper,
763                     const T inputs[],
764                     unsigned inputs_length,
765                     const uint8_t expected[],
766                     unsigned expected_length) {
767   VIXL_ASSERT(inputs_length > 0);
768 
769   const unsigned results_length = inputs_length * inputs_length;
770   uint8_t* results = new uint8_t[results_length];
771 
772   const unsigned bits = sizeof(T) * 8;
773   bool skipped;
774 
775   TestCmp_Helper(helper,
776                  reinterpret_cast<uintptr_t>(inputs),
777                  inputs_length,
778                  reinterpret_cast<uintptr_t>(results),
779                  bits,
780                  &skipped);
781 
782   if (Test::generate_test_trace()) {
783     // Print the results.
784     printf("const uint8_t kExpected_%s[] = {\n", name);
785     for (unsigned d = 0; d < results_length; d++) {
786       // Each NZCV result only requires 4 bits.
787       VIXL_ASSERT((results[d] & 0xf) == results[d]);
788       printf("  0x%" PRIx8 ",\n", results[d]);
789     }
790     printf("};\n");
791     printf("const unsigned kExpectedCount_%s = %u;\n", name, results_length);
792   } else if (!skipped) {
793     // Check the results.
794     VIXL_CHECK(expected_length == results_length);
795     unsigned error_count = 0;
796     unsigned d = 0;
797     for (unsigned n = 0; n < inputs_length; n++) {
798       for (unsigned m = 0; m < inputs_length; m++, d++) {
799         if (results[d] != expected[d]) {
800           if (++error_count > kErrorReportLimit) continue;
801 
802           printf("%s 0x%0*" PRIx64 ", 0x%0*" PRIx64 " (%s %g %g):\n",
803                  name,
804                  bits / 4,
805                  static_cast<uint64_t>(inputs[n]),
806                  bits / 4,
807                  static_cast<uint64_t>(inputs[m]),
808                  name,
809                  rawbits_to_fp(inputs[n]),
810                  rawbits_to_fp(inputs[m]));
811           printf("  Expected: %c%c%c%c (0x%" PRIx8 ")\n",
812                  (expected[d] & 0x8) ? 'N' : 'n',
813                  (expected[d] & 0x4) ? 'Z' : 'z',
814                  (expected[d] & 0x2) ? 'C' : 'c',
815                  (expected[d] & 0x1) ? 'V' : 'v',
816                  expected[d]);
817           printf("  Found:    %c%c%c%c (0x%" PRIx8 ")\n",
818                  (results[d] & 0x8) ? 'N' : 'n',
819                  (results[d] & 0x4) ? 'Z' : 'z',
820                  (results[d] & 0x2) ? 'C' : 'c',
821                  (results[d] & 0x1) ? 'V' : 'v',
822                  results[d]);
823           printf("\n");
824         }
825       }
826     }
827     VIXL_ASSERT(d == expected_length);
828     if (error_count > kErrorReportLimit) {
829       printf("%u other errors follow.\n", error_count - kErrorReportLimit);
830     }
831     VIXL_CHECK(error_count == 0);
832   }
833   delete[] results;
834 }
835 
836 
TestCmpZero_Helper(TestFPCmpZeroHelper_t helper,uintptr_t inputs,unsigned inputs_length,uintptr_t results,unsigned reg_size,bool * skipped)837 static void TestCmpZero_Helper(TestFPCmpZeroHelper_t helper,
838                                uintptr_t inputs,
839                                unsigned inputs_length,
840                                uintptr_t results,
841                                unsigned reg_size,
842                                bool* skipped) {
843   VIXL_ASSERT((reg_size == kDRegSize) || (reg_size == kSRegSize));
844 
845   SETUP_WITH_FEATURES(CPUFeatures::kFP);
846   START();
847 
848   // Roll up the loop to keep the code size down.
849   Label loop_n, loop_m;
850 
851   Register out = x0;
852   Register inputs_base = x1;
853   Register length = w2;
854   Register index_n = w3;
855   Register flags = x4;
856 
857   bool double_op = reg_size == kDRegSize;
858   const int index_shift =
859       double_op ? kDRegSizeInBytesLog2 : kSRegSizeInBytesLog2;
860 
861   VRegister fn = double_op ? d1 : s1;
862 
863   __ Mov(out, results);
864   __ Mov(inputs_base, inputs);
865   __ Mov(length, inputs_length);
866 
867   __ Mov(index_n, 0);
868   __ Bind(&loop_n);
869   __ Ldr(fn, MemOperand(inputs_base, index_n, UXTW, index_shift));
870 
871   {
872     SingleEmissionCheckScope guard(&masm);
873     (masm.*helper)(fn, 0.0);
874   }
875   __ Mrs(flags, NZCV);
876   __ Ubfx(flags, flags, 28, 4);
877   __ Strb(flags, MemOperand(out, 1, PostIndex));
878 
879   __ Add(index_n, index_n, 1);
880   __ Cmp(index_n, inputs_length);
881   __ B(lo, &loop_n);
882 
883   END();
884   TRY_RUN(skipped);
885 }
886 
887 
888 // Test FP instructions. The inputs[] and expected[] arrays should be arrays of
889 // rawbits representations of doubles or floats. This ensures that exact bit
890 // comparisons can be performed.
891 template <typename T>
TestCmpZero(const char * name,TestFPCmpZeroHelper_t helper,const T inputs[],unsigned inputs_length,const uint8_t expected[],unsigned expected_length)892 static void TestCmpZero(const char* name,
893                         TestFPCmpZeroHelper_t helper,
894                         const T inputs[],
895                         unsigned inputs_length,
896                         const uint8_t expected[],
897                         unsigned expected_length) {
898   VIXL_ASSERT(inputs_length > 0);
899 
900   const unsigned results_length = inputs_length;
901   uint8_t* results = new uint8_t[results_length];
902 
903   const unsigned bits = sizeof(T) * 8;
904   bool skipped;
905 
906   TestCmpZero_Helper(helper,
907                      reinterpret_cast<uintptr_t>(inputs),
908                      inputs_length,
909                      reinterpret_cast<uintptr_t>(results),
910                      bits,
911                      &skipped);
912 
913   if (Test::generate_test_trace()) {
914     // Print the results.
915     printf("const uint8_t kExpected_%s[] = {\n", name);
916     for (unsigned d = 0; d < results_length; d++) {
917       // Each NZCV result only requires 4 bits.
918       VIXL_ASSERT((results[d] & 0xf) == results[d]);
919       printf("  0x%" PRIx8 ",\n", results[d]);
920     }
921     printf("};\n");
922     printf("const unsigned kExpectedCount_%s = %u;\n", name, results_length);
923   } else if (!skipped) {
924     // Check the results.
925     VIXL_CHECK(expected_length == results_length);
926     unsigned error_count = 0;
927     unsigned d = 0;
928     for (unsigned n = 0; n < inputs_length; n++, d++) {
929       if (results[d] != expected[d]) {
930         if (++error_count > kErrorReportLimit) continue;
931 
932         printf("%s 0x%0*" PRIx64 ", 0x%0*u (%s %g #0.0):\n",
933                name,
934                bits / 4,
935                static_cast<uint64_t>(inputs[n]),
936                bits / 4,
937                0,
938                name,
939                rawbits_to_fp(inputs[n]));
940         printf("  Expected: %c%c%c%c (0x%" PRIx8 ")\n",
941                (expected[d] & 0x8) ? 'N' : 'n',
942                (expected[d] & 0x4) ? 'Z' : 'z',
943                (expected[d] & 0x2) ? 'C' : 'c',
944                (expected[d] & 0x1) ? 'V' : 'v',
945                expected[d]);
946         printf("  Found:    %c%c%c%c (0x%" PRIx8 ")\n",
947                (results[d] & 0x8) ? 'N' : 'n',
948                (results[d] & 0x4) ? 'Z' : 'z',
949                (results[d] & 0x2) ? 'C' : 'c',
950                (results[d] & 0x1) ? 'V' : 'v',
951                results[d]);
952         printf("\n");
953       }
954     }
955     VIXL_ASSERT(d == expected_length);
956     if (error_count > kErrorReportLimit) {
957       printf("%u other errors follow.\n", error_count - kErrorReportLimit);
958     }
959     VIXL_CHECK(error_count == 0);
960   }
961   delete[] results;
962 }
963 
964 
TestFPToFixed_Helper(TestFPToFixedHelper_t helper,uintptr_t inputs,unsigned inputs_length,uintptr_t results,unsigned d_size,unsigned n_size,bool * skipped)965 static void TestFPToFixed_Helper(TestFPToFixedHelper_t helper,
966                                  uintptr_t inputs,
967                                  unsigned inputs_length,
968                                  uintptr_t results,
969                                  unsigned d_size,
970                                  unsigned n_size,
971                                  bool* skipped) {
972   VIXL_ASSERT((d_size == kXRegSize) || (d_size == kWRegSize));
973   VIXL_ASSERT((n_size == kDRegSize) || (n_size == kSRegSize) ||
974               (n_size == kHRegSize));
975 
976   SETUP_WITH_FEATURES(CPUFeatures::kFP, CPUFeatures::kFPHalf);
977   START();
978 
979   // Roll up the loop to keep the code size down.
980   Label loop_n;
981 
982   Register out = x0;
983   Register inputs_base = x1;
984   Register length = w2;
985   Register index_n = w3;
986 
987   int n_index_shift;
988   if (n_size == kDRegSize) {
989     n_index_shift = kDRegSizeInBytesLog2;
990   } else if (n_size == kSRegSize) {
991     n_index_shift = kSRegSizeInBytesLog2;
992   } else {
993     n_index_shift = kHRegSizeInBytesLog2;
994   }
995 
996   Register rd = (d_size == kXRegSize) ? Register(x10) : Register(w10);
997   VRegister fn;
998   if (n_size == kDRegSize) {
999     fn = d1;
1000   } else if (n_size == kSRegSize) {
1001     fn = s1;
1002   } else {
1003     fn = h1;
1004   }
1005 
1006   __ Mov(out, results);
1007   __ Mov(inputs_base, inputs);
1008   __ Mov(length, inputs_length);
1009 
1010   __ Mov(index_n, 0);
1011   __ Bind(&loop_n);
1012   __ Ldr(fn, MemOperand(inputs_base, index_n, UXTW, n_index_shift));
1013 
1014   for (unsigned fbits = 0; fbits <= d_size; ++fbits) {
1015     {
1016       SingleEmissionCheckScope guard(&masm);
1017       (masm.*helper)(rd, fn, fbits);
1018     }
1019     __ Str(rd, MemOperand(out, rd.GetSizeInBytes(), PostIndex));
1020   }
1021 
1022   __ Add(index_n, index_n, 1);
1023   __ Cmp(index_n, inputs_length);
1024   __ B(lo, &loop_n);
1025 
1026   END();
1027   TRY_RUN(skipped);
1028 }
1029 
1030 
TestFPToInt_Helper(TestFPToIntHelper_t helper,uintptr_t inputs,unsigned inputs_length,uintptr_t results,unsigned d_size,unsigned n_size,bool * skipped)1031 static void TestFPToInt_Helper(TestFPToIntHelper_t helper,
1032                                uintptr_t inputs,
1033                                unsigned inputs_length,
1034                                uintptr_t results,
1035                                unsigned d_size,
1036                                unsigned n_size,
1037                                bool* skipped) {
1038   VIXL_ASSERT((d_size == kXRegSize) || (d_size == kWRegSize));
1039   VIXL_ASSERT((n_size == kDRegSize) || (n_size == kSRegSize) ||
1040               (n_size == kHRegSize));
1041 
1042   SETUP_WITH_FEATURES(CPUFeatures::kFP,
1043                       CPUFeatures::kFPHalf,
1044                       CPUFeatures::kJSCVT);
1045   START();
1046 
1047   // Roll up the loop to keep the code size down.
1048   Label loop_n;
1049 
1050   Register out = x0;
1051   Register inputs_base = x1;
1052   Register length = w2;
1053   Register index_n = w3;
1054 
1055   int n_index_shift;
1056   if (n_size == kDRegSize) {
1057     n_index_shift = kDRegSizeInBytesLog2;
1058   } else if (n_size == kSRegSize) {
1059     n_index_shift = kSRegSizeInBytesLog2;
1060   } else {
1061     n_index_shift = kHRegSizeInBytesLog2;
1062   }
1063 
1064   Register rd = (d_size == kXRegSize) ? Register(x10) : Register(w10);
1065   VRegister fn;
1066   if (n_size == kDRegSize) {
1067     fn = d1;
1068   } else if (n_size == kSRegSize) {
1069     fn = s1;
1070   } else {
1071     fn = h1;
1072   }
1073 
1074   __ Mov(out, results);
1075   __ Mov(inputs_base, inputs);
1076   __ Mov(length, inputs_length);
1077 
1078   __ Mov(index_n, 0);
1079   __ Bind(&loop_n);
1080   __ Ldr(fn, MemOperand(inputs_base, index_n, UXTW, n_index_shift));
1081 
1082   {
1083     SingleEmissionCheckScope guard(&masm);
1084     (masm.*helper)(rd, fn);
1085   }
1086   __ Str(rd, MemOperand(out, rd.GetSizeInBytes(), PostIndex));
1087 
1088   __ Add(index_n, index_n, 1);
1089   __ Cmp(index_n, inputs_length);
1090   __ B(lo, &loop_n);
1091 
1092   END();
1093   TRY_RUN(skipped);
1094 }
1095 
1096 
1097 // Test FP instructions.
1098 //  - The inputs[] array should be an array of rawbits representations of
1099 //    doubles or floats. This ensures that exact bit comparisons can be
1100 //    performed.
1101 //  - The expected[] array should be an array of signed integers.
1102 template <typename Tn, typename Td>
TestFPToS(const char * name,TestFPToIntHelper_t helper,const Tn inputs[],unsigned inputs_length,const Td expected[],unsigned expected_length)1103 static void TestFPToS(const char* name,
1104                       TestFPToIntHelper_t helper,
1105                       const Tn inputs[],
1106                       unsigned inputs_length,
1107                       const Td expected[],
1108                       unsigned expected_length) {
1109   VIXL_ASSERT(inputs_length > 0);
1110 
1111   const unsigned results_length = inputs_length;
1112   Td* results = new Td[results_length];
1113 
1114   const unsigned d_bits = sizeof(Td) * 8;
1115   const unsigned n_bits = sizeof(Tn) * 8;
1116   bool skipped;
1117 
1118   TestFPToInt_Helper(helper,
1119                      reinterpret_cast<uintptr_t>(inputs),
1120                      inputs_length,
1121                      reinterpret_cast<uintptr_t>(results),
1122                      d_bits,
1123                      n_bits,
1124                      &skipped);
1125 
1126   if (Test::generate_test_trace()) {
1127     // Print the results.
1128     printf("const int%u_t kExpected_%s[] = {\n", d_bits, name);
1129     // There is no simple C++ literal for INT*_MIN that doesn't produce
1130     // warnings, so we use an appropriate constant in that case instead.
1131     // Deriving int_d_min in this way (rather than just checking INT64_MIN and
1132     // the like) avoids warnings about comparing values with differing ranges.
1133     const int64_t int_d_max = (UINT64_C(1) << (d_bits - 1)) - 1;
1134     const int64_t int_d_min = -(int_d_max)-1;
1135     for (unsigned d = 0; d < results_length; d++) {
1136       if (results[d] == int_d_min) {
1137         printf("  -INT%u_C(%" PRId64 ") - 1,\n", d_bits, int_d_max);
1138       } else {
1139         // Some constants (such as those between INT32_MAX and UINT32_MAX)
1140         // trigger compiler warnings. To avoid these warnings, use an
1141         // appropriate macro to make the type explicit.
1142         int64_t result_int64 = static_cast<int64_t>(results[d]);
1143         if (result_int64 >= 0) {
1144           printf("  INT%u_C(%" PRId64 "),\n", d_bits, result_int64);
1145         } else {
1146           printf("  -INT%u_C(%" PRId64 "),\n", d_bits, -result_int64);
1147         }
1148       }
1149     }
1150     printf("};\n");
1151     printf("const unsigned kExpectedCount_%s = %u;\n", name, results_length);
1152   } else if (!skipped) {
1153     // Check the results.
1154     VIXL_CHECK(expected_length == results_length);
1155     unsigned error_count = 0;
1156     unsigned d = 0;
1157     for (unsigned n = 0; n < inputs_length; n++, d++) {
1158       if (results[d] != expected[d]) {
1159         if (++error_count > kErrorReportLimit) continue;
1160 
1161         printf("%s 0x%0*" PRIx64 " (%s %g):\n",
1162                name,
1163                n_bits / 4,
1164                static_cast<uint64_t>(inputs[n]),
1165                name,
1166                rawbits_to_fp(inputs[n]));
1167         printf("  Expected: 0x%0*" PRIx64 " (%" PRId64 ")\n",
1168                d_bits / 4,
1169                static_cast<uint64_t>(expected[d]),
1170                static_cast<int64_t>(expected[d]));
1171         printf("  Found:    0x%0*" PRIx64 " (%" PRId64 ")\n",
1172                d_bits / 4,
1173                static_cast<uint64_t>(results[d]),
1174                static_cast<int64_t>(results[d]));
1175         printf("\n");
1176       }
1177     }
1178     VIXL_ASSERT(d == expected_length);
1179     if (error_count > kErrorReportLimit) {
1180       printf("%u other errors follow.\n", error_count - kErrorReportLimit);
1181     }
1182     VIXL_CHECK(error_count == 0);
1183   }
1184   delete[] results;
1185 }
1186 
1187 
1188 // Test FP instructions.
1189 //  - The inputs[] array should be an array of rawbits representations of
1190 //    doubles or floats. This ensures that exact bit comparisons can be
1191 //    performed.
1192 //  - The expected[] array should be an array of unsigned integers.
1193 template <typename Tn, typename Td>
TestFPToU(const char * name,TestFPToIntHelper_t helper,const Tn inputs[],unsigned inputs_length,const Td expected[],unsigned expected_length)1194 static void TestFPToU(const char* name,
1195                       TestFPToIntHelper_t helper,
1196                       const Tn inputs[],
1197                       unsigned inputs_length,
1198                       const Td expected[],
1199                       unsigned expected_length) {
1200   VIXL_ASSERT(inputs_length > 0);
1201 
1202   const unsigned results_length = inputs_length;
1203   Td* results = new Td[results_length];
1204 
1205   const unsigned d_bits = sizeof(Td) * 8;
1206   const unsigned n_bits = sizeof(Tn) * 8;
1207   bool skipped;
1208 
1209   TestFPToInt_Helper(helper,
1210                      reinterpret_cast<uintptr_t>(inputs),
1211                      inputs_length,
1212                      reinterpret_cast<uintptr_t>(results),
1213                      d_bits,
1214                      n_bits,
1215                      &skipped);
1216 
1217   if (Test::generate_test_trace()) {
1218     // Print the results.
1219     printf("const uint%u_t kExpected_%s[] = {\n", d_bits, name);
1220     for (unsigned d = 0; d < results_length; d++) {
1221       printf("  %" PRIu64 "u,\n", static_cast<uint64_t>(results[d]));
1222     }
1223     printf("};\n");
1224     printf("const unsigned kExpectedCount_%s = %u;\n", name, results_length);
1225   } else if (!skipped) {
1226     // Check the results.
1227     VIXL_CHECK(expected_length == results_length);
1228     unsigned error_count = 0;
1229     unsigned d = 0;
1230     for (unsigned n = 0; n < inputs_length; n++, d++) {
1231       if (results[d] != expected[d]) {
1232         if (++error_count > kErrorReportLimit) continue;
1233 
1234         printf("%s 0x%0*" PRIx64 " (%s %g):\n",
1235                name,
1236                n_bits / 4,
1237                static_cast<uint64_t>(inputs[n]),
1238                name,
1239                rawbits_to_fp(inputs[n]));
1240         printf("  Expected: 0x%0*" PRIx64 " (%" PRIu64 ")\n",
1241                d_bits / 4,
1242                static_cast<uint64_t>(expected[d]),
1243                static_cast<uint64_t>(expected[d]));
1244         printf("  Found:    0x%0*" PRIx64 " (%" PRIu64 ")\n",
1245                d_bits / 4,
1246                static_cast<uint64_t>(results[d]),
1247                static_cast<uint64_t>(results[d]));
1248         printf("\n");
1249       }
1250     }
1251     VIXL_ASSERT(d == expected_length);
1252     if (error_count > kErrorReportLimit) {
1253       printf("%u other errors follow.\n", error_count - kErrorReportLimit);
1254     }
1255     VIXL_CHECK(error_count == 0);
1256   }
1257   delete[] results;
1258 }
1259 
1260 
1261 // Test FP instructions.
1262 //  - The inputs[] array should be an array of rawbits representations of
1263 //    doubles or floats. This ensures that exact bit comparisons can be
1264 //    performed.
1265 //  - The expected[] array should be an array of signed integers.
1266 template <typename Tn, typename Td>
TestFPToFixedS(const char * name,TestFPToFixedHelper_t helper,const Tn inputs[],unsigned inputs_length,const Td expected[],unsigned expected_length)1267 static void TestFPToFixedS(const char* name,
1268                            TestFPToFixedHelper_t helper,
1269                            const Tn inputs[],
1270                            unsigned inputs_length,
1271                            const Td expected[],
1272                            unsigned expected_length) {
1273   VIXL_ASSERT(inputs_length > 0);
1274 
1275   const unsigned d_bits = sizeof(Td) * 8;
1276   const unsigned n_bits = sizeof(Tn) * 8;
1277 
1278   const unsigned results_length = inputs_length * (d_bits + 1);
1279   Td* results = new Td[results_length];
1280 
1281   bool skipped;
1282 
1283   TestFPToFixed_Helper(helper,
1284                        reinterpret_cast<uintptr_t>(inputs),
1285                        inputs_length,
1286                        reinterpret_cast<uintptr_t>(results),
1287                        d_bits,
1288                        n_bits,
1289                        &skipped);
1290 
1291   if (Test::generate_test_trace()) {
1292     // Print the results.
1293     printf("const int%u_t kExpected_%s[] = {\n", d_bits, name);
1294     // There is no simple C++ literal for INT*_MIN that doesn't produce
1295     // warnings, so we use an appropriate constant in that case instead.
1296     // Deriving int_d_min in this way (rather than just checking INT64_MIN and
1297     // the like) avoids warnings about comparing values with differing ranges.
1298     const int64_t int_d_max = (UINT64_C(1) << (d_bits - 1)) - 1;
1299     const int64_t int_d_min = -(int_d_max)-1;
1300     for (unsigned d = 0; d < results_length; d++) {
1301       if (results[d] == int_d_min) {
1302         printf("  -INT%u_C(%" PRId64 ") - 1,\n", d_bits, int_d_max);
1303       } else {
1304         // Some constants (such as those between INT32_MAX and UINT32_MAX)
1305         // trigger compiler warnings. To avoid these warnings, use an
1306         // appropriate macro to make the type explicit.
1307         int64_t result_int64 = static_cast<int64_t>(results[d]);
1308         if (result_int64 >= 0) {
1309           printf("  INT%u_C(%" PRId64 "),\n", d_bits, result_int64);
1310         } else {
1311           printf("  -INT%u_C(%" PRId64 "),\n", d_bits, -result_int64);
1312         }
1313       }
1314     }
1315     printf("};\n");
1316     printf("const unsigned kExpectedCount_%s = %u;\n", name, results_length);
1317   } else if (!skipped) {
1318     // Check the results.
1319     VIXL_CHECK(expected_length == results_length);
1320     unsigned error_count = 0;
1321     unsigned d = 0;
1322     for (unsigned n = 0; n < inputs_length; n++) {
1323       for (unsigned fbits = 0; fbits <= d_bits; ++fbits, d++) {
1324         if (results[d] != expected[d]) {
1325           if (++error_count > kErrorReportLimit) continue;
1326 
1327           printf("%s 0x%0*" PRIx64 " #%d (%s %g #%d):\n",
1328                  name,
1329                  n_bits / 4,
1330                  static_cast<uint64_t>(inputs[n]),
1331                  fbits,
1332                  name,
1333                  rawbits_to_fp(inputs[n]),
1334                  fbits);
1335           printf("  Expected: 0x%0*" PRIx64 " (%" PRId64 ")\n",
1336                  d_bits / 4,
1337                  static_cast<uint64_t>(expected[d]),
1338                  static_cast<int64_t>(expected[d]));
1339           printf("  Found:    0x%0*" PRIx64 " (%" PRId64 ")\n",
1340                  d_bits / 4,
1341                  static_cast<uint64_t>(results[d]),
1342                  static_cast<int64_t>(results[d]));
1343           printf("\n");
1344         }
1345       }
1346     }
1347     VIXL_ASSERT(d == expected_length);
1348     if (error_count > kErrorReportLimit) {
1349       printf("%u other errors follow.\n", error_count - kErrorReportLimit);
1350     }
1351     VIXL_CHECK(error_count == 0);
1352   }
1353   delete[] results;
1354 }
1355 
1356 
1357 // Test FP instructions.
1358 //  - The inputs[] array should be an array of rawbits representations of
1359 //    doubles or floats. This ensures that exact bit comparisons can be
1360 //    performed.
1361 //  - The expected[] array should be an array of unsigned integers.
1362 template <typename Tn, typename Td>
TestFPToFixedU(const char * name,TestFPToFixedHelper_t helper,const Tn inputs[],unsigned inputs_length,const Td expected[],unsigned expected_length)1363 static void TestFPToFixedU(const char* name,
1364                            TestFPToFixedHelper_t helper,
1365                            const Tn inputs[],
1366                            unsigned inputs_length,
1367                            const Td expected[],
1368                            unsigned expected_length) {
1369   VIXL_ASSERT(inputs_length > 0);
1370 
1371   const unsigned d_bits = sizeof(Td) * 8;
1372   const unsigned n_bits = sizeof(Tn) * 8;
1373 
1374   const unsigned results_length = inputs_length * (d_bits + 1);
1375   Td* results = new Td[results_length];
1376 
1377   bool skipped;
1378 
1379   TestFPToFixed_Helper(helper,
1380                        reinterpret_cast<uintptr_t>(inputs),
1381                        inputs_length,
1382                        reinterpret_cast<uintptr_t>(results),
1383                        d_bits,
1384                        n_bits,
1385                        &skipped);
1386 
1387   if (Test::generate_test_trace()) {
1388     // Print the results.
1389     printf("const uint%u_t kExpected_%s[] = {\n", d_bits, name);
1390     for (unsigned d = 0; d < results_length; d++) {
1391       printf("  %" PRIu64 "u,\n", static_cast<uint64_t>(results[d]));
1392     }
1393     printf("};\n");
1394     printf("const unsigned kExpectedCount_%s = %u;\n", name, results_length);
1395   } else if (!skipped) {
1396     // Check the results.
1397     VIXL_CHECK(expected_length == results_length);
1398     unsigned error_count = 0;
1399     unsigned d = 0;
1400     for (unsigned n = 0; n < inputs_length; n++) {
1401       for (unsigned fbits = 0; fbits <= d_bits; ++fbits, d++) {
1402         if (results[d] != expected[d]) {
1403           if (++error_count > kErrorReportLimit) continue;
1404 
1405           printf("%s 0x%0*" PRIx64 " #%d (%s %g #%d):\n",
1406                  name,
1407                  n_bits / 4,
1408                  static_cast<uint64_t>(inputs[n]),
1409                  fbits,
1410                  name,
1411                  rawbits_to_fp(inputs[n]),
1412                  fbits);
1413           printf("  Expected: 0x%0*" PRIx64 " (%" PRIu64 ")\n",
1414                  d_bits / 4,
1415                  static_cast<uint64_t>(expected[d]),
1416                  static_cast<uint64_t>(expected[d]));
1417           printf("  Found:    0x%0*" PRIx64 " (%" PRIu64 ")\n",
1418                  d_bits / 4,
1419                  static_cast<uint64_t>(results[d]),
1420                  static_cast<uint64_t>(results[d]));
1421           printf("\n");
1422         }
1423       }
1424     }
1425     VIXL_ASSERT(d == expected_length);
1426     if (error_count > kErrorReportLimit) {
1427       printf("%u other errors follow.\n", error_count - kErrorReportLimit);
1428     }
1429     VIXL_CHECK(error_count == 0);
1430   }
1431   delete[] results;
1432 }
1433 
1434 
1435 // ==== Tests for instructions of the form <INST> VReg, VReg. ====
1436 
1437 
Test1OpNEON_Helper(Test1OpNEONHelper_t helper,uintptr_t inputs_n,unsigned inputs_n_length,uintptr_t results,VectorFormat vd_form,VectorFormat vn_form,bool * skipped)1438 static void Test1OpNEON_Helper(Test1OpNEONHelper_t helper,
1439                                uintptr_t inputs_n,
1440                                unsigned inputs_n_length,
1441                                uintptr_t results,
1442                                VectorFormat vd_form,
1443                                VectorFormat vn_form,
1444                                bool* skipped) {
1445   VIXL_ASSERT(vd_form != kFormatUndefined);
1446   VIXL_ASSERT(vn_form != kFormatUndefined);
1447 
1448   CPUFeatures features;
1449   features.Combine(CPUFeatures::kNEON,
1450                    CPUFeatures::kFP,
1451                    CPUFeatures::kRDM,
1452                    CPUFeatures::kNEONHalf);
1453   // For frint{32,64}{x,y} variants.
1454   features.Combine(CPUFeatures::kFrintToFixedSizedInt);
1455   SETUP_WITH_FEATURES(features);
1456   START();
1457 
1458   // Roll up the loop to keep the code size down.
1459   Label loop_n;
1460 
1461   Register out = x0;
1462   Register inputs_n_base = x1;
1463   Register inputs_n_last_16bytes = x3;
1464   Register index_n = x5;
1465 
1466   // TODO: Refactor duplicate definitions below with a VRegister::As() routine.
1467   const unsigned vd_bits = RegisterSizeInBitsFromFormat(vd_form);
1468   const unsigned vd_lane_count = LaneCountFromFormat(vd_form);
1469 
1470   const unsigned vn_bits = RegisterSizeInBitsFromFormat(vn_form);
1471   const unsigned vn_lane_count = LaneCountFromFormat(vn_form);
1472   const unsigned vn_lane_bytes = LaneSizeInBytesFromFormat(vn_form);
1473   const unsigned vn_lane_bytes_log2 = LaneSizeInBytesLog2FromFormat(vn_form);
1474   const unsigned vn_lane_bits = LaneSizeInBitsFromFormat(vn_form);
1475 
1476 
1477   // These will be either a D- or a Q-register form, with a single lane
1478   // (for use in scalar load and store operations).
1479   VRegister vd = VRegister(0, vd_bits);
1480   VRegister vn = v1.V16B();
1481   VRegister vntmp = v3.V16B();
1482 
1483   // These will have the correct format for use when calling 'helper'.
1484   VRegister vd_helper = VRegister(0, vd_bits, vd_lane_count);
1485   VRegister vn_helper = VRegister(1, vn_bits, vn_lane_count);
1486 
1487   // 'v*tmp_single' will be either 'Vt.B', 'Vt.H', 'Vt.S' or 'Vt.D'.
1488   VRegister vntmp_single = VRegister(3, vn_lane_bits);
1489 
1490   __ Mov(out, results);
1491 
1492   __ Mov(inputs_n_base, inputs_n);
1493   __ Mov(inputs_n_last_16bytes,
1494          inputs_n + (vn_lane_bytes * inputs_n_length) - 16);
1495 
1496   __ Ldr(vn, MemOperand(inputs_n_last_16bytes));
1497 
1498   __ Mov(index_n, 0);
1499   __ Bind(&loop_n);
1500 
1501   __ Ldr(vntmp_single,
1502          MemOperand(inputs_n_base, index_n, LSL, vn_lane_bytes_log2));
1503   __ Ext(vn, vn, vntmp, vn_lane_bytes);
1504 
1505   // Set the destination to zero.
1506   // TODO: Setting the destination to values other than zero
1507   //       might be a better test for instructions such as sqxtn2
1508   //       which may leave parts of V registers unchanged.
1509   __ Movi(vd.V16B(), 0);
1510 
1511   {
1512     SingleEmissionCheckScope guard(&masm);
1513     (masm.*helper)(vd_helper, vn_helper);
1514   }
1515   __ Str(vd, MemOperand(out, vd.GetSizeInBytes(), PostIndex));
1516 
1517   __ Add(index_n, index_n, 1);
1518   __ Cmp(index_n, inputs_n_length);
1519   __ B(lo, &loop_n);
1520 
1521   END();
1522   TRY_RUN(skipped);
1523 }
1524 
1525 
1526 // Test NEON instructions. The inputs_*[] and expected[] arrays should be
1527 // arrays of rawbit representation of input values. This ensures that
1528 // exact bit comparisons can be performed.
1529 template <typename Td, typename Tn>
Test1OpNEON(const char * name,Test1OpNEONHelper_t helper,const Tn inputs_n[],unsigned inputs_n_length,const Td expected[],unsigned expected_length,VectorFormat vd_form,VectorFormat vn_form)1530 static void Test1OpNEON(const char* name,
1531                         Test1OpNEONHelper_t helper,
1532                         const Tn inputs_n[],
1533                         unsigned inputs_n_length,
1534                         const Td expected[],
1535                         unsigned expected_length,
1536                         VectorFormat vd_form,
1537                         VectorFormat vn_form) {
1538   VIXL_ASSERT(inputs_n_length > 0);
1539 
1540   const unsigned vd_lane_count = LaneCountFromFormat(vd_form);
1541   const unsigned vn_lane_bytes = LaneSizeInBytesFromFormat(vn_form);
1542   const unsigned vn_lane_count = LaneCountFromFormat(vn_form);
1543 
1544   const unsigned results_length = inputs_n_length;
1545   Td* results = new Td[results_length * vd_lane_count];
1546   const unsigned lane_bit = sizeof(Td) * 8;
1547   const unsigned lane_len_in_hex = MaxHexCharCount<Td, Tn>();
1548 
1549   bool skipped;
1550 
1551   Test1OpNEON_Helper(helper,
1552                      reinterpret_cast<uintptr_t>(inputs_n),
1553                      inputs_n_length,
1554                      reinterpret_cast<uintptr_t>(results),
1555                      vd_form,
1556                      vn_form,
1557                      &skipped);
1558 
1559   if (Test::generate_test_trace()) {
1560     // Print the results.
1561     printf("const uint%u_t kExpected_NEON_%s[] = {\n", lane_bit, name);
1562     for (unsigned iteration = 0; iteration < results_length; iteration++) {
1563       printf(" ");
1564       // Output a separate result for each element of the result vector.
1565       for (unsigned lane = 0; lane < vd_lane_count; lane++) {
1566         unsigned index = lane + (iteration * vd_lane_count);
1567         printf(" 0x%0*" PRIx64 ",",
1568                lane_len_in_hex,
1569                static_cast<uint64_t>(results[index]));
1570       }
1571       printf("\n");
1572     }
1573 
1574     printf("};\n");
1575     printf("const unsigned kExpectedCount_NEON_%s = %u;\n",
1576            name,
1577            results_length);
1578   } else if (!skipped) {
1579     // Check the results.
1580     VIXL_CHECK(expected_length == results_length);
1581     unsigned error_count = 0;
1582     unsigned d = 0;
1583     const char* padding = "                    ";
1584     VIXL_ASSERT(strlen(padding) >= (lane_len_in_hex + 1));
1585     for (unsigned n = 0; n < inputs_n_length; n++, d++) {
1586       bool error_in_vector = false;
1587 
1588       for (unsigned lane = 0; lane < vd_lane_count; lane++) {
1589         unsigned output_index = (n * vd_lane_count) + lane;
1590 
1591         if (results[output_index] != expected[output_index]) {
1592           error_in_vector = true;
1593           break;
1594         }
1595       }
1596 
1597       if (error_in_vector && (++error_count <= kErrorReportLimit)) {
1598         printf("%s\n", name);
1599         printf(" Vn%.*s| Vd%.*s| Expected\n",
1600                lane_len_in_hex + 1,
1601                padding,
1602                lane_len_in_hex + 1,
1603                padding);
1604 
1605         const unsigned first_index_n =
1606             inputs_n_length - (16 / vn_lane_bytes) + n + 1;
1607 
1608         for (unsigned lane = 0; lane < std::max(vd_lane_count, vn_lane_count);
1609              lane++) {
1610           unsigned output_index = (n * vd_lane_count) + lane;
1611           unsigned input_index_n = (first_index_n + lane) % inputs_n_length;
1612 
1613           printf("%c0x%0*" PRIx64 " | 0x%0*" PRIx64
1614                  " "
1615                  "| 0x%0*" PRIx64 "\n",
1616                  results[output_index] != expected[output_index] ? '*' : ' ',
1617                  lane_len_in_hex,
1618                  static_cast<uint64_t>(inputs_n[input_index_n]),
1619                  lane_len_in_hex,
1620                  static_cast<uint64_t>(results[output_index]),
1621                  lane_len_in_hex,
1622                  static_cast<uint64_t>(expected[output_index]));
1623         }
1624       }
1625     }
1626     VIXL_ASSERT(d == expected_length);
1627     if (error_count > kErrorReportLimit) {
1628       printf("%u other errors follow.\n", error_count - kErrorReportLimit);
1629     }
1630     VIXL_CHECK(error_count == 0);
1631   }
1632   delete[] results;
1633 }
1634 
1635 
1636 // ==== Tests for instructions of the form <mnemonic> <V><d>, <Vn>.<T> ====
1637 //      where <V> is one of B, H, S or D registers.
1638 //      e.g. saddlv H1, v0.8B
1639 
1640 // TODO: Change tests to store all lanes of the resulting V register.
1641 //       Some tests store all 128 bits of the resulting V register to
1642 //       check the simulator's behaviour on the rest of the register.
1643 //       This is better than storing the affected lanes only.
1644 //       Change any tests such as the 'Across' template to do the same.
1645 
Test1OpAcrossNEON_Helper(Test1OpNEONHelper_t helper,uintptr_t inputs_n,unsigned inputs_n_length,uintptr_t results,VectorFormat vd_form,VectorFormat vn_form,bool * skipped)1646 static void Test1OpAcrossNEON_Helper(Test1OpNEONHelper_t helper,
1647                                      uintptr_t inputs_n,
1648                                      unsigned inputs_n_length,
1649                                      uintptr_t results,
1650                                      VectorFormat vd_form,
1651                                      VectorFormat vn_form,
1652                                      bool* skipped) {
1653   VIXL_ASSERT(vd_form != kFormatUndefined);
1654   VIXL_ASSERT(vn_form != kFormatUndefined);
1655 
1656   SETUP_WITH_FEATURES(CPUFeatures::kNEON,
1657                       CPUFeatures::kFP,
1658                       CPUFeatures::kNEONHalf);
1659   START();
1660 
1661   // Roll up the loop to keep the code size down.
1662   Label loop_n;
1663 
1664   Register out = x0;
1665   Register inputs_n_base = x1;
1666   Register inputs_n_last_vector = x3;
1667   Register index_n = x5;
1668 
1669   // TODO: Refactor duplicate definitions below with a VRegister::As() routine.
1670   const unsigned vd_bits = RegisterSizeInBitsFromFormat(vd_form);
1671   const unsigned vn_bits = RegisterSizeInBitsFromFormat(vn_form);
1672   const unsigned vn_lane_count = LaneCountFromFormat(vn_form);
1673   const unsigned vn_lane_bytes = LaneSizeInBytesFromFormat(vn_form);
1674   const unsigned vn_lane_bytes_log2 = LaneSizeInBytesLog2FromFormat(vn_form);
1675   const unsigned vn_lane_bits = LaneSizeInBitsFromFormat(vn_form);
1676 
1677   // Test destructive operations by (arbitrarily) using the same register for
1678   // B and S lane sizes.
1679   bool destructive = (vd_bits == kBRegSize) || (vd_bits == kSRegSize);
1680 
1681   // Create two aliases for v0; the first is the destination for the tested
1682   // instruction, the second, the whole Q register to check the results.
1683   VRegister vd = VRegister(0, vd_bits);
1684   VRegister vdstr = VRegister(0, kQRegSize);
1685 
1686   VRegister vn = VRegister(1, vn_bits);
1687   VRegister vntmp = VRegister(3, vn_bits);
1688 
1689   // These will have the correct format for use when calling 'helper'.
1690   VRegister vd_helper = VRegister(0, vn_bits, vn_lane_count);
1691   VRegister vn_helper = VRegister(1, vn_bits, vn_lane_count);
1692 
1693   // 'v*tmp_single' will be either 'Vt.B', 'Vt.H', 'Vt.S' or 'Vt.D'.
1694   VRegister vntmp_single = VRegister(3, vn_lane_bits);
1695 
1696   // Same registers for use in the 'ext' instructions.
1697   VRegister vn_ext = (kDRegSize == vn_bits) ? vn.V8B() : vn.V16B();
1698   VRegister vntmp_ext = (kDRegSize == vn_bits) ? vntmp.V8B() : vntmp.V16B();
1699 
1700   __ Mov(out, results);
1701 
1702   __ Mov(inputs_n_base, inputs_n);
1703   __ Mov(inputs_n_last_vector,
1704          inputs_n + vn_lane_bytes * (inputs_n_length - vn_lane_count));
1705 
1706   __ Ldr(vn, MemOperand(inputs_n_last_vector));
1707 
1708   __ Mov(index_n, 0);
1709   __ Bind(&loop_n);
1710 
1711   __ Ldr(vntmp_single,
1712          MemOperand(inputs_n_base, index_n, LSL, vn_lane_bytes_log2));
1713   __ Ext(vn_ext, vn_ext, vntmp_ext, vn_lane_bytes);
1714 
1715   if (destructive) {
1716     __ Mov(vd_helper, vn_helper);
1717     SingleEmissionCheckScope guard(&masm);
1718     (masm.*helper)(vd, vd_helper);
1719   } else {
1720     SingleEmissionCheckScope guard(&masm);
1721     (masm.*helper)(vd, vn_helper);
1722   }
1723 
1724   __ Str(vdstr, MemOperand(out, kQRegSizeInBytes, PostIndex));
1725 
1726   __ Add(index_n, index_n, 1);
1727   __ Cmp(index_n, inputs_n_length);
1728   __ B(lo, &loop_n);
1729 
1730   END();
1731   TRY_RUN(skipped);
1732 }
1733 
1734 // Test NEON instructions. The inputs_*[] and expected[] arrays should be
1735 // arrays of rawbit representation of input values. This ensures that
1736 // exact bit comparisons can be performed.
1737 template <typename Td, typename Tn>
Test1OpAcrossNEON(const char * name,Test1OpNEONHelper_t helper,const Tn inputs_n[],unsigned inputs_n_length,const Td expected[],unsigned expected_length,VectorFormat vd_form,VectorFormat vn_form)1738 static void Test1OpAcrossNEON(const char* name,
1739                               Test1OpNEONHelper_t helper,
1740                               const Tn inputs_n[],
1741                               unsigned inputs_n_length,
1742                               const Td expected[],
1743                               unsigned expected_length,
1744                               VectorFormat vd_form,
1745                               VectorFormat vn_form) {
1746   VIXL_ASSERT(inputs_n_length > 0);
1747 
1748   const unsigned vd_lane_count = LaneCountFromFormat(vd_form);
1749   const unsigned vd_lanes_per_q = MaxLaneCountFromFormat(vd_form);
1750 
1751   const unsigned results_length = inputs_n_length;
1752   Td* results = new Td[results_length * vd_lanes_per_q];
1753   const unsigned lane_bit = sizeof(Td) * 8;
1754   const unsigned lane_len_in_hex = MaxHexCharCount<Td, Tn>();
1755 
1756   bool skipped;
1757 
1758   Test1OpAcrossNEON_Helper(helper,
1759                            reinterpret_cast<uintptr_t>(inputs_n),
1760                            inputs_n_length,
1761                            reinterpret_cast<uintptr_t>(results),
1762                            vd_form,
1763                            vn_form,
1764                            &skipped);
1765 
1766   if (Test::generate_test_trace()) {
1767     // Print the results.
1768     printf("const uint%u_t kExpected_NEON_%s[] = {\n", lane_bit, name);
1769     for (unsigned iteration = 0; iteration < results_length; iteration++) {
1770       printf(" ");
1771       // Output a separate result for each element of the result vector.
1772       for (unsigned lane = 0; lane < vd_lane_count; lane++) {
1773         unsigned index = lane + (iteration * vd_lanes_per_q);
1774         printf(" 0x%0*" PRIx64 ",",
1775                lane_len_in_hex,
1776                static_cast<uint64_t>(results[index]));
1777       }
1778       printf("\n");
1779     }
1780 
1781     printf("};\n");
1782     printf("const unsigned kExpectedCount_NEON_%s = %u;\n",
1783            name,
1784            results_length);
1785   } else if (!skipped) {
1786     // Check the results.
1787     VIXL_CHECK(expected_length == results_length);
1788     unsigned error_count = 0;
1789     unsigned d = 0;
1790     const char* padding = "                    ";
1791     VIXL_ASSERT(strlen(padding) >= (lane_len_in_hex + 1));
1792     for (unsigned n = 0; n < inputs_n_length; n++, d++) {
1793       bool error_in_vector = false;
1794 
1795       for (unsigned lane = 0; lane < vd_lane_count; lane++) {
1796         unsigned expected_index = (n * vd_lane_count) + lane;
1797         unsigned results_index = (n * vd_lanes_per_q) + lane;
1798 
1799         if (results[results_index] != expected[expected_index]) {
1800           error_in_vector = true;
1801           break;
1802         }
1803       }
1804 
1805       // For across operations, the remaining lanes should be zero.
1806       for (unsigned lane = vd_lane_count; lane < vd_lanes_per_q; lane++) {
1807         unsigned results_index = (n * vd_lanes_per_q) + lane;
1808         if (results[results_index] != 0) {
1809           error_in_vector = true;
1810           break;
1811         }
1812       }
1813 
1814       if (error_in_vector && (++error_count <= kErrorReportLimit)) {
1815         const unsigned vn_lane_count = LaneCountFromFormat(vn_form);
1816 
1817         printf("%s\n", name);
1818         printf(" Vn%.*s| Vd%.*s| Expected\n",
1819                lane_len_in_hex + 1,
1820                padding,
1821                lane_len_in_hex + 1,
1822                padding);
1823 
1824         // TODO: In case of an error, all tests print out as many elements as
1825         //       there are lanes in the output or input vectors. This way
1826         //       the viewer can read all the values that were needed for the
1827         //       operation but the output contains also unnecessary values.
1828         //       These prints can be improved according to the arguments
1829         //       passed to test functions.
1830         //       This output for the 'Across' category has the required
1831         //       modifications.
1832         for (unsigned lane = 0; lane < vn_lane_count; lane++) {
1833           unsigned results_index =
1834               (n * vd_lanes_per_q) + ((vn_lane_count - 1) - lane);
1835           unsigned input_index_n =
1836               (inputs_n_length - vn_lane_count + n + 1 + lane) %
1837               inputs_n_length;
1838 
1839           Td expect = 0;
1840           if ((vn_lane_count - 1) == lane) {
1841             // This is the last lane to be printed, ie. the least-significant
1842             // lane, so use the expected value; any other lane should be zero.
1843             unsigned expected_index = n * vd_lane_count;
1844             expect = expected[expected_index];
1845           }
1846           printf("%c0x%0*" PRIx64 " | 0x%0*" PRIx64 " | 0x%0*" PRIx64 "\n",
1847                  results[results_index] != expect ? '*' : ' ',
1848                  lane_len_in_hex,
1849                  static_cast<uint64_t>(inputs_n[input_index_n]),
1850                  lane_len_in_hex,
1851                  static_cast<uint64_t>(results[results_index]),
1852                  lane_len_in_hex,
1853                  static_cast<uint64_t>(expect));
1854         }
1855       }
1856     }
1857     VIXL_ASSERT(d == expected_length);
1858     if (error_count > kErrorReportLimit) {
1859       printf("%u other errors follow.\n", error_count - kErrorReportLimit);
1860     }
1861     VIXL_CHECK(error_count == 0);
1862   }
1863   delete[] results;
1864 }
1865 
1866 
1867 // ==== Tests for instructions of the form <INST> VReg, VReg, VReg. ====
1868 
1869 // TODO: Iterate over inputs_d once the traces file is split.
1870 
Test2OpNEON_Helper(Test2OpNEONHelper_t helper,uintptr_t inputs_d,uintptr_t inputs_n,unsigned inputs_n_length,uintptr_t inputs_m,unsigned inputs_m_length,uintptr_t results,VectorFormat vd_form,VectorFormat vn_form,VectorFormat vm_form,bool * skipped)1871 static void Test2OpNEON_Helper(Test2OpNEONHelper_t helper,
1872                                uintptr_t inputs_d,
1873                                uintptr_t inputs_n,
1874                                unsigned inputs_n_length,
1875                                uintptr_t inputs_m,
1876                                unsigned inputs_m_length,
1877                                uintptr_t results,
1878                                VectorFormat vd_form,
1879                                VectorFormat vn_form,
1880                                VectorFormat vm_form,
1881                                bool* skipped) {
1882   VIXL_ASSERT(vd_form != kFormatUndefined);
1883   VIXL_ASSERT(vn_form != kFormatUndefined);
1884   VIXL_ASSERT(vm_form != kFormatUndefined);
1885 
1886   CPUFeatures features;
1887   features.Combine(CPUFeatures::kNEON, CPUFeatures::kNEONHalf);
1888   features.Combine(CPUFeatures::kFP);
1889   features.Combine(CPUFeatures::kRDM);
1890   features.Combine(CPUFeatures::kDotProduct);
1891   features.Combine(CPUFeatures::kFHM);
1892   SETUP_WITH_FEATURES(features);
1893   START();
1894 
1895   // Roll up the loop to keep the code size down.
1896   Label loop_n, loop_m;
1897 
1898   Register out = x0;
1899   Register inputs_n_base = x1;
1900   Register inputs_m_base = x2;
1901   Register inputs_d_base = x3;
1902   Register inputs_n_last_16bytes = x4;
1903   Register inputs_m_last_16bytes = x5;
1904   Register index_n = x6;
1905   Register index_m = x7;
1906 
1907   // TODO: Refactor duplicate definitions below with a VRegister::As() routine.
1908   const unsigned vd_bits = RegisterSizeInBitsFromFormat(vd_form);
1909   const unsigned vd_lane_count = LaneCountFromFormat(vd_form);
1910 
1911   const unsigned vn_bits = RegisterSizeInBitsFromFormat(vn_form);
1912   const unsigned vn_lane_count = LaneCountFromFormat(vn_form);
1913   const unsigned vn_lane_bytes = LaneSizeInBytesFromFormat(vn_form);
1914   const unsigned vn_lane_bytes_log2 = LaneSizeInBytesLog2FromFormat(vn_form);
1915   const unsigned vn_lane_bits = LaneSizeInBitsFromFormat(vn_form);
1916 
1917   const unsigned vm_bits = RegisterSizeInBitsFromFormat(vm_form);
1918   const unsigned vm_lane_count = LaneCountFromFormat(vm_form);
1919   const unsigned vm_lane_bytes = LaneSizeInBytesFromFormat(vm_form);
1920   const unsigned vm_lane_bytes_log2 = LaneSizeInBytesLog2FromFormat(vm_form);
1921   const unsigned vm_lane_bits = LaneSizeInBitsFromFormat(vm_form);
1922 
1923 
1924   // Always load and store 128 bits regardless of the format.
1925   VRegister vd = v0.V16B();
1926   VRegister vn = v1.V16B();
1927   VRegister vm = v2.V16B();
1928   VRegister vntmp = v3.V16B();
1929   VRegister vmtmp = v4.V16B();
1930   VRegister vres = v5.V16B();
1931 
1932   // These will have the correct format for calling the 'helper'.
1933   VRegister vn_helper = VRegister(1, vn_bits, vn_lane_count);
1934   VRegister vm_helper = VRegister(2, vm_bits, vm_lane_count);
1935   VRegister vres_helper = VRegister(5, vd_bits, vd_lane_count);
1936 
1937   // 'v*tmp_single' will be either 'Vt.B', 'Vt.H', 'Vt.S' or 'Vt.D'.
1938   VRegister vntmp_single = VRegister(3, vn_lane_bits);
1939   VRegister vmtmp_single = VRegister(4, vm_lane_bits);
1940 
1941   __ Mov(out, results);
1942 
1943   __ Mov(inputs_d_base, inputs_d);
1944 
1945   __ Mov(inputs_n_base, inputs_n);
1946   __ Mov(inputs_n_last_16bytes, inputs_n + (inputs_n_length - 16));
1947   __ Mov(inputs_m_base, inputs_m);
1948   __ Mov(inputs_m_last_16bytes, inputs_m + (inputs_m_length - 16));
1949 
1950   __ Ldr(vd, MemOperand(inputs_d_base));
1951   __ Ldr(vn, MemOperand(inputs_n_last_16bytes));
1952   __ Ldr(vm, MemOperand(inputs_m_last_16bytes));
1953 
1954   __ Mov(index_n, 0);
1955   __ Bind(&loop_n);
1956 
1957   __ Ldr(vntmp_single,
1958          MemOperand(inputs_n_base, index_n, LSL, vn_lane_bytes_log2));
1959   __ Ext(vn, vn, vntmp, vn_lane_bytes);
1960 
1961   __ Mov(index_m, 0);
1962   __ Bind(&loop_m);
1963 
1964   __ Ldr(vmtmp_single,
1965          MemOperand(inputs_m_base, index_m, LSL, vm_lane_bytes_log2));
1966   __ Ext(vm, vm, vmtmp, vm_lane_bytes);
1967 
1968   __ Mov(vres, vd);
1969   {
1970     SingleEmissionCheckScope guard(&masm);
1971     (masm.*helper)(vres_helper, vn_helper, vm_helper);
1972   }
1973   __ Str(vres, MemOperand(out, vd.GetSizeInBytes(), PostIndex));
1974 
1975   __ Add(index_m, index_m, 1);
1976   __ Cmp(index_m, inputs_m_length);
1977   __ B(lo, &loop_m);
1978 
1979   __ Add(index_n, index_n, 1);
1980   __ Cmp(index_n, inputs_n_length);
1981   __ B(lo, &loop_n);
1982 
1983   END();
1984   TRY_RUN(skipped);
1985 }
1986 
1987 
1988 // Test NEON instructions. The inputs_*[] and expected[] arrays should be
1989 // arrays of rawbit representation of input values. This ensures that
1990 // exact bit comparisons can be performed.
1991 template <typename Td, typename Tn, typename Tm>
Test2OpNEON(const char * name,Test2OpNEONHelper_t helper,const Td inputs_d[],const Tn inputs_n[],unsigned inputs_n_length,const Tm inputs_m[],unsigned inputs_m_length,const Td expected[],unsigned expected_length,VectorFormat vd_form,VectorFormat vn_form,VectorFormat vm_form)1992 static void Test2OpNEON(const char* name,
1993                         Test2OpNEONHelper_t helper,
1994                         const Td inputs_d[],
1995                         const Tn inputs_n[],
1996                         unsigned inputs_n_length,
1997                         const Tm inputs_m[],
1998                         unsigned inputs_m_length,
1999                         const Td expected[],
2000                         unsigned expected_length,
2001                         VectorFormat vd_form,
2002                         VectorFormat vn_form,
2003                         VectorFormat vm_form) {
2004   VIXL_ASSERT(inputs_n_length > 0 && inputs_m_length > 0);
2005 
2006   const unsigned vd_lane_count = MaxLaneCountFromFormat(vd_form);
2007 
2008   const unsigned results_length = inputs_n_length * inputs_m_length;
2009   Td* results = new Td[results_length * vd_lane_count];
2010   const unsigned lane_bit = sizeof(Td) * 8;
2011   const unsigned lane_len_in_hex = MaxHexCharCount<Td, Tm>();
2012 
2013   bool skipped;
2014 
2015   Test2OpNEON_Helper(helper,
2016                      reinterpret_cast<uintptr_t>(inputs_d),
2017                      reinterpret_cast<uintptr_t>(inputs_n),
2018                      inputs_n_length,
2019                      reinterpret_cast<uintptr_t>(inputs_m),
2020                      inputs_m_length,
2021                      reinterpret_cast<uintptr_t>(results),
2022                      vd_form,
2023                      vn_form,
2024                      vm_form,
2025                      &skipped);
2026 
2027   if (Test::generate_test_trace()) {
2028     // Print the results.
2029     printf("const uint%u_t kExpected_NEON_%s[] = {\n", lane_bit, name);
2030     for (unsigned iteration = 0; iteration < results_length; iteration++) {
2031       printf(" ");
2032       // Output a separate result for each element of the result vector.
2033       for (unsigned lane = 0; lane < vd_lane_count; lane++) {
2034         unsigned index = lane + (iteration * vd_lane_count);
2035         printf(" 0x%0*" PRIx64 ",",
2036                lane_len_in_hex,
2037                static_cast<uint64_t>(results[index]));
2038       }
2039       printf("\n");
2040     }
2041 
2042     printf("};\n");
2043     printf("const unsigned kExpectedCount_NEON_%s = %u;\n",
2044            name,
2045            results_length);
2046   } else if (!skipped) {
2047     // Check the results.
2048     VIXL_CHECK(expected_length == results_length);
2049     unsigned error_count = 0;
2050     unsigned d = 0;
2051     const char* padding = "                    ";
2052     VIXL_ASSERT(strlen(padding) >= (lane_len_in_hex + 1));
2053     for (unsigned n = 0; n < inputs_n_length; n++) {
2054       for (unsigned m = 0; m < inputs_m_length; m++, d++) {
2055         bool error_in_vector = false;
2056 
2057         for (unsigned lane = 0; lane < vd_lane_count; lane++) {
2058           unsigned output_index = (n * inputs_m_length * vd_lane_count) +
2059                                   (m * vd_lane_count) + lane;
2060 
2061           if (results[output_index] != expected[output_index]) {
2062             error_in_vector = true;
2063             break;
2064           }
2065         }
2066 
2067         if (error_in_vector && (++error_count <= kErrorReportLimit)) {
2068           printf("%s\n", name);
2069           printf(" Vd%.*s| Vn%.*s| Vm%.*s| Vd%.*s| Expected\n",
2070                  lane_len_in_hex + 1,
2071                  padding,
2072                  lane_len_in_hex + 1,
2073                  padding,
2074                  lane_len_in_hex + 1,
2075                  padding,
2076                  lane_len_in_hex + 1,
2077                  padding);
2078 
2079           for (unsigned lane = 0; lane < vd_lane_count; lane++) {
2080             unsigned output_index = (n * inputs_m_length * vd_lane_count) +
2081                                     (m * vd_lane_count) + lane;
2082             unsigned input_index_n =
2083                 (inputs_n_length - vd_lane_count + n + 1 + lane) %
2084                 inputs_n_length;
2085             unsigned input_index_m =
2086                 (inputs_m_length - vd_lane_count + m + 1 + lane) %
2087                 inputs_m_length;
2088 
2089             printf("%c0x%0*" PRIx64 " | 0x%0*" PRIx64 " | 0x%0*" PRIx64
2090                    " "
2091                    "| 0x%0*" PRIx64 " | 0x%0*" PRIx64 "\n",
2092                    results[output_index] != expected[output_index] ? '*' : ' ',
2093                    lane_len_in_hex,
2094                    static_cast<uint64_t>(inputs_d[lane]),
2095                    lane_len_in_hex,
2096                    static_cast<uint64_t>(inputs_n[input_index_n]),
2097                    lane_len_in_hex,
2098                    static_cast<uint64_t>(inputs_m[input_index_m]),
2099                    lane_len_in_hex,
2100                    static_cast<uint64_t>(results[output_index]),
2101                    lane_len_in_hex,
2102                    static_cast<uint64_t>(expected[output_index]));
2103           }
2104         }
2105       }
2106     }
2107     VIXL_ASSERT(d == expected_length);
2108     if (error_count > kErrorReportLimit) {
2109       printf("%u other errors follow.\n", error_count - kErrorReportLimit);
2110     }
2111     VIXL_CHECK(error_count == 0);
2112   }
2113   delete[] results;
2114 }
2115 
2116 
2117 // ==== Tests for instructions of the form <INST> Vd, Vn, Vm[<#index>]. ====
2118 
TestByElementNEON_Helper(TestByElementNEONHelper_t helper,uintptr_t inputs_d,uintptr_t inputs_n,unsigned inputs_n_length,uintptr_t inputs_m,unsigned inputs_m_length,const int indices[],unsigned indices_length,uintptr_t results,VectorFormat vd_form,VectorFormat vn_form,VectorFormat vm_form,unsigned vm_subvector_count,bool * skipped)2119 static void TestByElementNEON_Helper(TestByElementNEONHelper_t helper,
2120                                      uintptr_t inputs_d,
2121                                      uintptr_t inputs_n,
2122                                      unsigned inputs_n_length,
2123                                      uintptr_t inputs_m,
2124                                      unsigned inputs_m_length,
2125                                      const int indices[],
2126                                      unsigned indices_length,
2127                                      uintptr_t results,
2128                                      VectorFormat vd_form,
2129                                      VectorFormat vn_form,
2130                                      VectorFormat vm_form,
2131                                      unsigned vm_subvector_count,
2132                                      bool* skipped) {
2133   VIXL_ASSERT(vd_form != kFormatUndefined);
2134   VIXL_ASSERT(vn_form != kFormatUndefined);
2135   VIXL_ASSERT(vm_form != kFormatUndefined);
2136   VIXL_ASSERT((vm_subvector_count != 0) && IsPowerOf2(vm_subvector_count));
2137 
2138   CPUFeatures features;
2139   features.Combine(CPUFeatures::kNEON, CPUFeatures::kNEONHalf);
2140   features.Combine(CPUFeatures::kFP);
2141   features.Combine(CPUFeatures::kRDM);
2142   features.Combine(CPUFeatures::kDotProduct);
2143   features.Combine(CPUFeatures::kFHM);
2144   SETUP_WITH_FEATURES(features);
2145 
2146   START();
2147 
2148   // Roll up the loop to keep the code size down.
2149   Label loop_n, loop_m;
2150 
2151   Register out = x0;
2152   Register inputs_n_base = x1;
2153   Register inputs_m_base = x2;
2154   Register inputs_d_base = x3;
2155   Register inputs_n_last_16bytes = x4;
2156   Register inputs_m_last_16bytes = x5;
2157   Register index_n = x6;
2158   Register index_m = x7;
2159 
2160   // TODO: Refactor duplicate definitions below with a VRegister::As() routine.
2161   const unsigned vd_bits = RegisterSizeInBitsFromFormat(vd_form);
2162   const unsigned vd_lane_count = LaneCountFromFormat(vd_form);
2163 
2164   const unsigned vn_bits = RegisterSizeInBitsFromFormat(vn_form);
2165   const unsigned vn_lane_count = LaneCountFromFormat(vn_form);
2166   const unsigned vn_lane_bytes = LaneSizeInBytesFromFormat(vn_form);
2167   const unsigned vn_lane_bytes_log2 = LaneSizeInBytesLog2FromFormat(vn_form);
2168   const unsigned vn_lane_bits = LaneSizeInBitsFromFormat(vn_form);
2169 
2170   const unsigned vm_bits = RegisterSizeInBitsFromFormat(vm_form);
2171   const unsigned vm_lane_count = LaneCountFromFormat(vm_form);
2172   const unsigned vm_lane_bytes = LaneSizeInBytesFromFormat(vm_form);
2173   const unsigned vm_lane_bytes_log2 = LaneSizeInBytesLog2FromFormat(vm_form);
2174   const unsigned vm_lane_bits = LaneSizeInBitsFromFormat(vm_form);
2175 
2176   VIXL_ASSERT((vm_bits * vm_subvector_count) <= kQRegSize);
2177 
2178   // Always load and store 128 bits regardless of the format.
2179   VRegister vd = v0.V16B();
2180   VRegister vn = v1.V16B();
2181   VRegister vm = v2.V16B();
2182   VRegister vntmp = v3.V16B();
2183   VRegister vmtmp = v4.V16B();
2184   VRegister vres = v5.V16B();
2185 
2186   // These will have the correct format for calling the 'helper'.
2187   VRegister vn_helper = VRegister(1, vn_bits, vn_lane_count);
2188   VRegister vm_helper =
2189       VRegister(2, vm_bits * vm_subvector_count, vm_lane_count);
2190   VRegister vres_helper = VRegister(5, vd_bits, vd_lane_count);
2191 
2192   // 'v*tmp_single' will be either 'Vt.B', 'Vt.H', 'Vt.S' or 'Vt.D'.
2193   VRegister vntmp_single = VRegister(3, vn_lane_bits);
2194   VRegister vmtmp_single = VRegister(4, vm_lane_bits);
2195 
2196   __ Mov(out, results);
2197 
2198   __ Mov(inputs_d_base, inputs_d);
2199 
2200   __ Mov(inputs_n_base, inputs_n);
2201   __ Mov(inputs_n_last_16bytes, inputs_n + (inputs_n_length - 16));
2202   __ Mov(inputs_m_base, inputs_m);
2203   __ Mov(inputs_m_last_16bytes, inputs_m + (inputs_m_length - 16));
2204 
2205   __ Ldr(vd, MemOperand(inputs_d_base));
2206   __ Ldr(vn, MemOperand(inputs_n_last_16bytes));
2207   __ Ldr(vm, MemOperand(inputs_m_last_16bytes));
2208 
2209   __ Mov(index_n, 0);
2210   __ Bind(&loop_n);
2211 
2212   __ Ldr(vntmp_single,
2213          MemOperand(inputs_n_base, index_n, LSL, vn_lane_bytes_log2));
2214   __ Ext(vn, vn, vntmp, vn_lane_bytes);
2215 
2216   __ Mov(index_m, 0);
2217   __ Bind(&loop_m);
2218 
2219   __ Ldr(vmtmp_single,
2220          MemOperand(inputs_m_base, index_m, LSL, vm_lane_bytes_log2));
2221   __ Ext(vm, vm, vmtmp, vm_lane_bytes);
2222 
2223   __ Mov(vres, vd);
2224   {
2225     for (unsigned i = 0; i < indices_length; i++) {
2226       {
2227         SingleEmissionCheckScope guard(&masm);
2228         (masm.*helper)(vres_helper, vn_helper, vm_helper, indices[i]);
2229       }
2230       __ Str(vres, MemOperand(out, vd.GetSizeInBytes(), PostIndex));
2231     }
2232   }
2233 
2234   __ Add(index_m, index_m, 1);
2235   __ Cmp(index_m, inputs_m_length);
2236   __ B(lo, &loop_m);
2237 
2238   __ Add(index_n, index_n, 1);
2239   __ Cmp(index_n, inputs_n_length);
2240   __ B(lo, &loop_n);
2241 
2242   END();
2243   TRY_RUN(skipped);
2244 }
2245 
2246 
2247 // Test NEON instructions. The inputs_*[] and expected[] arrays should be
2248 // arrays of rawbit representation of input values. This ensures that
2249 // exact bit comparisons can be performed.
2250 template <typename Td, typename Tn, typename Tm>
TestByElementNEON(const char * name,TestByElementNEONHelper_t helper,const Td inputs_d[],const Tn inputs_n[],unsigned inputs_n_length,const Tm inputs_m[],unsigned inputs_m_length,const int indices[],unsigned indices_length,const Td expected[],unsigned expected_length,VectorFormat vd_form,VectorFormat vn_form,VectorFormat vm_form,unsigned vm_subvector_count=1)2251 static void TestByElementNEON(const char* name,
2252                               TestByElementNEONHelper_t helper,
2253                               const Td inputs_d[],
2254                               const Tn inputs_n[],
2255                               unsigned inputs_n_length,
2256                               const Tm inputs_m[],
2257                               unsigned inputs_m_length,
2258                               const int indices[],
2259                               unsigned indices_length,
2260                               const Td expected[],
2261                               unsigned expected_length,
2262                               VectorFormat vd_form,
2263                               VectorFormat vn_form,
2264                               VectorFormat vm_form,
2265                               unsigned vm_subvector_count = 1) {
2266   VIXL_ASSERT(inputs_n_length > 0);
2267   VIXL_ASSERT(inputs_m_length > 0);
2268   VIXL_ASSERT(indices_length > 0);
2269 
2270   const unsigned vd_lane_count = MaxLaneCountFromFormat(vd_form);
2271 
2272   const unsigned results_length =
2273       inputs_n_length * inputs_m_length * indices_length;
2274   Td* results = new Td[results_length * vd_lane_count];
2275   const unsigned lane_bit = sizeof(Td) * 8;
2276   const unsigned lane_len_in_hex = MaxHexCharCount<Td, Tm>();
2277 
2278   bool skipped;
2279 
2280   TestByElementNEON_Helper(helper,
2281                            reinterpret_cast<uintptr_t>(inputs_d),
2282                            reinterpret_cast<uintptr_t>(inputs_n),
2283                            inputs_n_length,
2284                            reinterpret_cast<uintptr_t>(inputs_m),
2285                            inputs_m_length,
2286                            indices,
2287                            indices_length,
2288                            reinterpret_cast<uintptr_t>(results),
2289                            vd_form,
2290                            vn_form,
2291                            vm_form,
2292                            vm_subvector_count,
2293                            &skipped);
2294 
2295   if (Test::generate_test_trace()) {
2296     // Print the results.
2297     printf("const uint%u_t kExpected_NEON_%s[] = {\n", lane_bit, name);
2298     for (unsigned iteration = 0; iteration < results_length; iteration++) {
2299       printf(" ");
2300       // Output a separate result for each element of the result vector.
2301       for (unsigned lane = 0; lane < vd_lane_count; lane++) {
2302         unsigned index = lane + (iteration * vd_lane_count);
2303         printf(" 0x%0*" PRIx64 ",",
2304                lane_len_in_hex,
2305                static_cast<uint64_t>(results[index]));
2306       }
2307       printf("\n");
2308     }
2309 
2310     printf("};\n");
2311     printf("const unsigned kExpectedCount_NEON_%s = %u;\n",
2312            name,
2313            results_length);
2314   } else if (!skipped) {
2315     // Check the results.
2316     VIXL_CHECK(expected_length == results_length);
2317     unsigned error_count = 0;
2318     unsigned d = 0;
2319     const char* padding = "                    ";
2320     VIXL_ASSERT(strlen(padding) >= (lane_len_in_hex + 1));
2321     for (unsigned n = 0; n < inputs_n_length; n++) {
2322       for (unsigned m = 0; m < inputs_m_length; m++) {
2323         for (unsigned index = 0; index < indices_length; index++, d++) {
2324           bool error_in_vector = false;
2325 
2326           for (unsigned lane = 0; lane < vd_lane_count; lane++) {
2327             unsigned output_index =
2328                 (n * inputs_m_length * indices_length * vd_lane_count) +
2329                 (m * indices_length * vd_lane_count) + (index * vd_lane_count) +
2330                 lane;
2331 
2332             if (results[output_index] != expected[output_index]) {
2333               error_in_vector = true;
2334               break;
2335             }
2336           }
2337 
2338           if (error_in_vector && (++error_count <= kErrorReportLimit)) {
2339             printf("%s\n", name);
2340             printf(" Vd%.*s| Vn%.*s| Vm%.*s| Index | Vd%.*s| Expected\n",
2341                    lane_len_in_hex + 1,
2342                    padding,
2343                    lane_len_in_hex + 1,
2344                    padding,
2345                    lane_len_in_hex + 1,
2346                    padding,
2347                    lane_len_in_hex + 1,
2348                    padding);
2349 
2350             for (unsigned lane = 0; lane < vd_lane_count; lane++) {
2351               unsigned output_index =
2352                   (n * inputs_m_length * indices_length * vd_lane_count) +
2353                   (m * indices_length * vd_lane_count) +
2354                   (index * vd_lane_count) + lane;
2355               unsigned input_index_n =
2356                   (inputs_n_length - vd_lane_count + n + 1 + lane) %
2357                   inputs_n_length;
2358               unsigned input_index_m =
2359                   (inputs_m_length - vd_lane_count + m + 1 + lane) %
2360                   inputs_m_length;
2361 
2362               printf("%c0x%0*" PRIx64 " | 0x%0*" PRIx64 " | 0x%0*" PRIx64
2363                      " "
2364                      "| [%3d] | 0x%0*" PRIx64 " | 0x%0*" PRIx64 "\n",
2365                      results[output_index] != expected[output_index] ? '*'
2366                                                                      : ' ',
2367                      lane_len_in_hex,
2368                      static_cast<uint64_t>(inputs_d[lane]),
2369                      lane_len_in_hex,
2370                      static_cast<uint64_t>(inputs_n[input_index_n]),
2371                      lane_len_in_hex,
2372                      static_cast<uint64_t>(inputs_m[input_index_m]),
2373                      indices[index],
2374                      lane_len_in_hex,
2375                      static_cast<uint64_t>(results[output_index]),
2376                      lane_len_in_hex,
2377                      static_cast<uint64_t>(expected[output_index]));
2378             }
2379           }
2380         }
2381       }
2382     }
2383     VIXL_ASSERT(d == expected_length);
2384     if (error_count > kErrorReportLimit) {
2385       printf("%u other errors follow.\n", error_count - kErrorReportLimit);
2386     }
2387     VIXL_CHECK(error_count == 0);
2388   }
2389   delete[] results;
2390 }
2391 
2392 
2393 // ==== Tests for instructions of the form <INST> VReg, VReg, #Immediate. ====
2394 
2395 
2396 template <typename Tm>
Test2OpImmNEON_Helper(typename Test2OpImmediateNEONHelper_t<Tm>::mnemonic helper,uintptr_t inputs_n,unsigned inputs_n_length,const Tm inputs_m[],unsigned inputs_m_length,uintptr_t results,VectorFormat vd_form,VectorFormat vn_form,bool * skipped)2397 void Test2OpImmNEON_Helper(
2398     typename Test2OpImmediateNEONHelper_t<Tm>::mnemonic helper,
2399     uintptr_t inputs_n,
2400     unsigned inputs_n_length,
2401     const Tm inputs_m[],
2402     unsigned inputs_m_length,
2403     uintptr_t results,
2404     VectorFormat vd_form,
2405     VectorFormat vn_form,
2406     bool* skipped) {
2407   VIXL_ASSERT(vd_form != kFormatUndefined && vn_form != kFormatUndefined);
2408 
2409   SETUP_WITH_FEATURES(CPUFeatures::kNEON,
2410                       CPUFeatures::kFP,
2411                       CPUFeatures::kNEONHalf);
2412   START();
2413 
2414   // Roll up the loop to keep the code size down.
2415   Label loop_n;
2416 
2417   Register out = x0;
2418   Register inputs_n_base = x1;
2419   Register inputs_n_last_16bytes = x3;
2420   Register index_n = x5;
2421 
2422   // TODO: Refactor duplicate definitions below with a VRegister::As() routine.
2423   const unsigned vd_bits = RegisterSizeInBitsFromFormat(vd_form);
2424   const unsigned vd_lane_count = LaneCountFromFormat(vd_form);
2425 
2426   const unsigned vn_bits = RegisterSizeInBitsFromFormat(vn_form);
2427   const unsigned vn_lane_count = LaneCountFromFormat(vn_form);
2428   const unsigned vn_lane_bytes = LaneSizeInBytesFromFormat(vn_form);
2429   const unsigned vn_lane_bytes_log2 = LaneSizeInBytesLog2FromFormat(vn_form);
2430   const unsigned vn_lane_bits = LaneSizeInBitsFromFormat(vn_form);
2431 
2432 
2433   // These will be either a D- or a Q-register form, with a single lane
2434   // (for use in scalar load and store operations).
2435   VRegister vd = VRegister(0, vd_bits);
2436   VRegister vn = v1.V16B();
2437   VRegister vntmp = v3.V16B();
2438 
2439   // These will have the correct format for use when calling 'helper'.
2440   VRegister vd_helper = VRegister(0, vd_bits, vd_lane_count);
2441   VRegister vn_helper = VRegister(1, vn_bits, vn_lane_count);
2442 
2443   // 'v*tmp_single' will be either 'Vt.B', 'Vt.H', 'Vt.S' or 'Vt.D'.
2444   VRegister vntmp_single = VRegister(3, vn_lane_bits);
2445 
2446   __ Mov(out, results);
2447 
2448   __ Mov(inputs_n_base, inputs_n);
2449   __ Mov(inputs_n_last_16bytes,
2450          inputs_n + (vn_lane_bytes * inputs_n_length) - 16);
2451 
2452   __ Ldr(vn, MemOperand(inputs_n_last_16bytes));
2453 
2454   __ Mov(index_n, 0);
2455   __ Bind(&loop_n);
2456 
2457   __ Ldr(vntmp_single,
2458          MemOperand(inputs_n_base, index_n, LSL, vn_lane_bytes_log2));
2459   __ Ext(vn, vn, vntmp, vn_lane_bytes);
2460 
2461   // Set the destination to zero for tests such as '[r]shrn2'.
2462   // TODO: Setting the destination to values other than zero might be a better
2463   //       test for shift and accumulate instructions (srsra/ssra/usra/ursra).
2464   __ Movi(vd.V16B(), 0);
2465 
2466   {
2467     for (unsigned i = 0; i < inputs_m_length; i++) {
2468       {
2469         SingleEmissionCheckScope guard(&masm);
2470         (masm.*helper)(vd_helper, vn_helper, inputs_m[i]);
2471       }
2472       __ Str(vd, MemOperand(out, vd.GetSizeInBytes(), PostIndex));
2473     }
2474   }
2475 
2476   __ Add(index_n, index_n, 1);
2477   __ Cmp(index_n, inputs_n_length);
2478   __ B(lo, &loop_n);
2479 
2480   END();
2481   TRY_RUN(skipped);
2482 }
2483 
2484 
2485 // Test NEON instructions. The inputs_*[] and expected[] arrays should be
2486 // arrays of rawbit representation of input values. This ensures that
2487 // exact bit comparisons can be performed.
2488 template <typename Td, typename Tn, typename Tm>
Test2OpImmNEON(const char * name,typename Test2OpImmediateNEONHelper_t<Tm>::mnemonic helper,const Tn inputs_n[],unsigned inputs_n_length,const Tm inputs_m[],unsigned inputs_m_length,const Td expected[],unsigned expected_length,VectorFormat vd_form,VectorFormat vn_form)2489 static void Test2OpImmNEON(
2490     const char* name,
2491     typename Test2OpImmediateNEONHelper_t<Tm>::mnemonic helper,
2492     const Tn inputs_n[],
2493     unsigned inputs_n_length,
2494     const Tm inputs_m[],
2495     unsigned inputs_m_length,
2496     const Td expected[],
2497     unsigned expected_length,
2498     VectorFormat vd_form,
2499     VectorFormat vn_form) {
2500   VIXL_ASSERT(inputs_n_length > 0 && inputs_m_length > 0);
2501 
2502   const unsigned vd_lane_count = LaneCountFromFormat(vd_form);
2503   const unsigned vn_lane_bytes = LaneSizeInBytesFromFormat(vn_form);
2504   const unsigned vn_lane_count = LaneCountFromFormat(vn_form);
2505 
2506   const unsigned results_length = inputs_n_length * inputs_m_length;
2507   Td* results = new Td[results_length * vd_lane_count];
2508   const unsigned lane_bit = sizeof(Td) * 8;
2509   const unsigned lane_len_in_hex = MaxHexCharCount<Td, Tn>();
2510 
2511   bool skipped;
2512 
2513   Test2OpImmNEON_Helper(helper,
2514                         reinterpret_cast<uintptr_t>(inputs_n),
2515                         inputs_n_length,
2516                         inputs_m,
2517                         inputs_m_length,
2518                         reinterpret_cast<uintptr_t>(results),
2519                         vd_form,
2520                         vn_form,
2521                         &skipped);
2522 
2523   if (Test::generate_test_trace()) {
2524     // Print the results.
2525     printf("const uint%u_t kExpected_NEON_%s[] = {\n", lane_bit, name);
2526     for (unsigned iteration = 0; iteration < results_length; iteration++) {
2527       printf(" ");
2528       // Output a separate result for each element of the result vector.
2529       for (unsigned lane = 0; lane < vd_lane_count; lane++) {
2530         unsigned index = lane + (iteration * vd_lane_count);
2531         printf(" 0x%0*" PRIx64 ",",
2532                lane_len_in_hex,
2533                static_cast<uint64_t>(results[index]));
2534       }
2535       printf("\n");
2536     }
2537 
2538     printf("};\n");
2539     printf("const unsigned kExpectedCount_NEON_%s = %u;\n",
2540            name,
2541            results_length);
2542   } else if (!skipped) {
2543     // Check the results.
2544     VIXL_CHECK(expected_length == results_length);
2545     unsigned error_count = 0;
2546     unsigned d = 0;
2547     const char* padding = "                    ";
2548     VIXL_ASSERT(strlen(padding) >= (lane_len_in_hex + 1));
2549     for (unsigned n = 0; n < inputs_n_length; n++) {
2550       for (unsigned m = 0; m < inputs_m_length; m++, d++) {
2551         bool error_in_vector = false;
2552 
2553         for (unsigned lane = 0; lane < vd_lane_count; lane++) {
2554           unsigned output_index = (n * inputs_m_length * vd_lane_count) +
2555                                   (m * vd_lane_count) + lane;
2556 
2557           if (results[output_index] != expected[output_index]) {
2558             error_in_vector = true;
2559             break;
2560           }
2561         }
2562 
2563         if (error_in_vector && (++error_count <= kErrorReportLimit)) {
2564           printf("%s\n", name);
2565           printf(" Vn%.*s| Imm%.*s| Vd%.*s| Expected\n",
2566                  lane_len_in_hex + 1,
2567                  padding,
2568                  lane_len_in_hex,
2569                  padding,
2570                  lane_len_in_hex + 1,
2571                  padding);
2572 
2573           const unsigned first_index_n =
2574               inputs_n_length - (16 / vn_lane_bytes) + n + 1;
2575 
2576           for (unsigned lane = 0; lane < std::max(vd_lane_count, vn_lane_count);
2577                lane++) {
2578             unsigned output_index = (n * inputs_m_length * vd_lane_count) +
2579                                     (m * vd_lane_count) + lane;
2580             unsigned input_index_n = (first_index_n + lane) % inputs_n_length;
2581             unsigned input_index_m = m;
2582 
2583             printf("%c0x%0*" PRIx64 " | 0x%0*" PRIx64
2584                    " "
2585                    "| 0x%0*" PRIx64 " | 0x%0*" PRIx64 "\n",
2586                    results[output_index] != expected[output_index] ? '*' : ' ',
2587                    lane_len_in_hex,
2588                    static_cast<uint64_t>(inputs_n[input_index_n]),
2589                    lane_len_in_hex,
2590                    static_cast<uint64_t>(inputs_m[input_index_m]),
2591                    lane_len_in_hex,
2592                    static_cast<uint64_t>(results[output_index]),
2593                    lane_len_in_hex,
2594                    static_cast<uint64_t>(expected[output_index]));
2595           }
2596         }
2597       }
2598     }
2599     VIXL_ASSERT(d == expected_length);
2600     if (error_count > kErrorReportLimit) {
2601       printf("%u other errors follow.\n", error_count - kErrorReportLimit);
2602     }
2603     VIXL_CHECK(error_count == 0);
2604   }
2605   delete[] results;
2606 }
2607 
2608 
2609 // ==== Tests for instructions of the form <INST> VReg, #Imm, VReg, #Imm. ====
2610 
2611 
TestOpImmOpImmNEON_Helper(TestOpImmOpImmVdUpdateNEONHelper_t helper,uintptr_t inputs_d,const int inputs_imm1[],unsigned inputs_imm1_length,uintptr_t inputs_n,unsigned inputs_n_length,const int inputs_imm2[],unsigned inputs_imm2_length,uintptr_t results,VectorFormat vd_form,VectorFormat vn_form,bool * skipped)2612 static void TestOpImmOpImmNEON_Helper(TestOpImmOpImmVdUpdateNEONHelper_t helper,
2613                                       uintptr_t inputs_d,
2614                                       const int inputs_imm1[],
2615                                       unsigned inputs_imm1_length,
2616                                       uintptr_t inputs_n,
2617                                       unsigned inputs_n_length,
2618                                       const int inputs_imm2[],
2619                                       unsigned inputs_imm2_length,
2620                                       uintptr_t results,
2621                                       VectorFormat vd_form,
2622                                       VectorFormat vn_form,
2623                                       bool* skipped) {
2624   VIXL_ASSERT(vd_form != kFormatUndefined);
2625   VIXL_ASSERT(vn_form != kFormatUndefined);
2626 
2627   SETUP_WITH_FEATURES(CPUFeatures::kNEON, CPUFeatures::kFP);
2628   START();
2629 
2630   // Roll up the loop to keep the code size down.
2631   Label loop_n;
2632 
2633   Register out = x0;
2634   Register inputs_d_base = x1;
2635   Register inputs_n_base = x2;
2636   Register inputs_n_last_vector = x4;
2637   Register index_n = x6;
2638 
2639   // TODO: Refactor duplicate definitions below with a VRegister::As() routine.
2640   const unsigned vd_bits = RegisterSizeInBitsFromFormat(vd_form);
2641   const unsigned vd_lane_count = LaneCountFromFormat(vd_form);
2642 
2643   const unsigned vn_bits = RegisterSizeInBitsFromFormat(vn_form);
2644   const unsigned vn_lane_count = LaneCountFromFormat(vn_form);
2645   const unsigned vn_lane_bytes = LaneSizeInBytesFromFormat(vn_form);
2646   const unsigned vn_lane_bytes_log2 = LaneSizeInBytesLog2FromFormat(vn_form);
2647   const unsigned vn_lane_bits = LaneSizeInBitsFromFormat(vn_form);
2648 
2649 
2650   // These will be either a D- or a Q-register form, with a single lane
2651   // (for use in scalar load and store operations).
2652   VRegister vd = VRegister(0, vd_bits);
2653   VRegister vn = VRegister(1, vn_bits);
2654   VRegister vntmp = VRegister(4, vn_bits);
2655   VRegister vres = VRegister(5, vn_bits);
2656 
2657   VRegister vn_helper = VRegister(1, vn_bits, vn_lane_count);
2658   VRegister vres_helper = VRegister(5, vd_bits, vd_lane_count);
2659 
2660   // 'v*tmp_single' will be either 'Vt.B', 'Vt.H', 'Vt.S' or 'Vt.D'.
2661   VRegister vntmp_single = VRegister(4, vn_lane_bits);
2662 
2663   // Same registers for use in the 'ext' instructions.
2664   VRegister vn_ext = (kDRegSize == vn_bits) ? vn.V8B() : vn.V16B();
2665   VRegister vntmp_ext = (kDRegSize == vn_bits) ? vntmp.V8B() : vntmp.V16B();
2666 
2667   __ Mov(out, results);
2668 
2669   __ Mov(inputs_d_base, inputs_d);
2670 
2671   __ Mov(inputs_n_base, inputs_n);
2672   __ Mov(inputs_n_last_vector,
2673          inputs_n + vn_lane_bytes * (inputs_n_length - vn_lane_count));
2674 
2675   __ Ldr(vd, MemOperand(inputs_d_base));
2676 
2677   __ Ldr(vn, MemOperand(inputs_n_last_vector));
2678 
2679   __ Mov(index_n, 0);
2680   __ Bind(&loop_n);
2681 
2682   __ Ldr(vntmp_single,
2683          MemOperand(inputs_n_base, index_n, LSL, vn_lane_bytes_log2));
2684   __ Ext(vn_ext, vn_ext, vntmp_ext, vn_lane_bytes);
2685 
2686   {
2687     EmissionCheckScope guard(&masm,
2688                              kInstructionSize * inputs_imm1_length *
2689                                  inputs_imm2_length * 3);
2690     for (unsigned i = 0; i < inputs_imm1_length; i++) {
2691       for (unsigned j = 0; j < inputs_imm2_length; j++) {
2692         __ Mov(vres, vd);
2693         (masm.*helper)(vres_helper, inputs_imm1[i], vn_helper, inputs_imm2[j]);
2694         __ Str(vres, MemOperand(out, vd.GetSizeInBytes(), PostIndex));
2695       }
2696     }
2697   }
2698 
2699   __ Add(index_n, index_n, 1);
2700   __ Cmp(index_n, inputs_n_length);
2701   __ B(lo, &loop_n);
2702 
2703   END();
2704   TRY_RUN(skipped);
2705 }
2706 
2707 
2708 // Test NEON instructions. The inputs_*[] and expected[] arrays should be
2709 // arrays of rawbit representation of input values. This ensures that
2710 // exact bit comparisons can be performed.
2711 template <typename Td, typename Tn>
TestOpImmOpImmNEON(const char * name,TestOpImmOpImmVdUpdateNEONHelper_t helper,const Td inputs_d[],const int inputs_imm1[],unsigned inputs_imm1_length,const Tn inputs_n[],unsigned inputs_n_length,const int inputs_imm2[],unsigned inputs_imm2_length,const Td expected[],unsigned expected_length,VectorFormat vd_form,VectorFormat vn_form)2712 static void TestOpImmOpImmNEON(const char* name,
2713                                TestOpImmOpImmVdUpdateNEONHelper_t helper,
2714                                const Td inputs_d[],
2715                                const int inputs_imm1[],
2716                                unsigned inputs_imm1_length,
2717                                const Tn inputs_n[],
2718                                unsigned inputs_n_length,
2719                                const int inputs_imm2[],
2720                                unsigned inputs_imm2_length,
2721                                const Td expected[],
2722                                unsigned expected_length,
2723                                VectorFormat vd_form,
2724                                VectorFormat vn_form) {
2725   VIXL_ASSERT(inputs_n_length > 0);
2726   VIXL_ASSERT(inputs_imm1_length > 0);
2727   VIXL_ASSERT(inputs_imm2_length > 0);
2728 
2729   const unsigned vd_lane_count = LaneCountFromFormat(vd_form);
2730 
2731   const unsigned results_length =
2732       inputs_n_length * inputs_imm1_length * inputs_imm2_length;
2733 
2734   Td* results = new Td[results_length * vd_lane_count];
2735   const unsigned lane_bit = sizeof(Td) * 8;
2736   const unsigned lane_len_in_hex = MaxHexCharCount<Td, Tn>();
2737 
2738   bool skipped;
2739 
2740   TestOpImmOpImmNEON_Helper(helper,
2741                             reinterpret_cast<uintptr_t>(inputs_d),
2742                             inputs_imm1,
2743                             inputs_imm1_length,
2744                             reinterpret_cast<uintptr_t>(inputs_n),
2745                             inputs_n_length,
2746                             inputs_imm2,
2747                             inputs_imm2_length,
2748                             reinterpret_cast<uintptr_t>(results),
2749                             vd_form,
2750                             vn_form,
2751                             &skipped);
2752 
2753   if (Test::generate_test_trace()) {
2754     // Print the results.
2755     printf("const uint%u_t kExpected_NEON_%s[] = {\n", lane_bit, name);
2756     for (unsigned iteration = 0; iteration < results_length; iteration++) {
2757       printf(" ");
2758       // Output a separate result for each element of the result vector.
2759       for (unsigned lane = 0; lane < vd_lane_count; lane++) {
2760         unsigned index = lane + (iteration * vd_lane_count);
2761         printf(" 0x%0*" PRIx64 ",",
2762                lane_len_in_hex,
2763                static_cast<uint64_t>(results[index]));
2764       }
2765       printf("\n");
2766     }
2767 
2768     printf("};\n");
2769     printf("const unsigned kExpectedCount_NEON_%s = %u;\n",
2770            name,
2771            results_length);
2772   } else if (!skipped) {
2773     // Check the results.
2774     VIXL_CHECK(expected_length == results_length);
2775     unsigned error_count = 0;
2776     unsigned counted_length = 0;
2777     const char* padding = "                    ";
2778     VIXL_ASSERT(strlen(padding) >= (lane_len_in_hex + 1));
2779     for (unsigned n = 0; n < inputs_n_length; n++) {
2780       for (unsigned imm1 = 0; imm1 < inputs_imm1_length; imm1++) {
2781         for (unsigned imm2 = 0; imm2 < inputs_imm2_length; imm2++) {
2782           bool error_in_vector = false;
2783 
2784           counted_length++;
2785 
2786           for (unsigned lane = 0; lane < vd_lane_count; lane++) {
2787             unsigned output_index =
2788                 (n * inputs_imm1_length * inputs_imm2_length * vd_lane_count) +
2789                 (imm1 * inputs_imm2_length * vd_lane_count) +
2790                 (imm2 * vd_lane_count) + lane;
2791 
2792             if (results[output_index] != expected[output_index]) {
2793               error_in_vector = true;
2794               break;
2795             }
2796           }
2797 
2798           if (error_in_vector && (++error_count <= kErrorReportLimit)) {
2799             printf("%s\n", name);
2800             printf(" Vd%.*s| Imm%.*s| Vn%.*s| Imm%.*s| Vd%.*s| Expected\n",
2801                    lane_len_in_hex + 1,
2802                    padding,
2803                    lane_len_in_hex,
2804                    padding,
2805                    lane_len_in_hex + 1,
2806                    padding,
2807                    lane_len_in_hex,
2808                    padding,
2809                    lane_len_in_hex + 1,
2810                    padding);
2811 
2812             for (unsigned lane = 0; lane < vd_lane_count; lane++) {
2813               unsigned output_index =
2814                   (n * inputs_imm1_length * inputs_imm2_length *
2815                    vd_lane_count) +
2816                   (imm1 * inputs_imm2_length * vd_lane_count) +
2817                   (imm2 * vd_lane_count) + lane;
2818               unsigned input_index_n =
2819                   (inputs_n_length - vd_lane_count + n + 1 + lane) %
2820                   inputs_n_length;
2821               unsigned input_index_imm1 = imm1;
2822               unsigned input_index_imm2 = imm2;
2823 
2824               printf("%c0x%0*" PRIx64 " | 0x%0*" PRIx64 " | 0x%0*" PRIx64
2825                      " "
2826                      "| 0x%0*" PRIx64 " | 0x%0*" PRIx64 " | 0x%0*" PRIx64 "\n",
2827                      results[output_index] != expected[output_index] ? '*'
2828                                                                      : ' ',
2829                      lane_len_in_hex,
2830                      static_cast<uint64_t>(inputs_d[lane]),
2831                      lane_len_in_hex,
2832                      static_cast<uint64_t>(inputs_imm1[input_index_imm1]),
2833                      lane_len_in_hex,
2834                      static_cast<uint64_t>(inputs_n[input_index_n]),
2835                      lane_len_in_hex,
2836                      static_cast<uint64_t>(inputs_imm2[input_index_imm2]),
2837                      lane_len_in_hex,
2838                      static_cast<uint64_t>(results[output_index]),
2839                      lane_len_in_hex,
2840                      static_cast<uint64_t>(expected[output_index]));
2841             }
2842           }
2843         }
2844       }
2845     }
2846     VIXL_ASSERT(counted_length == expected_length);
2847     if (error_count > kErrorReportLimit) {
2848       printf("%u other errors follow.\n", error_count - kErrorReportLimit);
2849     }
2850     VIXL_CHECK(error_count == 0);
2851   }
2852   delete[] results;
2853 }
2854 
2855 
2856 // ==== Floating-point tests. ====
2857 
2858 
2859 // Standard floating-point test expansion for both double- and single-precision
2860 // operations.
2861 #define STRINGIFY(s) #s
2862 
2863 #define CALL_TEST_FP_HELPER(mnemonic, variant, type, input) \
2864   Test##type(STRINGIFY(mnemonic) "_" STRINGIFY(variant),    \
2865              &MacroAssembler::mnemonic,                     \
2866              input,                                         \
2867              sizeof(input) / sizeof(input[0]),              \
2868              kExpected_##mnemonic##_##variant,              \
2869              kExpectedCount_##mnemonic##_##variant)
2870 
2871 #define DEFINE_TEST_FP(mnemonic, type, input)                    \
2872   TEST(mnemonic##_d) {                                           \
2873     CALL_TEST_FP_HELPER(mnemonic, d, type, kInputDouble##input); \
2874   }                                                              \
2875   TEST(mnemonic##_s) {                                           \
2876     CALL_TEST_FP_HELPER(mnemonic, s, type, kInputFloat##input);  \
2877   }
2878 
2879 #define DEFINE_TEST_FP_FP16(mnemonic, type, input)                \
2880   TEST(mnemonic##_d) {                                            \
2881     CALL_TEST_FP_HELPER(mnemonic, d, type, kInputDouble##input);  \
2882   }                                                               \
2883   TEST(mnemonic##_s) {                                            \
2884     CALL_TEST_FP_HELPER(mnemonic, s, type, kInputFloat##input);   \
2885   }                                                               \
2886   TEST(mnemonic##_h) {                                            \
2887     CALL_TEST_FP_HELPER(mnemonic, h, type, kInputFloat16##input); \
2888   }
2889 
2890 
2891 // TODO: Test with a newer version of valgrind.
2892 //
2893 // Note: valgrind-3.10.0 does not properly interpret libm's fma() on x86_64.
2894 // Therefore this test will be exiting though an ASSERT and thus leaking
2895 // memory.
2896 DEFINE_TEST_FP_FP16(fmadd, 3Op, Basic)
2897 DEFINE_TEST_FP_FP16(fmsub, 3Op, Basic)
2898 DEFINE_TEST_FP_FP16(fnmadd, 3Op, Basic)
2899 DEFINE_TEST_FP_FP16(fnmsub, 3Op, Basic)
2900 
2901 DEFINE_TEST_FP_FP16(fadd, 2Op, Basic)
2902 DEFINE_TEST_FP_FP16(fdiv, 2Op, Basic)
2903 DEFINE_TEST_FP_FP16(fmax, 2Op, Basic)
2904 DEFINE_TEST_FP_FP16(fmaxnm, 2Op, Basic)
2905 DEFINE_TEST_FP_FP16(fmin, 2Op, Basic)
2906 DEFINE_TEST_FP_FP16(fminnm, 2Op, Basic)
2907 DEFINE_TEST_FP_FP16(fmul, 2Op, Basic)
2908 DEFINE_TEST_FP_FP16(fsub, 2Op, Basic)
2909 DEFINE_TEST_FP_FP16(fnmul, 2Op, Basic)
2910 
2911 DEFINE_TEST_FP_FP16(fabs, 1Op, Basic)
2912 DEFINE_TEST_FP_FP16(fmov, 1Op, Basic)
2913 DEFINE_TEST_FP_FP16(fneg, 1Op, Basic)
2914 DEFINE_TEST_FP_FP16(fsqrt, 1Op, Basic)
2915 DEFINE_TEST_FP(frint32x, 1Op, Conversions)
2916 DEFINE_TEST_FP(frint64x, 1Op, Conversions)
2917 DEFINE_TEST_FP(frint32z, 1Op, Conversions)
2918 DEFINE_TEST_FP(frint64z, 1Op, Conversions)
2919 DEFINE_TEST_FP_FP16(frinta, 1Op, Conversions)
2920 DEFINE_TEST_FP_FP16(frinti, 1Op, Conversions)
2921 DEFINE_TEST_FP_FP16(frintm, 1Op, Conversions)
2922 DEFINE_TEST_FP_FP16(frintn, 1Op, Conversions)
2923 DEFINE_TEST_FP_FP16(frintp, 1Op, Conversions)
2924 DEFINE_TEST_FP_FP16(frintx, 1Op, Conversions)
2925 DEFINE_TEST_FP_FP16(frintz, 1Op, Conversions)
2926 
TEST(fcmp_d)2927 TEST(fcmp_d) { CALL_TEST_FP_HELPER(fcmp, d, Cmp, kInputDoubleBasic); }
TEST(fcmp_s)2928 TEST(fcmp_s) { CALL_TEST_FP_HELPER(fcmp, s, Cmp, kInputFloatBasic); }
TEST(fcmp_dz)2929 TEST(fcmp_dz) { CALL_TEST_FP_HELPER(fcmp, dz, CmpZero, kInputDoubleBasic); }
TEST(fcmp_sz)2930 TEST(fcmp_sz) { CALL_TEST_FP_HELPER(fcmp, sz, CmpZero, kInputFloatBasic); }
2931 
TEST(fcvt_sd)2932 TEST(fcvt_sd) { CALL_TEST_FP_HELPER(fcvt, sd, 1Op, kInputDoubleConversions); }
TEST(fcvt_ds)2933 TEST(fcvt_ds) { CALL_TEST_FP_HELPER(fcvt, ds, 1Op, kInputFloatConversions); }
2934 
2935 #define DEFINE_TEST_FP_TO_INT(mnemonic, type, input)               \
2936   TEST(mnemonic##_xd) {                                            \
2937     CALL_TEST_FP_HELPER(mnemonic, xd, type, kInputDouble##input);  \
2938   }                                                                \
2939   TEST(mnemonic##_xs) {                                            \
2940     CALL_TEST_FP_HELPER(mnemonic, xs, type, kInputFloat##input);   \
2941   }                                                                \
2942   TEST(mnemonic##_xh) {                                            \
2943     CALL_TEST_FP_HELPER(mnemonic, xh, type, kInputFloat16##input); \
2944   }                                                                \
2945   TEST(mnemonic##_wd) {                                            \
2946     CALL_TEST_FP_HELPER(mnemonic, wd, type, kInputDouble##input);  \
2947   }                                                                \
2948   TEST(mnemonic##_ws) {                                            \
2949     CALL_TEST_FP_HELPER(mnemonic, ws, type, kInputFloat##input);   \
2950   }                                                                \
2951   TEST(mnemonic##_wh) {                                            \
2952     CALL_TEST_FP_HELPER(mnemonic, wh, type, kInputFloat16##input); \
2953   }
2954 
DEFINE_TEST_FP_TO_INT(fcvtas,FPToS,Conversions)2955 DEFINE_TEST_FP_TO_INT(fcvtas, FPToS, Conversions)
2956 DEFINE_TEST_FP_TO_INT(fcvtau, FPToU, Conversions)
2957 DEFINE_TEST_FP_TO_INT(fcvtms, FPToS, Conversions)
2958 DEFINE_TEST_FP_TO_INT(fcvtmu, FPToU, Conversions)
2959 DEFINE_TEST_FP_TO_INT(fcvtns, FPToS, Conversions)
2960 DEFINE_TEST_FP_TO_INT(fcvtnu, FPToU, Conversions)
2961 DEFINE_TEST_FP_TO_INT(fcvtzs, FPToFixedS, Conversions)
2962 DEFINE_TEST_FP_TO_INT(fcvtzu, FPToFixedU, Conversions)
2963 
2964 #define DEFINE_TEST_FP_TO_JS_INT(mnemonic, type, input)           \
2965   TEST(mnemonic##_wd) {                                           \
2966     CALL_TEST_FP_HELPER(mnemonic, wd, type, kInputDouble##input); \
2967   }
2968 
2969 DEFINE_TEST_FP_TO_JS_INT(fjcvtzs, FPToS, Conversions)
2970 
2971 // TODO: Scvtf-fixed-point
2972 // TODO: Scvtf-integer
2973 // TODO: Ucvtf-fixed-point
2974 // TODO: Ucvtf-integer
2975 
2976 // TODO: Fccmp
2977 // TODO: Fcsel
2978 
2979 
2980 // ==== NEON Tests. ====
2981 
2982 #define CALL_TEST_NEON_HELPER_1Op(mnemonic, vdform, vnform, input_n) \
2983   Test1OpNEON(STRINGIFY(mnemonic) "_" STRINGIFY(vdform),             \
2984               &MacroAssembler::mnemonic,                             \
2985               input_n,                                               \
2986               (sizeof(input_n) / sizeof(input_n[0])),                \
2987               kExpected_NEON_##mnemonic##_##vdform,                  \
2988               kExpectedCount_NEON_##mnemonic##_##vdform,             \
2989               kFormat##vdform,                                       \
2990               kFormat##vnform)
2991 
2992 #define CALL_TEST_NEON_HELPER_1OpAcross(mnemonic, vdform, vnform, input_n)   \
2993   Test1OpAcrossNEON(STRINGIFY(mnemonic) "_" STRINGIFY(vdform) "_" STRINGIFY( \
2994                         vnform),                                             \
2995                     &MacroAssembler::mnemonic,                               \
2996                     input_n,                                                 \
2997                     (sizeof(input_n) / sizeof(input_n[0])),                  \
2998                     kExpected_NEON_##mnemonic##_##vdform##_##vnform,         \
2999                     kExpectedCount_NEON_##mnemonic##_##vdform##_##vnform,    \
3000                     kFormat##vdform,                                         \
3001                     kFormat##vnform)
3002 
3003 #define CALL_TEST_NEON_HELPER_2Op(                               \
3004     mnemonic, vdform, vnform, vmform, input_d, input_n, input_m) \
3005   Test2OpNEON(STRINGIFY(mnemonic) "_" STRINGIFY(vdform),         \
3006               &MacroAssembler::mnemonic,                         \
3007               input_d,                                           \
3008               input_n,                                           \
3009               (sizeof(input_n) / sizeof(input_n[0])),            \
3010               input_m,                                           \
3011               (sizeof(input_m) / sizeof(input_m[0])),            \
3012               kExpected_NEON_##mnemonic##_##vdform,              \
3013               kExpectedCount_NEON_##mnemonic##_##vdform,         \
3014               kFormat##vdform,                                   \
3015               kFormat##vnform,                                   \
3016               kFormat##vmform)
3017 
3018 #define CALL_TEST_NEON_HELPER_2OpImm(                                 \
3019     mnemonic, vdform, vnform, input_n, input_m)                       \
3020   Test2OpImmNEON(STRINGIFY(mnemonic) "_" STRINGIFY(vdform) "_2OPIMM", \
3021                  &MacroAssembler::mnemonic,                           \
3022                  input_n,                                             \
3023                  (sizeof(input_n) / sizeof(input_n[0])),              \
3024                  input_m,                                             \
3025                  (sizeof(input_m) / sizeof(input_m[0])),              \
3026                  kExpected_NEON_##mnemonic##_##vdform##_2OPIMM,       \
3027                  kExpectedCount_NEON_##mnemonic##_##vdform##_2OPIMM,  \
3028                  kFormat##vdform,                                     \
3029                  kFormat##vnform)
3030 
3031 #define CALL_TEST_NEON_HELPER_ByElement(                                  \
3032     mnemonic, vdform, vnform, vmform, input_d, input_n, input_m, indices) \
3033   TestByElementNEON(                                                      \
3034       STRINGIFY(mnemonic) "_" STRINGIFY(vdform) "_" STRINGIFY(            \
3035           vnform) "_" STRINGIFY(vmform),                                  \
3036       &MacroAssembler::mnemonic,                                          \
3037       input_d,                                                            \
3038       input_n,                                                            \
3039       (sizeof(input_n) / sizeof(input_n[0])),                             \
3040       input_m,                                                            \
3041       (sizeof(input_m) / sizeof(input_m[0])),                             \
3042       indices,                                                            \
3043       (sizeof(indices) / sizeof(indices[0])),                             \
3044       kExpected_NEON_##mnemonic##_##vdform##_##vnform##_##vmform,         \
3045       kExpectedCount_NEON_##mnemonic##_##vdform##_##vnform##_##vmform,    \
3046       kFormat##vdform,                                                    \
3047       kFormat##vnform,                                                    \
3048       kFormat##vmform)
3049 
3050 #define CALL_TEST_NEON_HELPER_ByElement_Dot_Product(mnemonic,           \
3051                                                     vdform,             \
3052                                                     vnform,             \
3053                                                     vmform,             \
3054                                                     input_d,            \
3055                                                     input_n,            \
3056                                                     input_m,            \
3057                                                     indices,            \
3058                                                     vm_subvector_count) \
3059   TestByElementNEON(                                                    \
3060       STRINGIFY(mnemonic) "_" STRINGIFY(vdform) "_" STRINGIFY(          \
3061           vnform) "_" STRINGIFY(vmform),                                \
3062       &MacroAssembler::mnemonic,                                        \
3063       input_d,                                                          \
3064       input_n,                                                          \
3065       (sizeof(input_n) / sizeof(input_n[0])),                           \
3066       input_m,                                                          \
3067       (sizeof(input_m) / sizeof(input_m[0])),                           \
3068       indices,                                                          \
3069       (sizeof(indices) / sizeof(indices[0])),                           \
3070       kExpected_NEON_##mnemonic##_##vdform##_##vnform##_##vmform,       \
3071       kExpectedCount_NEON_##mnemonic##_##vdform##_##vnform##_##vmform,  \
3072       kFormat##vdform,                                                  \
3073       kFormat##vnform,                                                  \
3074       kFormat##vmform,                                                  \
3075       vm_subvector_count)
3076 
3077 #define CALL_TEST_NEON_HELPER_OpImmOpImm(helper,                   \
3078                                          mnemonic,                 \
3079                                          vdform,                   \
3080                                          vnform,                   \
3081                                          input_d,                  \
3082                                          input_imm1,               \
3083                                          input_n,                  \
3084                                          input_imm2)               \
3085   TestOpImmOpImmNEON(STRINGIFY(mnemonic) "_" STRINGIFY(vdform),    \
3086                      helper,                                       \
3087                      input_d,                                      \
3088                      input_imm1,                                   \
3089                      (sizeof(input_imm1) / sizeof(input_imm1[0])), \
3090                      input_n,                                      \
3091                      (sizeof(input_n) / sizeof(input_n[0])),       \
3092                      input_imm2,                                   \
3093                      (sizeof(input_imm2) / sizeof(input_imm2[0])), \
3094                      kExpected_NEON_##mnemonic##_##vdform,         \
3095                      kExpectedCount_NEON_##mnemonic##_##vdform,    \
3096                      kFormat##vdform,                              \
3097                      kFormat##vnform)
3098 
3099 #define CALL_TEST_NEON_HELPER_2SAME(mnemonic, variant, input) \
3100   CALL_TEST_NEON_HELPER_1Op(mnemonic, variant, variant, input)
3101 
3102 #define DEFINE_TEST_NEON_2SAME_8B_16B(mnemonic, input)              \
3103   TEST(mnemonic##_8B) {                                             \
3104     CALL_TEST_NEON_HELPER_2SAME(mnemonic, 8B, kInput8bits##input);  \
3105   }                                                                 \
3106   TEST(mnemonic##_16B) {                                            \
3107     CALL_TEST_NEON_HELPER_2SAME(mnemonic, 16B, kInput8bits##input); \
3108   }
3109 
3110 #define DEFINE_TEST_NEON_2SAME_4H_8H(mnemonic, input)               \
3111   TEST(mnemonic##_4H) {                                             \
3112     CALL_TEST_NEON_HELPER_2SAME(mnemonic, 4H, kInput16bits##input); \
3113   }                                                                 \
3114   TEST(mnemonic##_8H) {                                             \
3115     CALL_TEST_NEON_HELPER_2SAME(mnemonic, 8H, kInput16bits##input); \
3116   }
3117 
3118 #define DEFINE_TEST_NEON_2SAME_2S_4S(mnemonic, input)               \
3119   TEST(mnemonic##_2S) {                                             \
3120     CALL_TEST_NEON_HELPER_2SAME(mnemonic, 2S, kInput32bits##input); \
3121   }                                                                 \
3122   TEST(mnemonic##_4S) {                                             \
3123     CALL_TEST_NEON_HELPER_2SAME(mnemonic, 4S, kInput32bits##input); \
3124   }
3125 
3126 #define DEFINE_TEST_NEON_2SAME_BH(mnemonic, input) \
3127   DEFINE_TEST_NEON_2SAME_8B_16B(mnemonic, input)   \
3128   DEFINE_TEST_NEON_2SAME_4H_8H(mnemonic, input)
3129 
3130 #define DEFINE_TEST_NEON_2SAME_NO2D(mnemonic, input) \
3131   DEFINE_TEST_NEON_2SAME_BH(mnemonic, input)         \
3132   DEFINE_TEST_NEON_2SAME_2S_4S(mnemonic, input)
3133 
3134 #define DEFINE_TEST_NEON_2SAME(mnemonic, input)                     \
3135   DEFINE_TEST_NEON_2SAME_NO2D(mnemonic, input)                      \
3136   TEST(mnemonic##_2D) {                                             \
3137     CALL_TEST_NEON_HELPER_2SAME(mnemonic, 2D, kInput64bits##input); \
3138   }
3139 #define DEFINE_TEST_NEON_2SAME_SD(mnemonic, input)                  \
3140   DEFINE_TEST_NEON_2SAME_2S_4S(mnemonic, input)                     \
3141   TEST(mnemonic##_2D) {                                             \
3142     CALL_TEST_NEON_HELPER_2SAME(mnemonic, 2D, kInput64bits##input); \
3143   }
3144 
3145 #define DEFINE_TEST_NEON_2SAME_FP(mnemonic, input)                  \
3146   TEST(mnemonic##_2S) {                                             \
3147     CALL_TEST_NEON_HELPER_2SAME(mnemonic, 2S, kInputFloat##input);  \
3148   }                                                                 \
3149   TEST(mnemonic##_4S) {                                             \
3150     CALL_TEST_NEON_HELPER_2SAME(mnemonic, 4S, kInputFloat##input);  \
3151   }                                                                 \
3152   TEST(mnemonic##_2D) {                                             \
3153     CALL_TEST_NEON_HELPER_2SAME(mnemonic, 2D, kInputDouble##input); \
3154   }
3155 
3156 #define DEFINE_TEST_NEON_2SAME_FP_FP16(mnemonic, input)              \
3157   DEFINE_TEST_NEON_2SAME_FP(mnemonic, input)                         \
3158   TEST(mnemonic##_4H) {                                              \
3159     CALL_TEST_NEON_HELPER_2SAME(mnemonic, 4H, kInputFloat16##input); \
3160   }                                                                  \
3161   TEST(mnemonic##_8H) {                                              \
3162     CALL_TEST_NEON_HELPER_2SAME(mnemonic, 8H, kInputFloat16##input); \
3163   }
3164 
3165 #define DEFINE_TEST_NEON_2SAME_FP_FP16_SCALAR(mnemonic, input)      \
3166   TEST(mnemonic##_H) {                                              \
3167     CALL_TEST_NEON_HELPER_2SAME(mnemonic, H, kInputFloat16##input); \
3168   }                                                                 \
3169   TEST(mnemonic##_S) {                                              \
3170     CALL_TEST_NEON_HELPER_2SAME(mnemonic, S, kInputFloat##input);   \
3171   }                                                                 \
3172   TEST(mnemonic##_D) {                                              \
3173     CALL_TEST_NEON_HELPER_2SAME(mnemonic, D, kInputDouble##input);  \
3174   }
3175 
3176 #define DEFINE_TEST_NEON_2SAME_SCALAR_B(mnemonic, input)          \
3177   TEST(mnemonic##_B) {                                            \
3178     CALL_TEST_NEON_HELPER_2SAME(mnemonic, B, kInput8bits##input); \
3179   }
3180 #define DEFINE_TEST_NEON_2SAME_SCALAR_H(mnemonic, input)           \
3181   TEST(mnemonic##_H) {                                             \
3182     CALL_TEST_NEON_HELPER_2SAME(mnemonic, H, kInput16bits##input); \
3183   }
3184 #define DEFINE_TEST_NEON_2SAME_SCALAR_S(mnemonic, input)           \
3185   TEST(mnemonic##_S) {                                             \
3186     CALL_TEST_NEON_HELPER_2SAME(mnemonic, S, kInput32bits##input); \
3187   }
3188 #define DEFINE_TEST_NEON_2SAME_SCALAR_D(mnemonic, input)           \
3189   TEST(mnemonic##_D) {                                             \
3190     CALL_TEST_NEON_HELPER_2SAME(mnemonic, D, kInput64bits##input); \
3191   }
3192 
3193 #define DEFINE_TEST_NEON_2SAME_SCALAR(mnemonic, input) \
3194   DEFINE_TEST_NEON_2SAME_SCALAR_B(mnemonic, input)     \
3195   DEFINE_TEST_NEON_2SAME_SCALAR_H(mnemonic, input)     \
3196   DEFINE_TEST_NEON_2SAME_SCALAR_S(mnemonic, input)     \
3197   DEFINE_TEST_NEON_2SAME_SCALAR_D(mnemonic, input)
3198 
3199 #define DEFINE_TEST_NEON_2SAME_SCALAR_SD(mnemonic, input) \
3200   DEFINE_TEST_NEON_2SAME_SCALAR_S(mnemonic, input)        \
3201   DEFINE_TEST_NEON_2SAME_SCALAR_D(mnemonic, input)
3202 
3203 
3204 #define CALL_TEST_NEON_HELPER_ACROSS(mnemonic, vd_form, vn_form, input_n) \
3205   CALL_TEST_NEON_HELPER_1OpAcross(mnemonic, vd_form, vn_form, input_n)
3206 
3207 #define DEFINE_TEST_NEON_ACROSS(mnemonic, input)                        \
3208   TEST(mnemonic##_B_8B) {                                               \
3209     CALL_TEST_NEON_HELPER_ACROSS(mnemonic, B, 8B, kInput8bits##input);  \
3210   }                                                                     \
3211   TEST(mnemonic##_B_16B) {                                              \
3212     CALL_TEST_NEON_HELPER_ACROSS(mnemonic, B, 16B, kInput8bits##input); \
3213   }                                                                     \
3214   TEST(mnemonic##_H_4H) {                                               \
3215     CALL_TEST_NEON_HELPER_ACROSS(mnemonic, H, 4H, kInput16bits##input); \
3216   }                                                                     \
3217   TEST(mnemonic##_H_8H) {                                               \
3218     CALL_TEST_NEON_HELPER_ACROSS(mnemonic, H, 8H, kInput16bits##input); \
3219   }                                                                     \
3220   TEST(mnemonic##_S_4S) {                                               \
3221     CALL_TEST_NEON_HELPER_ACROSS(mnemonic, S, 4S, kInput32bits##input); \
3222   }
3223 
3224 #define DEFINE_TEST_NEON_ACROSS_LONG(mnemonic, input)                   \
3225   TEST(mnemonic##_H_8B) {                                               \
3226     CALL_TEST_NEON_HELPER_ACROSS(mnemonic, H, 8B, kInput8bits##input);  \
3227   }                                                                     \
3228   TEST(mnemonic##_H_16B) {                                              \
3229     CALL_TEST_NEON_HELPER_ACROSS(mnemonic, H, 16B, kInput8bits##input); \
3230   }                                                                     \
3231   TEST(mnemonic##_S_4H) {                                               \
3232     CALL_TEST_NEON_HELPER_ACROSS(mnemonic, S, 4H, kInput16bits##input); \
3233   }                                                                     \
3234   TEST(mnemonic##_S_8H) {                                               \
3235     CALL_TEST_NEON_HELPER_ACROSS(mnemonic, S, 8H, kInput16bits##input); \
3236   }                                                                     \
3237   TEST(mnemonic##_D_4S) {                                               \
3238     CALL_TEST_NEON_HELPER_ACROSS(mnemonic, D, 4S, kInput32bits##input); \
3239   }
3240 
3241 #define DEFINE_TEST_NEON_ACROSS_FP(mnemonic, input)                      \
3242   TEST(mnemonic##_H_4H) {                                                \
3243     CALL_TEST_NEON_HELPER_ACROSS(mnemonic, H, 4H, kInputFloat16##input); \
3244   }                                                                      \
3245   TEST(mnemonic##_H_8H) {                                                \
3246     CALL_TEST_NEON_HELPER_ACROSS(mnemonic, H, 8H, kInputFloat16##input); \
3247   }                                                                      \
3248   TEST(mnemonic##_S_4S) {                                                \
3249     CALL_TEST_NEON_HELPER_ACROSS(mnemonic, S, 4S, kInputFloat##input);   \
3250   }
3251 
3252 #define CALL_TEST_NEON_HELPER_2DIFF(mnemonic, vdform, vnform, input_n) \
3253   CALL_TEST_NEON_HELPER_1Op(mnemonic, vdform, vnform, input_n)
3254 
3255 #define DEFINE_TEST_NEON_2DIFF_LONG(mnemonic, input)                    \
3256   TEST(mnemonic##_4H) {                                                 \
3257     CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 4H, 8B, kInput8bits##input);  \
3258   }                                                                     \
3259   TEST(mnemonic##_8H) {                                                 \
3260     CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 8H, 16B, kInput8bits##input); \
3261   }                                                                     \
3262   TEST(mnemonic##_2S) {                                                 \
3263     CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 2S, 4H, kInput16bits##input); \
3264   }                                                                     \
3265   TEST(mnemonic##_4S) {                                                 \
3266     CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 4S, 8H, kInput16bits##input); \
3267   }                                                                     \
3268   TEST(mnemonic##_1D) {                                                 \
3269     CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 1D, 2S, kInput32bits##input); \
3270   }                                                                     \
3271   TEST(mnemonic##_2D) {                                                 \
3272     CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 2D, 4S, kInput32bits##input); \
3273   }
3274 
3275 #define DEFINE_TEST_NEON_2DIFF_NARROW(mnemonic, input)                      \
3276   TEST(mnemonic##_8B) {                                                     \
3277     CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 8B, 8H, kInput16bits##input);     \
3278   }                                                                         \
3279   TEST(mnemonic##_4H) {                                                     \
3280     CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 4H, 4S, kInput32bits##input);     \
3281   }                                                                         \
3282   TEST(mnemonic##_2S) {                                                     \
3283     CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 2S, 2D, kInput64bits##input);     \
3284   }                                                                         \
3285   TEST(mnemonic##2_16B) {                                                   \
3286     CALL_TEST_NEON_HELPER_2DIFF(mnemonic##2, 16B, 8H, kInput16bits##input); \
3287   }                                                                         \
3288   TEST(mnemonic##2_8H) {                                                    \
3289     CALL_TEST_NEON_HELPER_2DIFF(mnemonic##2, 8H, 4S, kInput32bits##input);  \
3290   }                                                                         \
3291   TEST(mnemonic##2_4S) {                                                    \
3292     CALL_TEST_NEON_HELPER_2DIFF(mnemonic##2, 4S, 2D, kInput64bits##input);  \
3293   }
3294 
3295 #define DEFINE_TEST_NEON_2DIFF_FP_LONG(mnemonic, input)                     \
3296   TEST(mnemonic##_4S) {                                                     \
3297     CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 4S, 4H, kInputFloat16##input);    \
3298   }                                                                         \
3299   TEST(mnemonic##_2D) {                                                     \
3300     CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 2D, 2S, kInputFloat##input);      \
3301   }                                                                         \
3302   TEST(mnemonic##2_4S) {                                                    \
3303     CALL_TEST_NEON_HELPER_2DIFF(mnemonic##2, 4S, 8H, kInputFloat16##input); \
3304   }                                                                         \
3305   TEST(mnemonic##2_2D) {                                                    \
3306     CALL_TEST_NEON_HELPER_2DIFF(mnemonic##2, 2D, 4S, kInputFloat##input);   \
3307   }
3308 
3309 #define DEFINE_TEST_NEON_2DIFF_FP_NARROW(mnemonic, input)                  \
3310   TEST(mnemonic##_4H) {                                                    \
3311     CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 4H, 4S, kInputFloat##input);     \
3312   }                                                                        \
3313   TEST(mnemonic##_2S) {                                                    \
3314     CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 2S, 2D, kInputDouble##input);    \
3315   }                                                                        \
3316   TEST(mnemonic##2_8H) {                                                   \
3317     CALL_TEST_NEON_HELPER_2DIFF(mnemonic##2, 8H, 4S, kInputFloat##input);  \
3318   }                                                                        \
3319   TEST(mnemonic##2_4S) {                                                   \
3320     CALL_TEST_NEON_HELPER_2DIFF(mnemonic##2, 4S, 2D, kInputDouble##input); \
3321   }
3322 
3323 #define DEFINE_TEST_NEON_2DIFF_FP_NARROW_2S(mnemonic, input)               \
3324   TEST(mnemonic##_2S) {                                                    \
3325     CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 2S, 2D, kInputDouble##input);    \
3326   }                                                                        \
3327   TEST(mnemonic##2_4S) {                                                   \
3328     CALL_TEST_NEON_HELPER_2DIFF(mnemonic##2, 4S, 2D, kInputDouble##input); \
3329   }
3330 
3331 #define DEFINE_TEST_NEON_2DIFF_SCALAR_NARROW(mnemonic, input)         \
3332   TEST(mnemonic##_B) {                                                \
3333     CALL_TEST_NEON_HELPER_2DIFF(mnemonic, B, H, kInput16bits##input); \
3334   }                                                                   \
3335   TEST(mnemonic##_H) {                                                \
3336     CALL_TEST_NEON_HELPER_2DIFF(mnemonic, H, S, kInput32bits##input); \
3337   }                                                                   \
3338   TEST(mnemonic##_S) {                                                \
3339     CALL_TEST_NEON_HELPER_2DIFF(mnemonic, S, D, kInput64bits##input); \
3340   }
3341 
3342 #define DEFINE_TEST_NEON_2DIFF_FP_SCALAR_SD(mnemonic, input)            \
3343   TEST(mnemonic##_S) {                                                  \
3344     CALL_TEST_NEON_HELPER_2DIFF(mnemonic, S, 2S, kInputFloat##input);   \
3345   }                                                                     \
3346   TEST(mnemonic##_D) {                                                  \
3347     CALL_TEST_NEON_HELPER_2DIFF(mnemonic, D, 2D, kInputDouble##input);  \
3348   }                                                                     \
3349   TEST(mnemonic##_H) {                                                  \
3350     CALL_TEST_NEON_HELPER_2DIFF(mnemonic, H, 2H, kInputFloat16##input); \
3351   }
3352 
3353 #define CALL_TEST_NEON_HELPER_3SAME(mnemonic, variant, input_d, input_nm) \
3354   {                                                                       \
3355     CALL_TEST_NEON_HELPER_2Op(mnemonic,                                   \
3356                               variant,                                    \
3357                               variant,                                    \
3358                               variant,                                    \
3359                               input_d,                                    \
3360                               input_nm,                                   \
3361                               input_nm);                                  \
3362   }
3363 
3364 #define DEFINE_TEST_NEON_3SAME_8B_16B(mnemonic, input)     \
3365   TEST(mnemonic##_8B) {                                    \
3366     CALL_TEST_NEON_HELPER_3SAME(mnemonic,                  \
3367                                 8B,                        \
3368                                 kInput8bitsAccDestination, \
3369                                 kInput8bits##input);       \
3370   }                                                        \
3371   TEST(mnemonic##_16B) {                                   \
3372     CALL_TEST_NEON_HELPER_3SAME(mnemonic,                  \
3373                                 16B,                       \
3374                                 kInput8bitsAccDestination, \
3375                                 kInput8bits##input);       \
3376   }
3377 
3378 #define DEFINE_TEST_NEON_3SAME_HS(mnemonic, input)          \
3379   TEST(mnemonic##_4H) {                                     \
3380     CALL_TEST_NEON_HELPER_3SAME(mnemonic,                   \
3381                                 4H,                         \
3382                                 kInput16bitsAccDestination, \
3383                                 kInput16bits##input);       \
3384   }                                                         \
3385   TEST(mnemonic##_8H) {                                     \
3386     CALL_TEST_NEON_HELPER_3SAME(mnemonic,                   \
3387                                 8H,                         \
3388                                 kInput16bitsAccDestination, \
3389                                 kInput16bits##input);       \
3390   }                                                         \
3391   TEST(mnemonic##_2S) {                                     \
3392     CALL_TEST_NEON_HELPER_3SAME(mnemonic,                   \
3393                                 2S,                         \
3394                                 kInput32bitsAccDestination, \
3395                                 kInput32bits##input);       \
3396   }                                                         \
3397   TEST(mnemonic##_4S) {                                     \
3398     CALL_TEST_NEON_HELPER_3SAME(mnemonic,                   \
3399                                 4S,                         \
3400                                 kInput32bitsAccDestination, \
3401                                 kInput32bits##input);       \
3402   }
3403 
3404 #define DEFINE_TEST_NEON_3SAME_NO2D(mnemonic, input) \
3405   DEFINE_TEST_NEON_3SAME_8B_16B(mnemonic, input)     \
3406   DEFINE_TEST_NEON_3SAME_HS(mnemonic, input)
3407 
3408 #define DEFINE_TEST_NEON_3SAME(mnemonic, input)             \
3409   DEFINE_TEST_NEON_3SAME_NO2D(mnemonic, input)              \
3410   TEST(mnemonic##_2D) {                                     \
3411     CALL_TEST_NEON_HELPER_3SAME(mnemonic,                   \
3412                                 2D,                         \
3413                                 kInput64bitsAccDestination, \
3414                                 kInput64bits##input);       \
3415   }
3416 
3417 #define DEFINE_TEST_NEON_3SAME_FP(mnemonic, input)           \
3418   TEST(mnemonic##_4H) {                                      \
3419     CALL_TEST_NEON_HELPER_3SAME(mnemonic,                    \
3420                                 4H,                          \
3421                                 kInputFloat16AccDestination, \
3422                                 kInputFloat16##input);       \
3423   }                                                          \
3424   TEST(mnemonic##_8H) {                                      \
3425     CALL_TEST_NEON_HELPER_3SAME(mnemonic,                    \
3426                                 8H,                          \
3427                                 kInputFloat16AccDestination, \
3428                                 kInputFloat16##input);       \
3429   }                                                          \
3430   TEST(mnemonic##_2S) {                                      \
3431     CALL_TEST_NEON_HELPER_3SAME(mnemonic,                    \
3432                                 2S,                          \
3433                                 kInputFloatAccDestination,   \
3434                                 kInputFloat##input);         \
3435   }                                                          \
3436   TEST(mnemonic##_4S) {                                      \
3437     CALL_TEST_NEON_HELPER_3SAME(mnemonic,                    \
3438                                 4S,                          \
3439                                 kInputFloatAccDestination,   \
3440                                 kInputFloat##input);         \
3441   }                                                          \
3442   TEST(mnemonic##_2D) {                                      \
3443     CALL_TEST_NEON_HELPER_3SAME(mnemonic,                    \
3444                                 2D,                          \
3445                                 kInputDoubleAccDestination,  \
3446                                 kInputDouble##input);        \
3447   }
3448 
3449 #define DEFINE_TEST_NEON_3SAME_SCALAR_D(mnemonic, input)    \
3450   TEST(mnemonic##_D) {                                      \
3451     CALL_TEST_NEON_HELPER_3SAME(mnemonic,                   \
3452                                 D,                          \
3453                                 kInput64bitsAccDestination, \
3454                                 kInput64bits##input);       \
3455   }
3456 
3457 #define DEFINE_TEST_NEON_3SAME_SCALAR_HS(mnemonic, input)   \
3458   TEST(mnemonic##_H) {                                      \
3459     CALL_TEST_NEON_HELPER_3SAME(mnemonic,                   \
3460                                 H,                          \
3461                                 kInput16bitsAccDestination, \
3462                                 kInput16bits##input);       \
3463   }                                                         \
3464   TEST(mnemonic##_S) {                                      \
3465     CALL_TEST_NEON_HELPER_3SAME(mnemonic,                   \
3466                                 S,                          \
3467                                 kInput32bitsAccDestination, \
3468                                 kInput32bits##input);       \
3469   }
3470 
3471 #define DEFINE_TEST_NEON_3SAME_SCALAR(mnemonic, input)      \
3472   TEST(mnemonic##_B) {                                      \
3473     CALL_TEST_NEON_HELPER_3SAME(mnemonic,                   \
3474                                 B,                          \
3475                                 kInput8bitsAccDestination,  \
3476                                 kInput8bits##input);        \
3477   }                                                         \
3478   TEST(mnemonic##_H) {                                      \
3479     CALL_TEST_NEON_HELPER_3SAME(mnemonic,                   \
3480                                 H,                          \
3481                                 kInput16bitsAccDestination, \
3482                                 kInput16bits##input);       \
3483   }                                                         \
3484   TEST(mnemonic##_S) {                                      \
3485     CALL_TEST_NEON_HELPER_3SAME(mnemonic,                   \
3486                                 S,                          \
3487                                 kInput32bitsAccDestination, \
3488                                 kInput32bits##input);       \
3489   }                                                         \
3490   TEST(mnemonic##_D) {                                      \
3491     CALL_TEST_NEON_HELPER_3SAME(mnemonic,                   \
3492                                 D,                          \
3493                                 kInput64bitsAccDestination, \
3494                                 kInput64bits##input);       \
3495   }
3496 
3497 #define DEFINE_TEST_NEON_3SAME_FP_SCALAR(mnemonic, input)    \
3498   TEST(mnemonic##_H) {                                       \
3499     CALL_TEST_NEON_HELPER_3SAME(mnemonic,                    \
3500                                 H,                           \
3501                                 kInputFloat16AccDestination, \
3502                                 kInputFloat16##input);       \
3503   }                                                          \
3504   TEST(mnemonic##_S) {                                       \
3505     CALL_TEST_NEON_HELPER_3SAME(mnemonic,                    \
3506                                 S,                           \
3507                                 kInputFloatAccDestination,   \
3508                                 kInputFloat##input);         \
3509   }                                                          \
3510   TEST(mnemonic##_D) {                                       \
3511     CALL_TEST_NEON_HELPER_3SAME(mnemonic,                    \
3512                                 D,                           \
3513                                 kInputDoubleAccDestination,  \
3514                                 kInputDouble##input);        \
3515   }
3516 
3517 #define DEFINE_TEST_NEON_FHM(mnemonic, input_d, input_n, input_m) \
3518   TEST(mnemonic##_2S) {                                           \
3519     CALL_TEST_NEON_HELPER_3DIFF(mnemonic,                         \
3520                                 2S,                               \
3521                                 2H,                               \
3522                                 2H,                               \
3523                                 kInputFloatAccDestination,        \
3524                                 kInputFloat16##input_n,           \
3525                                 kInputFloat16##input_m);          \
3526   }                                                               \
3527   TEST(mnemonic##_4S) {                                           \
3528     CALL_TEST_NEON_HELPER_3DIFF(mnemonic,                         \
3529                                 4S,                               \
3530                                 4H,                               \
3531                                 4H,                               \
3532                                 kInputFloatAccDestination,        \
3533                                 kInputFloat16##input_n,           \
3534                                 kInputFloat16##input_m);          \
3535   }
3536 
3537 #define CALL_TEST_NEON_HELPER_3DIFF(                             \
3538     mnemonic, vdform, vnform, vmform, input_d, input_n, input_m) \
3539   {                                                              \
3540     CALL_TEST_NEON_HELPER_2Op(mnemonic,                          \
3541                               vdform,                            \
3542                               vnform,                            \
3543                               vmform,                            \
3544                               input_d,                           \
3545                               input_n,                           \
3546                               input_m);                          \
3547   }
3548 
3549 #define DEFINE_TEST_NEON_3DIFF_LONG_8H(mnemonic, input)     \
3550   TEST(mnemonic##_8H) {                                     \
3551     CALL_TEST_NEON_HELPER_3DIFF(mnemonic,                   \
3552                                 8H,                         \
3553                                 8B,                         \
3554                                 8B,                         \
3555                                 kInput16bitsAccDestination, \
3556                                 kInput8bits##input,         \
3557                                 kInput8bits##input);        \
3558   }                                                         \
3559   TEST(mnemonic##2_8H) {                                    \
3560     CALL_TEST_NEON_HELPER_3DIFF(mnemonic##2,                \
3561                                 8H,                         \
3562                                 16B,                        \
3563                                 16B,                        \
3564                                 kInput16bitsAccDestination, \
3565                                 kInput8bits##input,         \
3566                                 kInput8bits##input);        \
3567   }
3568 
3569 #define DEFINE_TEST_NEON_3DIFF_LONG_4S(mnemonic, input)     \
3570   TEST(mnemonic##_4S) {                                     \
3571     CALL_TEST_NEON_HELPER_3DIFF(mnemonic,                   \
3572                                 4S,                         \
3573                                 4H,                         \
3574                                 4H,                         \
3575                                 kInput32bitsAccDestination, \
3576                                 kInput16bits##input,        \
3577                                 kInput16bits##input);       \
3578   }                                                         \
3579   TEST(mnemonic##2_4S) {                                    \
3580     CALL_TEST_NEON_HELPER_3DIFF(mnemonic##2,                \
3581                                 4S,                         \
3582                                 8H,                         \
3583                                 8H,                         \
3584                                 kInput32bitsAccDestination, \
3585                                 kInput16bits##input,        \
3586                                 kInput16bits##input);       \
3587   }
3588 
3589 #define DEFINE_TEST_NEON_3DIFF_LONG_2D(mnemonic, input)     \
3590   TEST(mnemonic##_2D) {                                     \
3591     CALL_TEST_NEON_HELPER_3DIFF(mnemonic,                   \
3592                                 2D,                         \
3593                                 2S,                         \
3594                                 2S,                         \
3595                                 kInput64bitsAccDestination, \
3596                                 kInput32bits##input,        \
3597                                 kInput32bits##input);       \
3598   }                                                         \
3599   TEST(mnemonic##2_2D) {                                    \
3600     CALL_TEST_NEON_HELPER_3DIFF(mnemonic##2,                \
3601                                 2D,                         \
3602                                 4S,                         \
3603                                 4S,                         \
3604                                 kInput64bitsAccDestination, \
3605                                 kInput32bits##input,        \
3606                                 kInput32bits##input);       \
3607   }
3608 
3609 #define DEFINE_TEST_NEON_3DIFF_LONG_SD(mnemonic, input) \
3610   DEFINE_TEST_NEON_3DIFF_LONG_4S(mnemonic, input)       \
3611   DEFINE_TEST_NEON_3DIFF_LONG_2D(mnemonic, input)
3612 
3613 #define DEFINE_TEST_NEON_3DIFF_LONG(mnemonic, input) \
3614   DEFINE_TEST_NEON_3DIFF_LONG_8H(mnemonic, input)    \
3615   DEFINE_TEST_NEON_3DIFF_LONG_4S(mnemonic, input)    \
3616   DEFINE_TEST_NEON_3DIFF_LONG_2D(mnemonic, input)
3617 
3618 #define DEFINE_TEST_NEON_3DIFF_SCALAR_LONG_S(mnemonic, input) \
3619   TEST(mnemonic##_S) {                                        \
3620     CALL_TEST_NEON_HELPER_3DIFF(mnemonic,                     \
3621                                 S,                            \
3622                                 H,                            \
3623                                 H,                            \
3624                                 kInput32bitsAccDestination,   \
3625                                 kInput16bits##input,          \
3626                                 kInput16bits##input);         \
3627   }
3628 
3629 #define DEFINE_TEST_NEON_3DIFF_SCALAR_LONG_D(mnemonic, input) \
3630   TEST(mnemonic##_D) {                                        \
3631     CALL_TEST_NEON_HELPER_3DIFF(mnemonic,                     \
3632                                 D,                            \
3633                                 S,                            \
3634                                 S,                            \
3635                                 kInput64bitsAccDestination,   \
3636                                 kInput32bits##input,          \
3637                                 kInput32bits##input);         \
3638   }
3639 
3640 #define DEFINE_TEST_NEON_3DIFF_SCALAR_LONG_SD(mnemonic, input) \
3641   DEFINE_TEST_NEON_3DIFF_SCALAR_LONG_S(mnemonic, input)        \
3642   DEFINE_TEST_NEON_3DIFF_SCALAR_LONG_D(mnemonic, input)
3643 
3644 #define DEFINE_TEST_NEON_3DIFF_WIDE(mnemonic, input)        \
3645   TEST(mnemonic##_8H) {                                     \
3646     CALL_TEST_NEON_HELPER_3DIFF(mnemonic,                   \
3647                                 8H,                         \
3648                                 8H,                         \
3649                                 8B,                         \
3650                                 kInput16bitsAccDestination, \
3651                                 kInput16bits##input,        \
3652                                 kInput8bits##input);        \
3653   }                                                         \
3654   TEST(mnemonic##_4S) {                                     \
3655     CALL_TEST_NEON_HELPER_3DIFF(mnemonic,                   \
3656                                 4S,                         \
3657                                 4S,                         \
3658                                 4H,                         \
3659                                 kInput32bitsAccDestination, \
3660                                 kInput32bits##input,        \
3661                                 kInput16bits##input);       \
3662   }                                                         \
3663   TEST(mnemonic##_2D) {                                     \
3664     CALL_TEST_NEON_HELPER_3DIFF(mnemonic,                   \
3665                                 2D,                         \
3666                                 2D,                         \
3667                                 2S,                         \
3668                                 kInput64bitsAccDestination, \
3669                                 kInput64bits##input,        \
3670                                 kInput32bits##input);       \
3671   }                                                         \
3672   TEST(mnemonic##2_8H) {                                    \
3673     CALL_TEST_NEON_HELPER_3DIFF(mnemonic##2,                \
3674                                 8H,                         \
3675                                 8H,                         \
3676                                 16B,                        \
3677                                 kInput16bitsAccDestination, \
3678                                 kInput16bits##input,        \
3679                                 kInput8bits##input);        \
3680   }                                                         \
3681   TEST(mnemonic##2_4S) {                                    \
3682     CALL_TEST_NEON_HELPER_3DIFF(mnemonic##2,                \
3683                                 4S,                         \
3684                                 4S,                         \
3685                                 8H,                         \
3686                                 kInput32bitsAccDestination, \
3687                                 kInput32bits##input,        \
3688                                 kInput16bits##input);       \
3689   }                                                         \
3690   TEST(mnemonic##2_2D) {                                    \
3691     CALL_TEST_NEON_HELPER_3DIFF(mnemonic##2,                \
3692                                 2D,                         \
3693                                 2D,                         \
3694                                 4S,                         \
3695                                 kInput64bitsAccDestination, \
3696                                 kInput64bits##input,        \
3697                                 kInput32bits##input);       \
3698   }
3699 
3700 #define DEFINE_TEST_NEON_3DIFF_NARROW(mnemonic, input)      \
3701   TEST(mnemonic##_8B) {                                     \
3702     CALL_TEST_NEON_HELPER_3DIFF(mnemonic,                   \
3703                                 8B,                         \
3704                                 8H,                         \
3705                                 8H,                         \
3706                                 kInput8bitsAccDestination,  \
3707                                 kInput16bits##input,        \
3708                                 kInput16bits##input);       \
3709   }                                                         \
3710   TEST(mnemonic##_4H) {                                     \
3711     CALL_TEST_NEON_HELPER_3DIFF(mnemonic,                   \
3712                                 4H,                         \
3713                                 4S,                         \
3714                                 4S,                         \
3715                                 kInput16bitsAccDestination, \
3716                                 kInput32bits##input,        \
3717                                 kInput32bits##input);       \
3718   }                                                         \
3719   TEST(mnemonic##_2S) {                                     \
3720     CALL_TEST_NEON_HELPER_3DIFF(mnemonic,                   \
3721                                 2S,                         \
3722                                 2D,                         \
3723                                 2D,                         \
3724                                 kInput32bitsAccDestination, \
3725                                 kInput64bits##input,        \
3726                                 kInput64bits##input);       \
3727   }                                                         \
3728   TEST(mnemonic##2_16B) {                                   \
3729     CALL_TEST_NEON_HELPER_3DIFF(mnemonic##2,                \
3730                                 16B,                        \
3731                                 8H,                         \
3732                                 8H,                         \
3733                                 kInput8bitsAccDestination,  \
3734                                 kInput16bits##input,        \
3735                                 kInput16bits##input);       \
3736   }                                                         \
3737   TEST(mnemonic##2_8H) {                                    \
3738     CALL_TEST_NEON_HELPER_3DIFF(mnemonic##2,                \
3739                                 8H,                         \
3740                                 4S,                         \
3741                                 4S,                         \
3742                                 kInput16bitsAccDestination, \
3743                                 kInput32bits##input,        \
3744                                 kInput32bits##input);       \
3745   }                                                         \
3746   TEST(mnemonic##2_4S) {                                    \
3747     CALL_TEST_NEON_HELPER_3DIFF(mnemonic##2,                \
3748                                 4S,                         \
3749                                 2D,                         \
3750                                 2D,                         \
3751                                 kInput32bitsAccDestination, \
3752                                 kInput64bits##input,        \
3753                                 kInput64bits##input);       \
3754   }
3755 
3756 #define DEFINE_TEST_NEON_3DIFF_DOUBLE_WIDE(mnemonic, input) \
3757   TEST(mnemonic##_2S) {                                     \
3758     CALL_TEST_NEON_HELPER_3DIFF(mnemonic,                   \
3759                                 2S,                         \
3760                                 8B,                         \
3761                                 8B,                         \
3762                                 kInput32bitsAccDestination, \
3763                                 kInput8bits##input,         \
3764                                 kInput8bits##input);        \
3765   }                                                         \
3766   TEST(mnemonic##_4S) {                                     \
3767     CALL_TEST_NEON_HELPER_3DIFF(mnemonic,                   \
3768                                 4S,                         \
3769                                 16B,                        \
3770                                 16B,                        \
3771                                 kInput32bitsAccDestination, \
3772                                 kInput8bits##input,         \
3773                                 kInput8bits##input);        \
3774   }
3775 
3776 
3777 #define CALL_TEST_NEON_HELPER_2OPIMM(             \
3778     mnemonic, vdform, vnform, input_n, input_imm) \
3779   {                                               \
3780     CALL_TEST_NEON_HELPER_2OpImm(mnemonic,        \
3781                                  vdform,          \
3782                                  vnform,          \
3783                                  input_n,         \
3784                                  input_imm);      \
3785   }
3786 
3787 #define DEFINE_TEST_NEON_2OPIMM(mnemonic, input, input_imm)   \
3788   TEST(mnemonic##_8B_2OPIMM) {                                \
3789     CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                    \
3790                                  8B,                          \
3791                                  8B,                          \
3792                                  kInput8bits##input,          \
3793                                  kInput8bitsImm##input_imm);  \
3794   }                                                           \
3795   TEST(mnemonic##_16B_2OPIMM) {                               \
3796     CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                    \
3797                                  16B,                         \
3798                                  16B,                         \
3799                                  kInput8bits##input,          \
3800                                  kInput8bitsImm##input_imm);  \
3801   }                                                           \
3802   TEST(mnemonic##_4H_2OPIMM) {                                \
3803     CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                    \
3804                                  4H,                          \
3805                                  4H,                          \
3806                                  kInput16bits##input,         \
3807                                  kInput16bitsImm##input_imm); \
3808   }                                                           \
3809   TEST(mnemonic##_8H_2OPIMM) {                                \
3810     CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                    \
3811                                  8H,                          \
3812                                  8H,                          \
3813                                  kInput16bits##input,         \
3814                                  kInput16bitsImm##input_imm); \
3815   }                                                           \
3816   TEST(mnemonic##_2S_2OPIMM) {                                \
3817     CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                    \
3818                                  2S,                          \
3819                                  2S,                          \
3820                                  kInput32bits##input,         \
3821                                  kInput32bitsImm##input_imm); \
3822   }                                                           \
3823   TEST(mnemonic##_4S_2OPIMM) {                                \
3824     CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                    \
3825                                  4S,                          \
3826                                  4S,                          \
3827                                  kInput32bits##input,         \
3828                                  kInput32bitsImm##input_imm); \
3829   }                                                           \
3830   TEST(mnemonic##_2D_2OPIMM) {                                \
3831     CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                    \
3832                                  2D,                          \
3833                                  2D,                          \
3834                                  kInput64bits##input,         \
3835                                  kInput64bitsImm##input_imm); \
3836   }
3837 
3838 #define DEFINE_TEST_NEON_2OPIMM_COPY(mnemonic, input, input_imm) \
3839   TEST(mnemonic##_8B_2OPIMM) {                                   \
3840     CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                       \
3841                                  8B,                             \
3842                                  B,                              \
3843                                  kInput8bits##input,             \
3844                                  kInput8bitsImm##input_imm);     \
3845   }                                                              \
3846   TEST(mnemonic##_16B_2OPIMM) {                                  \
3847     CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                       \
3848                                  16B,                            \
3849                                  B,                              \
3850                                  kInput8bits##input,             \
3851                                  kInput8bitsImm##input_imm);     \
3852   }                                                              \
3853   TEST(mnemonic##_4H_2OPIMM) {                                   \
3854     CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                       \
3855                                  4H,                             \
3856                                  H,                              \
3857                                  kInput16bits##input,            \
3858                                  kInput16bitsImm##input_imm);    \
3859   }                                                              \
3860   TEST(mnemonic##_8H_2OPIMM) {                                   \
3861     CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                       \
3862                                  8H,                             \
3863                                  H,                              \
3864                                  kInput16bits##input,            \
3865                                  kInput16bitsImm##input_imm);    \
3866   }                                                              \
3867   TEST(mnemonic##_2S_2OPIMM) {                                   \
3868     CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                       \
3869                                  2S,                             \
3870                                  S,                              \
3871                                  kInput32bits##input,            \
3872                                  kInput32bitsImm##input_imm);    \
3873   }                                                              \
3874   TEST(mnemonic##_4S_2OPIMM) {                                   \
3875     CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                       \
3876                                  4S,                             \
3877                                  S,                              \
3878                                  kInput32bits##input,            \
3879                                  kInput32bitsImm##input_imm);    \
3880   }                                                              \
3881   TEST(mnemonic##_2D_2OPIMM) {                                   \
3882     CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                       \
3883                                  2D,                             \
3884                                  D,                              \
3885                                  kInput64bits##input,            \
3886                                  kInput64bitsImm##input_imm);    \
3887   }
3888 
3889 #define DEFINE_TEST_NEON_2OPIMM_NARROW(mnemonic, input, input_imm) \
3890   TEST(mnemonic##_8B_2OPIMM) {                                     \
3891     CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                         \
3892                                  8B,                               \
3893                                  8H,                               \
3894                                  kInput16bits##input,              \
3895                                  kInput8bitsImm##input_imm);       \
3896   }                                                                \
3897   TEST(mnemonic##_4H_2OPIMM) {                                     \
3898     CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                         \
3899                                  4H,                               \
3900                                  4S,                               \
3901                                  kInput32bits##input,              \
3902                                  kInput16bitsImm##input_imm);      \
3903   }                                                                \
3904   TEST(mnemonic##_2S_2OPIMM) {                                     \
3905     CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                         \
3906                                  2S,                               \
3907                                  2D,                               \
3908                                  kInput64bits##input,              \
3909                                  kInput32bitsImm##input_imm);      \
3910   }                                                                \
3911   TEST(mnemonic##2_16B_2OPIMM) {                                   \
3912     CALL_TEST_NEON_HELPER_2OPIMM(mnemonic##2,                      \
3913                                  16B,                              \
3914                                  8H,                               \
3915                                  kInput16bits##input,              \
3916                                  kInput8bitsImm##input_imm);       \
3917   }                                                                \
3918   TEST(mnemonic##2_8H_2OPIMM) {                                    \
3919     CALL_TEST_NEON_HELPER_2OPIMM(mnemonic##2,                      \
3920                                  8H,                               \
3921                                  4S,                               \
3922                                  kInput32bits##input,              \
3923                                  kInput16bitsImm##input_imm);      \
3924   }                                                                \
3925   TEST(mnemonic##2_4S_2OPIMM) {                                    \
3926     CALL_TEST_NEON_HELPER_2OPIMM(mnemonic##2,                      \
3927                                  4S,                               \
3928                                  2D,                               \
3929                                  kInput64bits##input,              \
3930                                  kInput32bitsImm##input_imm);      \
3931   }
3932 
3933 #define DEFINE_TEST_NEON_2OPIMM_SCALAR_NARROW(mnemonic, input, input_imm) \
3934   TEST(mnemonic##_B_2OPIMM) {                                             \
3935     CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                                \
3936                                  B,                                       \
3937                                  H,                                       \
3938                                  kInput16bits##input,                     \
3939                                  kInput8bitsImm##input_imm);              \
3940   }                                                                       \
3941   TEST(mnemonic##_H_2OPIMM) {                                             \
3942     CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                                \
3943                                  H,                                       \
3944                                  S,                                       \
3945                                  kInput32bits##input,                     \
3946                                  kInput16bitsImm##input_imm);             \
3947   }                                                                       \
3948   TEST(mnemonic##_S_2OPIMM) {                                             \
3949     CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                                \
3950                                  S,                                       \
3951                                  D,                                       \
3952                                  kInput64bits##input,                     \
3953                                  kInput32bitsImm##input_imm);             \
3954   }
3955 
3956 #define DEFINE_TEST_NEON_2OPIMM_FCMP_ZERO(mnemonic, input, input_imm) \
3957   TEST(mnemonic##_4H_2OPIMM) {                                        \
3958     CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                            \
3959                                  4H,                                  \
3960                                  4H,                                  \
3961                                  kInputFloat16##input,                \
3962                                  kInputDoubleImm##input_imm);         \
3963   }                                                                   \
3964   TEST(mnemonic##_8H_2OPIMM) {                                        \
3965     CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                            \
3966                                  8H,                                  \
3967                                  8H,                                  \
3968                                  kInputFloat16##input,                \
3969                                  kInputDoubleImm##input_imm);         \
3970   }                                                                   \
3971   TEST(mnemonic##_2S_2OPIMM) {                                        \
3972     CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                            \
3973                                  2S,                                  \
3974                                  2S,                                  \
3975                                  kInputFloat##Basic,                  \
3976                                  kInputDoubleImm##input_imm);         \
3977   }                                                                   \
3978   TEST(mnemonic##_4S_2OPIMM) {                                        \
3979     CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                            \
3980                                  4S,                                  \
3981                                  4S,                                  \
3982                                  kInputFloat##input,                  \
3983                                  kInputDoubleImm##input_imm);         \
3984   }                                                                   \
3985   TEST(mnemonic##_2D_2OPIMM) {                                        \
3986     CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                            \
3987                                  2D,                                  \
3988                                  2D,                                  \
3989                                  kInputDouble##input,                 \
3990                                  kInputDoubleImm##input_imm);         \
3991   }
3992 
3993 #define DEFINE_TEST_NEON_2OPIMM_FP(mnemonic, input, input_imm) \
3994   TEST(mnemonic##_4H_2OPIMM) {                                 \
3995     CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                     \
3996                                  4H,                           \
3997                                  4H,                           \
3998                                  kInputFloat16##input,         \
3999                                  kInput16bitsImm##input_imm);  \
4000   }                                                            \
4001   TEST(mnemonic##_8H_2OPIMM) {                                 \
4002     CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                     \
4003                                  8H,                           \
4004                                  8H,                           \
4005                                  kInputFloat16##input,         \
4006                                  kInput16bitsImm##input_imm);  \
4007   }                                                            \
4008   TEST(mnemonic##_2S_2OPIMM) {                                 \
4009     CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                     \
4010                                  2S,                           \
4011                                  2S,                           \
4012                                  kInputFloat##Basic,           \
4013                                  kInput32bitsImm##input_imm);  \
4014   }                                                            \
4015   TEST(mnemonic##_4S_2OPIMM) {                                 \
4016     CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                     \
4017                                  4S,                           \
4018                                  4S,                           \
4019                                  kInputFloat##input,           \
4020                                  kInput32bitsImm##input_imm);  \
4021   }                                                            \
4022   TEST(mnemonic##_2D_2OPIMM) {                                 \
4023     CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                     \
4024                                  2D,                           \
4025                                  2D,                           \
4026                                  kInputDouble##input,          \
4027                                  kInput64bitsImm##input_imm);  \
4028   }
4029 
4030 #define DEFINE_TEST_NEON_2OPIMM_FP_SCALAR(mnemonic, input, input_imm) \
4031   TEST(mnemonic##_H_2OPIMM) {                                         \
4032     CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                            \
4033                                  H,                                   \
4034                                  H,                                   \
4035                                  kInputFloat16##Basic,                \
4036                                  kInput16bitsImm##input_imm);         \
4037   }                                                                   \
4038   TEST(mnemonic##_S_2OPIMM) {                                         \
4039     CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                            \
4040                                  S,                                   \
4041                                  S,                                   \
4042                                  kInputFloat##Basic,                  \
4043                                  kInput32bitsImm##input_imm);         \
4044   }                                                                   \
4045   TEST(mnemonic##_D_2OPIMM) {                                         \
4046     CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                            \
4047                                  D,                                   \
4048                                  D,                                   \
4049                                  kInputDouble##input,                 \
4050                                  kInput64bitsImm##input_imm);         \
4051   }
4052 
4053 #define DEFINE_TEST_NEON_2OPIMM_HSD(mnemonic, input, input_imm) \
4054   TEST(mnemonic##_4H_2OPIMM) {                                  \
4055     CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                      \
4056                                  4H,                            \
4057                                  4H,                            \
4058                                  kInput16bits##input,           \
4059                                  kInput16bitsImm##input_imm);   \
4060   }                                                             \
4061   TEST(mnemonic##_8H_2OPIMM) {                                  \
4062     CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                      \
4063                                  8H,                            \
4064                                  8H,                            \
4065                                  kInput16bits##input,           \
4066                                  kInput16bitsImm##input_imm);   \
4067   }                                                             \
4068   TEST(mnemonic##_2S_2OPIMM) {                                  \
4069     CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                      \
4070                                  2S,                            \
4071                                  2S,                            \
4072                                  kInput32bits##input,           \
4073                                  kInput32bitsImm##input_imm);   \
4074   }                                                             \
4075   TEST(mnemonic##_4S_2OPIMM) {                                  \
4076     CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                      \
4077                                  4S,                            \
4078                                  4S,                            \
4079                                  kInput32bits##input,           \
4080                                  kInput32bitsImm##input_imm);   \
4081   }                                                             \
4082   TEST(mnemonic##_2D_2OPIMM) {                                  \
4083     CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                      \
4084                                  2D,                            \
4085                                  2D,                            \
4086                                  kInput64bits##input,           \
4087                                  kInput64bitsImm##input_imm);   \
4088   }
4089 
4090 #define DEFINE_TEST_NEON_2OPIMM_SCALAR_D(mnemonic, input, input_imm) \
4091   TEST(mnemonic##_D_2OPIMM) {                                        \
4092     CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                           \
4093                                  D,                                  \
4094                                  D,                                  \
4095                                  kInput64bits##input,                \
4096                                  kInput64bitsImm##input_imm);        \
4097   }
4098 
4099 #define DEFINE_TEST_NEON_2OPIMM_SCALAR_HSD(mnemonic, input, input_imm) \
4100   TEST(mnemonic##_H_2OPIMM) {                                          \
4101     CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                             \
4102                                  H,                                    \
4103                                  H,                                    \
4104                                  kInput16bits##input,                  \
4105                                  kInput16bitsImm##input_imm);          \
4106   }                                                                    \
4107   TEST(mnemonic##_S_2OPIMM) {                                          \
4108     CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                             \
4109                                  S,                                    \
4110                                  S,                                    \
4111                                  kInput32bits##input,                  \
4112                                  kInput32bitsImm##input_imm);          \
4113   }                                                                    \
4114   DEFINE_TEST_NEON_2OPIMM_SCALAR_D(mnemonic, input, input_imm)
4115 
4116 #define DEFINE_TEST_NEON_2OPIMM_FP_SCALAR_D(mnemonic, input, input_imm) \
4117   TEST(mnemonic##_D_2OPIMM) {                                           \
4118     CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                              \
4119                                  D,                                     \
4120                                  D,                                     \
4121                                  kInputDouble##input,                   \
4122                                  kInputDoubleImm##input_imm);           \
4123   }
4124 
4125 #define DEFINE_TEST_NEON_2OPIMM_FP_SCALAR_HSD(mnemonic, input, input_imm) \
4126   TEST(mnemonic##_H_2OPIMM) {                                             \
4127     CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                                \
4128                                  H,                                       \
4129                                  H,                                       \
4130                                  kInputFloat16##input,                    \
4131                                  kInputDoubleImm##input_imm);             \
4132   }                                                                       \
4133   TEST(mnemonic##_S_2OPIMM) {                                             \
4134     CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                                \
4135                                  S,                                       \
4136                                  S,                                       \
4137                                  kInputFloat##input,                      \
4138                                  kInputDoubleImm##input_imm);             \
4139   }                                                                       \
4140   DEFINE_TEST_NEON_2OPIMM_FP_SCALAR_D(mnemonic, input, input_imm)
4141 
4142 #define DEFINE_TEST_NEON_2OPIMM_SCALAR(mnemonic, input, input_imm) \
4143   TEST(mnemonic##_B_2OPIMM) {                                      \
4144     CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                         \
4145                                  B,                                \
4146                                  B,                                \
4147                                  kInput8bits##input,               \
4148                                  kInput8bitsImm##input_imm);       \
4149   }                                                                \
4150   DEFINE_TEST_NEON_2OPIMM_SCALAR_HSD(mnemonic, input, input_imm)
4151 
4152 #define DEFINE_TEST_NEON_2OPIMM_LONG(mnemonic, input, input_imm) \
4153   TEST(mnemonic##_8H_2OPIMM) {                                   \
4154     CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                       \
4155                                  8H,                             \
4156                                  8B,                             \
4157                                  kInput8bits##input,             \
4158                                  kInput8bitsImm##input_imm);     \
4159   }                                                              \
4160   TEST(mnemonic##_4S_2OPIMM) {                                   \
4161     CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                       \
4162                                  4S,                             \
4163                                  4H,                             \
4164                                  kInput16bits##input,            \
4165                                  kInput16bitsImm##input_imm);    \
4166   }                                                              \
4167   TEST(mnemonic##_2D_2OPIMM) {                                   \
4168     CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                       \
4169                                  2D,                             \
4170                                  2S,                             \
4171                                  kInput32bits##input,            \
4172                                  kInput32bitsImm##input_imm);    \
4173   }                                                              \
4174   TEST(mnemonic##2_8H_2OPIMM) {                                  \
4175     CALL_TEST_NEON_HELPER_2OPIMM(mnemonic##2,                    \
4176                                  8H,                             \
4177                                  16B,                            \
4178                                  kInput8bits##input,             \
4179                                  kInput8bitsImm##input_imm);     \
4180   }                                                              \
4181   TEST(mnemonic##2_4S_2OPIMM) {                                  \
4182     CALL_TEST_NEON_HELPER_2OPIMM(mnemonic##2,                    \
4183                                  4S,                             \
4184                                  8H,                             \
4185                                  kInput16bits##input,            \
4186                                  kInput16bitsImm##input_imm);    \
4187   }                                                              \
4188   TEST(mnemonic##2_2D_2OPIMM) {                                  \
4189     CALL_TEST_NEON_HELPER_2OPIMM(mnemonic##2,                    \
4190                                  2D,                             \
4191                                  4S,                             \
4192                                  kInput32bits##input,            \
4193                                  kInput32bitsImm##input_imm);    \
4194   }
4195 
4196 #define CALL_TEST_NEON_HELPER_BYELEMENT_DOT_PRODUCT(mnemonic,           \
4197                                                     vdform,             \
4198                                                     vnform,             \
4199                                                     vmform,             \
4200                                                     input_d,            \
4201                                                     input_n,            \
4202                                                     input_m,            \
4203                                                     indices,            \
4204                                                     vm_subvector_count) \
4205   {                                                                     \
4206     CALL_TEST_NEON_HELPER_ByElement_Dot_Product(mnemonic,               \
4207                                                 vdform,                 \
4208                                                 vnform,                 \
4209                                                 vmform,                 \
4210                                                 input_d,                \
4211                                                 input_n,                \
4212                                                 input_m,                \
4213                                                 indices,                \
4214                                                 vm_subvector_count);    \
4215   }
4216 
4217 #define DEFINE_TEST_NEON_BYELEMENT_DOT_PRODUCT(                        \
4218     mnemonic, input_d, input_n, input_m)                               \
4219   TEST(mnemonic##_2S_8B_B) {                                           \
4220     CALL_TEST_NEON_HELPER_BYELEMENT_DOT_PRODUCT(mnemonic,              \
4221                                                 2S,                    \
4222                                                 8B,                    \
4223                                                 B,                     \
4224                                                 kInput32bits##input_d, \
4225                                                 kInput8bits##input_n,  \
4226                                                 kInput8bits##input_m,  \
4227                                                 kInputSIndices,        \
4228                                                 4);                    \
4229   }                                                                    \
4230   TEST(mnemonic##_4S_16B_B) {                                          \
4231     CALL_TEST_NEON_HELPER_BYELEMENT_DOT_PRODUCT(mnemonic,              \
4232                                                 4S,                    \
4233                                                 16B,                   \
4234                                                 B,                     \
4235                                                 kInput32bits##input_d, \
4236                                                 kInput8bits##input_n,  \
4237                                                 kInput8bits##input_m,  \
4238                                                 kInputSIndices,        \
4239                                                 4);                    \
4240   }
4241 
4242 #define CALL_TEST_NEON_HELPER_BYELEMENT(                                  \
4243     mnemonic, vdform, vnform, vmform, input_d, input_n, input_m, indices) \
4244   {                                                                       \
4245     CALL_TEST_NEON_HELPER_ByElement(mnemonic,                             \
4246                                     vdform,                               \
4247                                     vnform,                               \
4248                                     vmform,                               \
4249                                     input_d,                              \
4250                                     input_n,                              \
4251                                     input_m,                              \
4252                                     indices);                             \
4253   }
4254 
4255 #define DEFINE_TEST_NEON_BYELEMENT(mnemonic, input_d, input_n, input_m) \
4256   TEST(mnemonic##_4H_4H_H) {                                            \
4257     CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic,                           \
4258                                     4H,                                 \
4259                                     4H,                                 \
4260                                     H,                                  \
4261                                     kInput16bits##input_d,              \
4262                                     kInput16bits##input_n,              \
4263                                     kInput16bits##input_m,              \
4264                                     kInputHIndices);                    \
4265   }                                                                     \
4266   TEST(mnemonic##_8H_8H_H) {                                            \
4267     CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic,                           \
4268                                     8H,                                 \
4269                                     8H,                                 \
4270                                     H,                                  \
4271                                     kInput16bits##input_d,              \
4272                                     kInput16bits##input_n,              \
4273                                     kInput16bits##input_m,              \
4274                                     kInputHIndices);                    \
4275   }                                                                     \
4276   TEST(mnemonic##_2S_2S_S) {                                            \
4277     CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic,                           \
4278                                     2S,                                 \
4279                                     2S,                                 \
4280                                     S,                                  \
4281                                     kInput32bits##input_d,              \
4282                                     kInput32bits##input_n,              \
4283                                     kInput32bits##input_m,              \
4284                                     kInputSIndices);                    \
4285   }                                                                     \
4286   TEST(mnemonic##_4S_4S_S) {                                            \
4287     CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic,                           \
4288                                     4S,                                 \
4289                                     4S,                                 \
4290                                     S,                                  \
4291                                     kInput32bits##input_d,              \
4292                                     kInput32bits##input_n,              \
4293                                     kInput32bits##input_m,              \
4294                                     kInputSIndices);                    \
4295   }
4296 
4297 #define DEFINE_TEST_NEON_BYELEMENT_SCALAR(mnemonic, input_d, input_n, input_m) \
4298   TEST(mnemonic##_H_H_H) {                                                     \
4299     CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic,                                  \
4300                                     H,                                         \
4301                                     H,                                         \
4302                                     H,                                         \
4303                                     kInput16bits##input_d,                     \
4304                                     kInput16bits##input_n,                     \
4305                                     kInput16bits##input_m,                     \
4306                                     kInputHIndices);                           \
4307   }                                                                            \
4308   TEST(mnemonic##_S_S_S) {                                                     \
4309     CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic,                                  \
4310                                     S,                                         \
4311                                     S,                                         \
4312                                     S,                                         \
4313                                     kInput32bits##input_d,                     \
4314                                     kInput32bits##input_n,                     \
4315                                     kInput32bits##input_m,                     \
4316                                     kInputSIndices);                           \
4317   }
4318 
4319 #define DEFINE_TEST_NEON_FP_BYELEMENT(mnemonic, input_d, input_n, input_m) \
4320   TEST(mnemonic##_4H_4H_H) {                                               \
4321     CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic,                              \
4322                                     4H,                                    \
4323                                     4H,                                    \
4324                                     H,                                     \
4325                                     kInputFloat16##input_d,                \
4326                                     kInputFloat16##input_n,                \
4327                                     kInputFloat16##input_m,                \
4328                                     kInputHIndices);                       \
4329   }                                                                        \
4330   TEST(mnemonic##_8H_8H_H) {                                               \
4331     CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic,                              \
4332                                     8H,                                    \
4333                                     8H,                                    \
4334                                     H,                                     \
4335                                     kInputFloat16##input_d,                \
4336                                     kInputFloat16##input_n,                \
4337                                     kInputFloat16##input_m,                \
4338                                     kInputHIndices);                       \
4339   }                                                                        \
4340   TEST(mnemonic##_2S_2S_S) {                                               \
4341     CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic,                              \
4342                                     2S,                                    \
4343                                     2S,                                    \
4344                                     S,                                     \
4345                                     kInputFloat##input_d,                  \
4346                                     kInputFloat##input_n,                  \
4347                                     kInputFloat##input_m,                  \
4348                                     kInputSIndices);                       \
4349   }                                                                        \
4350   TEST(mnemonic##_4S_4S_S) {                                               \
4351     CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic,                              \
4352                                     4S,                                    \
4353                                     4S,                                    \
4354                                     S,                                     \
4355                                     kInputFloat##input_d,                  \
4356                                     kInputFloat##input_n,                  \
4357                                     kInputFloat##input_m,                  \
4358                                     kInputSIndices);                       \
4359   }                                                                        \
4360   TEST(mnemonic##_2D_2D_D) {                                               \
4361     CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic,                              \
4362                                     2D,                                    \
4363                                     2D,                                    \
4364                                     D,                                     \
4365                                     kInputDouble##input_d,                 \
4366                                     kInputDouble##input_n,                 \
4367                                     kInputDouble##input_m,                 \
4368                                     kInputDIndices);                       \
4369   }
4370 
4371 #define DEFINE_TEST_NEON_FHM_BYELEMENT(mnemonic, input_d, input_n, input_m) \
4372   TEST(mnemonic##_2S_2H_H) {                                                \
4373     CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic,                               \
4374                                     2S,                                     \
4375                                     2H,                                     \
4376                                     H,                                      \
4377                                     kInputFloatAccDestination,              \
4378                                     kInputFloat16##input_n,                 \
4379                                     kInputFloat16##input_m,                 \
4380                                     kInputHIndices);                        \
4381   }                                                                         \
4382   TEST(mnemonic##_4S_4H_H) {                                                \
4383     CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic,                               \
4384                                     4S,                                     \
4385                                     4H,                                     \
4386                                     H,                                      \
4387                                     kInputFloatAccDestination,              \
4388                                     kInputFloat16##input_n,                 \
4389                                     kInputFloat16##input_m,                 \
4390                                     kInputHIndices);                        \
4391   }
4392 
4393 #define DEFINE_TEST_NEON_FP_BYELEMENT_SCALAR(mnemonic, inp_d, inp_n, inp_m) \
4394   TEST(mnemonic##_H_H_H) {                                                  \
4395     CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic,                               \
4396                                     H,                                      \
4397                                     H,                                      \
4398                                     H,                                      \
4399                                     kInputFloat16##inp_d,                   \
4400                                     kInputFloat16##inp_n,                   \
4401                                     kInputFloat16##inp_m,                   \
4402                                     kInputHIndices);                        \
4403   }                                                                         \
4404   TEST(mnemonic##_S_S_S) {                                                  \
4405     CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic,                               \
4406                                     S,                                      \
4407                                     S,                                      \
4408                                     S,                                      \
4409                                     kInputFloat##inp_d,                     \
4410                                     kInputFloat##inp_n,                     \
4411                                     kInputFloat##inp_m,                     \
4412                                     kInputSIndices);                        \
4413   }                                                                         \
4414   TEST(mnemonic##_D_D_D) {                                                  \
4415     CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic,                               \
4416                                     D,                                      \
4417                                     D,                                      \
4418                                     D,                                      \
4419                                     kInputDouble##inp_d,                    \
4420                                     kInputDouble##inp_n,                    \
4421                                     kInputDouble##inp_m,                    \
4422                                     kInputDIndices);                        \
4423   }
4424 
4425 
4426 #define DEFINE_TEST_NEON_BYELEMENT_DIFF(mnemonic, input_d, input_n, input_m) \
4427   TEST(mnemonic##_4S_4H_H) {                                                 \
4428     CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic,                                \
4429                                     4S,                                      \
4430                                     4H,                                      \
4431                                     H,                                       \
4432                                     kInput32bits##input_d,                   \
4433                                     kInput16bits##input_n,                   \
4434                                     kInput16bits##input_m,                   \
4435                                     kInputHIndices);                         \
4436   }                                                                          \
4437   TEST(mnemonic##2_4S_8H_H) {                                                \
4438     CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic##2,                             \
4439                                     4S,                                      \
4440                                     8H,                                      \
4441                                     H,                                       \
4442                                     kInput32bits##input_d,                   \
4443                                     kInput16bits##input_n,                   \
4444                                     kInput16bits##input_m,                   \
4445                                     kInputHIndices);                         \
4446   }                                                                          \
4447   TEST(mnemonic##_2D_2S_S) {                                                 \
4448     CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic,                                \
4449                                     2D,                                      \
4450                                     2S,                                      \
4451                                     S,                                       \
4452                                     kInput64bits##input_d,                   \
4453                                     kInput32bits##input_n,                   \
4454                                     kInput32bits##input_m,                   \
4455                                     kInputSIndices);                         \
4456   }                                                                          \
4457   TEST(mnemonic##2_2D_4S_S) {                                                \
4458     CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic##2,                             \
4459                                     2D,                                      \
4460                                     4S,                                      \
4461                                     S,                                       \
4462                                     kInput64bits##input_d,                   \
4463                                     kInput32bits##input_n,                   \
4464                                     kInput32bits##input_m,                   \
4465                                     kInputSIndices);                         \
4466   }
4467 
4468 #define DEFINE_TEST_NEON_BYELEMENT_DIFF_SCALAR(            \
4469     mnemonic, input_d, input_n, input_m)                   \
4470   TEST(mnemonic##_S_H_H) {                                 \
4471     CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic,              \
4472                                     S,                     \
4473                                     H,                     \
4474                                     H,                     \
4475                                     kInput32bits##input_d, \
4476                                     kInput16bits##input_n, \
4477                                     kInput16bits##input_m, \
4478                                     kInputHIndices);       \
4479   }                                                        \
4480   TEST(mnemonic##_D_S_S) {                                 \
4481     CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic,              \
4482                                     D,                     \
4483                                     S,                     \
4484                                     S,                     \
4485                                     kInput64bits##input_d, \
4486                                     kInput32bits##input_n, \
4487                                     kInput32bits##input_m, \
4488                                     kInputSIndices);       \
4489   }
4490 
4491 
4492 #define CALL_TEST_NEON_HELPER_2OP2IMM(                           \
4493     mnemonic, variant, input_d, input_imm1, input_n, input_imm2) \
4494   {                                                              \
4495     CALL_TEST_NEON_HELPER_OpImmOpImm(&MacroAssembler::mnemonic,  \
4496                                      mnemonic,                   \
4497                                      variant,                    \
4498                                      variant,                    \
4499                                      input_d,                    \
4500                                      input_imm1,                 \
4501                                      input_n,                    \
4502                                      input_imm2);                \
4503   }
4504 
4505 #define DEFINE_TEST_NEON_2OP2IMM(                               \
4506     mnemonic, input_d, input_imm1, input_n, input_imm2)         \
4507   TEST(mnemonic##_B) {                                          \
4508     CALL_TEST_NEON_HELPER_2OP2IMM(mnemonic,                     \
4509                                   16B,                          \
4510                                   kInput8bits##input_d,         \
4511                                   kInput8bitsImm##input_imm1,   \
4512                                   kInput8bits##input_n,         \
4513                                   kInput8bitsImm##input_imm2);  \
4514   }                                                             \
4515   TEST(mnemonic##_H) {                                          \
4516     CALL_TEST_NEON_HELPER_2OP2IMM(mnemonic,                     \
4517                                   8H,                           \
4518                                   kInput16bits##input_d,        \
4519                                   kInput16bitsImm##input_imm1,  \
4520                                   kInput16bits##input_n,        \
4521                                   kInput16bitsImm##input_imm2); \
4522   }                                                             \
4523   TEST(mnemonic##_S) {                                          \
4524     CALL_TEST_NEON_HELPER_2OP2IMM(mnemonic,                     \
4525                                   4S,                           \
4526                                   kInput32bits##input_d,        \
4527                                   kInput32bitsImm##input_imm1,  \
4528                                   kInput32bits##input_n,        \
4529                                   kInput32bitsImm##input_imm2); \
4530   }                                                             \
4531   TEST(mnemonic##_D) {                                          \
4532     CALL_TEST_NEON_HELPER_2OP2IMM(mnemonic,                     \
4533                                   2D,                           \
4534                                   kInput64bits##input_d,        \
4535                                   kInput64bitsImm##input_imm1,  \
4536                                   kInput64bits##input_n,        \
4537                                   kInput64bitsImm##input_imm2); \
4538   }
4539 
4540 
4541 // Advanced SIMD copy.
4542 DEFINE_TEST_NEON_2OP2IMM(
4543     ins, Basic, LaneCountFromZero, Basic, LaneCountFromZero)
4544 DEFINE_TEST_NEON_2OPIMM_COPY(dup, Basic, LaneCountFromZero)
4545 
4546 
4547 // Advanced SIMD scalar copy.
4548 DEFINE_TEST_NEON_2OPIMM_SCALAR(dup, Basic, LaneCountFromZero)
4549 
4550 
4551 // Advanced SIMD three same.
4552 DEFINE_TEST_NEON_3SAME_NO2D(shadd, Basic)
4553 DEFINE_TEST_NEON_3SAME(sqadd, Basic)
4554 DEFINE_TEST_NEON_3SAME_NO2D(srhadd, Basic)
4555 DEFINE_TEST_NEON_3SAME_NO2D(shsub, Basic)
4556 DEFINE_TEST_NEON_3SAME(sqsub, Basic)
4557 DEFINE_TEST_NEON_3SAME(cmgt, Basic)
4558 DEFINE_TEST_NEON_3SAME(cmge, Basic)
4559 DEFINE_TEST_NEON_3SAME(sshl, Basic)
4560 DEFINE_TEST_NEON_3SAME(sqshl, Basic)
4561 DEFINE_TEST_NEON_3SAME(srshl, Basic)
4562 DEFINE_TEST_NEON_3SAME(sqrshl, Basic)
4563 DEFINE_TEST_NEON_3SAME_NO2D(smax, Basic)
4564 DEFINE_TEST_NEON_3SAME_NO2D(smin, Basic)
4565 DEFINE_TEST_NEON_3SAME_NO2D(sabd, Basic)
4566 DEFINE_TEST_NEON_3SAME_NO2D(saba, Basic)
4567 DEFINE_TEST_NEON_3SAME(add, Basic)
4568 DEFINE_TEST_NEON_3SAME(cmtst, Basic)
4569 DEFINE_TEST_NEON_3SAME_NO2D(mla, Basic)
4570 DEFINE_TEST_NEON_3SAME_NO2D(mul, Basic)
4571 DEFINE_TEST_NEON_3SAME_NO2D(smaxp, Basic)
4572 DEFINE_TEST_NEON_3SAME_NO2D(sminp, Basic)
4573 DEFINE_TEST_NEON_3SAME_HS(sqdmulh, Basic)
4574 DEFINE_TEST_NEON_3SAME(addp, Basic)
4575 DEFINE_TEST_NEON_3SAME_FP(fmaxnm, Basic)
4576 DEFINE_TEST_NEON_3SAME_FP(fmla, Basic)
4577 DEFINE_TEST_NEON_3SAME_FP(fadd, Basic)
4578 DEFINE_TEST_NEON_3SAME_FP(fmulx, Basic)
4579 DEFINE_TEST_NEON_3SAME_FP(fcmeq, Basic)
4580 DEFINE_TEST_NEON_3SAME_FP(fmax, Basic)
4581 DEFINE_TEST_NEON_3SAME_FP(frecps, Basic)
4582 DEFINE_TEST_NEON_3SAME_8B_16B(and_, Basic)
4583 DEFINE_TEST_NEON_3SAME_8B_16B(bic, Basic)
4584 DEFINE_TEST_NEON_3SAME_FP(fminnm, Basic)
4585 DEFINE_TEST_NEON_3SAME_FP(fmls, Basic)
4586 DEFINE_TEST_NEON_3SAME_FP(fsub, Basic)
4587 DEFINE_TEST_NEON_3SAME_FP(fmin, Basic)
4588 DEFINE_TEST_NEON_3SAME_FP(frsqrts, Basic)
4589 DEFINE_TEST_NEON_3SAME_8B_16B(orr, Basic)
4590 DEFINE_TEST_NEON_3SAME_8B_16B(orn, Basic)
4591 DEFINE_TEST_NEON_3SAME_NO2D(uhadd, Basic)
4592 DEFINE_TEST_NEON_3SAME(uqadd, Basic)
4593 DEFINE_TEST_NEON_3SAME_NO2D(urhadd, Basic)
4594 DEFINE_TEST_NEON_3SAME_NO2D(uhsub, Basic)
4595 DEFINE_TEST_NEON_3SAME(uqsub, Basic)
4596 DEFINE_TEST_NEON_3SAME(cmhi, Basic)
4597 DEFINE_TEST_NEON_3SAME(cmhs, Basic)
4598 DEFINE_TEST_NEON_3SAME(ushl, Basic)
4599 DEFINE_TEST_NEON_3SAME(uqshl, Basic)
4600 DEFINE_TEST_NEON_3SAME(urshl, Basic)
4601 DEFINE_TEST_NEON_3SAME(uqrshl, Basic)
4602 DEFINE_TEST_NEON_3SAME_NO2D(umax, Basic)
4603 DEFINE_TEST_NEON_3SAME_NO2D(umin, Basic)
4604 DEFINE_TEST_NEON_3SAME_NO2D(uabd, Basic)
4605 DEFINE_TEST_NEON_3SAME_NO2D(uaba, Basic)
4606 DEFINE_TEST_NEON_3SAME(sub, Basic)
4607 DEFINE_TEST_NEON_3SAME(cmeq, Basic)
4608 DEFINE_TEST_NEON_3SAME_NO2D(mls, Basic)
4609 DEFINE_TEST_NEON_3SAME_8B_16B(pmul, Basic)
4610 DEFINE_TEST_NEON_3SAME_NO2D(uminp, Basic)
4611 DEFINE_TEST_NEON_3SAME_NO2D(umaxp, Basic)
4612 DEFINE_TEST_NEON_3SAME_HS(sqrdmulh, Basic)
4613 DEFINE_TEST_NEON_3SAME_HS(sqrdmlah, Basic)
4614 DEFINE_TEST_NEON_3SAME_HS(sqrdmlsh, Basic)
4615 DEFINE_TEST_NEON_3DIFF_DOUBLE_WIDE(udot, Basic)
4616 DEFINE_TEST_NEON_3DIFF_DOUBLE_WIDE(sdot, Basic)
4617 DEFINE_TEST_NEON_3SAME_FP(fmaxnmp, Basic)
4618 DEFINE_TEST_NEON_3SAME_FP(faddp, Basic)
4619 DEFINE_TEST_NEON_3SAME_FP(fmul, Basic)
4620 DEFINE_TEST_NEON_3SAME_FP(fcmge, Basic)
4621 DEFINE_TEST_NEON_3SAME_FP(facge, Basic)
4622 DEFINE_TEST_NEON_3SAME_FP(fmaxp, Basic)
4623 DEFINE_TEST_NEON_3SAME_FP(fdiv, Basic)
4624 DEFINE_TEST_NEON_3SAME_8B_16B(eor, Basic)
4625 DEFINE_TEST_NEON_3SAME_8B_16B(bsl, Basic)
4626 DEFINE_TEST_NEON_3SAME_FP(fminnmp, Basic)
4627 DEFINE_TEST_NEON_3SAME_FP(fabd, Basic)
4628 DEFINE_TEST_NEON_3SAME_FP(fcmgt, Basic)
4629 DEFINE_TEST_NEON_3SAME_FP(facgt, Basic)
4630 DEFINE_TEST_NEON_3SAME_FP(fminp, Basic)
4631 DEFINE_TEST_NEON_3SAME_8B_16B(bit, Basic)
4632 DEFINE_TEST_NEON_3SAME_8B_16B(bif, Basic)
4633 
4634 
4635 // Advanced SIMD scalar three same.
4636 DEFINE_TEST_NEON_3SAME_SCALAR(sqadd, Basic)
4637 DEFINE_TEST_NEON_3SAME_SCALAR(sqsub, Basic)
4638 DEFINE_TEST_NEON_3SAME_SCALAR_D(cmgt, Basic)
4639 DEFINE_TEST_NEON_3SAME_SCALAR_D(cmge, Basic)
4640 DEFINE_TEST_NEON_3SAME_SCALAR_D(sshl, Basic)
4641 DEFINE_TEST_NEON_3SAME_SCALAR(sqshl, Basic)
4642 DEFINE_TEST_NEON_3SAME_SCALAR_D(srshl, Basic)
4643 DEFINE_TEST_NEON_3SAME_SCALAR(sqrshl, Basic)
4644 DEFINE_TEST_NEON_3SAME_SCALAR_D(add, Basic)
4645 DEFINE_TEST_NEON_3SAME_SCALAR_D(cmtst, Basic)
4646 DEFINE_TEST_NEON_3SAME_SCALAR_HS(sqdmulh, Basic)
4647 DEFINE_TEST_NEON_3SAME_FP_SCALAR(fmulx, Basic)
4648 DEFINE_TEST_NEON_3SAME_FP_SCALAR(fcmeq, Basic)
4649 DEFINE_TEST_NEON_3SAME_FP_SCALAR(frecps, Basic)
4650 DEFINE_TEST_NEON_3SAME_FP_SCALAR(frsqrts, Basic)
4651 DEFINE_TEST_NEON_3SAME_SCALAR_D(uqadd, Basic)
4652 DEFINE_TEST_NEON_3SAME_SCALAR_D(uqsub, Basic)
4653 DEFINE_TEST_NEON_3SAME_SCALAR_D(cmhi, Basic)
4654 DEFINE_TEST_NEON_3SAME_SCALAR_D(cmhs, Basic)
4655 DEFINE_TEST_NEON_3SAME_SCALAR_D(ushl, Basic)
4656 DEFINE_TEST_NEON_3SAME_SCALAR(uqshl, Basic)
4657 DEFINE_TEST_NEON_3SAME_SCALAR_D(urshl, Basic)
4658 DEFINE_TEST_NEON_3SAME_SCALAR(uqrshl, Basic)
4659 DEFINE_TEST_NEON_3SAME_SCALAR_D(sub, Basic)
4660 DEFINE_TEST_NEON_3SAME_SCALAR_D(cmeq, Basic)
4661 DEFINE_TEST_NEON_3SAME_SCALAR_HS(sqrdmulh, Basic)
4662 DEFINE_TEST_NEON_3SAME_SCALAR_HS(sqrdmlah, Basic)
4663 DEFINE_TEST_NEON_3SAME_SCALAR_HS(sqrdmlsh, Basic)
4664 DEFINE_TEST_NEON_3SAME_FP_SCALAR(fcmge, Basic)
4665 DEFINE_TEST_NEON_3SAME_FP_SCALAR(facge, Basic)
4666 DEFINE_TEST_NEON_3SAME_FP_SCALAR(fabd, Basic)
4667 DEFINE_TEST_NEON_3SAME_FP_SCALAR(fcmgt, Basic)
4668 DEFINE_TEST_NEON_3SAME_FP_SCALAR(facgt, Basic)
4669 
4670 
4671 // Advanced SIMD FHM instructions (FMLAL, FMLSL).
4672 // These are oddballs: they are encoded under the 3SAME group but behave
4673 // quite differently.
4674 DEFINE_TEST_NEON_FHM(fmlal, Basic, Basic, Basic)
4675 DEFINE_TEST_NEON_FHM(fmlal2, Basic, Basic, Basic)
4676 DEFINE_TEST_NEON_FHM(fmlsl, Basic, Basic, Basic)
4677 DEFINE_TEST_NEON_FHM(fmlsl2, Basic, Basic, Basic)
4678 
4679 
4680 // Advanced SIMD three different.
4681 DEFINE_TEST_NEON_3DIFF_LONG(saddl, Basic)
4682 DEFINE_TEST_NEON_3DIFF_WIDE(saddw, Basic)
4683 DEFINE_TEST_NEON_3DIFF_LONG(ssubl, Basic)
4684 DEFINE_TEST_NEON_3DIFF_WIDE(ssubw, Basic)
4685 DEFINE_TEST_NEON_3DIFF_NARROW(addhn, Basic)
4686 DEFINE_TEST_NEON_3DIFF_LONG(sabal, Basic)
4687 DEFINE_TEST_NEON_3DIFF_NARROW(subhn, Basic)
4688 DEFINE_TEST_NEON_3DIFF_LONG(sabdl, Basic)
4689 DEFINE_TEST_NEON_3DIFF_LONG(smlal, Basic)
4690 DEFINE_TEST_NEON_3DIFF_LONG_SD(sqdmlal, Basic)
4691 DEFINE_TEST_NEON_3DIFF_LONG(smlsl, Basic)
4692 DEFINE_TEST_NEON_3DIFF_LONG_SD(sqdmlsl, Basic)
4693 DEFINE_TEST_NEON_3DIFF_LONG(smull, Basic)
4694 DEFINE_TEST_NEON_3DIFF_LONG_SD(sqdmull, Basic)
4695 DEFINE_TEST_NEON_3DIFF_LONG_8H(pmull, Basic)
4696 DEFINE_TEST_NEON_3DIFF_LONG(uaddl, Basic)
4697 DEFINE_TEST_NEON_3DIFF_WIDE(uaddw, Basic)
4698 DEFINE_TEST_NEON_3DIFF_LONG(usubl, Basic)
4699 DEFINE_TEST_NEON_3DIFF_WIDE(usubw, Basic)
4700 DEFINE_TEST_NEON_3DIFF_NARROW(raddhn, Basic)
4701 DEFINE_TEST_NEON_3DIFF_LONG(uabal, Basic)
4702 DEFINE_TEST_NEON_3DIFF_NARROW(rsubhn, Basic)
4703 DEFINE_TEST_NEON_3DIFF_LONG(uabdl, Basic)
4704 DEFINE_TEST_NEON_3DIFF_LONG(umlal, Basic)
4705 DEFINE_TEST_NEON_3DIFF_LONG(umlsl, Basic)
4706 DEFINE_TEST_NEON_3DIFF_LONG(umull, Basic)
4707 
4708 
4709 // Advanced SIMD scalar three different.
4710 DEFINE_TEST_NEON_3DIFF_SCALAR_LONG_SD(sqdmlal, Basic)
4711 DEFINE_TEST_NEON_3DIFF_SCALAR_LONG_SD(sqdmlsl, Basic)
4712 DEFINE_TEST_NEON_3DIFF_SCALAR_LONG_SD(sqdmull, Basic)
4713 
4714 
4715 // Advanced SIMD scalar pairwise.
4716 TEST(addp_SCALAR) {
4717   CALL_TEST_NEON_HELPER_2DIFF(addp, D, 2D, kInput64bitsBasic);
4718 }
DEFINE_TEST_NEON_2DIFF_FP_SCALAR_SD(fmaxnmp,Basic)4719 DEFINE_TEST_NEON_2DIFF_FP_SCALAR_SD(fmaxnmp, Basic)
4720 DEFINE_TEST_NEON_2DIFF_FP_SCALAR_SD(faddp, Basic)
4721 DEFINE_TEST_NEON_2DIFF_FP_SCALAR_SD(fmaxp, Basic)
4722 DEFINE_TEST_NEON_2DIFF_FP_SCALAR_SD(fminnmp, Basic)
4723 DEFINE_TEST_NEON_2DIFF_FP_SCALAR_SD(fminp, Basic)
4724 
4725 
4726 // Advanced SIMD shift by immediate.
4727 DEFINE_TEST_NEON_2OPIMM(sshr, Basic, TypeWidth)
4728 DEFINE_TEST_NEON_2OPIMM(ssra, Basic, TypeWidth)
4729 DEFINE_TEST_NEON_2OPIMM(srshr, Basic, TypeWidth)
4730 DEFINE_TEST_NEON_2OPIMM(srsra, Basic, TypeWidth)
4731 DEFINE_TEST_NEON_2OPIMM(shl, Basic, TypeWidthFromZero)
4732 DEFINE_TEST_NEON_2OPIMM(sqshl, Basic, TypeWidthFromZero)
4733 DEFINE_TEST_NEON_2OPIMM_NARROW(shrn, Basic, TypeWidth)
4734 DEFINE_TEST_NEON_2OPIMM_NARROW(rshrn, Basic, TypeWidth)
4735 DEFINE_TEST_NEON_2OPIMM_NARROW(sqshrn, Basic, TypeWidth)
4736 DEFINE_TEST_NEON_2OPIMM_NARROW(sqrshrn, Basic, TypeWidth)
4737 DEFINE_TEST_NEON_2OPIMM_LONG(sshll, Basic, TypeWidthFromZero)
4738 DEFINE_TEST_NEON_2OPIMM_HSD(scvtf,
4739                             FixedPointConversions,
4740                             TypeWidthFromZeroToWidth)
4741 DEFINE_TEST_NEON_2OPIMM_FP(fcvtzs, Conversions, TypeWidthFromZeroToWidth)
4742 DEFINE_TEST_NEON_2OPIMM(ushr, Basic, TypeWidth)
4743 DEFINE_TEST_NEON_2OPIMM(usra, Basic, TypeWidth)
4744 DEFINE_TEST_NEON_2OPIMM(urshr, Basic, TypeWidth)
4745 DEFINE_TEST_NEON_2OPIMM(ursra, Basic, TypeWidth)
4746 DEFINE_TEST_NEON_2OPIMM(sri, Basic, TypeWidth)
4747 DEFINE_TEST_NEON_2OPIMM(sli, Basic, TypeWidthFromZero)
4748 DEFINE_TEST_NEON_2OPIMM(sqshlu, Basic, TypeWidthFromZero)
4749 DEFINE_TEST_NEON_2OPIMM(uqshl, Basic, TypeWidthFromZero)
4750 DEFINE_TEST_NEON_2OPIMM_NARROW(sqshrun, Basic, TypeWidth)
4751 DEFINE_TEST_NEON_2OPIMM_NARROW(sqrshrun, Basic, TypeWidth)
4752 DEFINE_TEST_NEON_2OPIMM_NARROW(uqshrn, Basic, TypeWidth)
4753 DEFINE_TEST_NEON_2OPIMM_NARROW(uqrshrn, Basic, TypeWidth)
4754 DEFINE_TEST_NEON_2OPIMM_LONG(ushll, Basic, TypeWidthFromZero)
4755 DEFINE_TEST_NEON_2OPIMM_HSD(ucvtf,
4756                             FixedPointConversions,
4757                             TypeWidthFromZeroToWidth)
4758 DEFINE_TEST_NEON_2OPIMM_FP(fcvtzu, Conversions, TypeWidthFromZeroToWidth)
4759 
4760 
4761 // Advanced SIMD scalar shift by immediate..
4762 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(sshr, Basic, TypeWidth)
4763 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(ssra, Basic, TypeWidth)
4764 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(srshr, Basic, TypeWidth)
4765 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(srsra, Basic, TypeWidth)
4766 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(shl, Basic, TypeWidthFromZero)
4767 DEFINE_TEST_NEON_2OPIMM_SCALAR(sqshl, Basic, TypeWidthFromZero)
4768 DEFINE_TEST_NEON_2OPIMM_SCALAR_NARROW(sqshrn, Basic, TypeWidth)
4769 DEFINE_TEST_NEON_2OPIMM_SCALAR_NARROW(sqrshrn, Basic, TypeWidth)
4770 DEFINE_TEST_NEON_2OPIMM_SCALAR_HSD(scvtf,
4771                                    FixedPointConversions,
4772                                    TypeWidthFromZeroToWidth)
4773 DEFINE_TEST_NEON_2OPIMM_FP_SCALAR(fcvtzs, Conversions, TypeWidthFromZeroToWidth)
4774 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(ushr, Basic, TypeWidth)
4775 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(usra, Basic, TypeWidth)
4776 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(urshr, Basic, TypeWidth)
4777 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(ursra, Basic, TypeWidth)
4778 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(sri, Basic, TypeWidth)
4779 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(sli, Basic, TypeWidthFromZero)
4780 DEFINE_TEST_NEON_2OPIMM_SCALAR(sqshlu, Basic, TypeWidthFromZero)
4781 DEFINE_TEST_NEON_2OPIMM_SCALAR(uqshl, Basic, TypeWidthFromZero)
4782 DEFINE_TEST_NEON_2OPIMM_SCALAR_NARROW(sqshrun, Basic, TypeWidth)
4783 DEFINE_TEST_NEON_2OPIMM_SCALAR_NARROW(sqrshrun, Basic, TypeWidth)
4784 DEFINE_TEST_NEON_2OPIMM_SCALAR_NARROW(uqshrn, Basic, TypeWidth)
4785 DEFINE_TEST_NEON_2OPIMM_SCALAR_NARROW(uqrshrn, Basic, TypeWidth)
4786 DEFINE_TEST_NEON_2OPIMM_SCALAR_HSD(ucvtf,
4787                                    FixedPointConversions,
4788                                    TypeWidthFromZeroToWidth)
4789 DEFINE_TEST_NEON_2OPIMM_FP_SCALAR(fcvtzu, Conversions, TypeWidthFromZeroToWidth)
4790 
4791 
4792 // Advanced SIMD two-register miscellaneous.
4793 DEFINE_TEST_NEON_2SAME_NO2D(rev64, Basic)
4794 DEFINE_TEST_NEON_2SAME_8B_16B(rev16, Basic)
4795 DEFINE_TEST_NEON_2DIFF_LONG(saddlp, Basic)
4796 DEFINE_TEST_NEON_2SAME(suqadd, Basic)
4797 DEFINE_TEST_NEON_2SAME_NO2D(cls, Basic)
4798 DEFINE_TEST_NEON_2SAME_8B_16B(cnt, Basic)
4799 DEFINE_TEST_NEON_2DIFF_LONG(sadalp, Basic)
4800 DEFINE_TEST_NEON_2SAME(sqabs, Basic)
4801 DEFINE_TEST_NEON_2OPIMM(cmgt, Basic, Zero)
4802 DEFINE_TEST_NEON_2OPIMM(cmeq, Basic, Zero)
4803 DEFINE_TEST_NEON_2OPIMM(cmlt, Basic, Zero)
4804 DEFINE_TEST_NEON_2SAME(abs, Basic)
4805 DEFINE_TEST_NEON_2DIFF_NARROW(xtn, Basic)
4806 DEFINE_TEST_NEON_2DIFF_NARROW(sqxtn, Basic)
4807 DEFINE_TEST_NEON_2DIFF_FP_NARROW(fcvtn, Conversions)
4808 DEFINE_TEST_NEON_2DIFF_FP_LONG(fcvtl, Conversions)
4809 DEFINE_TEST_NEON_2SAME_FP_FP16(frintn, Conversions)
4810 DEFINE_TEST_NEON_2SAME_FP_FP16(frintm, Conversions)
4811 DEFINE_TEST_NEON_2SAME_FP_FP16(fcvtns, Conversions)
4812 DEFINE_TEST_NEON_2SAME_FP_FP16(fcvtms, Conversions)
4813 DEFINE_TEST_NEON_2SAME_FP_FP16(fcvtas, Conversions)
4814 // SCVTF (vector, integer) covered by SCVTF(vector, fixed point) with fbits 0.
4815 DEFINE_TEST_NEON_2OPIMM_FCMP_ZERO(fcmgt, Basic, Zero)
4816 DEFINE_TEST_NEON_2OPIMM_FCMP_ZERO(fcmeq, Basic, Zero)
4817 DEFINE_TEST_NEON_2OPIMM_FCMP_ZERO(fcmlt, Basic, Zero)
4818 DEFINE_TEST_NEON_2SAME_FP_FP16(fabs, Basic)
4819 DEFINE_TEST_NEON_2SAME_FP_FP16(frintp, Conversions)
4820 DEFINE_TEST_NEON_2SAME_FP_FP16(frintz, Conversions)
4821 DEFINE_TEST_NEON_2SAME_FP_FP16(fcvtps, Conversions)
4822 // FCVTZS(vector, integer) covered by FCVTZS(vector, fixed point) with fbits 0.
4823 DEFINE_TEST_NEON_2SAME_2S_4S(urecpe, Basic)
4824 DEFINE_TEST_NEON_2SAME_FP_FP16(frecpe, Basic)
4825 DEFINE_TEST_NEON_2SAME_BH(rev32, Basic)
4826 DEFINE_TEST_NEON_2DIFF_LONG(uaddlp, Basic)
4827 DEFINE_TEST_NEON_2SAME(usqadd, Basic)
4828 DEFINE_TEST_NEON_2SAME_NO2D(clz, Basic)
4829 DEFINE_TEST_NEON_2DIFF_LONG(uadalp, Basic)
4830 DEFINE_TEST_NEON_2SAME(sqneg, Basic)
4831 DEFINE_TEST_NEON_2OPIMM(cmge, Basic, Zero)
4832 DEFINE_TEST_NEON_2OPIMM(cmle, Basic, Zero)
4833 DEFINE_TEST_NEON_2SAME(neg, Basic)
4834 DEFINE_TEST_NEON_2DIFF_NARROW(sqxtun, Basic)
4835 DEFINE_TEST_NEON_2OPIMM_LONG(shll, Basic, SHLL)
4836 DEFINE_TEST_NEON_2DIFF_NARROW(uqxtn, Basic)
4837 DEFINE_TEST_NEON_2DIFF_FP_NARROW_2S(fcvtxn, Conversions)
4838 DEFINE_TEST_NEON_2SAME_FP(frint32x, Conversions)
4839 DEFINE_TEST_NEON_2SAME_FP(frint64x, Conversions)
4840 DEFINE_TEST_NEON_2SAME_FP(frint32z, Conversions)
4841 DEFINE_TEST_NEON_2SAME_FP(frint64z, Conversions)
4842 DEFINE_TEST_NEON_2SAME_FP_FP16(frinta, Conversions)
4843 DEFINE_TEST_NEON_2SAME_FP_FP16(frintx, Conversions)
4844 DEFINE_TEST_NEON_2SAME_FP_FP16(fcvtnu, Conversions)
4845 DEFINE_TEST_NEON_2SAME_FP_FP16(fcvtmu, Conversions)
4846 DEFINE_TEST_NEON_2SAME_FP_FP16(fcvtau, Conversions)
4847 // UCVTF (vector, integer) covered by UCVTF(vector, fixed point) with fbits 0.
4848 DEFINE_TEST_NEON_2SAME_8B_16B(not_, Basic)
4849 DEFINE_TEST_NEON_2SAME_8B_16B(rbit, Basic)
4850 DEFINE_TEST_NEON_2OPIMM_FCMP_ZERO(fcmge, Basic, Zero)
4851 DEFINE_TEST_NEON_2OPIMM_FCMP_ZERO(fcmle, Basic, Zero)
4852 DEFINE_TEST_NEON_2SAME_FP_FP16(fneg, Basic)
4853 DEFINE_TEST_NEON_2SAME_FP_FP16(frinti, Conversions)
4854 DEFINE_TEST_NEON_2SAME_FP_FP16(fcvtpu, Conversions)
4855 // FCVTZU(vector, integer) covered by FCVTZU(vector, fixed point) with fbits 0.
4856 DEFINE_TEST_NEON_2SAME_2S_4S(ursqrte, Basic)
4857 DEFINE_TEST_NEON_2SAME_FP_FP16(frsqrte, Basic)
4858 DEFINE_TEST_NEON_2SAME_FP_FP16(fsqrt, Basic)
4859 
4860 
4861 // Advanced SIMD scalar two-register miscellaneous.
4862 DEFINE_TEST_NEON_2SAME_SCALAR(suqadd, Basic)
4863 DEFINE_TEST_NEON_2SAME_SCALAR(sqabs, Basic)
4864 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(cmgt, Basic, Zero)
4865 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(cmeq, Basic, Zero)
4866 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(cmlt, Basic, Zero)
4867 DEFINE_TEST_NEON_2SAME_SCALAR_D(abs, Basic)
4868 DEFINE_TEST_NEON_2DIFF_SCALAR_NARROW(sqxtn, Basic)
4869 DEFINE_TEST_NEON_2SAME_FP_FP16_SCALAR(fcvtns, Conversions)
4870 DEFINE_TEST_NEON_2SAME_FP_FP16_SCALAR(fcvtms, Conversions)
4871 DEFINE_TEST_NEON_2SAME_FP_FP16_SCALAR(fcvtas, Conversions)
4872 // SCVTF (vector, integer) covered by SCVTF(vector, fixed point) with fbits 0.
4873 DEFINE_TEST_NEON_2OPIMM_FP_SCALAR_HSD(fcmgt, Basic, Zero)
4874 DEFINE_TEST_NEON_2OPIMM_FP_SCALAR_HSD(fcmeq, Basic, Zero)
4875 DEFINE_TEST_NEON_2OPIMM_FP_SCALAR_HSD(fcmlt, Basic, Zero)
4876 DEFINE_TEST_NEON_2SAME_FP_FP16_SCALAR(fcvtps, Conversions)
4877 // FCVTZS(vector, integer) covered by FCVTZS(vector, fixed point) with fbits 0.
4878 DEFINE_TEST_NEON_2SAME_FP_FP16_SCALAR(frecpe, Basic)
4879 DEFINE_TEST_NEON_2SAME_FP_FP16_SCALAR(frecpx, Basic)
4880 DEFINE_TEST_NEON_2SAME_SCALAR(usqadd, Basic)
4881 DEFINE_TEST_NEON_2SAME_SCALAR(sqneg, Basic)
4882 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(cmge, Basic, Zero)
4883 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(cmle, Basic, Zero)
4884 DEFINE_TEST_NEON_2SAME_SCALAR_D(neg, Basic)
4885 DEFINE_TEST_NEON_2DIFF_SCALAR_NARROW(sqxtun, Basic)
4886 DEFINE_TEST_NEON_2DIFF_SCALAR_NARROW(uqxtn, Basic)
4887 TEST(fcvtxn_SCALAR) {
4888   CALL_TEST_NEON_HELPER_2DIFF(fcvtxn, S, D, kInputDoubleConversions);
4889 }
DEFINE_TEST_NEON_2SAME_FP_FP16_SCALAR(fcvtnu,Conversions)4890 DEFINE_TEST_NEON_2SAME_FP_FP16_SCALAR(fcvtnu, Conversions)
4891 DEFINE_TEST_NEON_2SAME_FP_FP16_SCALAR(fcvtmu, Conversions)
4892 DEFINE_TEST_NEON_2SAME_FP_FP16_SCALAR(fcvtau, Conversions)
4893 // UCVTF (vector, integer) covered by UCVTF(vector, fixed point) with fbits 0.
4894 DEFINE_TEST_NEON_2OPIMM_FP_SCALAR_HSD(fcmge, Basic, Zero)
4895 DEFINE_TEST_NEON_2OPIMM_FP_SCALAR_HSD(fcmle, Basic, Zero)
4896 DEFINE_TEST_NEON_2SAME_FP_FP16_SCALAR(fcvtpu, Conversions)
4897 // FCVTZU(vector, integer) covered by FCVTZU(vector, fixed point) with fbits 0.
4898 DEFINE_TEST_NEON_2SAME_FP_FP16_SCALAR(frsqrte, Basic)
4899 
4900 
4901 // Advanced SIMD across lanes.
4902 DEFINE_TEST_NEON_ACROSS_LONG(saddlv, Basic)
4903 DEFINE_TEST_NEON_ACROSS(smaxv, Basic)
4904 DEFINE_TEST_NEON_ACROSS(sminv, Basic)
4905 DEFINE_TEST_NEON_ACROSS(addv, Basic)
4906 DEFINE_TEST_NEON_ACROSS_LONG(uaddlv, Basic)
4907 DEFINE_TEST_NEON_ACROSS(umaxv, Basic)
4908 DEFINE_TEST_NEON_ACROSS(uminv, Basic)
4909 DEFINE_TEST_NEON_ACROSS_FP(fmaxnmv, Basic)
4910 DEFINE_TEST_NEON_ACROSS_FP(fmaxv, Basic)
4911 DEFINE_TEST_NEON_ACROSS_FP(fminnmv, Basic)
4912 DEFINE_TEST_NEON_ACROSS_FP(fminv, Basic)
4913 
4914 
4915 // Advanced SIMD permute.
4916 DEFINE_TEST_NEON_3SAME(uzp1, Basic)
4917 DEFINE_TEST_NEON_3SAME(trn1, Basic)
4918 DEFINE_TEST_NEON_3SAME(zip1, Basic)
4919 DEFINE_TEST_NEON_3SAME(uzp2, Basic)
4920 DEFINE_TEST_NEON_3SAME(trn2, Basic)
4921 DEFINE_TEST_NEON_3SAME(zip2, Basic)
4922 
4923 
4924 // Advanced SIMD vector x indexed element.
4925 DEFINE_TEST_NEON_BYELEMENT_DIFF(smlal, Basic, Basic, Basic)
4926 DEFINE_TEST_NEON_BYELEMENT_DIFF(sqdmlal, Basic, Basic, Basic)
4927 DEFINE_TEST_NEON_BYELEMENT_DIFF(smlsl, Basic, Basic, Basic)
4928 DEFINE_TEST_NEON_BYELEMENT_DIFF(sqdmlsl, Basic, Basic, Basic)
4929 DEFINE_TEST_NEON_BYELEMENT(mul, Basic, Basic, Basic)
4930 DEFINE_TEST_NEON_BYELEMENT_DIFF(smull, Basic, Basic, Basic)
4931 DEFINE_TEST_NEON_BYELEMENT_DIFF(sqdmull, Basic, Basic, Basic)
4932 DEFINE_TEST_NEON_BYELEMENT(sqdmulh, Basic, Basic, Basic)
4933 DEFINE_TEST_NEON_BYELEMENT(sqrdmulh, Basic, Basic, Basic)
4934 DEFINE_TEST_NEON_BYELEMENT(sqrdmlah, Basic, Basic, Basic)
4935 DEFINE_TEST_NEON_BYELEMENT(sqrdmlsh, Basic, Basic, Basic)
4936 DEFINE_TEST_NEON_BYELEMENT_DOT_PRODUCT(udot, Basic, Basic, Basic)
4937 DEFINE_TEST_NEON_BYELEMENT_DOT_PRODUCT(sdot, Basic, Basic, Basic)
4938 DEFINE_TEST_NEON_FP_BYELEMENT(fmla, Basic, Basic, Basic)
4939 DEFINE_TEST_NEON_FP_BYELEMENT(fmls, Basic, Basic, Basic)
4940 DEFINE_TEST_NEON_FP_BYELEMENT(fmul, Basic, Basic, Basic)
4941 DEFINE_TEST_NEON_BYELEMENT(mla, Basic, Basic, Basic)
4942 DEFINE_TEST_NEON_BYELEMENT_DIFF(umlal, Basic, Basic, Basic)
4943 DEFINE_TEST_NEON_BYELEMENT(mls, Basic, Basic, Basic)
4944 DEFINE_TEST_NEON_BYELEMENT_DIFF(umlsl, Basic, Basic, Basic)
4945 DEFINE_TEST_NEON_BYELEMENT_DIFF(umull, Basic, Basic, Basic)
4946 DEFINE_TEST_NEON_FP_BYELEMENT(fmulx, Basic, Basic, Basic)
4947 
4948 
4949 // Advanced SIMD scalar x indexed element.
4950 DEFINE_TEST_NEON_BYELEMENT_DIFF_SCALAR(sqdmlal, Basic, Basic, Basic)
4951 DEFINE_TEST_NEON_BYELEMENT_DIFF_SCALAR(sqdmlsl, Basic, Basic, Basic)
4952 DEFINE_TEST_NEON_BYELEMENT_DIFF_SCALAR(sqdmull, Basic, Basic, Basic)
4953 DEFINE_TEST_NEON_BYELEMENT_SCALAR(sqdmulh, Basic, Basic, Basic)
4954 DEFINE_TEST_NEON_BYELEMENT_SCALAR(sqrdmulh, Basic, Basic, Basic)
4955 DEFINE_TEST_NEON_BYELEMENT_SCALAR(sqrdmlah, Basic, Basic, Basic)
4956 DEFINE_TEST_NEON_BYELEMENT_SCALAR(sqrdmlsh, Basic, Basic, Basic)
4957 DEFINE_TEST_NEON_FP_BYELEMENT_SCALAR(fmla, Basic, Basic, Basic)
4958 DEFINE_TEST_NEON_FP_BYELEMENT_SCALAR(fmls, Basic, Basic, Basic)
4959 DEFINE_TEST_NEON_FP_BYELEMENT_SCALAR(fmul, Basic, Basic, Basic)
4960 DEFINE_TEST_NEON_FP_BYELEMENT_SCALAR(fmulx, Basic, Basic, Basic)
4961 
4962 
4963 DEFINE_TEST_NEON_FHM_BYELEMENT(fmlal, Basic, Basic, Basic)
4964 DEFINE_TEST_NEON_FHM_BYELEMENT(fmlal2, Basic, Basic, Basic)
4965 DEFINE_TEST_NEON_FHM_BYELEMENT(fmlsl, Basic, Basic, Basic)
4966 DEFINE_TEST_NEON_FHM_BYELEMENT(fmlsl2, Basic, Basic, Basic)
4967 
4968 
4969 #undef __
4970 #define __ masm->
4971 
4972 #if defined(VIXL_INCLUDE_SIMULATOR_AARCH64) &&                 \
4973     defined(VIXL_HAS_ABI_SUPPORT) && __cplusplus >= 201103L && \
4974     (defined(__clang__) || GCC_VERSION_OR_NEWER(4, 9, 1))
4975 
4976 // Generate a function that stores zero to a hard-coded address.
4977 Instruction* GenerateStoreZero(MacroAssembler* masm, int32_t* target) {
4978   masm->Reset();
4979 
4980   UseScratchRegisterScope temps(masm);
4981   Register temp = temps.AcquireX();
4982   __ Mov(temp, reinterpret_cast<intptr_t>(target));
4983   __ Str(wzr, MemOperand(temp));
4984   __ Ret();
4985 
4986   masm->FinalizeCode();
4987   return masm->GetBuffer()->GetStartAddress<Instruction*>();
4988 }
4989 
4990 
4991 // Generate a function that stores the `int32_t` argument to a hard-coded
4992 // address.
4993 // In this example and the other below, we use the `abi` object to retrieve
4994 // argument and return locations even though we could easily hard code them.
4995 // This mirrors how more generic code (e.g. templated) user would use these
4996 // mechanisms.
GenerateStoreInput(MacroAssembler * masm,int32_t * target)4997 Instruction* GenerateStoreInput(MacroAssembler* masm, int32_t* target) {
4998   masm->Reset();
4999 
5000   ABI abi;
5001   Register input =
5002       Register(abi.GetNextParameterGenericOperand<int32_t>().GetCPURegister());
5003 
5004   UseScratchRegisterScope temps(masm);
5005   Register temp = temps.AcquireX();
5006   __ Mov(temp, reinterpret_cast<intptr_t>(target));
5007   __ Str(input, MemOperand(temp));
5008   __ Ret();
5009 
5010   masm->FinalizeCode();
5011   return masm->GetBuffer()->GetStartAddress<Instruction*>();
5012 }
5013 
5014 
5015 // A minimal implementation of a `pow` function.
GeneratePow(MacroAssembler * masm,unsigned pow)5016 Instruction* GeneratePow(MacroAssembler* masm, unsigned pow) {
5017   masm->Reset();
5018 
5019   ABI abi;
5020   Register input =
5021       Register(abi.GetNextParameterGenericOperand<int64_t>().GetCPURegister());
5022   Register result =
5023       Register(abi.GetReturnGenericOperand<int64_t>().GetCPURegister());
5024   UseScratchRegisterScope temps(masm);
5025   Register temp = temps.AcquireX();
5026 
5027   __ Mov(temp, 1);
5028   for (unsigned i = 0; i < pow; i++) {
5029     __ Mul(temp, temp, input);
5030   }
5031   __ Mov(result, temp);
5032   __ Ret();
5033 
5034   masm->FinalizeCode();
5035   return masm->GetBuffer()->GetStartAddress<Instruction*>();
5036 }
5037 
5038 
GenerateSum(MacroAssembler * masm)5039 Instruction* GenerateSum(MacroAssembler* masm) {
5040   masm->Reset();
5041 
5042   ABI abi;
5043   VRegister input_1 =
5044       VRegister(abi.GetNextParameterGenericOperand<float>().GetCPURegister());
5045   Register input_2 =
5046       Register(abi.GetNextParameterGenericOperand<int64_t>().GetCPURegister());
5047   VRegister input_3 =
5048       VRegister(abi.GetNextParameterGenericOperand<double>().GetCPURegister());
5049   VRegister result =
5050       VRegister(abi.GetReturnGenericOperand<double>().GetCPURegister());
5051 
5052   UseScratchRegisterScope temps(masm);
5053   VRegister temp = temps.AcquireD();
5054 
5055   __ Fcvt(input_1.D(), input_1);
5056   __ Scvtf(temp, input_2);
5057   __ Fadd(temp, temp, input_1.D());
5058   __ Fadd(result, temp, input_3);
5059   __ Ret();
5060 
5061   masm->FinalizeCode();
5062   return masm->GetBuffer()->GetStartAddress<Instruction*>();
5063 }
5064 
5065 
TEST(RunFrom)5066 TEST(RunFrom) {
5067   SETUP_WITH_FEATURES(CPUFeatures::kFP);
5068 
5069   // Run a function returning `void` and taking no argument.
5070   int32_t value = 0xbad;
5071   simulator.RunFrom(GenerateStoreZero(&masm, &value));
5072   VIXL_CHECK(value == 0);
5073 
5074   // Run a function returning `void` and taking one argument.
5075   int32_t argument = 0xf00d;
5076   simulator.RunFrom<void, int32_t>(GenerateStoreInput(&masm, &value), argument);
5077   VIXL_CHECK(value == 0xf00d);
5078 
5079   // Run a function taking one argument and returning a value.
5080   int64_t res_int64_t;
5081   res_int64_t =
5082       simulator.RunFrom<int64_t, int64_t>(GeneratePow(&masm, 0), 0xbad);
5083   VIXL_CHECK(res_int64_t == 1);
5084   res_int64_t = simulator.RunFrom<int64_t, int64_t>(GeneratePow(&masm, 1), 123);
5085   VIXL_CHECK(res_int64_t == 123);
5086   res_int64_t = simulator.RunFrom<int64_t, int64_t>(GeneratePow(&masm, 10), 2);
5087   VIXL_CHECK(res_int64_t == 1024);
5088 
5089   // Run a function taking multiple arguments in registers.
5090   double res_double =
5091       simulator.RunFrom<double, float, int64_t, double>(GenerateSum(&masm),
5092                                                         1.0,
5093                                                         2,
5094                                                         3.0);
5095   VIXL_CHECK(res_double == 6.0);
5096 }
5097 #endif
5098 
5099 
5100 }  // namespace aarch64
5101 }  // namespace vixl
5102