• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2015, VIXL authors
2 // All rights reserved.
3 //
4 // Redistribution and use in source and binary forms, with or without
5 // modification, are permitted provided that the following conditions are met:
6 //
7 //   * Redistributions of source code must retain the above copyright notice,
8 //     this list of conditions and the following disclaimer.
9 //   * Redistributions in binary form must reproduce the above copyright notice,
10 //     this list of conditions and the following disclaimer in the documentation
11 //     and/or other materials provided with the distribution.
12 //   * Neither the name of ARM Limited nor the names of its contributors may be
13 //     used to endorse or promote products derived from this software without
14 //     specific prior written permission.
15 //
16 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
17 // ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18 // WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19 // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
20 // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
22 // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
23 // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
24 // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 
27 #include <cfloat>
28 #include <cstdio>
29 
30 #include "test-runner.h"
31 #include "test-utils.h"
32 
33 #include "aarch64/test-simulator-inputs-aarch64.h"
34 #include "aarch64/test-simulator-traces-aarch64.h"
35 #include "aarch64/test-utils-aarch64.h"
36 
37 #include "aarch64/macro-assembler-aarch64.h"
38 #include "aarch64/simulator-aarch64.h"
39 
40 namespace vixl {
41 namespace aarch64 {
42 
43 // ==== Simulator Tests ====
44 //
45 // These simulator tests check instruction behaviour against a trace taken from
46 // real AArch64 hardware. The same test code is used to generate the trace; the
47 // results are printed to stdout when the test is run with
48 // --generate_test_trace.
49 //
50 // The input lists and expected results are stored in test/traces. The expected
51 // results can be regenerated using tools/generate_simulator_traces.py. Adding a
52 // test for a new instruction is described at the top of
53 // test-simulator-traces-aarch64.h.
54 
55 #define __ masm.
56 #define TEST(name) TEST_(AARCH64_SIM_##name)
57 
58 #ifdef VIXL_INCLUDE_SIMULATOR_AARCH64
59 
60 #define SETUP()                                                                \
61   MacroAssembler masm;                                                         \
62   Decoder decoder;                                                             \
63   Simulator* simulator =                                                       \
64       Test::run_debugger() ? new Debugger(&decoder) : new Simulator(&decoder); \
65   simulator->SetColouredTrace(Test::coloured_trace());                         \
66   simulator->SetInstructionStats(Test::instruction_stats());
67 
68 #define START()                         \
69   masm.Reset();                         \
70   simulator->ResetState();              \
71   __ PushCalleeSavedRegisters();        \
72   if (Test::trace_reg()) {              \
73     __ Trace(LOG_STATE, TRACE_ENABLE);  \
74   }                                     \
75   if (Test::trace_write()) {            \
76     __ Trace(LOG_WRITE, TRACE_ENABLE);  \
77   }                                     \
78   if (Test::trace_sim()) {              \
79     __ Trace(LOG_DISASM, TRACE_ENABLE); \
80   }                                     \
81   if (Test::instruction_stats()) {      \
82     __ EnableInstrumentation();         \
83   }
84 
85 #define END()                       \
86   if (Test::instruction_stats()) {  \
87     __ DisableInstrumentation();    \
88   }                                 \
89   __ Trace(LOG_ALL, TRACE_DISABLE); \
90   __ PopCalleeSavedRegisters();     \
91   __ Ret();                         \
92   masm.FinalizeCode()
93 
94 #define RUN() \
95   simulator->RunFrom(masm.GetBuffer()->GetStartAddress<Instruction*>())
96 
97 #define TEARDOWN() delete simulator;
98 
99 #else  // VIXL_INCLUDE_SIMULATOR_AARCH64
100 
101 #define SETUP()        \
102   MacroAssembler masm; \
103   CPU::SetUp()
104 
105 #define START() \
106   masm.Reset(); \
107   __ PushCalleeSavedRegisters()
108 
109 #define END()                   \
110   __ PopCalleeSavedRegisters(); \
111   __ Ret();                     \
112   masm.FinalizeCode()
113 
114 #define RUN()                                                 \
115   {                                                           \
116     masm.GetBuffer()->SetExecutable();                        \
117     ExecuteMemory(masm.GetBuffer()->GetStartAddress<byte*>(), \
118                   masm.GetSizeOfCodeGenerated());             \
119     masm.GetBuffer()->SetWritable();                          \
120   }
121 
122 #define TEARDOWN()
123 
124 #endif  // VIXL_INCLUDE_SIMULATOR_AARCH64
125 
126 
127 // The maximum number of errors to report in detail for each test.
128 static const unsigned kErrorReportLimit = 8;
129 
130 
131 // Overloaded versions of RawbitsToDouble and RawbitsToFloat for use in the
132 // templated test functions.
rawbits_to_fp(uint32_t bits)133 static float rawbits_to_fp(uint32_t bits) { return RawbitsToFloat(bits); }
134 
rawbits_to_fp(uint64_t bits)135 static double rawbits_to_fp(uint64_t bits) { return RawbitsToDouble(bits); }
136 
137 
138 // MacroAssembler member function pointers to pass to the test dispatchers.
139 typedef void (MacroAssembler::*Test1OpFPHelper_t)(const FPRegister& fd,
140                                                   const FPRegister& fn);
141 typedef void (MacroAssembler::*Test2OpFPHelper_t)(const FPRegister& fd,
142                                                   const FPRegister& fn,
143                                                   const FPRegister& fm);
144 typedef void (MacroAssembler::*Test3OpFPHelper_t)(const FPRegister& fd,
145                                                   const FPRegister& fn,
146                                                   const FPRegister& fm,
147                                                   const FPRegister& fa);
148 typedef void (MacroAssembler::*TestFPCmpHelper_t)(const FPRegister& fn,
149                                                   const FPRegister& fm);
150 typedef void (MacroAssembler::*TestFPCmpZeroHelper_t)(const FPRegister& fn,
151                                                       double value);
152 typedef void (MacroAssembler::*TestFPToIntHelper_t)(const Register& rd,
153                                                     const FPRegister& fn);
154 typedef void (MacroAssembler::*TestFPToFixedHelper_t)(const Register& rd,
155                                                       const FPRegister& fn,
156                                                       int fbits);
157 typedef void (MacroAssembler::*TestFixedToFPHelper_t)(const FPRegister& fd,
158                                                       const Register& rn,
159                                                       int fbits);
160 // TODO: 'Test2OpNEONHelper_t' and 'Test2OpFPHelper_t' can be
161 //       consolidated into one routine.
162 typedef void (MacroAssembler::*Test1OpNEONHelper_t)(const VRegister& vd,
163                                                     const VRegister& vn);
164 typedef void (MacroAssembler::*Test2OpNEONHelper_t)(const VRegister& vd,
165                                                     const VRegister& vn,
166                                                     const VRegister& vm);
167 typedef void (MacroAssembler::*TestByElementNEONHelper_t)(const VRegister& vd,
168                                                           const VRegister& vn,
169                                                           const VRegister& vm,
170                                                           int vm_index);
171 typedef void (MacroAssembler::*TestOpImmOpImmVdUpdateNEONHelper_t)(
172     const VRegister& vd, int imm1, const VRegister& vn, int imm2);
173 
174 // This helps using the same typename for both the function pointer
175 // and the array of immediates passed to helper routines.
176 template <typename T>
177 class Test2OpImmediateNEONHelper_t {
178  public:
179   typedef void (MacroAssembler::*mnemonic)(const VRegister& vd,
180                                            const VRegister& vn,
181                                            T imm);
182 };
183 
184 
185 // Maximum number of hex characters required to represent values of either
186 // templated type.
187 template <typename Ta, typename Tb>
MaxHexCharCount()188 static unsigned MaxHexCharCount() {
189   unsigned count = static_cast<unsigned>(std::max(sizeof(Ta), sizeof(Tb)));
190   return (count * 8) / 4;
191 }
192 
193 
194 // Standard test dispatchers.
195 
196 
Test1Op_Helper(Test1OpFPHelper_t helper,uintptr_t inputs,unsigned inputs_length,uintptr_t results,unsigned d_size,unsigned n_size)197 static void Test1Op_Helper(Test1OpFPHelper_t helper,
198                            uintptr_t inputs,
199                            unsigned inputs_length,
200                            uintptr_t results,
201                            unsigned d_size,
202                            unsigned n_size) {
203   VIXL_ASSERT((d_size == kDRegSize) || (d_size == kSRegSize));
204   VIXL_ASSERT((n_size == kDRegSize) || (n_size == kSRegSize));
205 
206   SETUP();
207   START();
208 
209   // Roll up the loop to keep the code size down.
210   Label loop_n;
211 
212   Register out = x0;
213   Register inputs_base = x1;
214   Register length = w2;
215   Register index_n = w3;
216 
217   const int n_index_shift =
218       (n_size == kDRegSize) ? kDRegSizeInBytesLog2 : kSRegSizeInBytesLog2;
219 
220   FPRegister fd = (d_size == kDRegSize) ? d0 : s0;
221   FPRegister fn = (n_size == kDRegSize) ? d1 : s1;
222 
223   __ Mov(out, results);
224   __ Mov(inputs_base, inputs);
225   __ Mov(length, inputs_length);
226 
227   __ Mov(index_n, 0);
228   __ Bind(&loop_n);
229   __ Ldr(fn, MemOperand(inputs_base, index_n, UXTW, n_index_shift));
230 
231   {
232     SingleEmissionCheckScope guard(&masm);
233     (masm.*helper)(fd, fn);
234   }
235   __ Str(fd, MemOperand(out, fd.GetSizeInBytes(), PostIndex));
236 
237   __ Add(index_n, index_n, 1);
238   __ Cmp(index_n, inputs_length);
239   __ B(lo, &loop_n);
240 
241   END();
242   RUN();
243   TEARDOWN();
244 }
245 
246 
247 // Test FP instructions. The inputs[] and expected[] arrays should be arrays of
248 // rawbits representations of doubles or floats. This ensures that exact bit
249 // comparisons can be performed.
250 template <typename Tn, typename Td>
Test1Op(const char * name,Test1OpFPHelper_t helper,const Tn inputs[],unsigned inputs_length,const Td expected[],unsigned expected_length)251 static void Test1Op(const char* name,
252                     Test1OpFPHelper_t helper,
253                     const Tn inputs[],
254                     unsigned inputs_length,
255                     const Td expected[],
256                     unsigned expected_length) {
257   VIXL_ASSERT(inputs_length > 0);
258 
259   const unsigned results_length = inputs_length;
260   Td* results = new Td[results_length];
261 
262   const unsigned d_bits = sizeof(Td) * 8;
263   const unsigned n_bits = sizeof(Tn) * 8;
264 
265   Test1Op_Helper(helper,
266                  reinterpret_cast<uintptr_t>(inputs),
267                  inputs_length,
268                  reinterpret_cast<uintptr_t>(results),
269                  d_bits,
270                  n_bits);
271 
272   if (Test::generate_test_trace()) {
273     // Print the results.
274     printf("const uint%u_t kExpected_%s[] = {\n", d_bits, name);
275     for (unsigned d = 0; d < results_length; d++) {
276       printf("  0x%0*" PRIx64 ",\n",
277              d_bits / 4,
278              static_cast<uint64_t>(results[d]));
279     }
280     printf("};\n");
281     printf("const unsigned kExpectedCount_%s = %u;\n", name, results_length);
282   } else {
283     // Check the results.
284     VIXL_CHECK(expected_length == results_length);
285     unsigned error_count = 0;
286     unsigned d = 0;
287     for (unsigned n = 0; n < inputs_length; n++, d++) {
288       if (results[d] != expected[d]) {
289         if (++error_count > kErrorReportLimit) continue;
290 
291         printf("%s 0x%0*" PRIx64 " (%s %g):\n",
292                name,
293                n_bits / 4,
294                static_cast<uint64_t>(inputs[n]),
295                name,
296                rawbits_to_fp(inputs[n]));
297         printf("  Expected: 0x%0*" PRIx64 " (%g)\n",
298                d_bits / 4,
299                static_cast<uint64_t>(expected[d]),
300                rawbits_to_fp(expected[d]));
301         printf("  Found:    0x%0*" PRIx64 " (%g)\n",
302                d_bits / 4,
303                static_cast<uint64_t>(results[d]),
304                rawbits_to_fp(results[d]));
305         printf("\n");
306       }
307     }
308     VIXL_ASSERT(d == expected_length);
309     if (error_count > kErrorReportLimit) {
310       printf("%u other errors follow.\n", error_count - kErrorReportLimit);
311     }
312     VIXL_CHECK(error_count == 0);
313   }
314   delete[] results;
315 }
316 
317 
Test2Op_Helper(Test2OpFPHelper_t helper,uintptr_t inputs,unsigned inputs_length,uintptr_t results,unsigned reg_size)318 static void Test2Op_Helper(Test2OpFPHelper_t helper,
319                            uintptr_t inputs,
320                            unsigned inputs_length,
321                            uintptr_t results,
322                            unsigned reg_size) {
323   VIXL_ASSERT((reg_size == kDRegSize) || (reg_size == kSRegSize));
324 
325   SETUP();
326   START();
327 
328   // Roll up the loop to keep the code size down.
329   Label loop_n, loop_m;
330 
331   Register out = x0;
332   Register inputs_base = x1;
333   Register length = w2;
334   Register index_n = w3;
335   Register index_m = w4;
336 
337   bool double_op = reg_size == kDRegSize;
338   const int index_shift =
339       double_op ? kDRegSizeInBytesLog2 : kSRegSizeInBytesLog2;
340 
341   FPRegister fd = double_op ? d0 : s0;
342   FPRegister fn = double_op ? d1 : s1;
343   FPRegister fm = double_op ? d2 : s2;
344 
345   __ Mov(out, results);
346   __ Mov(inputs_base, inputs);
347   __ Mov(length, inputs_length);
348 
349   __ Mov(index_n, 0);
350   __ Bind(&loop_n);
351   __ Ldr(fn, MemOperand(inputs_base, index_n, UXTW, index_shift));
352 
353   __ Mov(index_m, 0);
354   __ Bind(&loop_m);
355   __ Ldr(fm, MemOperand(inputs_base, index_m, UXTW, index_shift));
356 
357   {
358     SingleEmissionCheckScope guard(&masm);
359     (masm.*helper)(fd, fn, fm);
360   }
361   __ Str(fd, MemOperand(out, fd.GetSizeInBytes(), PostIndex));
362 
363   __ Add(index_m, index_m, 1);
364   __ Cmp(index_m, inputs_length);
365   __ B(lo, &loop_m);
366 
367   __ Add(index_n, index_n, 1);
368   __ Cmp(index_n, inputs_length);
369   __ B(lo, &loop_n);
370 
371   END();
372   RUN();
373   TEARDOWN();
374 }
375 
376 
377 // Test FP instructions. The inputs[] and expected[] arrays should be arrays of
378 // rawbits representations of doubles or floats. This ensures that exact bit
379 // comparisons can be performed.
380 template <typename T>
Test2Op(const char * name,Test2OpFPHelper_t helper,const T inputs[],unsigned inputs_length,const T expected[],unsigned expected_length)381 static void Test2Op(const char* name,
382                     Test2OpFPHelper_t helper,
383                     const T inputs[],
384                     unsigned inputs_length,
385                     const T expected[],
386                     unsigned expected_length) {
387   VIXL_ASSERT(inputs_length > 0);
388 
389   const unsigned results_length = inputs_length * inputs_length;
390   T* results = new T[results_length];
391 
392   const unsigned bits = sizeof(T) * 8;
393 
394   Test2Op_Helper(helper,
395                  reinterpret_cast<uintptr_t>(inputs),
396                  inputs_length,
397                  reinterpret_cast<uintptr_t>(results),
398                  bits);
399 
400   if (Test::generate_test_trace()) {
401     // Print the results.
402     printf("const uint%u_t kExpected_%s[] = {\n", bits, name);
403     for (unsigned d = 0; d < results_length; d++) {
404       printf("  0x%0*" PRIx64 ",\n",
405              bits / 4,
406              static_cast<uint64_t>(results[d]));
407     }
408     printf("};\n");
409     printf("const unsigned kExpectedCount_%s = %u;\n", name, results_length);
410   } else {
411     // Check the results.
412     VIXL_CHECK(expected_length == results_length);
413     unsigned error_count = 0;
414     unsigned d = 0;
415     for (unsigned n = 0; n < inputs_length; n++) {
416       for (unsigned m = 0; m < inputs_length; m++, d++) {
417         if (results[d] != expected[d]) {
418           if (++error_count > kErrorReportLimit) continue;
419 
420           printf("%s 0x%0*" PRIx64 ", 0x%0*" PRIx64 " (%s %g %g):\n",
421                  name,
422                  bits / 4,
423                  static_cast<uint64_t>(inputs[n]),
424                  bits / 4,
425                  static_cast<uint64_t>(inputs[m]),
426                  name,
427                  rawbits_to_fp(inputs[n]),
428                  rawbits_to_fp(inputs[m]));
429           printf("  Expected: 0x%0*" PRIx64 " (%g)\n",
430                  bits / 4,
431                  static_cast<uint64_t>(expected[d]),
432                  rawbits_to_fp(expected[d]));
433           printf("  Found:    0x%0*" PRIx64 " (%g)\n",
434                  bits / 4,
435                  static_cast<uint64_t>(results[d]),
436                  rawbits_to_fp(results[d]));
437           printf("\n");
438         }
439       }
440     }
441     VIXL_ASSERT(d == expected_length);
442     if (error_count > kErrorReportLimit) {
443       printf("%u other errors follow.\n", error_count - kErrorReportLimit);
444     }
445     VIXL_CHECK(error_count == 0);
446   }
447   delete[] results;
448 }
449 
450 
Test3Op_Helper(Test3OpFPHelper_t helper,uintptr_t inputs,unsigned inputs_length,uintptr_t results,unsigned reg_size)451 static void Test3Op_Helper(Test3OpFPHelper_t helper,
452                            uintptr_t inputs,
453                            unsigned inputs_length,
454                            uintptr_t results,
455                            unsigned reg_size) {
456   VIXL_ASSERT((reg_size == kDRegSize) || (reg_size == kSRegSize));
457 
458   SETUP();
459   START();
460 
461   // Roll up the loop to keep the code size down.
462   Label loop_n, loop_m, loop_a;
463 
464   Register out = x0;
465   Register inputs_base = x1;
466   Register length = w2;
467   Register index_n = w3;
468   Register index_m = w4;
469   Register index_a = w5;
470 
471   bool double_op = reg_size == kDRegSize;
472   const int index_shift =
473       double_op ? kDRegSizeInBytesLog2 : kSRegSizeInBytesLog2;
474 
475   FPRegister fd = double_op ? d0 : s0;
476   FPRegister fn = double_op ? d1 : s1;
477   FPRegister fm = double_op ? d2 : s2;
478   FPRegister fa = double_op ? d3 : s3;
479 
480   __ Mov(out, results);
481   __ Mov(inputs_base, inputs);
482   __ Mov(length, inputs_length);
483 
484   __ Mov(index_n, 0);
485   __ Bind(&loop_n);
486   __ Ldr(fn, MemOperand(inputs_base, index_n, UXTW, index_shift));
487 
488   __ Mov(index_m, 0);
489   __ Bind(&loop_m);
490   __ Ldr(fm, MemOperand(inputs_base, index_m, UXTW, index_shift));
491 
492   __ Mov(index_a, 0);
493   __ Bind(&loop_a);
494   __ Ldr(fa, MemOperand(inputs_base, index_a, UXTW, index_shift));
495 
496   {
497     SingleEmissionCheckScope guard(&masm);
498     (masm.*helper)(fd, fn, fm, fa);
499   }
500   __ Str(fd, MemOperand(out, fd.GetSizeInBytes(), PostIndex));
501 
502   __ Add(index_a, index_a, 1);
503   __ Cmp(index_a, inputs_length);
504   __ B(lo, &loop_a);
505 
506   __ Add(index_m, index_m, 1);
507   __ Cmp(index_m, inputs_length);
508   __ B(lo, &loop_m);
509 
510   __ Add(index_n, index_n, 1);
511   __ Cmp(index_n, inputs_length);
512   __ B(lo, &loop_n);
513 
514   END();
515   RUN();
516   TEARDOWN();
517 }
518 
519 
520 // Test FP instructions. The inputs[] and expected[] arrays should be arrays of
521 // rawbits representations of doubles or floats. This ensures that exact bit
522 // comparisons can be performed.
523 template <typename T>
Test3Op(const char * name,Test3OpFPHelper_t helper,const T inputs[],unsigned inputs_length,const T expected[],unsigned expected_length)524 static void Test3Op(const char* name,
525                     Test3OpFPHelper_t helper,
526                     const T inputs[],
527                     unsigned inputs_length,
528                     const T expected[],
529                     unsigned expected_length) {
530   VIXL_ASSERT(inputs_length > 0);
531 
532   const unsigned results_length = inputs_length * inputs_length * inputs_length;
533   T* results = new T[results_length];
534 
535   const unsigned bits = sizeof(T) * 8;
536 
537   Test3Op_Helper(helper,
538                  reinterpret_cast<uintptr_t>(inputs),
539                  inputs_length,
540                  reinterpret_cast<uintptr_t>(results),
541                  bits);
542 
543   if (Test::generate_test_trace()) {
544     // Print the results.
545     printf("const uint%u_t kExpected_%s[] = {\n", bits, name);
546     for (unsigned d = 0; d < results_length; d++) {
547       printf("  0x%0*" PRIx64 ",\n",
548              bits / 4,
549              static_cast<uint64_t>(results[d]));
550     }
551     printf("};\n");
552     printf("const unsigned kExpectedCount_%s = %u;\n", name, results_length);
553   } else {
554     // Check the results.
555     VIXL_CHECK(expected_length == results_length);
556     unsigned error_count = 0;
557     unsigned d = 0;
558     for (unsigned n = 0; n < inputs_length; n++) {
559       for (unsigned m = 0; m < inputs_length; m++) {
560         for (unsigned a = 0; a < inputs_length; a++, d++) {
561           if (results[d] != expected[d]) {
562             if (++error_count > kErrorReportLimit) continue;
563 
564             printf("%s 0x%0*" PRIx64 ", 0x%0*" PRIx64 ", 0x%0*" PRIx64
565                    " (%s %g %g %g):\n",
566                    name,
567                    bits / 4,
568                    static_cast<uint64_t>(inputs[n]),
569                    bits / 4,
570                    static_cast<uint64_t>(inputs[m]),
571                    bits / 4,
572                    static_cast<uint64_t>(inputs[a]),
573                    name,
574                    rawbits_to_fp(inputs[n]),
575                    rawbits_to_fp(inputs[m]),
576                    rawbits_to_fp(inputs[a]));
577             printf("  Expected: 0x%0*" PRIx64 " (%g)\n",
578                    bits / 4,
579                    static_cast<uint64_t>(expected[d]),
580                    rawbits_to_fp(expected[d]));
581             printf("  Found:    0x%0*" PRIx64 " (%g)\n",
582                    bits / 4,
583                    static_cast<uint64_t>(results[d]),
584                    rawbits_to_fp(results[d]));
585             printf("\n");
586           }
587         }
588       }
589     }
590     VIXL_ASSERT(d == expected_length);
591     if (error_count > kErrorReportLimit) {
592       printf("%u other errors follow.\n", error_count - kErrorReportLimit);
593     }
594     VIXL_CHECK(error_count == 0);
595   }
596   delete[] results;
597 }
598 
599 
TestCmp_Helper(TestFPCmpHelper_t helper,uintptr_t inputs,unsigned inputs_length,uintptr_t results,unsigned reg_size)600 static void TestCmp_Helper(TestFPCmpHelper_t helper,
601                            uintptr_t inputs,
602                            unsigned inputs_length,
603                            uintptr_t results,
604                            unsigned reg_size) {
605   VIXL_ASSERT((reg_size == kDRegSize) || (reg_size == kSRegSize));
606 
607   SETUP();
608   START();
609 
610   // Roll up the loop to keep the code size down.
611   Label loop_n, loop_m;
612 
613   Register out = x0;
614   Register inputs_base = x1;
615   Register length = w2;
616   Register index_n = w3;
617   Register index_m = w4;
618   Register flags = x5;
619 
620   bool double_op = reg_size == kDRegSize;
621   const int index_shift =
622       double_op ? kDRegSizeInBytesLog2 : kSRegSizeInBytesLog2;
623 
624   FPRegister fn = double_op ? d1 : s1;
625   FPRegister fm = double_op ? d2 : s2;
626 
627   __ Mov(out, results);
628   __ Mov(inputs_base, inputs);
629   __ Mov(length, inputs_length);
630 
631   __ Mov(index_n, 0);
632   __ Bind(&loop_n);
633   __ Ldr(fn, MemOperand(inputs_base, index_n, UXTW, index_shift));
634 
635   __ Mov(index_m, 0);
636   __ Bind(&loop_m);
637   __ Ldr(fm, MemOperand(inputs_base, index_m, UXTW, index_shift));
638 
639   {
640     SingleEmissionCheckScope guard(&masm);
641     (masm.*helper)(fn, fm);
642   }
643   __ Mrs(flags, NZCV);
644   __ Ubfx(flags, flags, 28, 4);
645   __ Strb(flags, MemOperand(out, 1, PostIndex));
646 
647   __ Add(index_m, index_m, 1);
648   __ Cmp(index_m, inputs_length);
649   __ B(lo, &loop_m);
650 
651   __ Add(index_n, index_n, 1);
652   __ Cmp(index_n, inputs_length);
653   __ B(lo, &loop_n);
654 
655   END();
656   RUN();
657   TEARDOWN();
658 }
659 
660 
661 // Test FP instructions. The inputs[] and expected[] arrays should be arrays of
662 // rawbits representations of doubles or floats. This ensures that exact bit
663 // comparisons can be performed.
664 template <typename T>
TestCmp(const char * name,TestFPCmpHelper_t helper,const T inputs[],unsigned inputs_length,const uint8_t expected[],unsigned expected_length)665 static void TestCmp(const char* name,
666                     TestFPCmpHelper_t helper,
667                     const T inputs[],
668                     unsigned inputs_length,
669                     const uint8_t expected[],
670                     unsigned expected_length) {
671   VIXL_ASSERT(inputs_length > 0);
672 
673   const unsigned results_length = inputs_length * inputs_length;
674   uint8_t* results = new uint8_t[results_length];
675 
676   const unsigned bits = sizeof(T) * 8;
677 
678   TestCmp_Helper(helper,
679                  reinterpret_cast<uintptr_t>(inputs),
680                  inputs_length,
681                  reinterpret_cast<uintptr_t>(results),
682                  bits);
683 
684   if (Test::generate_test_trace()) {
685     // Print the results.
686     printf("const uint8_t kExpected_%s[] = {\n", name);
687     for (unsigned d = 0; d < results_length; d++) {
688       // Each NZCV result only requires 4 bits.
689       VIXL_ASSERT((results[d] & 0xf) == results[d]);
690       printf("  0x%" PRIx8 ",\n", results[d]);
691     }
692     printf("};\n");
693     printf("const unsigned kExpectedCount_%s = %u;\n", name, results_length);
694   } else {
695     // Check the results.
696     VIXL_CHECK(expected_length == results_length);
697     unsigned error_count = 0;
698     unsigned d = 0;
699     for (unsigned n = 0; n < inputs_length; n++) {
700       for (unsigned m = 0; m < inputs_length; m++, d++) {
701         if (results[d] != expected[d]) {
702           if (++error_count > kErrorReportLimit) continue;
703 
704           printf("%s 0x%0*" PRIx64 ", 0x%0*" PRIx64 " (%s %g %g):\n",
705                  name,
706                  bits / 4,
707                  static_cast<uint64_t>(inputs[n]),
708                  bits / 4,
709                  static_cast<uint64_t>(inputs[m]),
710                  name,
711                  rawbits_to_fp(inputs[n]),
712                  rawbits_to_fp(inputs[m]));
713           printf("  Expected: %c%c%c%c (0x%" PRIx8 ")\n",
714                  (expected[d] & 0x8) ? 'N' : 'n',
715                  (expected[d] & 0x4) ? 'Z' : 'z',
716                  (expected[d] & 0x2) ? 'C' : 'c',
717                  (expected[d] & 0x1) ? 'V' : 'v',
718                  expected[d]);
719           printf("  Found:    %c%c%c%c (0x%" PRIx8 ")\n",
720                  (results[d] & 0x8) ? 'N' : 'n',
721                  (results[d] & 0x4) ? 'Z' : 'z',
722                  (results[d] & 0x2) ? 'C' : 'c',
723                  (results[d] & 0x1) ? 'V' : 'v',
724                  results[d]);
725           printf("\n");
726         }
727       }
728     }
729     VIXL_ASSERT(d == expected_length);
730     if (error_count > kErrorReportLimit) {
731       printf("%u other errors follow.\n", error_count - kErrorReportLimit);
732     }
733     VIXL_CHECK(error_count == 0);
734   }
735   delete[] results;
736 }
737 
738 
TestCmpZero_Helper(TestFPCmpZeroHelper_t helper,uintptr_t inputs,unsigned inputs_length,uintptr_t results,unsigned reg_size)739 static void TestCmpZero_Helper(TestFPCmpZeroHelper_t helper,
740                                uintptr_t inputs,
741                                unsigned inputs_length,
742                                uintptr_t results,
743                                unsigned reg_size) {
744   VIXL_ASSERT((reg_size == kDRegSize) || (reg_size == kSRegSize));
745 
746   SETUP();
747   START();
748 
749   // Roll up the loop to keep the code size down.
750   Label loop_n, loop_m;
751 
752   Register out = x0;
753   Register inputs_base = x1;
754   Register length = w2;
755   Register index_n = w3;
756   Register flags = x4;
757 
758   bool double_op = reg_size == kDRegSize;
759   const int index_shift =
760       double_op ? kDRegSizeInBytesLog2 : kSRegSizeInBytesLog2;
761 
762   FPRegister fn = double_op ? d1 : s1;
763 
764   __ Mov(out, results);
765   __ Mov(inputs_base, inputs);
766   __ Mov(length, inputs_length);
767 
768   __ Mov(index_n, 0);
769   __ Bind(&loop_n);
770   __ Ldr(fn, MemOperand(inputs_base, index_n, UXTW, index_shift));
771 
772   {
773     SingleEmissionCheckScope guard(&masm);
774     (masm.*helper)(fn, 0.0);
775   }
776   __ Mrs(flags, NZCV);
777   __ Ubfx(flags, flags, 28, 4);
778   __ Strb(flags, MemOperand(out, 1, PostIndex));
779 
780   __ Add(index_n, index_n, 1);
781   __ Cmp(index_n, inputs_length);
782   __ B(lo, &loop_n);
783 
784   END();
785   RUN();
786   TEARDOWN();
787 }
788 
789 
790 // Test FP instructions. The inputs[] and expected[] arrays should be arrays of
791 // rawbits representations of doubles or floats. This ensures that exact bit
792 // comparisons can be performed.
793 template <typename T>
TestCmpZero(const char * name,TestFPCmpZeroHelper_t helper,const T inputs[],unsigned inputs_length,const uint8_t expected[],unsigned expected_length)794 static void TestCmpZero(const char* name,
795                         TestFPCmpZeroHelper_t helper,
796                         const T inputs[],
797                         unsigned inputs_length,
798                         const uint8_t expected[],
799                         unsigned expected_length) {
800   VIXL_ASSERT(inputs_length > 0);
801 
802   const unsigned results_length = inputs_length;
803   uint8_t* results = new uint8_t[results_length];
804 
805   const unsigned bits = sizeof(T) * 8;
806 
807   TestCmpZero_Helper(helper,
808                      reinterpret_cast<uintptr_t>(inputs),
809                      inputs_length,
810                      reinterpret_cast<uintptr_t>(results),
811                      bits);
812 
813   if (Test::generate_test_trace()) {
814     // Print the results.
815     printf("const uint8_t kExpected_%s[] = {\n", name);
816     for (unsigned d = 0; d < results_length; d++) {
817       // Each NZCV result only requires 4 bits.
818       VIXL_ASSERT((results[d] & 0xf) == results[d]);
819       printf("  0x%" PRIx8 ",\n", results[d]);
820     }
821     printf("};\n");
822     printf("const unsigned kExpectedCount_%s = %u;\n", name, results_length);
823   } else {
824     // Check the results.
825     VIXL_CHECK(expected_length == results_length);
826     unsigned error_count = 0;
827     unsigned d = 0;
828     for (unsigned n = 0; n < inputs_length; n++, d++) {
829       if (results[d] != expected[d]) {
830         if (++error_count > kErrorReportLimit) continue;
831 
832         printf("%s 0x%0*" PRIx64 ", 0x%0*u (%s %g #0.0):\n",
833                name,
834                bits / 4,
835                static_cast<uint64_t>(inputs[n]),
836                bits / 4,
837                0,
838                name,
839                rawbits_to_fp(inputs[n]));
840         printf("  Expected: %c%c%c%c (0x%" PRIx8 ")\n",
841                (expected[d] & 0x8) ? 'N' : 'n',
842                (expected[d] & 0x4) ? 'Z' : 'z',
843                (expected[d] & 0x2) ? 'C' : 'c',
844                (expected[d] & 0x1) ? 'V' : 'v',
845                expected[d]);
846         printf("  Found:    %c%c%c%c (0x%" PRIx8 ")\n",
847                (results[d] & 0x8) ? 'N' : 'n',
848                (results[d] & 0x4) ? 'Z' : 'z',
849                (results[d] & 0x2) ? 'C' : 'c',
850                (results[d] & 0x1) ? 'V' : 'v',
851                results[d]);
852         printf("\n");
853       }
854     }
855     VIXL_ASSERT(d == expected_length);
856     if (error_count > kErrorReportLimit) {
857       printf("%u other errors follow.\n", error_count - kErrorReportLimit);
858     }
859     VIXL_CHECK(error_count == 0);
860   }
861   delete[] results;
862 }
863 
864 
TestFPToFixed_Helper(TestFPToFixedHelper_t helper,uintptr_t inputs,unsigned inputs_length,uintptr_t results,unsigned d_size,unsigned n_size)865 static void TestFPToFixed_Helper(TestFPToFixedHelper_t helper,
866                                  uintptr_t inputs,
867                                  unsigned inputs_length,
868                                  uintptr_t results,
869                                  unsigned d_size,
870                                  unsigned n_size) {
871   VIXL_ASSERT((d_size == kXRegSize) || (d_size == kWRegSize));
872   VIXL_ASSERT((n_size == kDRegSize) || (n_size == kSRegSize));
873 
874   SETUP();
875   START();
876 
877   // Roll up the loop to keep the code size down.
878   Label loop_n;
879 
880   Register out = x0;
881   Register inputs_base = x1;
882   Register length = w2;
883   Register index_n = w3;
884 
885   const int n_index_shift =
886       (n_size == kDRegSize) ? kDRegSizeInBytesLog2 : kSRegSizeInBytesLog2;
887 
888   Register rd = (d_size == kXRegSize) ? Register(x10) : Register(w10);
889   FPRegister fn = (n_size == kDRegSize) ? d1 : s1;
890 
891   __ Mov(out, results);
892   __ Mov(inputs_base, inputs);
893   __ Mov(length, inputs_length);
894 
895   __ Mov(index_n, 0);
896   __ Bind(&loop_n);
897   __ Ldr(fn, MemOperand(inputs_base, index_n, UXTW, n_index_shift));
898 
899   for (unsigned fbits = 0; fbits <= d_size; ++fbits) {
900     {
901       SingleEmissionCheckScope guard(&masm);
902       (masm.*helper)(rd, fn, fbits);
903     }
904     __ Str(rd, MemOperand(out, rd.GetSizeInBytes(), PostIndex));
905   }
906 
907   __ Add(index_n, index_n, 1);
908   __ Cmp(index_n, inputs_length);
909   __ B(lo, &loop_n);
910 
911   END();
912   RUN();
913   TEARDOWN();
914 }
915 
916 
TestFPToInt_Helper(TestFPToIntHelper_t helper,uintptr_t inputs,unsigned inputs_length,uintptr_t results,unsigned d_size,unsigned n_size)917 static void TestFPToInt_Helper(TestFPToIntHelper_t helper,
918                                uintptr_t inputs,
919                                unsigned inputs_length,
920                                uintptr_t results,
921                                unsigned d_size,
922                                unsigned n_size) {
923   VIXL_ASSERT((d_size == kXRegSize) || (d_size == kWRegSize));
924   VIXL_ASSERT((n_size == kDRegSize) || (n_size == kSRegSize));
925 
926   SETUP();
927   START();
928 
929   // Roll up the loop to keep the code size down.
930   Label loop_n;
931 
932   Register out = x0;
933   Register inputs_base = x1;
934   Register length = w2;
935   Register index_n = w3;
936 
937   const int n_index_shift =
938       (n_size == kDRegSize) ? kDRegSizeInBytesLog2 : kSRegSizeInBytesLog2;
939 
940   Register rd = (d_size == kXRegSize) ? Register(x10) : Register(w10);
941   FPRegister fn = (n_size == kDRegSize) ? d1 : s1;
942 
943   __ Mov(out, results);
944   __ Mov(inputs_base, inputs);
945   __ Mov(length, inputs_length);
946 
947   __ Mov(index_n, 0);
948   __ Bind(&loop_n);
949   __ Ldr(fn, MemOperand(inputs_base, index_n, UXTW, n_index_shift));
950 
951   {
952     SingleEmissionCheckScope guard(&masm);
953     (masm.*helper)(rd, fn);
954   }
955   __ Str(rd, MemOperand(out, rd.GetSizeInBytes(), PostIndex));
956 
957   __ Add(index_n, index_n, 1);
958   __ Cmp(index_n, inputs_length);
959   __ B(lo, &loop_n);
960 
961   END();
962   RUN();
963   TEARDOWN();
964 }
965 
966 
967 // Test FP instructions.
968 //  - The inputs[] array should be an array of rawbits representations of
969 //    doubles or floats. This ensures that exact bit comparisons can be
970 //    performed.
971 //  - The expected[] array should be an array of signed integers.
972 template <typename Tn, typename Td>
TestFPToS(const char * name,TestFPToIntHelper_t helper,const Tn inputs[],unsigned inputs_length,const Td expected[],unsigned expected_length)973 static void TestFPToS(const char* name,
974                       TestFPToIntHelper_t helper,
975                       const Tn inputs[],
976                       unsigned inputs_length,
977                       const Td expected[],
978                       unsigned expected_length) {
979   VIXL_ASSERT(inputs_length > 0);
980 
981   const unsigned results_length = inputs_length;
982   Td* results = new Td[results_length];
983 
984   const unsigned d_bits = sizeof(Td) * 8;
985   const unsigned n_bits = sizeof(Tn) * 8;
986 
987   TestFPToInt_Helper(helper,
988                      reinterpret_cast<uintptr_t>(inputs),
989                      inputs_length,
990                      reinterpret_cast<uintptr_t>(results),
991                      d_bits,
992                      n_bits);
993 
994   if (Test::generate_test_trace()) {
995     // Print the results.
996     printf("const int%u_t kExpected_%s[] = {\n", d_bits, name);
997     // There is no simple C++ literal for INT*_MIN that doesn't produce
998     // warnings, so we use an appropriate constant in that case instead.
999     // Deriving int_d_min in this way (rather than just checking INT64_MIN and
1000     // the like) avoids warnings about comparing values with differing ranges.
1001     const int64_t int_d_max = (UINT64_C(1) << (d_bits - 1)) - 1;
1002     const int64_t int_d_min = -(int_d_max)-1;
1003     for (unsigned d = 0; d < results_length; d++) {
1004       if (results[d] == int_d_min) {
1005         printf("  -INT%u_C(%" PRId64 ") - 1,\n", d_bits, int_d_max);
1006       } else {
1007         // Some constants (such as those between INT32_MAX and UINT32_MAX)
1008         // trigger compiler warnings. To avoid these warnings, use an
1009         // appropriate macro to make the type explicit.
1010         int64_t result_int64 = static_cast<int64_t>(results[d]);
1011         if (result_int64 >= 0) {
1012           printf("  INT%u_C(%" PRId64 "),\n", d_bits, result_int64);
1013         } else {
1014           printf("  -INT%u_C(%" PRId64 "),\n", d_bits, -result_int64);
1015         }
1016       }
1017     }
1018     printf("};\n");
1019     printf("const unsigned kExpectedCount_%s = %u;\n", name, results_length);
1020   } else {
1021     // Check the results.
1022     VIXL_CHECK(expected_length == results_length);
1023     unsigned error_count = 0;
1024     unsigned d = 0;
1025     for (unsigned n = 0; n < inputs_length; n++, d++) {
1026       if (results[d] != expected[d]) {
1027         if (++error_count > kErrorReportLimit) continue;
1028 
1029         printf("%s 0x%0*" PRIx64 " (%s %g):\n",
1030                name,
1031                n_bits / 4,
1032                static_cast<uint64_t>(inputs[n]),
1033                name,
1034                rawbits_to_fp(inputs[n]));
1035         printf("  Expected: 0x%0*" PRIx64 " (%" PRId64 ")\n",
1036                d_bits / 4,
1037                static_cast<uint64_t>(expected[d]),
1038                static_cast<int64_t>(expected[d]));
1039         printf("  Found:    0x%0*" PRIx64 " (%" PRId64 ")\n",
1040                d_bits / 4,
1041                static_cast<uint64_t>(results[d]),
1042                static_cast<int64_t>(results[d]));
1043         printf("\n");
1044       }
1045     }
1046     VIXL_ASSERT(d == expected_length);
1047     if (error_count > kErrorReportLimit) {
1048       printf("%u other errors follow.\n", error_count - kErrorReportLimit);
1049     }
1050     VIXL_CHECK(error_count == 0);
1051   }
1052   delete[] results;
1053 }
1054 
1055 
1056 // Test FP instructions.
1057 //  - The inputs[] array should be an array of rawbits representations of
1058 //    doubles or floats. This ensures that exact bit comparisons can be
1059 //    performed.
1060 //  - The expected[] array should be an array of unsigned integers.
1061 template <typename Tn, typename Td>
TestFPToU(const char * name,TestFPToIntHelper_t helper,const Tn inputs[],unsigned inputs_length,const Td expected[],unsigned expected_length)1062 static void TestFPToU(const char* name,
1063                       TestFPToIntHelper_t helper,
1064                       const Tn inputs[],
1065                       unsigned inputs_length,
1066                       const Td expected[],
1067                       unsigned expected_length) {
1068   VIXL_ASSERT(inputs_length > 0);
1069 
1070   const unsigned results_length = inputs_length;
1071   Td* results = new Td[results_length];
1072 
1073   const unsigned d_bits = sizeof(Td) * 8;
1074   const unsigned n_bits = sizeof(Tn) * 8;
1075 
1076   TestFPToInt_Helper(helper,
1077                      reinterpret_cast<uintptr_t>(inputs),
1078                      inputs_length,
1079                      reinterpret_cast<uintptr_t>(results),
1080                      d_bits,
1081                      n_bits);
1082 
1083   if (Test::generate_test_trace()) {
1084     // Print the results.
1085     printf("const uint%u_t kExpected_%s[] = {\n", d_bits, name);
1086     for (unsigned d = 0; d < results_length; d++) {
1087       printf("  %" PRIu64 "u,\n", static_cast<uint64_t>(results[d]));
1088     }
1089     printf("};\n");
1090     printf("const unsigned kExpectedCount_%s = %u;\n", name, results_length);
1091   } else {
1092     // Check the results.
1093     VIXL_CHECK(expected_length == results_length);
1094     unsigned error_count = 0;
1095     unsigned d = 0;
1096     for (unsigned n = 0; n < inputs_length; n++, d++) {
1097       if (results[d] != expected[d]) {
1098         if (++error_count > kErrorReportLimit) continue;
1099 
1100         printf("%s 0x%0*" PRIx64 " (%s %g):\n",
1101                name,
1102                n_bits / 4,
1103                static_cast<uint64_t>(inputs[n]),
1104                name,
1105                rawbits_to_fp(inputs[n]));
1106         printf("  Expected: 0x%0*" PRIx64 " (%" PRIu64 ")\n",
1107                d_bits / 4,
1108                static_cast<uint64_t>(expected[d]),
1109                static_cast<uint64_t>(expected[d]));
1110         printf("  Found:    0x%0*" PRIx64 " (%" PRIu64 ")\n",
1111                d_bits / 4,
1112                static_cast<uint64_t>(results[d]),
1113                static_cast<uint64_t>(results[d]));
1114         printf("\n");
1115       }
1116     }
1117     VIXL_ASSERT(d == expected_length);
1118     if (error_count > kErrorReportLimit) {
1119       printf("%u other errors follow.\n", error_count - kErrorReportLimit);
1120     }
1121     VIXL_CHECK(error_count == 0);
1122   }
1123   delete[] results;
1124 }
1125 
1126 
1127 // Test FP instructions.
1128 //  - The inputs[] array should be an array of rawbits representations of
1129 //    doubles or floats. This ensures that exact bit comparisons can be
1130 //    performed.
1131 //  - The expected[] array should be an array of signed integers.
1132 template <typename Tn, typename Td>
TestFPToFixedS(const char * name,TestFPToFixedHelper_t helper,const Tn inputs[],unsigned inputs_length,const Td expected[],unsigned expected_length)1133 static void TestFPToFixedS(const char* name,
1134                            TestFPToFixedHelper_t helper,
1135                            const Tn inputs[],
1136                            unsigned inputs_length,
1137                            const Td expected[],
1138                            unsigned expected_length) {
1139   VIXL_ASSERT(inputs_length > 0);
1140 
1141   const unsigned d_bits = sizeof(Td) * 8;
1142   const unsigned n_bits = sizeof(Tn) * 8;
1143 
1144   const unsigned results_length = inputs_length * (d_bits + 1);
1145   Td* results = new Td[results_length];
1146 
1147   TestFPToFixed_Helper(helper,
1148                        reinterpret_cast<uintptr_t>(inputs),
1149                        inputs_length,
1150                        reinterpret_cast<uintptr_t>(results),
1151                        d_bits,
1152                        n_bits);
1153 
1154   if (Test::generate_test_trace()) {
1155     // Print the results.
1156     printf("const int%u_t kExpected_%s[] = {\n", d_bits, name);
1157     // There is no simple C++ literal for INT*_MIN that doesn't produce
1158     // warnings, so we use an appropriate constant in that case instead.
1159     // Deriving int_d_min in this way (rather than just checking INT64_MIN and
1160     // the like) avoids warnings about comparing values with differing ranges.
1161     const int64_t int_d_max = (UINT64_C(1) << (d_bits - 1)) - 1;
1162     const int64_t int_d_min = -(int_d_max)-1;
1163     for (unsigned d = 0; d < results_length; d++) {
1164       if (results[d] == int_d_min) {
1165         printf("  -INT%u_C(%" PRId64 ") - 1,\n", d_bits, int_d_max);
1166       } else {
1167         // Some constants (such as those between INT32_MAX and UINT32_MAX)
1168         // trigger compiler warnings. To avoid these warnings, use an
1169         // appropriate macro to make the type explicit.
1170         int64_t result_int64 = static_cast<int64_t>(results[d]);
1171         if (result_int64 >= 0) {
1172           printf("  INT%u_C(%" PRId64 "),\n", d_bits, result_int64);
1173         } else {
1174           printf("  -INT%u_C(%" PRId64 "),\n", d_bits, -result_int64);
1175         }
1176       }
1177     }
1178     printf("};\n");
1179     printf("const unsigned kExpectedCount_%s = %u;\n", name, results_length);
1180   } else {
1181     // Check the results.
1182     VIXL_CHECK(expected_length == results_length);
1183     unsigned error_count = 0;
1184     unsigned d = 0;
1185     for (unsigned n = 0; n < inputs_length; n++) {
1186       for (unsigned fbits = 0; fbits <= d_bits; ++fbits, d++) {
1187         if (results[d] != expected[d]) {
1188           if (++error_count > kErrorReportLimit) continue;
1189 
1190           printf("%s 0x%0*" PRIx64 " #%d (%s %g #%d):\n",
1191                  name,
1192                  n_bits / 4,
1193                  static_cast<uint64_t>(inputs[n]),
1194                  fbits,
1195                  name,
1196                  rawbits_to_fp(inputs[n]),
1197                  fbits);
1198           printf("  Expected: 0x%0*" PRIx64 " (%" PRId64 ")\n",
1199                  d_bits / 4,
1200                  static_cast<uint64_t>(expected[d]),
1201                  static_cast<int64_t>(expected[d]));
1202           printf("  Found:    0x%0*" PRIx64 " (%" PRId64 ")\n",
1203                  d_bits / 4,
1204                  static_cast<uint64_t>(results[d]),
1205                  static_cast<int64_t>(results[d]));
1206           printf("\n");
1207         }
1208       }
1209     }
1210     VIXL_ASSERT(d == expected_length);
1211     if (error_count > kErrorReportLimit) {
1212       printf("%u other errors follow.\n", error_count - kErrorReportLimit);
1213     }
1214     VIXL_CHECK(error_count == 0);
1215   }
1216   delete[] results;
1217 }
1218 
1219 
1220 // Test FP instructions.
1221 //  - The inputs[] array should be an array of rawbits representations of
1222 //    doubles or floats. This ensures that exact bit comparisons can be
1223 //    performed.
1224 //  - The expected[] array should be an array of unsigned integers.
1225 template <typename Tn, typename Td>
TestFPToFixedU(const char * name,TestFPToFixedHelper_t helper,const Tn inputs[],unsigned inputs_length,const Td expected[],unsigned expected_length)1226 static void TestFPToFixedU(const char* name,
1227                            TestFPToFixedHelper_t helper,
1228                            const Tn inputs[],
1229                            unsigned inputs_length,
1230                            const Td expected[],
1231                            unsigned expected_length) {
1232   VIXL_ASSERT(inputs_length > 0);
1233 
1234   const unsigned d_bits = sizeof(Td) * 8;
1235   const unsigned n_bits = sizeof(Tn) * 8;
1236 
1237   const unsigned results_length = inputs_length * (d_bits + 1);
1238   Td* results = new Td[results_length];
1239 
1240   TestFPToFixed_Helper(helper,
1241                        reinterpret_cast<uintptr_t>(inputs),
1242                        inputs_length,
1243                        reinterpret_cast<uintptr_t>(results),
1244                        d_bits,
1245                        n_bits);
1246 
1247   if (Test::generate_test_trace()) {
1248     // Print the results.
1249     printf("const uint%u_t kExpected_%s[] = {\n", d_bits, name);
1250     for (unsigned d = 0; d < results_length; d++) {
1251       printf("  %" PRIu64 "u,\n", static_cast<uint64_t>(results[d]));
1252     }
1253     printf("};\n");
1254     printf("const unsigned kExpectedCount_%s = %u;\n", name, results_length);
1255   } else {
1256     // Check the results.
1257     VIXL_CHECK(expected_length == results_length);
1258     unsigned error_count = 0;
1259     unsigned d = 0;
1260     for (unsigned n = 0; n < inputs_length; n++) {
1261       for (unsigned fbits = 0; fbits <= d_bits; ++fbits, d++) {
1262         if (results[d] != expected[d]) {
1263           if (++error_count > kErrorReportLimit) continue;
1264 
1265           printf("%s 0x%0*" PRIx64 " #%d (%s %g #%d):\n",
1266                  name,
1267                  n_bits / 4,
1268                  static_cast<uint64_t>(inputs[n]),
1269                  fbits,
1270                  name,
1271                  rawbits_to_fp(inputs[n]),
1272                  fbits);
1273           printf("  Expected: 0x%0*" PRIx64 " (%" PRIu64 ")\n",
1274                  d_bits / 4,
1275                  static_cast<uint64_t>(expected[d]),
1276                  static_cast<uint64_t>(expected[d]));
1277           printf("  Found:    0x%0*" PRIx64 " (%" PRIu64 ")\n",
1278                  d_bits / 4,
1279                  static_cast<uint64_t>(results[d]),
1280                  static_cast<uint64_t>(results[d]));
1281           printf("\n");
1282         }
1283       }
1284     }
1285     VIXL_ASSERT(d == expected_length);
1286     if (error_count > kErrorReportLimit) {
1287       printf("%u other errors follow.\n", error_count - kErrorReportLimit);
1288     }
1289     VIXL_CHECK(error_count == 0);
1290   }
1291   delete[] results;
1292 }
1293 
1294 
1295 // ==== Tests for instructions of the form <INST> VReg, VReg. ====
1296 
1297 
Test1OpNEON_Helper(Test1OpNEONHelper_t helper,uintptr_t inputs_n,unsigned inputs_n_length,uintptr_t results,VectorFormat vd_form,VectorFormat vn_form)1298 static void Test1OpNEON_Helper(Test1OpNEONHelper_t helper,
1299                                uintptr_t inputs_n,
1300                                unsigned inputs_n_length,
1301                                uintptr_t results,
1302                                VectorFormat vd_form,
1303                                VectorFormat vn_form) {
1304   VIXL_ASSERT(vd_form != kFormatUndefined);
1305   VIXL_ASSERT(vn_form != kFormatUndefined);
1306 
1307   SETUP();
1308   START();
1309 
1310   // Roll up the loop to keep the code size down.
1311   Label loop_n;
1312 
1313   Register out = x0;
1314   Register inputs_n_base = x1;
1315   Register inputs_n_last_16bytes = x3;
1316   Register index_n = x5;
1317 
1318   // TODO: Refactor duplicate definitions below with a VRegister::As() routine.
1319   const unsigned vd_bits = RegisterSizeInBitsFromFormat(vd_form);
1320   const unsigned vd_lane_count = LaneCountFromFormat(vd_form);
1321 
1322   const unsigned vn_bits = RegisterSizeInBitsFromFormat(vn_form);
1323   const unsigned vn_lane_count = LaneCountFromFormat(vn_form);
1324   const unsigned vn_lane_bytes = LaneSizeInBytesFromFormat(vn_form);
1325   const unsigned vn_lane_bytes_log2 = LaneSizeInBytesLog2FromFormat(vn_form);
1326   const unsigned vn_lane_bits = LaneSizeInBitsFromFormat(vn_form);
1327 
1328 
1329   // These will be either a D- or a Q-register form, with a single lane
1330   // (for use in scalar load and store operations).
1331   VRegister vd = VRegister(0, vd_bits);
1332   VRegister vn = v1.V16B();
1333   VRegister vntmp = v3.V16B();
1334 
1335   // These will have the correct format for use when calling 'helper'.
1336   VRegister vd_helper = VRegister(0, vd_bits, vd_lane_count);
1337   VRegister vn_helper = VRegister(1, vn_bits, vn_lane_count);
1338 
1339   // 'v*tmp_single' will be either 'Vt.B', 'Vt.H', 'Vt.S' or 'Vt.D'.
1340   VRegister vntmp_single = VRegister(3, vn_lane_bits);
1341 
1342   __ Mov(out, results);
1343 
1344   __ Mov(inputs_n_base, inputs_n);
1345   __ Mov(inputs_n_last_16bytes,
1346          inputs_n + (vn_lane_bytes * inputs_n_length) - 16);
1347 
1348   __ Ldr(vn, MemOperand(inputs_n_last_16bytes));
1349 
1350   __ Mov(index_n, 0);
1351   __ Bind(&loop_n);
1352 
1353   __ Ldr(vntmp_single,
1354          MemOperand(inputs_n_base, index_n, LSL, vn_lane_bytes_log2));
1355   __ Ext(vn, vn, vntmp, vn_lane_bytes);
1356 
1357   // Set the destination to zero.
1358   // TODO: Setting the destination to values other than zero
1359   //       might be a better test for instructions such as sqxtn2
1360   //       which may leave parts of V registers unchanged.
1361   __ Movi(vd.V16B(), 0);
1362 
1363   {
1364     SingleEmissionCheckScope guard(&masm);
1365     (masm.*helper)(vd_helper, vn_helper);
1366   }
1367   __ Str(vd, MemOperand(out, vd.GetSizeInBytes(), PostIndex));
1368 
1369   __ Add(index_n, index_n, 1);
1370   __ Cmp(index_n, inputs_n_length);
1371   __ B(lo, &loop_n);
1372 
1373   END();
1374   RUN();
1375   TEARDOWN();
1376 }
1377 
1378 
1379 // Test NEON instructions. The inputs_*[] and expected[] arrays should be
1380 // arrays of rawbit representation of input values. This ensures that
1381 // exact bit comparisons can be performed.
1382 template <typename Td, typename Tn>
Test1OpNEON(const char * name,Test1OpNEONHelper_t helper,const Tn inputs_n[],unsigned inputs_n_length,const Td expected[],unsigned expected_length,VectorFormat vd_form,VectorFormat vn_form)1383 static void Test1OpNEON(const char* name,
1384                         Test1OpNEONHelper_t helper,
1385                         const Tn inputs_n[],
1386                         unsigned inputs_n_length,
1387                         const Td expected[],
1388                         unsigned expected_length,
1389                         VectorFormat vd_form,
1390                         VectorFormat vn_form) {
1391   VIXL_ASSERT(inputs_n_length > 0);
1392 
1393   const unsigned vd_lane_count = LaneCountFromFormat(vd_form);
1394   const unsigned vn_lane_bytes = LaneSizeInBytesFromFormat(vn_form);
1395   const unsigned vn_lane_count = LaneCountFromFormat(vn_form);
1396 
1397   const unsigned results_length = inputs_n_length;
1398   Td* results = new Td[results_length * vd_lane_count];
1399   const unsigned lane_bit = sizeof(Td) * 8;
1400   const unsigned lane_len_in_hex = MaxHexCharCount<Td, Tn>();
1401 
1402   Test1OpNEON_Helper(helper,
1403                      reinterpret_cast<uintptr_t>(inputs_n),
1404                      inputs_n_length,
1405                      reinterpret_cast<uintptr_t>(results),
1406                      vd_form,
1407                      vn_form);
1408 
1409   if (Test::generate_test_trace()) {
1410     // Print the results.
1411     printf("const uint%u_t kExpected_NEON_%s[] = {\n", lane_bit, name);
1412     for (unsigned iteration = 0; iteration < results_length; iteration++) {
1413       printf(" ");
1414       // Output a separate result for each element of the result vector.
1415       for (unsigned lane = 0; lane < vd_lane_count; lane++) {
1416         unsigned index = lane + (iteration * vd_lane_count);
1417         printf(" 0x%0*" PRIx64 ",",
1418                lane_len_in_hex,
1419                static_cast<uint64_t>(results[index]));
1420       }
1421       printf("\n");
1422     }
1423 
1424     printf("};\n");
1425     printf("const unsigned kExpectedCount_NEON_%s = %u;\n",
1426            name,
1427            results_length);
1428   } else {
1429     // Check the results.
1430     VIXL_CHECK(expected_length == results_length);
1431     unsigned error_count = 0;
1432     unsigned d = 0;
1433     const char* padding = "                    ";
1434     VIXL_ASSERT(strlen(padding) >= (lane_len_in_hex + 1));
1435     for (unsigned n = 0; n < inputs_n_length; n++, d++) {
1436       bool error_in_vector = false;
1437 
1438       for (unsigned lane = 0; lane < vd_lane_count; lane++) {
1439         unsigned output_index = (n * vd_lane_count) + lane;
1440 
1441         if (results[output_index] != expected[output_index]) {
1442           error_in_vector = true;
1443           break;
1444         }
1445       }
1446 
1447       if (error_in_vector && (++error_count <= kErrorReportLimit)) {
1448         printf("%s\n", name);
1449         printf(" Vn%.*s| Vd%.*s| Expected\n",
1450                lane_len_in_hex + 1,
1451                padding,
1452                lane_len_in_hex + 1,
1453                padding);
1454 
1455         const unsigned first_index_n =
1456             inputs_n_length - (16 / vn_lane_bytes) + n + 1;
1457 
1458         for (unsigned lane = 0; lane < std::max(vd_lane_count, vn_lane_count);
1459              lane++) {
1460           unsigned output_index = (n * vd_lane_count) + lane;
1461           unsigned input_index_n = (first_index_n + lane) % inputs_n_length;
1462 
1463           printf("%c0x%0*" PRIx64 " | 0x%0*" PRIx64
1464                  " "
1465                  "| 0x%0*" PRIx64 "\n",
1466                  results[output_index] != expected[output_index] ? '*' : ' ',
1467                  lane_len_in_hex,
1468                  static_cast<uint64_t>(inputs_n[input_index_n]),
1469                  lane_len_in_hex,
1470                  static_cast<uint64_t>(results[output_index]),
1471                  lane_len_in_hex,
1472                  static_cast<uint64_t>(expected[output_index]));
1473         }
1474       }
1475     }
1476     VIXL_ASSERT(d == expected_length);
1477     if (error_count > kErrorReportLimit) {
1478       printf("%u other errors follow.\n", error_count - kErrorReportLimit);
1479     }
1480     VIXL_CHECK(error_count == 0);
1481   }
1482   delete[] results;
1483 }
1484 
1485 
1486 // ==== Tests for instructions of the form <mnemonic> <V><d>, <Vn>.<T> ====
1487 //      where <V> is one of B, H, S or D registers.
1488 //      e.g. saddlv H1, v0.8B
1489 
1490 // TODO: Change tests to store all lanes of the resulting V register.
1491 //       Some tests store all 128 bits of the resulting V register to
1492 //       check the simulator's behaviour on the rest of the register.
1493 //       This is better than storing the affected lanes only.
1494 //       Change any tests such as the 'Across' template to do the same.
1495 
Test1OpAcrossNEON_Helper(Test1OpNEONHelper_t helper,uintptr_t inputs_n,unsigned inputs_n_length,uintptr_t results,VectorFormat vd_form,VectorFormat vn_form)1496 static void Test1OpAcrossNEON_Helper(Test1OpNEONHelper_t helper,
1497                                      uintptr_t inputs_n,
1498                                      unsigned inputs_n_length,
1499                                      uintptr_t results,
1500                                      VectorFormat vd_form,
1501                                      VectorFormat vn_form) {
1502   VIXL_ASSERT(vd_form != kFormatUndefined);
1503   VIXL_ASSERT(vn_form != kFormatUndefined);
1504 
1505   SETUP();
1506   START();
1507 
1508   // Roll up the loop to keep the code size down.
1509   Label loop_n;
1510 
1511   Register out = x0;
1512   Register inputs_n_base = x1;
1513   Register inputs_n_last_vector = x3;
1514   Register index_n = x5;
1515 
1516   // TODO: Refactor duplicate definitions below with a VRegister::As() routine.
1517   const unsigned vd_bits = RegisterSizeInBitsFromFormat(vd_form);
1518   const unsigned vn_bits = RegisterSizeInBitsFromFormat(vn_form);
1519   const unsigned vn_lane_count = LaneCountFromFormat(vn_form);
1520   const unsigned vn_lane_bytes = LaneSizeInBytesFromFormat(vn_form);
1521   const unsigned vn_lane_bytes_log2 = LaneSizeInBytesLog2FromFormat(vn_form);
1522   const unsigned vn_lane_bits = LaneSizeInBitsFromFormat(vn_form);
1523 
1524   // Test destructive operations by (arbitrarily) using the same register for
1525   // B and S lane sizes.
1526   bool destructive = (vd_bits == kBRegSize) || (vd_bits == kSRegSize);
1527 
1528   // Create two aliases for v0; the first is the destination for the tested
1529   // instruction, the second, the whole Q register to check the results.
1530   VRegister vd = VRegister(0, vd_bits);
1531   VRegister vdstr = VRegister(0, kQRegSize);
1532 
1533   VRegister vn = VRegister(1, vn_bits);
1534   VRegister vntmp = VRegister(3, vn_bits);
1535 
1536   // These will have the correct format for use when calling 'helper'.
1537   VRegister vd_helper = VRegister(0, vn_bits, vn_lane_count);
1538   VRegister vn_helper = VRegister(1, vn_bits, vn_lane_count);
1539 
1540   // 'v*tmp_single' will be either 'Vt.B', 'Vt.H', 'Vt.S' or 'Vt.D'.
1541   VRegister vntmp_single = VRegister(3, vn_lane_bits);
1542 
1543   // Same registers for use in the 'ext' instructions.
1544   VRegister vn_ext = (kDRegSize == vn_bits) ? vn.V8B() : vn.V16B();
1545   VRegister vntmp_ext = (kDRegSize == vn_bits) ? vntmp.V8B() : vntmp.V16B();
1546 
1547   __ Mov(out, results);
1548 
1549   __ Mov(inputs_n_base, inputs_n);
1550   __ Mov(inputs_n_last_vector,
1551          inputs_n + vn_lane_bytes * (inputs_n_length - vn_lane_count));
1552 
1553   __ Ldr(vn, MemOperand(inputs_n_last_vector));
1554 
1555   __ Mov(index_n, 0);
1556   __ Bind(&loop_n);
1557 
1558   __ Ldr(vntmp_single,
1559          MemOperand(inputs_n_base, index_n, LSL, vn_lane_bytes_log2));
1560   __ Ext(vn_ext, vn_ext, vntmp_ext, vn_lane_bytes);
1561 
1562   if (destructive) {
1563     __ Mov(vd_helper, vn_helper);
1564     SingleEmissionCheckScope guard(&masm);
1565     (masm.*helper)(vd, vd_helper);
1566   } else {
1567     SingleEmissionCheckScope guard(&masm);
1568     (masm.*helper)(vd, vn_helper);
1569   }
1570 
1571   __ Str(vdstr, MemOperand(out, kQRegSizeInBytes, PostIndex));
1572 
1573   __ Add(index_n, index_n, 1);
1574   __ Cmp(index_n, inputs_n_length);
1575   __ B(lo, &loop_n);
1576 
1577   END();
1578   RUN();
1579   TEARDOWN();
1580 }
1581 
1582 // Test NEON instructions. The inputs_*[] and expected[] arrays should be
1583 // arrays of rawbit representation of input values. This ensures that
1584 // exact bit comparisons can be performed.
1585 template <typename Td, typename Tn>
Test1OpAcrossNEON(const char * name,Test1OpNEONHelper_t helper,const Tn inputs_n[],unsigned inputs_n_length,const Td expected[],unsigned expected_length,VectorFormat vd_form,VectorFormat vn_form)1586 static void Test1OpAcrossNEON(const char* name,
1587                               Test1OpNEONHelper_t helper,
1588                               const Tn inputs_n[],
1589                               unsigned inputs_n_length,
1590                               const Td expected[],
1591                               unsigned expected_length,
1592                               VectorFormat vd_form,
1593                               VectorFormat vn_form) {
1594   VIXL_ASSERT(inputs_n_length > 0);
1595 
1596   const unsigned vd_lane_count = LaneCountFromFormat(vd_form);
1597   const unsigned vd_lanes_per_q = MaxLaneCountFromFormat(vd_form);
1598 
1599   const unsigned results_length = inputs_n_length;
1600   Td* results = new Td[results_length * vd_lanes_per_q];
1601   const unsigned lane_bit = sizeof(Td) * 8;
1602   const unsigned lane_len_in_hex = MaxHexCharCount<Td, Tn>();
1603 
1604   Test1OpAcrossNEON_Helper(helper,
1605                            reinterpret_cast<uintptr_t>(inputs_n),
1606                            inputs_n_length,
1607                            reinterpret_cast<uintptr_t>(results),
1608                            vd_form,
1609                            vn_form);
1610 
1611   if (Test::generate_test_trace()) {
1612     // Print the results.
1613     printf("const uint%u_t kExpected_NEON_%s[] = {\n", lane_bit, name);
1614     for (unsigned iteration = 0; iteration < results_length; iteration++) {
1615       printf(" ");
1616       // Output a separate result for each element of the result vector.
1617       for (unsigned lane = 0; lane < vd_lane_count; lane++) {
1618         unsigned index = lane + (iteration * vd_lane_count);
1619         printf(" 0x%0*" PRIx64 ",",
1620                lane_len_in_hex,
1621                static_cast<uint64_t>(results[index]));
1622       }
1623       printf("\n");
1624     }
1625 
1626     printf("};\n");
1627     printf("const unsigned kExpectedCount_NEON_%s = %u;\n",
1628            name,
1629            results_length);
1630   } else {
1631     // Check the results.
1632     VIXL_CHECK(expected_length == results_length);
1633     unsigned error_count = 0;
1634     unsigned d = 0;
1635     const char* padding = "                    ";
1636     VIXL_ASSERT(strlen(padding) >= (lane_len_in_hex + 1));
1637     for (unsigned n = 0; n < inputs_n_length; n++, d++) {
1638       bool error_in_vector = false;
1639 
1640       for (unsigned lane = 0; lane < vd_lane_count; lane++) {
1641         unsigned expected_index = (n * vd_lane_count) + lane;
1642         unsigned results_index = (n * vd_lanes_per_q) + lane;
1643 
1644         if (results[results_index] != expected[expected_index]) {
1645           error_in_vector = true;
1646           break;
1647         }
1648       }
1649 
1650       // For across operations, the remaining lanes should be zero.
1651       for (unsigned lane = vd_lane_count; lane < vd_lanes_per_q; lane++) {
1652         unsigned results_index = (n * vd_lanes_per_q) + lane;
1653         if (results[results_index] != 0) {
1654           error_in_vector = true;
1655           break;
1656         }
1657       }
1658 
1659       if (error_in_vector && (++error_count <= kErrorReportLimit)) {
1660         const unsigned vn_lane_count = LaneCountFromFormat(vn_form);
1661 
1662         printf("%s\n", name);
1663         printf(" Vn%.*s| Vd%.*s| Expected\n",
1664                lane_len_in_hex + 1,
1665                padding,
1666                lane_len_in_hex + 1,
1667                padding);
1668 
1669         // TODO: In case of an error, all tests print out as many elements as
1670         //       there are lanes in the output or input vectors. This way
1671         //       the viewer can read all the values that were needed for the
1672         //       operation but the output contains also unnecessary values.
1673         //       These prints can be improved according to the arguments
1674         //       passed to test functions.
1675         //       This output for the 'Across' category has the required
1676         //       modifications.
1677         for (unsigned lane = 0; lane < vn_lane_count; lane++) {
1678           unsigned results_index =
1679               (n * vd_lanes_per_q) + ((vn_lane_count - 1) - lane);
1680           unsigned input_index_n =
1681               (inputs_n_length - vn_lane_count + n + 1 + lane) %
1682               inputs_n_length;
1683 
1684           Td expect = 0;
1685           if ((vn_lane_count - 1) == lane) {
1686             // This is the last lane to be printed, ie. the least-significant
1687             // lane, so use the expected value; any other lane should be zero.
1688             unsigned expected_index = n * vd_lane_count;
1689             expect = expected[expected_index];
1690           }
1691           printf("%c0x%0*" PRIx64 " | 0x%0*" PRIx64 " | 0x%0*" PRIx64 "\n",
1692                  results[results_index] != expect ? '*' : ' ',
1693                  lane_len_in_hex,
1694                  static_cast<uint64_t>(inputs_n[input_index_n]),
1695                  lane_len_in_hex,
1696                  static_cast<uint64_t>(results[results_index]),
1697                  lane_len_in_hex,
1698                  static_cast<uint64_t>(expect));
1699         }
1700       }
1701     }
1702     VIXL_ASSERT(d == expected_length);
1703     if (error_count > kErrorReportLimit) {
1704       printf("%u other errors follow.\n", error_count - kErrorReportLimit);
1705     }
1706     VIXL_CHECK(error_count == 0);
1707   }
1708   delete[] results;
1709 }
1710 
1711 
1712 // ==== Tests for instructions of the form <INST> VReg, VReg, VReg. ====
1713 
1714 // TODO: Iterate over inputs_d once the traces file is split.
1715 
Test2OpNEON_Helper(Test2OpNEONHelper_t helper,uintptr_t inputs_d,uintptr_t inputs_n,unsigned inputs_n_length,uintptr_t inputs_m,unsigned inputs_m_length,uintptr_t results,VectorFormat vd_form,VectorFormat vn_form,VectorFormat vm_form)1716 static void Test2OpNEON_Helper(Test2OpNEONHelper_t helper,
1717                                uintptr_t inputs_d,
1718                                uintptr_t inputs_n,
1719                                unsigned inputs_n_length,
1720                                uintptr_t inputs_m,
1721                                unsigned inputs_m_length,
1722                                uintptr_t results,
1723                                VectorFormat vd_form,
1724                                VectorFormat vn_form,
1725                                VectorFormat vm_form) {
1726   VIXL_ASSERT(vd_form != kFormatUndefined);
1727   VIXL_ASSERT(vn_form != kFormatUndefined);
1728   VIXL_ASSERT(vm_form != kFormatUndefined);
1729 
1730   SETUP();
1731   START();
1732 
1733   // Roll up the loop to keep the code size down.
1734   Label loop_n, loop_m;
1735 
1736   Register out = x0;
1737   Register inputs_n_base = x1;
1738   Register inputs_m_base = x2;
1739   Register inputs_d_base = x3;
1740   Register inputs_n_last_16bytes = x4;
1741   Register inputs_m_last_16bytes = x5;
1742   Register index_n = x6;
1743   Register index_m = x7;
1744 
1745   // TODO: Refactor duplicate definitions below with a VRegister::As() routine.
1746   const unsigned vd_bits = RegisterSizeInBitsFromFormat(vd_form);
1747   const unsigned vd_lane_count = LaneCountFromFormat(vd_form);
1748 
1749   const unsigned vn_bits = RegisterSizeInBitsFromFormat(vn_form);
1750   const unsigned vn_lane_count = LaneCountFromFormat(vn_form);
1751   const unsigned vn_lane_bytes = LaneSizeInBytesFromFormat(vn_form);
1752   const unsigned vn_lane_bytes_log2 = LaneSizeInBytesLog2FromFormat(vn_form);
1753   const unsigned vn_lane_bits = LaneSizeInBitsFromFormat(vn_form);
1754 
1755   const unsigned vm_bits = RegisterSizeInBitsFromFormat(vm_form);
1756   const unsigned vm_lane_count = LaneCountFromFormat(vm_form);
1757   const unsigned vm_lane_bytes = LaneSizeInBytesFromFormat(vm_form);
1758   const unsigned vm_lane_bytes_log2 = LaneSizeInBytesLog2FromFormat(vm_form);
1759   const unsigned vm_lane_bits = LaneSizeInBitsFromFormat(vm_form);
1760 
1761 
1762   // Always load and store 128 bits regardless of the format.
1763   VRegister vd = v0.V16B();
1764   VRegister vn = v1.V16B();
1765   VRegister vm = v2.V16B();
1766   VRegister vntmp = v3.V16B();
1767   VRegister vmtmp = v4.V16B();
1768   VRegister vres = v5.V16B();
1769 
1770   // These will have the correct format for calling the 'helper'.
1771   VRegister vn_helper = VRegister(1, vn_bits, vn_lane_count);
1772   VRegister vm_helper = VRegister(2, vm_bits, vm_lane_count);
1773   VRegister vres_helper = VRegister(5, vd_bits, vd_lane_count);
1774 
1775   // 'v*tmp_single' will be either 'Vt.B', 'Vt.H', 'Vt.S' or 'Vt.D'.
1776   VRegister vntmp_single = VRegister(3, vn_lane_bits);
1777   VRegister vmtmp_single = VRegister(4, vm_lane_bits);
1778 
1779   __ Mov(out, results);
1780 
1781   __ Mov(inputs_d_base, inputs_d);
1782 
1783   __ Mov(inputs_n_base, inputs_n);
1784   __ Mov(inputs_n_last_16bytes, inputs_n + (inputs_n_length - 16));
1785   __ Mov(inputs_m_base, inputs_m);
1786   __ Mov(inputs_m_last_16bytes, inputs_m + (inputs_m_length - 16));
1787 
1788   __ Ldr(vd, MemOperand(inputs_d_base));
1789   __ Ldr(vn, MemOperand(inputs_n_last_16bytes));
1790   __ Ldr(vm, MemOperand(inputs_m_last_16bytes));
1791 
1792   __ Mov(index_n, 0);
1793   __ Bind(&loop_n);
1794 
1795   __ Ldr(vntmp_single,
1796          MemOperand(inputs_n_base, index_n, LSL, vn_lane_bytes_log2));
1797   __ Ext(vn, vn, vntmp, vn_lane_bytes);
1798 
1799   __ Mov(index_m, 0);
1800   __ Bind(&loop_m);
1801 
1802   __ Ldr(vmtmp_single,
1803          MemOperand(inputs_m_base, index_m, LSL, vm_lane_bytes_log2));
1804   __ Ext(vm, vm, vmtmp, vm_lane_bytes);
1805 
1806   __ Mov(vres, vd);
1807   {
1808     SingleEmissionCheckScope guard(&masm);
1809     (masm.*helper)(vres_helper, vn_helper, vm_helper);
1810   }
1811   __ Str(vres, MemOperand(out, vd.GetSizeInBytes(), PostIndex));
1812 
1813   __ Add(index_m, index_m, 1);
1814   __ Cmp(index_m, inputs_m_length);
1815   __ B(lo, &loop_m);
1816 
1817   __ Add(index_n, index_n, 1);
1818   __ Cmp(index_n, inputs_n_length);
1819   __ B(lo, &loop_n);
1820 
1821   END();
1822   RUN();
1823   TEARDOWN();
1824 }
1825 
1826 
1827 // Test NEON instructions. The inputs_*[] and expected[] arrays should be
1828 // arrays of rawbit representation of input values. This ensures that
1829 // exact bit comparisons can be performed.
1830 template <typename Td, typename Tn, typename Tm>
Test2OpNEON(const char * name,Test2OpNEONHelper_t helper,const Td inputs_d[],const Tn inputs_n[],unsigned inputs_n_length,const Tm inputs_m[],unsigned inputs_m_length,const Td expected[],unsigned expected_length,VectorFormat vd_form,VectorFormat vn_form,VectorFormat vm_form)1831 static void Test2OpNEON(const char* name,
1832                         Test2OpNEONHelper_t helper,
1833                         const Td inputs_d[],
1834                         const Tn inputs_n[],
1835                         unsigned inputs_n_length,
1836                         const Tm inputs_m[],
1837                         unsigned inputs_m_length,
1838                         const Td expected[],
1839                         unsigned expected_length,
1840                         VectorFormat vd_form,
1841                         VectorFormat vn_form,
1842                         VectorFormat vm_form) {
1843   VIXL_ASSERT(inputs_n_length > 0 && inputs_m_length > 0);
1844 
1845   const unsigned vd_lane_count = MaxLaneCountFromFormat(vd_form);
1846 
1847   const unsigned results_length = inputs_n_length * inputs_m_length;
1848   Td* results = new Td[results_length * vd_lane_count];
1849   const unsigned lane_bit = sizeof(Td) * 8;
1850   const unsigned lane_len_in_hex = MaxHexCharCount<Td, Tm>();
1851 
1852   Test2OpNEON_Helper(helper,
1853                      reinterpret_cast<uintptr_t>(inputs_d),
1854                      reinterpret_cast<uintptr_t>(inputs_n),
1855                      inputs_n_length,
1856                      reinterpret_cast<uintptr_t>(inputs_m),
1857                      inputs_m_length,
1858                      reinterpret_cast<uintptr_t>(results),
1859                      vd_form,
1860                      vn_form,
1861                      vm_form);
1862 
1863   if (Test::generate_test_trace()) {
1864     // Print the results.
1865     printf("const uint%u_t kExpected_NEON_%s[] = {\n", lane_bit, name);
1866     for (unsigned iteration = 0; iteration < results_length; iteration++) {
1867       printf(" ");
1868       // Output a separate result for each element of the result vector.
1869       for (unsigned lane = 0; lane < vd_lane_count; lane++) {
1870         unsigned index = lane + (iteration * vd_lane_count);
1871         printf(" 0x%0*" PRIx64 ",",
1872                lane_len_in_hex,
1873                static_cast<uint64_t>(results[index]));
1874       }
1875       printf("\n");
1876     }
1877 
1878     printf("};\n");
1879     printf("const unsigned kExpectedCount_NEON_%s = %u;\n",
1880            name,
1881            results_length);
1882   } else {
1883     // Check the results.
1884     VIXL_CHECK(expected_length == results_length);
1885     unsigned error_count = 0;
1886     unsigned d = 0;
1887     const char* padding = "                    ";
1888     VIXL_ASSERT(strlen(padding) >= (lane_len_in_hex + 1));
1889     for (unsigned n = 0; n < inputs_n_length; n++) {
1890       for (unsigned m = 0; m < inputs_m_length; m++, d++) {
1891         bool error_in_vector = false;
1892 
1893         for (unsigned lane = 0; lane < vd_lane_count; lane++) {
1894           unsigned output_index = (n * inputs_m_length * vd_lane_count) +
1895                                   (m * vd_lane_count) + lane;
1896 
1897           if (results[output_index] != expected[output_index]) {
1898             error_in_vector = true;
1899             break;
1900           }
1901         }
1902 
1903         if (error_in_vector && (++error_count <= kErrorReportLimit)) {
1904           printf("%s\n", name);
1905           printf(" Vd%.*s| Vn%.*s| Vm%.*s| Vd%.*s| Expected\n",
1906                  lane_len_in_hex + 1,
1907                  padding,
1908                  lane_len_in_hex + 1,
1909                  padding,
1910                  lane_len_in_hex + 1,
1911                  padding,
1912                  lane_len_in_hex + 1,
1913                  padding);
1914 
1915           for (unsigned lane = 0; lane < vd_lane_count; lane++) {
1916             unsigned output_index = (n * inputs_m_length * vd_lane_count) +
1917                                     (m * vd_lane_count) + lane;
1918             unsigned input_index_n =
1919                 (inputs_n_length - vd_lane_count + n + 1 + lane) %
1920                 inputs_n_length;
1921             unsigned input_index_m =
1922                 (inputs_m_length - vd_lane_count + m + 1 + lane) %
1923                 inputs_m_length;
1924 
1925             printf("%c0x%0*" PRIx64 " | 0x%0*" PRIx64 " | 0x%0*" PRIx64
1926                    " "
1927                    "| 0x%0*" PRIx64 " | 0x%0*" PRIx64 "\n",
1928                    results[output_index] != expected[output_index] ? '*' : ' ',
1929                    lane_len_in_hex,
1930                    static_cast<uint64_t>(inputs_d[lane]),
1931                    lane_len_in_hex,
1932                    static_cast<uint64_t>(inputs_n[input_index_n]),
1933                    lane_len_in_hex,
1934                    static_cast<uint64_t>(inputs_m[input_index_m]),
1935                    lane_len_in_hex,
1936                    static_cast<uint64_t>(results[output_index]),
1937                    lane_len_in_hex,
1938                    static_cast<uint64_t>(expected[output_index]));
1939           }
1940         }
1941       }
1942     }
1943     VIXL_ASSERT(d == expected_length);
1944     if (error_count > kErrorReportLimit) {
1945       printf("%u other errors follow.\n", error_count - kErrorReportLimit);
1946     }
1947     VIXL_CHECK(error_count == 0);
1948   }
1949   delete[] results;
1950 }
1951 
1952 
1953 // ==== Tests for instructions of the form <INST> Vd, Vn, Vm[<#index>]. ====
1954 
TestByElementNEON_Helper(TestByElementNEONHelper_t helper,uintptr_t inputs_d,uintptr_t inputs_n,unsigned inputs_n_length,uintptr_t inputs_m,unsigned inputs_m_length,const int indices[],unsigned indices_length,uintptr_t results,VectorFormat vd_form,VectorFormat vn_form,VectorFormat vm_form)1955 static void TestByElementNEON_Helper(TestByElementNEONHelper_t helper,
1956                                      uintptr_t inputs_d,
1957                                      uintptr_t inputs_n,
1958                                      unsigned inputs_n_length,
1959                                      uintptr_t inputs_m,
1960                                      unsigned inputs_m_length,
1961                                      const int indices[],
1962                                      unsigned indices_length,
1963                                      uintptr_t results,
1964                                      VectorFormat vd_form,
1965                                      VectorFormat vn_form,
1966                                      VectorFormat vm_form) {
1967   VIXL_ASSERT(vd_form != kFormatUndefined);
1968   VIXL_ASSERT(vn_form != kFormatUndefined);
1969   VIXL_ASSERT(vm_form != kFormatUndefined);
1970 
1971   SETUP();
1972   START();
1973 
1974   // Roll up the loop to keep the code size down.
1975   Label loop_n, loop_m;
1976 
1977   Register out = x0;
1978   Register inputs_n_base = x1;
1979   Register inputs_m_base = x2;
1980   Register inputs_d_base = x3;
1981   Register inputs_n_last_16bytes = x4;
1982   Register inputs_m_last_16bytes = x5;
1983   Register index_n = x6;
1984   Register index_m = x7;
1985 
1986   // TODO: Refactor duplicate definitions below with a VRegister::As() routine.
1987   const unsigned vd_bits = RegisterSizeInBitsFromFormat(vd_form);
1988   const unsigned vd_lane_count = LaneCountFromFormat(vd_form);
1989 
1990   const unsigned vn_bits = RegisterSizeInBitsFromFormat(vn_form);
1991   const unsigned vn_lane_count = LaneCountFromFormat(vn_form);
1992   const unsigned vn_lane_bytes = LaneSizeInBytesFromFormat(vn_form);
1993   const unsigned vn_lane_bytes_log2 = LaneSizeInBytesLog2FromFormat(vn_form);
1994   const unsigned vn_lane_bits = LaneSizeInBitsFromFormat(vn_form);
1995 
1996   const unsigned vm_bits = RegisterSizeInBitsFromFormat(vm_form);
1997   const unsigned vm_lane_count = LaneCountFromFormat(vm_form);
1998   const unsigned vm_lane_bytes = LaneSizeInBytesFromFormat(vm_form);
1999   const unsigned vm_lane_bytes_log2 = LaneSizeInBytesLog2FromFormat(vm_form);
2000   const unsigned vm_lane_bits = LaneSizeInBitsFromFormat(vm_form);
2001 
2002 
2003   // Always load and store 128 bits regardless of the format.
2004   VRegister vd = v0.V16B();
2005   VRegister vn = v1.V16B();
2006   VRegister vm = v2.V16B();
2007   VRegister vntmp = v3.V16B();
2008   VRegister vmtmp = v4.V16B();
2009   VRegister vres = v5.V16B();
2010 
2011   // These will have the correct format for calling the 'helper'.
2012   VRegister vn_helper = VRegister(1, vn_bits, vn_lane_count);
2013   VRegister vm_helper = VRegister(2, vm_bits, vm_lane_count);
2014   VRegister vres_helper = VRegister(5, vd_bits, vd_lane_count);
2015 
2016   // 'v*tmp_single' will be either 'Vt.B', 'Vt.H', 'Vt.S' or 'Vt.D'.
2017   VRegister vntmp_single = VRegister(3, vn_lane_bits);
2018   VRegister vmtmp_single = VRegister(4, vm_lane_bits);
2019 
2020   __ Mov(out, results);
2021 
2022   __ Mov(inputs_d_base, inputs_d);
2023 
2024   __ Mov(inputs_n_base, inputs_n);
2025   __ Mov(inputs_n_last_16bytes, inputs_n + (inputs_n_length - 16));
2026   __ Mov(inputs_m_base, inputs_m);
2027   __ Mov(inputs_m_last_16bytes, inputs_m + (inputs_m_length - 16));
2028 
2029   __ Ldr(vd, MemOperand(inputs_d_base));
2030   __ Ldr(vn, MemOperand(inputs_n_last_16bytes));
2031   __ Ldr(vm, MemOperand(inputs_m_last_16bytes));
2032 
2033   __ Mov(index_n, 0);
2034   __ Bind(&loop_n);
2035 
2036   __ Ldr(vntmp_single,
2037          MemOperand(inputs_n_base, index_n, LSL, vn_lane_bytes_log2));
2038   __ Ext(vn, vn, vntmp, vn_lane_bytes);
2039 
2040   __ Mov(index_m, 0);
2041   __ Bind(&loop_m);
2042 
2043   __ Ldr(vmtmp_single,
2044          MemOperand(inputs_m_base, index_m, LSL, vm_lane_bytes_log2));
2045   __ Ext(vm, vm, vmtmp, vm_lane_bytes);
2046 
2047   __ Mov(vres, vd);
2048   {
2049     for (unsigned i = 0; i < indices_length; i++) {
2050       {
2051         SingleEmissionCheckScope guard(&masm);
2052         (masm.*helper)(vres_helper, vn_helper, vm_helper, indices[i]);
2053       }
2054       __ Str(vres, MemOperand(out, vd.GetSizeInBytes(), PostIndex));
2055     }
2056   }
2057 
2058   __ Add(index_m, index_m, 1);
2059   __ Cmp(index_m, inputs_m_length);
2060   __ B(lo, &loop_m);
2061 
2062   __ Add(index_n, index_n, 1);
2063   __ Cmp(index_n, inputs_n_length);
2064   __ B(lo, &loop_n);
2065 
2066   END();
2067   RUN();
2068   TEARDOWN();
2069 }
2070 
2071 
2072 // Test NEON instructions. The inputs_*[] and expected[] arrays should be
2073 // arrays of rawbit representation of input values. This ensures that
2074 // exact bit comparisons can be performed.
2075 template <typename Td, typename Tn, typename Tm>
TestByElementNEON(const char * name,TestByElementNEONHelper_t helper,const Td inputs_d[],const Tn inputs_n[],unsigned inputs_n_length,const Tm inputs_m[],unsigned inputs_m_length,const int indices[],unsigned indices_length,const Td expected[],unsigned expected_length,VectorFormat vd_form,VectorFormat vn_form,VectorFormat vm_form)2076 static void TestByElementNEON(const char* name,
2077                               TestByElementNEONHelper_t helper,
2078                               const Td inputs_d[],
2079                               const Tn inputs_n[],
2080                               unsigned inputs_n_length,
2081                               const Tm inputs_m[],
2082                               unsigned inputs_m_length,
2083                               const int indices[],
2084                               unsigned indices_length,
2085                               const Td expected[],
2086                               unsigned expected_length,
2087                               VectorFormat vd_form,
2088                               VectorFormat vn_form,
2089                               VectorFormat vm_form) {
2090   VIXL_ASSERT(inputs_n_length > 0);
2091   VIXL_ASSERT(inputs_m_length > 0);
2092   VIXL_ASSERT(indices_length > 0);
2093 
2094   const unsigned vd_lane_count = MaxLaneCountFromFormat(vd_form);
2095 
2096   const unsigned results_length =
2097       inputs_n_length * inputs_m_length * indices_length;
2098   Td* results = new Td[results_length * vd_lane_count];
2099   const unsigned lane_bit = sizeof(Td) * 8;
2100   const unsigned lane_len_in_hex = MaxHexCharCount<Td, Tm>();
2101 
2102   TestByElementNEON_Helper(helper,
2103                            reinterpret_cast<uintptr_t>(inputs_d),
2104                            reinterpret_cast<uintptr_t>(inputs_n),
2105                            inputs_n_length,
2106                            reinterpret_cast<uintptr_t>(inputs_m),
2107                            inputs_m_length,
2108                            indices,
2109                            indices_length,
2110                            reinterpret_cast<uintptr_t>(results),
2111                            vd_form,
2112                            vn_form,
2113                            vm_form);
2114 
2115   if (Test::generate_test_trace()) {
2116     // Print the results.
2117     printf("const uint%u_t kExpected_NEON_%s[] = {\n", lane_bit, name);
2118     for (unsigned iteration = 0; iteration < results_length; iteration++) {
2119       printf(" ");
2120       // Output a separate result for each element of the result vector.
2121       for (unsigned lane = 0; lane < vd_lane_count; lane++) {
2122         unsigned index = lane + (iteration * vd_lane_count);
2123         printf(" 0x%0*" PRIx64 ",",
2124                lane_len_in_hex,
2125                static_cast<uint64_t>(results[index]));
2126       }
2127       printf("\n");
2128     }
2129 
2130     printf("};\n");
2131     printf("const unsigned kExpectedCount_NEON_%s = %u;\n",
2132            name,
2133            results_length);
2134   } else {
2135     // Check the results.
2136     VIXL_CHECK(expected_length == results_length);
2137     unsigned error_count = 0;
2138     unsigned d = 0;
2139     const char* padding = "                    ";
2140     VIXL_ASSERT(strlen(padding) >= (lane_len_in_hex + 1));
2141     for (unsigned n = 0; n < inputs_n_length; n++) {
2142       for (unsigned m = 0; m < inputs_m_length; m++) {
2143         for (unsigned index = 0; index < indices_length; index++, d++) {
2144           bool error_in_vector = false;
2145 
2146           for (unsigned lane = 0; lane < vd_lane_count; lane++) {
2147             unsigned output_index =
2148                 (n * inputs_m_length * indices_length * vd_lane_count) +
2149                 (m * indices_length * vd_lane_count) + (index * vd_lane_count) +
2150                 lane;
2151 
2152             if (results[output_index] != expected[output_index]) {
2153               error_in_vector = true;
2154               break;
2155             }
2156           }
2157 
2158           if (error_in_vector && (++error_count <= kErrorReportLimit)) {
2159             printf("%s\n", name);
2160             printf(" Vd%.*s| Vn%.*s| Vm%.*s| Index | Vd%.*s| Expected\n",
2161                    lane_len_in_hex + 1,
2162                    padding,
2163                    lane_len_in_hex + 1,
2164                    padding,
2165                    lane_len_in_hex + 1,
2166                    padding,
2167                    lane_len_in_hex + 1,
2168                    padding);
2169 
2170             for (unsigned lane = 0; lane < vd_lane_count; lane++) {
2171               unsigned output_index =
2172                   (n * inputs_m_length * indices_length * vd_lane_count) +
2173                   (m * indices_length * vd_lane_count) +
2174                   (index * vd_lane_count) + lane;
2175               unsigned input_index_n =
2176                   (inputs_n_length - vd_lane_count + n + 1 + lane) %
2177                   inputs_n_length;
2178               unsigned input_index_m =
2179                   (inputs_m_length - vd_lane_count + m + 1 + lane) %
2180                   inputs_m_length;
2181 
2182               printf("%c0x%0*" PRIx64 " | 0x%0*" PRIx64 " | 0x%0*" PRIx64
2183                      " "
2184                      "| [%3d] | 0x%0*" PRIx64 " | 0x%0*" PRIx64 "\n",
2185                      results[output_index] != expected[output_index] ? '*'
2186                                                                      : ' ',
2187                      lane_len_in_hex,
2188                      static_cast<uint64_t>(inputs_d[lane]),
2189                      lane_len_in_hex,
2190                      static_cast<uint64_t>(inputs_n[input_index_n]),
2191                      lane_len_in_hex,
2192                      static_cast<uint64_t>(inputs_m[input_index_m]),
2193                      indices[index],
2194                      lane_len_in_hex,
2195                      static_cast<uint64_t>(results[output_index]),
2196                      lane_len_in_hex,
2197                      static_cast<uint64_t>(expected[output_index]));
2198             }
2199           }
2200         }
2201       }
2202     }
2203     VIXL_ASSERT(d == expected_length);
2204     if (error_count > kErrorReportLimit) {
2205       printf("%u other errors follow.\n", error_count - kErrorReportLimit);
2206     }
2207     VIXL_CHECK(error_count == 0);
2208   }
2209   delete[] results;
2210 }
2211 
2212 
2213 // ==== Tests for instructions of the form <INST> VReg, VReg, #Immediate. ====
2214 
2215 
2216 template <typename Tm>
Test2OpImmNEON_Helper(typename Test2OpImmediateNEONHelper_t<Tm>::mnemonic helper,uintptr_t inputs_n,unsigned inputs_n_length,const Tm inputs_m[],unsigned inputs_m_length,uintptr_t results,VectorFormat vd_form,VectorFormat vn_form)2217 void Test2OpImmNEON_Helper(
2218     typename Test2OpImmediateNEONHelper_t<Tm>::mnemonic helper,
2219     uintptr_t inputs_n,
2220     unsigned inputs_n_length,
2221     const Tm inputs_m[],
2222     unsigned inputs_m_length,
2223     uintptr_t results,
2224     VectorFormat vd_form,
2225     VectorFormat vn_form) {
2226   VIXL_ASSERT(vd_form != kFormatUndefined && vn_form != kFormatUndefined);
2227 
2228   SETUP();
2229   START();
2230 
2231   // Roll up the loop to keep the code size down.
2232   Label loop_n;
2233 
2234   Register out = x0;
2235   Register inputs_n_base = x1;
2236   Register inputs_n_last_16bytes = x3;
2237   Register index_n = x5;
2238 
2239   // TODO: Refactor duplicate definitions below with a VRegister::As() routine.
2240   const unsigned vd_bits = RegisterSizeInBitsFromFormat(vd_form);
2241   const unsigned vd_lane_count = LaneCountFromFormat(vd_form);
2242 
2243   const unsigned vn_bits = RegisterSizeInBitsFromFormat(vn_form);
2244   const unsigned vn_lane_count = LaneCountFromFormat(vn_form);
2245   const unsigned vn_lane_bytes = LaneSizeInBytesFromFormat(vn_form);
2246   const unsigned vn_lane_bytes_log2 = LaneSizeInBytesLog2FromFormat(vn_form);
2247   const unsigned vn_lane_bits = LaneSizeInBitsFromFormat(vn_form);
2248 
2249 
2250   // These will be either a D- or a Q-register form, with a single lane
2251   // (for use in scalar load and store operations).
2252   VRegister vd = VRegister(0, vd_bits);
2253   VRegister vn = v1.V16B();
2254   VRegister vntmp = v3.V16B();
2255 
2256   // These will have the correct format for use when calling 'helper'.
2257   VRegister vd_helper = VRegister(0, vd_bits, vd_lane_count);
2258   VRegister vn_helper = VRegister(1, vn_bits, vn_lane_count);
2259 
2260   // 'v*tmp_single' will be either 'Vt.B', 'Vt.H', 'Vt.S' or 'Vt.D'.
2261   VRegister vntmp_single = VRegister(3, vn_lane_bits);
2262 
2263   __ Mov(out, results);
2264 
2265   __ Mov(inputs_n_base, inputs_n);
2266   __ Mov(inputs_n_last_16bytes,
2267          inputs_n + (vn_lane_bytes * inputs_n_length) - 16);
2268 
2269   __ Ldr(vn, MemOperand(inputs_n_last_16bytes));
2270 
2271   __ Mov(index_n, 0);
2272   __ Bind(&loop_n);
2273 
2274   __ Ldr(vntmp_single,
2275          MemOperand(inputs_n_base, index_n, LSL, vn_lane_bytes_log2));
2276   __ Ext(vn, vn, vntmp, vn_lane_bytes);
2277 
2278   // Set the destination to zero for tests such as '[r]shrn2'.
2279   // TODO: Setting the destination to values other than zero might be a better
2280   //       test for shift and accumulate instructions (srsra/ssra/usra/ursra).
2281   __ Movi(vd.V16B(), 0);
2282 
2283   {
2284     for (unsigned i = 0; i < inputs_m_length; i++) {
2285       {
2286         SingleEmissionCheckScope guard(&masm);
2287         (masm.*helper)(vd_helper, vn_helper, inputs_m[i]);
2288       }
2289       __ Str(vd, MemOperand(out, vd.GetSizeInBytes(), PostIndex));
2290     }
2291   }
2292 
2293   __ Add(index_n, index_n, 1);
2294   __ Cmp(index_n, inputs_n_length);
2295   __ B(lo, &loop_n);
2296 
2297   END();
2298   RUN();
2299   TEARDOWN();
2300 }
2301 
2302 
2303 // Test NEON instructions. The inputs_*[] and expected[] arrays should be
2304 // arrays of rawbit representation of input values. This ensures that
2305 // exact bit comparisons can be performed.
2306 template <typename Td, typename Tn, typename Tm>
Test2OpImmNEON(const char * name,typename Test2OpImmediateNEONHelper_t<Tm>::mnemonic helper,const Tn inputs_n[],unsigned inputs_n_length,const Tm inputs_m[],unsigned inputs_m_length,const Td expected[],unsigned expected_length,VectorFormat vd_form,VectorFormat vn_form)2307 static void Test2OpImmNEON(
2308     const char* name,
2309     typename Test2OpImmediateNEONHelper_t<Tm>::mnemonic helper,
2310     const Tn inputs_n[],
2311     unsigned inputs_n_length,
2312     const Tm inputs_m[],
2313     unsigned inputs_m_length,
2314     const Td expected[],
2315     unsigned expected_length,
2316     VectorFormat vd_form,
2317     VectorFormat vn_form) {
2318   VIXL_ASSERT(inputs_n_length > 0 && inputs_m_length > 0);
2319 
2320   const unsigned vd_lane_count = LaneCountFromFormat(vd_form);
2321   const unsigned vn_lane_bytes = LaneSizeInBytesFromFormat(vn_form);
2322   const unsigned vn_lane_count = LaneCountFromFormat(vn_form);
2323 
2324   const unsigned results_length = inputs_n_length * inputs_m_length;
2325   Td* results = new Td[results_length * vd_lane_count];
2326   const unsigned lane_bit = sizeof(Td) * 8;
2327   const unsigned lane_len_in_hex = MaxHexCharCount<Td, Tn>();
2328 
2329   Test2OpImmNEON_Helper(helper,
2330                         reinterpret_cast<uintptr_t>(inputs_n),
2331                         inputs_n_length,
2332                         inputs_m,
2333                         inputs_m_length,
2334                         reinterpret_cast<uintptr_t>(results),
2335                         vd_form,
2336                         vn_form);
2337 
2338   if (Test::generate_test_trace()) {
2339     // Print the results.
2340     printf("const uint%u_t kExpected_NEON_%s[] = {\n", lane_bit, name);
2341     for (unsigned iteration = 0; iteration < results_length; iteration++) {
2342       printf(" ");
2343       // Output a separate result for each element of the result vector.
2344       for (unsigned lane = 0; lane < vd_lane_count; lane++) {
2345         unsigned index = lane + (iteration * vd_lane_count);
2346         printf(" 0x%0*" PRIx64 ",",
2347                lane_len_in_hex,
2348                static_cast<uint64_t>(results[index]));
2349       }
2350       printf("\n");
2351     }
2352 
2353     printf("};\n");
2354     printf("const unsigned kExpectedCount_NEON_%s = %u;\n",
2355            name,
2356            results_length);
2357   } else {
2358     // Check the results.
2359     VIXL_CHECK(expected_length == results_length);
2360     unsigned error_count = 0;
2361     unsigned d = 0;
2362     const char* padding = "                    ";
2363     VIXL_ASSERT(strlen(padding) >= (lane_len_in_hex + 1));
2364     for (unsigned n = 0; n < inputs_n_length; n++) {
2365       for (unsigned m = 0; m < inputs_m_length; m++, d++) {
2366         bool error_in_vector = false;
2367 
2368         for (unsigned lane = 0; lane < vd_lane_count; lane++) {
2369           unsigned output_index = (n * inputs_m_length * vd_lane_count) +
2370                                   (m * vd_lane_count) + lane;
2371 
2372           if (results[output_index] != expected[output_index]) {
2373             error_in_vector = true;
2374             break;
2375           }
2376         }
2377 
2378         if (error_in_vector && (++error_count <= kErrorReportLimit)) {
2379           printf("%s\n", name);
2380           printf(" Vn%.*s| Imm%.*s| Vd%.*s| Expected\n",
2381                  lane_len_in_hex + 1,
2382                  padding,
2383                  lane_len_in_hex,
2384                  padding,
2385                  lane_len_in_hex + 1,
2386                  padding);
2387 
2388           const unsigned first_index_n =
2389               inputs_n_length - (16 / vn_lane_bytes) + n + 1;
2390 
2391           for (unsigned lane = 0; lane < std::max(vd_lane_count, vn_lane_count);
2392                lane++) {
2393             unsigned output_index = (n * inputs_m_length * vd_lane_count) +
2394                                     (m * vd_lane_count) + lane;
2395             unsigned input_index_n = (first_index_n + lane) % inputs_n_length;
2396             unsigned input_index_m = m;
2397 
2398             printf("%c0x%0*" PRIx64 " | 0x%0*" PRIx64
2399                    " "
2400                    "| 0x%0*" PRIx64 " | 0x%0*" PRIx64 "\n",
2401                    results[output_index] != expected[output_index] ? '*' : ' ',
2402                    lane_len_in_hex,
2403                    static_cast<uint64_t>(inputs_n[input_index_n]),
2404                    lane_len_in_hex,
2405                    static_cast<uint64_t>(inputs_m[input_index_m]),
2406                    lane_len_in_hex,
2407                    static_cast<uint64_t>(results[output_index]),
2408                    lane_len_in_hex,
2409                    static_cast<uint64_t>(expected[output_index]));
2410           }
2411         }
2412       }
2413     }
2414     VIXL_ASSERT(d == expected_length);
2415     if (error_count > kErrorReportLimit) {
2416       printf("%u other errors follow.\n", error_count - kErrorReportLimit);
2417     }
2418     VIXL_CHECK(error_count == 0);
2419   }
2420   delete[] results;
2421 }
2422 
2423 
2424 // ==== Tests for instructions of the form <INST> VReg, #Imm, VReg, #Imm. ====
2425 
2426 
TestOpImmOpImmNEON_Helper(TestOpImmOpImmVdUpdateNEONHelper_t helper,uintptr_t inputs_d,const int inputs_imm1[],unsigned inputs_imm1_length,uintptr_t inputs_n,unsigned inputs_n_length,const int inputs_imm2[],unsigned inputs_imm2_length,uintptr_t results,VectorFormat vd_form,VectorFormat vn_form)2427 static void TestOpImmOpImmNEON_Helper(TestOpImmOpImmVdUpdateNEONHelper_t helper,
2428                                       uintptr_t inputs_d,
2429                                       const int inputs_imm1[],
2430                                       unsigned inputs_imm1_length,
2431                                       uintptr_t inputs_n,
2432                                       unsigned inputs_n_length,
2433                                       const int inputs_imm2[],
2434                                       unsigned inputs_imm2_length,
2435                                       uintptr_t results,
2436                                       VectorFormat vd_form,
2437                                       VectorFormat vn_form) {
2438   VIXL_ASSERT(vd_form != kFormatUndefined);
2439   VIXL_ASSERT(vn_form != kFormatUndefined);
2440 
2441   SETUP();
2442   START();
2443 
2444   // Roll up the loop to keep the code size down.
2445   Label loop_n;
2446 
2447   Register out = x0;
2448   Register inputs_d_base = x1;
2449   Register inputs_n_base = x2;
2450   Register inputs_n_last_vector = x4;
2451   Register index_n = x6;
2452 
2453   // TODO: Refactor duplicate definitions below with a VRegister::As() routine.
2454   const unsigned vd_bits = RegisterSizeInBitsFromFormat(vd_form);
2455   const unsigned vd_lane_count = LaneCountFromFormat(vd_form);
2456 
2457   const unsigned vn_bits = RegisterSizeInBitsFromFormat(vn_form);
2458   const unsigned vn_lane_count = LaneCountFromFormat(vn_form);
2459   const unsigned vn_lane_bytes = LaneSizeInBytesFromFormat(vn_form);
2460   const unsigned vn_lane_bytes_log2 = LaneSizeInBytesLog2FromFormat(vn_form);
2461   const unsigned vn_lane_bits = LaneSizeInBitsFromFormat(vn_form);
2462 
2463 
2464   // These will be either a D- or a Q-register form, with a single lane
2465   // (for use in scalar load and store operations).
2466   VRegister vd = VRegister(0, vd_bits);
2467   VRegister vn = VRegister(1, vn_bits);
2468   VRegister vntmp = VRegister(4, vn_bits);
2469   VRegister vres = VRegister(5, vn_bits);
2470 
2471   VRegister vn_helper = VRegister(1, vn_bits, vn_lane_count);
2472   VRegister vres_helper = VRegister(5, vd_bits, vd_lane_count);
2473 
2474   // 'v*tmp_single' will be either 'Vt.B', 'Vt.H', 'Vt.S' or 'Vt.D'.
2475   VRegister vntmp_single = VRegister(4, vn_lane_bits);
2476 
2477   // Same registers for use in the 'ext' instructions.
2478   VRegister vn_ext = (kDRegSize == vn_bits) ? vn.V8B() : vn.V16B();
2479   VRegister vntmp_ext = (kDRegSize == vn_bits) ? vntmp.V8B() : vntmp.V16B();
2480 
2481   __ Mov(out, results);
2482 
2483   __ Mov(inputs_d_base, inputs_d);
2484 
2485   __ Mov(inputs_n_base, inputs_n);
2486   __ Mov(inputs_n_last_vector,
2487          inputs_n + vn_lane_bytes * (inputs_n_length - vn_lane_count));
2488 
2489   __ Ldr(vd, MemOperand(inputs_d_base));
2490 
2491   __ Ldr(vn, MemOperand(inputs_n_last_vector));
2492 
2493   __ Mov(index_n, 0);
2494   __ Bind(&loop_n);
2495 
2496   __ Ldr(vntmp_single,
2497          MemOperand(inputs_n_base, index_n, LSL, vn_lane_bytes_log2));
2498   __ Ext(vn_ext, vn_ext, vntmp_ext, vn_lane_bytes);
2499 
2500   {
2501     EmissionCheckScope guard(&masm,
2502                              kInstructionSize * inputs_imm1_length *
2503                                  inputs_imm2_length * 3);
2504     for (unsigned i = 0; i < inputs_imm1_length; i++) {
2505       for (unsigned j = 0; j < inputs_imm2_length; j++) {
2506         __ Mov(vres, vd);
2507         (masm.*helper)(vres_helper, inputs_imm1[i], vn_helper, inputs_imm2[j]);
2508         __ Str(vres, MemOperand(out, vd.GetSizeInBytes(), PostIndex));
2509       }
2510     }
2511   }
2512 
2513   __ Add(index_n, index_n, 1);
2514   __ Cmp(index_n, inputs_n_length);
2515   __ B(lo, &loop_n);
2516 
2517   END();
2518   RUN();
2519   TEARDOWN();
2520 }
2521 
2522 
2523 // Test NEON instructions. The inputs_*[] and expected[] arrays should be
2524 // arrays of rawbit representation of input values. This ensures that
2525 // exact bit comparisons can be performed.
2526 template <typename Td, typename Tn>
TestOpImmOpImmNEON(const char * name,TestOpImmOpImmVdUpdateNEONHelper_t helper,const Td inputs_d[],const int inputs_imm1[],unsigned inputs_imm1_length,const Tn inputs_n[],unsigned inputs_n_length,const int inputs_imm2[],unsigned inputs_imm2_length,const Td expected[],unsigned expected_length,VectorFormat vd_form,VectorFormat vn_form)2527 static void TestOpImmOpImmNEON(const char* name,
2528                                TestOpImmOpImmVdUpdateNEONHelper_t helper,
2529                                const Td inputs_d[],
2530                                const int inputs_imm1[],
2531                                unsigned inputs_imm1_length,
2532                                const Tn inputs_n[],
2533                                unsigned inputs_n_length,
2534                                const int inputs_imm2[],
2535                                unsigned inputs_imm2_length,
2536                                const Td expected[],
2537                                unsigned expected_length,
2538                                VectorFormat vd_form,
2539                                VectorFormat vn_form) {
2540   VIXL_ASSERT(inputs_n_length > 0);
2541   VIXL_ASSERT(inputs_imm1_length > 0);
2542   VIXL_ASSERT(inputs_imm2_length > 0);
2543 
2544   const unsigned vd_lane_count = LaneCountFromFormat(vd_form);
2545 
2546   const unsigned results_length =
2547       inputs_n_length * inputs_imm1_length * inputs_imm2_length;
2548 
2549   Td* results = new Td[results_length * vd_lane_count];
2550   const unsigned lane_bit = sizeof(Td) * 8;
2551   const unsigned lane_len_in_hex = MaxHexCharCount<Td, Tn>();
2552 
2553   TestOpImmOpImmNEON_Helper(helper,
2554                             reinterpret_cast<uintptr_t>(inputs_d),
2555                             inputs_imm1,
2556                             inputs_imm1_length,
2557                             reinterpret_cast<uintptr_t>(inputs_n),
2558                             inputs_n_length,
2559                             inputs_imm2,
2560                             inputs_imm2_length,
2561                             reinterpret_cast<uintptr_t>(results),
2562                             vd_form,
2563                             vn_form);
2564 
2565   if (Test::generate_test_trace()) {
2566     // Print the results.
2567     printf("const uint%u_t kExpected_NEON_%s[] = {\n", lane_bit, name);
2568     for (unsigned iteration = 0; iteration < results_length; iteration++) {
2569       printf(" ");
2570       // Output a separate result for each element of the result vector.
2571       for (unsigned lane = 0; lane < vd_lane_count; lane++) {
2572         unsigned index = lane + (iteration * vd_lane_count);
2573         printf(" 0x%0*" PRIx64 ",",
2574                lane_len_in_hex,
2575                static_cast<uint64_t>(results[index]));
2576       }
2577       printf("\n");
2578     }
2579 
2580     printf("};\n");
2581     printf("const unsigned kExpectedCount_NEON_%s = %u;\n",
2582            name,
2583            results_length);
2584   } else {
2585     // Check the results.
2586     VIXL_CHECK(expected_length == results_length);
2587     unsigned error_count = 0;
2588     unsigned counted_length = 0;
2589     const char* padding = "                    ";
2590     VIXL_ASSERT(strlen(padding) >= (lane_len_in_hex + 1));
2591     for (unsigned n = 0; n < inputs_n_length; n++) {
2592       for (unsigned imm1 = 0; imm1 < inputs_imm1_length; imm1++) {
2593         for (unsigned imm2 = 0; imm2 < inputs_imm2_length; imm2++) {
2594           bool error_in_vector = false;
2595 
2596           counted_length++;
2597 
2598           for (unsigned lane = 0; lane < vd_lane_count; lane++) {
2599             unsigned output_index =
2600                 (n * inputs_imm1_length * inputs_imm2_length * vd_lane_count) +
2601                 (imm1 * inputs_imm2_length * vd_lane_count) +
2602                 (imm2 * vd_lane_count) + lane;
2603 
2604             if (results[output_index] != expected[output_index]) {
2605               error_in_vector = true;
2606               break;
2607             }
2608           }
2609 
2610           if (error_in_vector && (++error_count <= kErrorReportLimit)) {
2611             printf("%s\n", name);
2612             printf(" Vd%.*s| Imm%.*s| Vn%.*s| Imm%.*s| Vd%.*s| Expected\n",
2613                    lane_len_in_hex + 1,
2614                    padding,
2615                    lane_len_in_hex,
2616                    padding,
2617                    lane_len_in_hex + 1,
2618                    padding,
2619                    lane_len_in_hex,
2620                    padding,
2621                    lane_len_in_hex + 1,
2622                    padding);
2623 
2624             for (unsigned lane = 0; lane < vd_lane_count; lane++) {
2625               unsigned output_index =
2626                   (n * inputs_imm1_length * inputs_imm2_length *
2627                    vd_lane_count) +
2628                   (imm1 * inputs_imm2_length * vd_lane_count) +
2629                   (imm2 * vd_lane_count) + lane;
2630               unsigned input_index_n =
2631                   (inputs_n_length - vd_lane_count + n + 1 + lane) %
2632                   inputs_n_length;
2633               unsigned input_index_imm1 = imm1;
2634               unsigned input_index_imm2 = imm2;
2635 
2636               printf("%c0x%0*" PRIx64 " | 0x%0*" PRIx64 " | 0x%0*" PRIx64
2637                      " "
2638                      "| 0x%0*" PRIx64 " | 0x%0*" PRIx64 " | 0x%0*" PRIx64 "\n",
2639                      results[output_index] != expected[output_index] ? '*'
2640                                                                      : ' ',
2641                      lane_len_in_hex,
2642                      static_cast<uint64_t>(inputs_d[lane]),
2643                      lane_len_in_hex,
2644                      static_cast<uint64_t>(inputs_imm1[input_index_imm1]),
2645                      lane_len_in_hex,
2646                      static_cast<uint64_t>(inputs_n[input_index_n]),
2647                      lane_len_in_hex,
2648                      static_cast<uint64_t>(inputs_imm2[input_index_imm2]),
2649                      lane_len_in_hex,
2650                      static_cast<uint64_t>(results[output_index]),
2651                      lane_len_in_hex,
2652                      static_cast<uint64_t>(expected[output_index]));
2653             }
2654           }
2655         }
2656       }
2657     }
2658     VIXL_ASSERT(counted_length == expected_length);
2659     if (error_count > kErrorReportLimit) {
2660       printf("%u other errors follow.\n", error_count - kErrorReportLimit);
2661     }
2662     VIXL_CHECK(error_count == 0);
2663   }
2664   delete[] results;
2665 }
2666 
2667 
2668 // ==== Floating-point tests. ====
2669 
2670 
2671 // Standard floating-point test expansion for both double- and single-precision
2672 // operations.
2673 #define STRINGIFY(s) #s
2674 
2675 #define CALL_TEST_FP_HELPER(mnemonic, variant, type, input) \
2676   Test##type(STRINGIFY(mnemonic) "_" STRINGIFY(variant),    \
2677              &MacroAssembler::mnemonic,                     \
2678              input,                                         \
2679              sizeof(input) / sizeof(input[0]),              \
2680              kExpected_##mnemonic##_##variant,              \
2681              kExpectedCount_##mnemonic##_##variant)
2682 
2683 #define DEFINE_TEST_FP(mnemonic, type, input)                    \
2684   TEST(mnemonic##_d) {                                           \
2685     CALL_TEST_FP_HELPER(mnemonic, d, type, kInputDouble##input); \
2686   }                                                              \
2687   TEST(mnemonic##_s) {                                           \
2688     CALL_TEST_FP_HELPER(mnemonic, s, type, kInputFloat##input);  \
2689   }
2690 
2691 // TODO: Test with a newer version of valgrind.
2692 //
2693 // Note: valgrind-3.10.0 does not properly interpret libm's fma() on x86_64.
2694 // Therefore this test will be exiting though an ASSERT and thus leaking
2695 // memory.
2696 DEFINE_TEST_FP(fmadd, 3Op, Basic)
2697 DEFINE_TEST_FP(fmsub, 3Op, Basic)
2698 DEFINE_TEST_FP(fnmadd, 3Op, Basic)
2699 DEFINE_TEST_FP(fnmsub, 3Op, Basic)
2700 
2701 DEFINE_TEST_FP(fadd, 2Op, Basic)
2702 DEFINE_TEST_FP(fdiv, 2Op, Basic)
2703 DEFINE_TEST_FP(fmax, 2Op, Basic)
2704 DEFINE_TEST_FP(fmaxnm, 2Op, Basic)
2705 DEFINE_TEST_FP(fmin, 2Op, Basic)
2706 DEFINE_TEST_FP(fminnm, 2Op, Basic)
2707 DEFINE_TEST_FP(fmul, 2Op, Basic)
2708 DEFINE_TEST_FP(fsub, 2Op, Basic)
2709 DEFINE_TEST_FP(fnmul, 2Op, Basic)
2710 
2711 DEFINE_TEST_FP(fabs, 1Op, Basic)
2712 DEFINE_TEST_FP(fmov, 1Op, Basic)
2713 DEFINE_TEST_FP(fneg, 1Op, Basic)
2714 DEFINE_TEST_FP(fsqrt, 1Op, Basic)
2715 DEFINE_TEST_FP(frinta, 1Op, Conversions)
2716 DEFINE_TEST_FP(frinti, 1Op, Conversions)
2717 DEFINE_TEST_FP(frintm, 1Op, Conversions)
2718 DEFINE_TEST_FP(frintn, 1Op, Conversions)
2719 DEFINE_TEST_FP(frintp, 1Op, Conversions)
2720 DEFINE_TEST_FP(frintx, 1Op, Conversions)
2721 DEFINE_TEST_FP(frintz, 1Op, Conversions)
2722 
TEST(fcmp_d)2723 TEST(fcmp_d) { CALL_TEST_FP_HELPER(fcmp, d, Cmp, kInputDoubleBasic); }
TEST(fcmp_s)2724 TEST(fcmp_s) { CALL_TEST_FP_HELPER(fcmp, s, Cmp, kInputFloatBasic); }
TEST(fcmp_dz)2725 TEST(fcmp_dz) { CALL_TEST_FP_HELPER(fcmp, dz, CmpZero, kInputDoubleBasic); }
TEST(fcmp_sz)2726 TEST(fcmp_sz) { CALL_TEST_FP_HELPER(fcmp, sz, CmpZero, kInputFloatBasic); }
2727 
TEST(fcvt_sd)2728 TEST(fcvt_sd) { CALL_TEST_FP_HELPER(fcvt, sd, 1Op, kInputDoubleConversions); }
TEST(fcvt_ds)2729 TEST(fcvt_ds) { CALL_TEST_FP_HELPER(fcvt, ds, 1Op, kInputFloatConversions); }
2730 
2731 #define DEFINE_TEST_FP_TO_INT(mnemonic, type, input)              \
2732   TEST(mnemonic##_xd) {                                           \
2733     CALL_TEST_FP_HELPER(mnemonic, xd, type, kInputDouble##input); \
2734   }                                                               \
2735   TEST(mnemonic##_xs) {                                           \
2736     CALL_TEST_FP_HELPER(mnemonic, xs, type, kInputFloat##input);  \
2737   }                                                               \
2738   TEST(mnemonic##_wd) {                                           \
2739     CALL_TEST_FP_HELPER(mnemonic, wd, type, kInputDouble##input); \
2740   }                                                               \
2741   TEST(mnemonic##_ws) {                                           \
2742     CALL_TEST_FP_HELPER(mnemonic, ws, type, kInputFloat##input);  \
2743   }
2744 
DEFINE_TEST_FP_TO_INT(fcvtas,FPToS,Conversions)2745 DEFINE_TEST_FP_TO_INT(fcvtas, FPToS, Conversions)
2746 DEFINE_TEST_FP_TO_INT(fcvtau, FPToU, Conversions)
2747 DEFINE_TEST_FP_TO_INT(fcvtms, FPToS, Conversions)
2748 DEFINE_TEST_FP_TO_INT(fcvtmu, FPToU, Conversions)
2749 DEFINE_TEST_FP_TO_INT(fcvtns, FPToS, Conversions)
2750 DEFINE_TEST_FP_TO_INT(fcvtnu, FPToU, Conversions)
2751 DEFINE_TEST_FP_TO_INT(fcvtzs, FPToFixedS, Conversions)
2752 DEFINE_TEST_FP_TO_INT(fcvtzu, FPToFixedU, Conversions)
2753 
2754 // TODO: Scvtf-fixed-point
2755 // TODO: Scvtf-integer
2756 // TODO: Ucvtf-fixed-point
2757 // TODO: Ucvtf-integer
2758 
2759 // TODO: Fccmp
2760 // TODO: Fcsel
2761 
2762 
2763 // ==== NEON Tests. ====
2764 
2765 #define CALL_TEST_NEON_HELPER_1Op(mnemonic, vdform, vnform, input_n) \
2766   Test1OpNEON(STRINGIFY(mnemonic) "_" STRINGIFY(vdform),             \
2767               &MacroAssembler::mnemonic,                             \
2768               input_n,                                               \
2769               (sizeof(input_n) / sizeof(input_n[0])),                \
2770               kExpected_NEON_##mnemonic##_##vdform,                  \
2771               kExpectedCount_NEON_##mnemonic##_##vdform,             \
2772               kFormat##vdform,                                       \
2773               kFormat##vnform)
2774 
2775 #define CALL_TEST_NEON_HELPER_1OpAcross(mnemonic, vdform, vnform, input_n)   \
2776   Test1OpAcrossNEON(STRINGIFY(mnemonic) "_" STRINGIFY(vdform) "_" STRINGIFY( \
2777                         vnform),                                             \
2778                     &MacroAssembler::mnemonic,                               \
2779                     input_n,                                                 \
2780                     (sizeof(input_n) / sizeof(input_n[0])),                  \
2781                     kExpected_NEON_##mnemonic##_##vdform##_##vnform,         \
2782                     kExpectedCount_NEON_##mnemonic##_##vdform##_##vnform,    \
2783                     kFormat##vdform,                                         \
2784                     kFormat##vnform)
2785 
2786 #define CALL_TEST_NEON_HELPER_2Op(                               \
2787     mnemonic, vdform, vnform, vmform, input_d, input_n, input_m) \
2788   Test2OpNEON(STRINGIFY(mnemonic) "_" STRINGIFY(vdform),         \
2789               &MacroAssembler::mnemonic,                         \
2790               input_d,                                           \
2791               input_n,                                           \
2792               (sizeof(input_n) / sizeof(input_n[0])),            \
2793               input_m,                                           \
2794               (sizeof(input_m) / sizeof(input_m[0])),            \
2795               kExpected_NEON_##mnemonic##_##vdform,              \
2796               kExpectedCount_NEON_##mnemonic##_##vdform,         \
2797               kFormat##vdform,                                   \
2798               kFormat##vnform,                                   \
2799               kFormat##vmform)
2800 
2801 #define CALL_TEST_NEON_HELPER_2OpImm(                                 \
2802     mnemonic, vdform, vnform, input_n, input_m)                       \
2803   Test2OpImmNEON(STRINGIFY(mnemonic) "_" STRINGIFY(vdform) "_2OPIMM", \
2804                  &MacroAssembler::mnemonic,                           \
2805                  input_n,                                             \
2806                  (sizeof(input_n) / sizeof(input_n[0])),              \
2807                  input_m,                                             \
2808                  (sizeof(input_m) / sizeof(input_m[0])),              \
2809                  kExpected_NEON_##mnemonic##_##vdform##_2OPIMM,       \
2810                  kExpectedCount_NEON_##mnemonic##_##vdform##_2OPIMM,  \
2811                  kFormat##vdform,                                     \
2812                  kFormat##vnform)
2813 
2814 #define CALL_TEST_NEON_HELPER_ByElement(                                  \
2815     mnemonic, vdform, vnform, vmform, input_d, input_n, input_m, indices) \
2816   TestByElementNEON(                                                      \
2817       STRINGIFY(mnemonic) "_" STRINGIFY(vdform) "_" STRINGIFY(            \
2818           vnform) "_" STRINGIFY(vmform),                                  \
2819       &MacroAssembler::mnemonic,                                          \
2820       input_d,                                                            \
2821       input_n,                                                            \
2822       (sizeof(input_n) / sizeof(input_n[0])),                             \
2823       input_m,                                                            \
2824       (sizeof(input_m) / sizeof(input_m[0])),                             \
2825       indices,                                                            \
2826       (sizeof(indices) / sizeof(indices[0])),                             \
2827       kExpected_NEON_##mnemonic##_##vdform##_##vnform##_##vmform,         \
2828       kExpectedCount_NEON_##mnemonic##_##vdform##_##vnform##_##vmform,    \
2829       kFormat##vdform,                                                    \
2830       kFormat##vnform,                                                    \
2831       kFormat##vmform)
2832 
2833 #define CALL_TEST_NEON_HELPER_OpImmOpImm(helper,                   \
2834                                          mnemonic,                 \
2835                                          vdform,                   \
2836                                          vnform,                   \
2837                                          input_d,                  \
2838                                          input_imm1,               \
2839                                          input_n,                  \
2840                                          input_imm2)               \
2841   TestOpImmOpImmNEON(STRINGIFY(mnemonic) "_" STRINGIFY(vdform),    \
2842                      helper,                                       \
2843                      input_d,                                      \
2844                      input_imm1,                                   \
2845                      (sizeof(input_imm1) / sizeof(input_imm1[0])), \
2846                      input_n,                                      \
2847                      (sizeof(input_n) / sizeof(input_n[0])),       \
2848                      input_imm2,                                   \
2849                      (sizeof(input_imm2) / sizeof(input_imm2[0])), \
2850                      kExpected_NEON_##mnemonic##_##vdform,         \
2851                      kExpectedCount_NEON_##mnemonic##_##vdform,    \
2852                      kFormat##vdform,                              \
2853                      kFormat##vnform)
2854 
2855 #define CALL_TEST_NEON_HELPER_2SAME(mnemonic, variant, input) \
2856   CALL_TEST_NEON_HELPER_1Op(mnemonic, variant, variant, input)
2857 
2858 #define DEFINE_TEST_NEON_2SAME_8B_16B(mnemonic, input)              \
2859   TEST(mnemonic##_8B) {                                             \
2860     CALL_TEST_NEON_HELPER_2SAME(mnemonic, 8B, kInput8bits##input);  \
2861   }                                                                 \
2862   TEST(mnemonic##_16B) {                                            \
2863     CALL_TEST_NEON_HELPER_2SAME(mnemonic, 16B, kInput8bits##input); \
2864   }
2865 
2866 #define DEFINE_TEST_NEON_2SAME_4H_8H(mnemonic, input)               \
2867   TEST(mnemonic##_4H) {                                             \
2868     CALL_TEST_NEON_HELPER_2SAME(mnemonic, 4H, kInput16bits##input); \
2869   }                                                                 \
2870   TEST(mnemonic##_8H) {                                             \
2871     CALL_TEST_NEON_HELPER_2SAME(mnemonic, 8H, kInput16bits##input); \
2872   }
2873 
2874 #define DEFINE_TEST_NEON_2SAME_2S_4S(mnemonic, input)               \
2875   TEST(mnemonic##_2S) {                                             \
2876     CALL_TEST_NEON_HELPER_2SAME(mnemonic, 2S, kInput32bits##input); \
2877   }                                                                 \
2878   TEST(mnemonic##_4S) {                                             \
2879     CALL_TEST_NEON_HELPER_2SAME(mnemonic, 4S, kInput32bits##input); \
2880   }
2881 
2882 #define DEFINE_TEST_NEON_2SAME_BH(mnemonic, input) \
2883   DEFINE_TEST_NEON_2SAME_8B_16B(mnemonic, input)   \
2884   DEFINE_TEST_NEON_2SAME_4H_8H(mnemonic, input)
2885 
2886 #define DEFINE_TEST_NEON_2SAME_NO2D(mnemonic, input) \
2887   DEFINE_TEST_NEON_2SAME_BH(mnemonic, input)         \
2888   DEFINE_TEST_NEON_2SAME_2S_4S(mnemonic, input)
2889 
2890 #define DEFINE_TEST_NEON_2SAME(mnemonic, input)                     \
2891   DEFINE_TEST_NEON_2SAME_NO2D(mnemonic, input)                      \
2892   TEST(mnemonic##_2D) {                                             \
2893     CALL_TEST_NEON_HELPER_2SAME(mnemonic, 2D, kInput64bits##input); \
2894   }
2895 #define DEFINE_TEST_NEON_2SAME_SD(mnemonic, input)                  \
2896   DEFINE_TEST_NEON_2SAME_2S_4S(mnemonic, input)                     \
2897   TEST(mnemonic##_2D) {                                             \
2898     CALL_TEST_NEON_HELPER_2SAME(mnemonic, 2D, kInput64bits##input); \
2899   }
2900 
2901 #define DEFINE_TEST_NEON_2SAME_FP(mnemonic, input)                  \
2902   TEST(mnemonic##_2S) {                                             \
2903     CALL_TEST_NEON_HELPER_2SAME(mnemonic, 2S, kInputFloat##input);  \
2904   }                                                                 \
2905   TEST(mnemonic##_4S) {                                             \
2906     CALL_TEST_NEON_HELPER_2SAME(mnemonic, 4S, kInputFloat##input);  \
2907   }                                                                 \
2908   TEST(mnemonic##_2D) {                                             \
2909     CALL_TEST_NEON_HELPER_2SAME(mnemonic, 2D, kInputDouble##input); \
2910   }
2911 
2912 #define DEFINE_TEST_NEON_2SAME_FP_SCALAR(mnemonic, input)          \
2913   TEST(mnemonic##_S) {                                             \
2914     CALL_TEST_NEON_HELPER_2SAME(mnemonic, S, kInputFloat##input);  \
2915   }                                                                \
2916   TEST(mnemonic##_D) {                                             \
2917     CALL_TEST_NEON_HELPER_2SAME(mnemonic, D, kInputDouble##input); \
2918   }
2919 
2920 #define DEFINE_TEST_NEON_2SAME_SCALAR_B(mnemonic, input)          \
2921   TEST(mnemonic##_B) {                                            \
2922     CALL_TEST_NEON_HELPER_2SAME(mnemonic, B, kInput8bits##input); \
2923   }
2924 #define DEFINE_TEST_NEON_2SAME_SCALAR_H(mnemonic, input)           \
2925   TEST(mnemonic##_H) {                                             \
2926     CALL_TEST_NEON_HELPER_2SAME(mnemonic, H, kInput16bits##input); \
2927   }
2928 #define DEFINE_TEST_NEON_2SAME_SCALAR_S(mnemonic, input)           \
2929   TEST(mnemonic##_S) {                                             \
2930     CALL_TEST_NEON_HELPER_2SAME(mnemonic, S, kInput32bits##input); \
2931   }
2932 #define DEFINE_TEST_NEON_2SAME_SCALAR_D(mnemonic, input)           \
2933   TEST(mnemonic##_D) {                                             \
2934     CALL_TEST_NEON_HELPER_2SAME(mnemonic, D, kInput64bits##input); \
2935   }
2936 
2937 #define DEFINE_TEST_NEON_2SAME_SCALAR(mnemonic, input) \
2938   DEFINE_TEST_NEON_2SAME_SCALAR_B(mnemonic, input)     \
2939   DEFINE_TEST_NEON_2SAME_SCALAR_H(mnemonic, input)     \
2940   DEFINE_TEST_NEON_2SAME_SCALAR_S(mnemonic, input)     \
2941   DEFINE_TEST_NEON_2SAME_SCALAR_D(mnemonic, input)
2942 
2943 #define DEFINE_TEST_NEON_2SAME_SCALAR_SD(mnemonic, input) \
2944   DEFINE_TEST_NEON_2SAME_SCALAR_S(mnemonic, input)        \
2945   DEFINE_TEST_NEON_2SAME_SCALAR_D(mnemonic, input)
2946 
2947 
2948 #define CALL_TEST_NEON_HELPER_ACROSS(mnemonic, vd_form, vn_form, input_n) \
2949   CALL_TEST_NEON_HELPER_1OpAcross(mnemonic, vd_form, vn_form, input_n)
2950 
2951 #define DEFINE_TEST_NEON_ACROSS(mnemonic, input)                        \
2952   TEST(mnemonic##_B_8B) {                                               \
2953     CALL_TEST_NEON_HELPER_ACROSS(mnemonic, B, 8B, kInput8bits##input);  \
2954   }                                                                     \
2955   TEST(mnemonic##_B_16B) {                                              \
2956     CALL_TEST_NEON_HELPER_ACROSS(mnemonic, B, 16B, kInput8bits##input); \
2957   }                                                                     \
2958   TEST(mnemonic##_H_4H) {                                               \
2959     CALL_TEST_NEON_HELPER_ACROSS(mnemonic, H, 4H, kInput16bits##input); \
2960   }                                                                     \
2961   TEST(mnemonic##_H_8H) {                                               \
2962     CALL_TEST_NEON_HELPER_ACROSS(mnemonic, H, 8H, kInput16bits##input); \
2963   }                                                                     \
2964   TEST(mnemonic##_S_4S) {                                               \
2965     CALL_TEST_NEON_HELPER_ACROSS(mnemonic, S, 4S, kInput32bits##input); \
2966   }
2967 
2968 #define DEFINE_TEST_NEON_ACROSS_LONG(mnemonic, input)                   \
2969   TEST(mnemonic##_H_8B) {                                               \
2970     CALL_TEST_NEON_HELPER_ACROSS(mnemonic, H, 8B, kInput8bits##input);  \
2971   }                                                                     \
2972   TEST(mnemonic##_H_16B) {                                              \
2973     CALL_TEST_NEON_HELPER_ACROSS(mnemonic, H, 16B, kInput8bits##input); \
2974   }                                                                     \
2975   TEST(mnemonic##_S_4H) {                                               \
2976     CALL_TEST_NEON_HELPER_ACROSS(mnemonic, S, 4H, kInput16bits##input); \
2977   }                                                                     \
2978   TEST(mnemonic##_S_8H) {                                               \
2979     CALL_TEST_NEON_HELPER_ACROSS(mnemonic, S, 8H, kInput16bits##input); \
2980   }                                                                     \
2981   TEST(mnemonic##_D_4S) {                                               \
2982     CALL_TEST_NEON_HELPER_ACROSS(mnemonic, D, 4S, kInput32bits##input); \
2983   }
2984 
2985 #define DEFINE_TEST_NEON_ACROSS_FP(mnemonic, input)                    \
2986   TEST(mnemonic##_S_4S) {                                              \
2987     CALL_TEST_NEON_HELPER_ACROSS(mnemonic, S, 4S, kInputFloat##input); \
2988   }
2989 
2990 #define CALL_TEST_NEON_HELPER_2DIFF(mnemonic, vdform, vnform, input_n) \
2991   CALL_TEST_NEON_HELPER_1Op(mnemonic, vdform, vnform, input_n)
2992 
2993 #define DEFINE_TEST_NEON_2DIFF_LONG(mnemonic, input)                    \
2994   TEST(mnemonic##_4H) {                                                 \
2995     CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 4H, 8B, kInput8bits##input);  \
2996   }                                                                     \
2997   TEST(mnemonic##_8H) {                                                 \
2998     CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 8H, 16B, kInput8bits##input); \
2999   }                                                                     \
3000   TEST(mnemonic##_2S) {                                                 \
3001     CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 2S, 4H, kInput16bits##input); \
3002   }                                                                     \
3003   TEST(mnemonic##_4S) {                                                 \
3004     CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 4S, 8H, kInput16bits##input); \
3005   }                                                                     \
3006   TEST(mnemonic##_1D) {                                                 \
3007     CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 1D, 2S, kInput32bits##input); \
3008   }                                                                     \
3009   TEST(mnemonic##_2D) {                                                 \
3010     CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 2D, 4S, kInput32bits##input); \
3011   }
3012 
3013 #define DEFINE_TEST_NEON_2DIFF_NARROW(mnemonic, input)                      \
3014   TEST(mnemonic##_8B) {                                                     \
3015     CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 8B, 8H, kInput16bits##input);     \
3016   }                                                                         \
3017   TEST(mnemonic##_4H) {                                                     \
3018     CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 4H, 4S, kInput32bits##input);     \
3019   }                                                                         \
3020   TEST(mnemonic##_2S) {                                                     \
3021     CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 2S, 2D, kInput64bits##input);     \
3022   }                                                                         \
3023   TEST(mnemonic##2_16B) {                                                   \
3024     CALL_TEST_NEON_HELPER_2DIFF(mnemonic##2, 16B, 8H, kInput16bits##input); \
3025   }                                                                         \
3026   TEST(mnemonic##2_8H) {                                                    \
3027     CALL_TEST_NEON_HELPER_2DIFF(mnemonic##2, 8H, 4S, kInput32bits##input);  \
3028   }                                                                         \
3029   TEST(mnemonic##2_4S) {                                                    \
3030     CALL_TEST_NEON_HELPER_2DIFF(mnemonic##2, 4S, 2D, kInput64bits##input);  \
3031   }
3032 
3033 #define DEFINE_TEST_NEON_2DIFF_FP_LONG(mnemonic, input)                     \
3034   TEST(mnemonic##_4S) {                                                     \
3035     CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 4S, 4H, kInputFloat16##input);    \
3036   }                                                                         \
3037   TEST(mnemonic##_2D) {                                                     \
3038     CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 2D, 2S, kInputFloat##input);      \
3039   }                                                                         \
3040   TEST(mnemonic##2_4S) {                                                    \
3041     CALL_TEST_NEON_HELPER_2DIFF(mnemonic##2, 4S, 8H, kInputFloat16##input); \
3042   }                                                                         \
3043   TEST(mnemonic##2_2D) {                                                    \
3044     CALL_TEST_NEON_HELPER_2DIFF(mnemonic##2, 2D, 4S, kInputFloat##input);   \
3045   }
3046 
3047 #define DEFINE_TEST_NEON_2DIFF_FP_NARROW(mnemonic, input)                  \
3048   TEST(mnemonic##_4H) {                                                    \
3049     CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 4H, 4S, kInputFloat##input);     \
3050   }                                                                        \
3051   TEST(mnemonic##_2S) {                                                    \
3052     CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 2S, 2D, kInputDouble##input);    \
3053   }                                                                        \
3054   TEST(mnemonic##2_8H) {                                                   \
3055     CALL_TEST_NEON_HELPER_2DIFF(mnemonic##2, 8H, 4S, kInputFloat##input);  \
3056   }                                                                        \
3057   TEST(mnemonic##2_4S) {                                                   \
3058     CALL_TEST_NEON_HELPER_2DIFF(mnemonic##2, 4S, 2D, kInputDouble##input); \
3059   }
3060 
3061 #define DEFINE_TEST_NEON_2DIFF_FP_NARROW_2S(mnemonic, input)               \
3062   TEST(mnemonic##_2S) {                                                    \
3063     CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 2S, 2D, kInputDouble##input);    \
3064   }                                                                        \
3065   TEST(mnemonic##2_4S) {                                                   \
3066     CALL_TEST_NEON_HELPER_2DIFF(mnemonic##2, 4S, 2D, kInputDouble##input); \
3067   }
3068 
3069 #define DEFINE_TEST_NEON_2DIFF_SCALAR_NARROW(mnemonic, input)         \
3070   TEST(mnemonic##_B) {                                                \
3071     CALL_TEST_NEON_HELPER_2DIFF(mnemonic, B, H, kInput16bits##input); \
3072   }                                                                   \
3073   TEST(mnemonic##_H) {                                                \
3074     CALL_TEST_NEON_HELPER_2DIFF(mnemonic, H, S, kInput32bits##input); \
3075   }                                                                   \
3076   TEST(mnemonic##_S) {                                                \
3077     CALL_TEST_NEON_HELPER_2DIFF(mnemonic, S, D, kInput64bits##input); \
3078   }
3079 
3080 #define DEFINE_TEST_NEON_2DIFF_FP_SCALAR_SD(mnemonic, input)           \
3081   TEST(mnemonic##_S) {                                                 \
3082     CALL_TEST_NEON_HELPER_2DIFF(mnemonic, S, 2S, kInputFloat##input);  \
3083   }                                                                    \
3084   TEST(mnemonic##_D) {                                                 \
3085     CALL_TEST_NEON_HELPER_2DIFF(mnemonic, D, 2D, kInputDouble##input); \
3086   }
3087 
3088 #define CALL_TEST_NEON_HELPER_3SAME(mnemonic, variant, input_d, input_nm) \
3089   {                                                                       \
3090     CALL_TEST_NEON_HELPER_2Op(mnemonic,                                   \
3091                               variant,                                    \
3092                               variant,                                    \
3093                               variant,                                    \
3094                               input_d,                                    \
3095                               input_nm,                                   \
3096                               input_nm);                                  \
3097   }
3098 
3099 #define DEFINE_TEST_NEON_3SAME_8B_16B(mnemonic, input)     \
3100   TEST(mnemonic##_8B) {                                    \
3101     CALL_TEST_NEON_HELPER_3SAME(mnemonic,                  \
3102                                 8B,                        \
3103                                 kInput8bitsAccDestination, \
3104                                 kInput8bits##input);       \
3105   }                                                        \
3106   TEST(mnemonic##_16B) {                                   \
3107     CALL_TEST_NEON_HELPER_3SAME(mnemonic,                  \
3108                                 16B,                       \
3109                                 kInput8bitsAccDestination, \
3110                                 kInput8bits##input);       \
3111   }
3112 
3113 #define DEFINE_TEST_NEON_3SAME_HS(mnemonic, input)          \
3114   TEST(mnemonic##_4H) {                                     \
3115     CALL_TEST_NEON_HELPER_3SAME(mnemonic,                   \
3116                                 4H,                         \
3117                                 kInput16bitsAccDestination, \
3118                                 kInput16bits##input);       \
3119   }                                                         \
3120   TEST(mnemonic##_8H) {                                     \
3121     CALL_TEST_NEON_HELPER_3SAME(mnemonic,                   \
3122                                 8H,                         \
3123                                 kInput16bitsAccDestination, \
3124                                 kInput16bits##input);       \
3125   }                                                         \
3126   TEST(mnemonic##_2S) {                                     \
3127     CALL_TEST_NEON_HELPER_3SAME(mnemonic,                   \
3128                                 2S,                         \
3129                                 kInput32bitsAccDestination, \
3130                                 kInput32bits##input);       \
3131   }                                                         \
3132   TEST(mnemonic##_4S) {                                     \
3133     CALL_TEST_NEON_HELPER_3SAME(mnemonic,                   \
3134                                 4S,                         \
3135                                 kInput32bitsAccDestination, \
3136                                 kInput32bits##input);       \
3137   }
3138 
3139 #define DEFINE_TEST_NEON_3SAME_NO2D(mnemonic, input) \
3140   DEFINE_TEST_NEON_3SAME_8B_16B(mnemonic, input)     \
3141   DEFINE_TEST_NEON_3SAME_HS(mnemonic, input)
3142 
3143 #define DEFINE_TEST_NEON_3SAME(mnemonic, input)             \
3144   DEFINE_TEST_NEON_3SAME_NO2D(mnemonic, input)              \
3145   TEST(mnemonic##_2D) {                                     \
3146     CALL_TEST_NEON_HELPER_3SAME(mnemonic,                   \
3147                                 2D,                         \
3148                                 kInput64bitsAccDestination, \
3149                                 kInput64bits##input);       \
3150   }
3151 
3152 #define DEFINE_TEST_NEON_3SAME_FP(mnemonic, input)          \
3153   TEST(mnemonic##_2S) {                                     \
3154     CALL_TEST_NEON_HELPER_3SAME(mnemonic,                   \
3155                                 2S,                         \
3156                                 kInputFloatAccDestination,  \
3157                                 kInputFloat##input);        \
3158   }                                                         \
3159   TEST(mnemonic##_4S) {                                     \
3160     CALL_TEST_NEON_HELPER_3SAME(mnemonic,                   \
3161                                 4S,                         \
3162                                 kInputFloatAccDestination,  \
3163                                 kInputFloat##input);        \
3164   }                                                         \
3165   TEST(mnemonic##_2D) {                                     \
3166     CALL_TEST_NEON_HELPER_3SAME(mnemonic,                   \
3167                                 2D,                         \
3168                                 kInputDoubleAccDestination, \
3169                                 kInputDouble##input);       \
3170   }
3171 
3172 #define DEFINE_TEST_NEON_3SAME_SCALAR_D(mnemonic, input)    \
3173   TEST(mnemonic##_D) {                                      \
3174     CALL_TEST_NEON_HELPER_3SAME(mnemonic,                   \
3175                                 D,                          \
3176                                 kInput64bitsAccDestination, \
3177                                 kInput64bits##input);       \
3178   }
3179 
3180 #define DEFINE_TEST_NEON_3SAME_SCALAR_HS(mnemonic, input)   \
3181   TEST(mnemonic##_H) {                                      \
3182     CALL_TEST_NEON_HELPER_3SAME(mnemonic,                   \
3183                                 H,                          \
3184                                 kInput16bitsAccDestination, \
3185                                 kInput16bits##input);       \
3186   }                                                         \
3187   TEST(mnemonic##_S) {                                      \
3188     CALL_TEST_NEON_HELPER_3SAME(mnemonic,                   \
3189                                 S,                          \
3190                                 kInput32bitsAccDestination, \
3191                                 kInput32bits##input);       \
3192   }
3193 
3194 #define DEFINE_TEST_NEON_3SAME_SCALAR(mnemonic, input)      \
3195   TEST(mnemonic##_B) {                                      \
3196     CALL_TEST_NEON_HELPER_3SAME(mnemonic,                   \
3197                                 B,                          \
3198                                 kInput8bitsAccDestination,  \
3199                                 kInput8bits##input);        \
3200   }                                                         \
3201   TEST(mnemonic##_H) {                                      \
3202     CALL_TEST_NEON_HELPER_3SAME(mnemonic,                   \
3203                                 H,                          \
3204                                 kInput16bitsAccDestination, \
3205                                 kInput16bits##input);       \
3206   }                                                         \
3207   TEST(mnemonic##_S) {                                      \
3208     CALL_TEST_NEON_HELPER_3SAME(mnemonic,                   \
3209                                 S,                          \
3210                                 kInput32bitsAccDestination, \
3211                                 kInput32bits##input);       \
3212   }                                                         \
3213   TEST(mnemonic##_D) {                                      \
3214     CALL_TEST_NEON_HELPER_3SAME(mnemonic,                   \
3215                                 D,                          \
3216                                 kInput64bitsAccDestination, \
3217                                 kInput64bits##input);       \
3218   }
3219 
3220 #define DEFINE_TEST_NEON_3SAME_FP_SCALAR(mnemonic, input)   \
3221   TEST(mnemonic##_S) {                                      \
3222     CALL_TEST_NEON_HELPER_3SAME(mnemonic,                   \
3223                                 S,                          \
3224                                 kInputFloatAccDestination,  \
3225                                 kInputFloat##input);        \
3226   }                                                         \
3227   TEST(mnemonic##_D) {                                      \
3228     CALL_TEST_NEON_HELPER_3SAME(mnemonic,                   \
3229                                 D,                          \
3230                                 kInputDoubleAccDestination, \
3231                                 kInputDouble##input);       \
3232   }
3233 
3234 #define CALL_TEST_NEON_HELPER_3DIFF(                             \
3235     mnemonic, vdform, vnform, vmform, input_d, input_n, input_m) \
3236   {                                                              \
3237     CALL_TEST_NEON_HELPER_2Op(mnemonic,                          \
3238                               vdform,                            \
3239                               vnform,                            \
3240                               vmform,                            \
3241                               input_d,                           \
3242                               input_n,                           \
3243                               input_m);                          \
3244   }
3245 
3246 #define DEFINE_TEST_NEON_3DIFF_LONG_8H(mnemonic, input)     \
3247   TEST(mnemonic##_8H) {                                     \
3248     CALL_TEST_NEON_HELPER_3DIFF(mnemonic,                   \
3249                                 8H,                         \
3250                                 8B,                         \
3251                                 8B,                         \
3252                                 kInput16bitsAccDestination, \
3253                                 kInput8bits##input,         \
3254                                 kInput8bits##input);        \
3255   }                                                         \
3256   TEST(mnemonic##2_8H) {                                    \
3257     CALL_TEST_NEON_HELPER_3DIFF(mnemonic##2,                \
3258                                 8H,                         \
3259                                 16B,                        \
3260                                 16B,                        \
3261                                 kInput16bitsAccDestination, \
3262                                 kInput8bits##input,         \
3263                                 kInput8bits##input);        \
3264   }
3265 
3266 #define DEFINE_TEST_NEON_3DIFF_LONG_4S(mnemonic, input)     \
3267   TEST(mnemonic##_4S) {                                     \
3268     CALL_TEST_NEON_HELPER_3DIFF(mnemonic,                   \
3269                                 4S,                         \
3270                                 4H,                         \
3271                                 4H,                         \
3272                                 kInput32bitsAccDestination, \
3273                                 kInput16bits##input,        \
3274                                 kInput16bits##input);       \
3275   }                                                         \
3276   TEST(mnemonic##2_4S) {                                    \
3277     CALL_TEST_NEON_HELPER_3DIFF(mnemonic##2,                \
3278                                 4S,                         \
3279                                 8H,                         \
3280                                 8H,                         \
3281                                 kInput32bitsAccDestination, \
3282                                 kInput16bits##input,        \
3283                                 kInput16bits##input);       \
3284   }
3285 
3286 #define DEFINE_TEST_NEON_3DIFF_LONG_2D(mnemonic, input)     \
3287   TEST(mnemonic##_2D) {                                     \
3288     CALL_TEST_NEON_HELPER_3DIFF(mnemonic,                   \
3289                                 2D,                         \
3290                                 2S,                         \
3291                                 2S,                         \
3292                                 kInput64bitsAccDestination, \
3293                                 kInput32bits##input,        \
3294                                 kInput32bits##input);       \
3295   }                                                         \
3296   TEST(mnemonic##2_2D) {                                    \
3297     CALL_TEST_NEON_HELPER_3DIFF(mnemonic##2,                \
3298                                 2D,                         \
3299                                 4S,                         \
3300                                 4S,                         \
3301                                 kInput64bitsAccDestination, \
3302                                 kInput32bits##input,        \
3303                                 kInput32bits##input);       \
3304   }
3305 
3306 #define DEFINE_TEST_NEON_3DIFF_LONG_SD(mnemonic, input) \
3307   DEFINE_TEST_NEON_3DIFF_LONG_4S(mnemonic, input)       \
3308   DEFINE_TEST_NEON_3DIFF_LONG_2D(mnemonic, input)
3309 
3310 #define DEFINE_TEST_NEON_3DIFF_LONG(mnemonic, input) \
3311   DEFINE_TEST_NEON_3DIFF_LONG_8H(mnemonic, input)    \
3312   DEFINE_TEST_NEON_3DIFF_LONG_4S(mnemonic, input)    \
3313   DEFINE_TEST_NEON_3DIFF_LONG_2D(mnemonic, input)
3314 
3315 #define DEFINE_TEST_NEON_3DIFF_SCALAR_LONG_S(mnemonic, input) \
3316   TEST(mnemonic##_S) {                                        \
3317     CALL_TEST_NEON_HELPER_3DIFF(mnemonic,                     \
3318                                 S,                            \
3319                                 H,                            \
3320                                 H,                            \
3321                                 kInput32bitsAccDestination,   \
3322                                 kInput16bits##input,          \
3323                                 kInput16bits##input);         \
3324   }
3325 
3326 #define DEFINE_TEST_NEON_3DIFF_SCALAR_LONG_D(mnemonic, input) \
3327   TEST(mnemonic##_D) {                                        \
3328     CALL_TEST_NEON_HELPER_3DIFF(mnemonic,                     \
3329                                 D,                            \
3330                                 S,                            \
3331                                 S,                            \
3332                                 kInput64bitsAccDestination,   \
3333                                 kInput32bits##input,          \
3334                                 kInput32bits##input);         \
3335   }
3336 
3337 #define DEFINE_TEST_NEON_3DIFF_SCALAR_LONG_SD(mnemonic, input) \
3338   DEFINE_TEST_NEON_3DIFF_SCALAR_LONG_S(mnemonic, input)        \
3339   DEFINE_TEST_NEON_3DIFF_SCALAR_LONG_D(mnemonic, input)
3340 
3341 #define DEFINE_TEST_NEON_3DIFF_WIDE(mnemonic, input)        \
3342   TEST(mnemonic##_8H) {                                     \
3343     CALL_TEST_NEON_HELPER_3DIFF(mnemonic,                   \
3344                                 8H,                         \
3345                                 8H,                         \
3346                                 8B,                         \
3347                                 kInput16bitsAccDestination, \
3348                                 kInput16bits##input,        \
3349                                 kInput8bits##input);        \
3350   }                                                         \
3351   TEST(mnemonic##_4S) {                                     \
3352     CALL_TEST_NEON_HELPER_3DIFF(mnemonic,                   \
3353                                 4S,                         \
3354                                 4S,                         \
3355                                 4H,                         \
3356                                 kInput32bitsAccDestination, \
3357                                 kInput32bits##input,        \
3358                                 kInput16bits##input);       \
3359   }                                                         \
3360   TEST(mnemonic##_2D) {                                     \
3361     CALL_TEST_NEON_HELPER_3DIFF(mnemonic,                   \
3362                                 2D,                         \
3363                                 2D,                         \
3364                                 2S,                         \
3365                                 kInput64bitsAccDestination, \
3366                                 kInput64bits##input,        \
3367                                 kInput32bits##input);       \
3368   }                                                         \
3369   TEST(mnemonic##2_8H) {                                    \
3370     CALL_TEST_NEON_HELPER_3DIFF(mnemonic##2,                \
3371                                 8H,                         \
3372                                 8H,                         \
3373                                 16B,                        \
3374                                 kInput16bitsAccDestination, \
3375                                 kInput16bits##input,        \
3376                                 kInput8bits##input);        \
3377   }                                                         \
3378   TEST(mnemonic##2_4S) {                                    \
3379     CALL_TEST_NEON_HELPER_3DIFF(mnemonic##2,                \
3380                                 4S,                         \
3381                                 4S,                         \
3382                                 8H,                         \
3383                                 kInput32bitsAccDestination, \
3384                                 kInput32bits##input,        \
3385                                 kInput16bits##input);       \
3386   }                                                         \
3387   TEST(mnemonic##2_2D) {                                    \
3388     CALL_TEST_NEON_HELPER_3DIFF(mnemonic##2,                \
3389                                 2D,                         \
3390                                 2D,                         \
3391                                 4S,                         \
3392                                 kInput64bitsAccDestination, \
3393                                 kInput64bits##input,        \
3394                                 kInput32bits##input);       \
3395   }
3396 
3397 #define DEFINE_TEST_NEON_3DIFF_NARROW(mnemonic, input)      \
3398   TEST(mnemonic##_8B) {                                     \
3399     CALL_TEST_NEON_HELPER_3DIFF(mnemonic,                   \
3400                                 8B,                         \
3401                                 8H,                         \
3402                                 8H,                         \
3403                                 kInput8bitsAccDestination,  \
3404                                 kInput16bits##input,        \
3405                                 kInput16bits##input);       \
3406   }                                                         \
3407   TEST(mnemonic##_4H) {                                     \
3408     CALL_TEST_NEON_HELPER_3DIFF(mnemonic,                   \
3409                                 4H,                         \
3410                                 4S,                         \
3411                                 4S,                         \
3412                                 kInput16bitsAccDestination, \
3413                                 kInput32bits##input,        \
3414                                 kInput32bits##input);       \
3415   }                                                         \
3416   TEST(mnemonic##_2S) {                                     \
3417     CALL_TEST_NEON_HELPER_3DIFF(mnemonic,                   \
3418                                 2S,                         \
3419                                 2D,                         \
3420                                 2D,                         \
3421                                 kInput32bitsAccDestination, \
3422                                 kInput64bits##input,        \
3423                                 kInput64bits##input);       \
3424   }                                                         \
3425   TEST(mnemonic##2_16B) {                                   \
3426     CALL_TEST_NEON_HELPER_3DIFF(mnemonic##2,                \
3427                                 16B,                        \
3428                                 8H,                         \
3429                                 8H,                         \
3430                                 kInput8bitsAccDestination,  \
3431                                 kInput16bits##input,        \
3432                                 kInput16bits##input);       \
3433   }                                                         \
3434   TEST(mnemonic##2_8H) {                                    \
3435     CALL_TEST_NEON_HELPER_3DIFF(mnemonic##2,                \
3436                                 8H,                         \
3437                                 4S,                         \
3438                                 4S,                         \
3439                                 kInput16bitsAccDestination, \
3440                                 kInput32bits##input,        \
3441                                 kInput32bits##input);       \
3442   }                                                         \
3443   TEST(mnemonic##2_4S) {                                    \
3444     CALL_TEST_NEON_HELPER_3DIFF(mnemonic##2,                \
3445                                 4S,                         \
3446                                 2D,                         \
3447                                 2D,                         \
3448                                 kInput32bitsAccDestination, \
3449                                 kInput64bits##input,        \
3450                                 kInput64bits##input);       \
3451   }
3452 
3453 #define CALL_TEST_NEON_HELPER_2OPIMM(             \
3454     mnemonic, vdform, vnform, input_n, input_imm) \
3455   {                                               \
3456     CALL_TEST_NEON_HELPER_2OpImm(mnemonic,        \
3457                                  vdform,          \
3458                                  vnform,          \
3459                                  input_n,         \
3460                                  input_imm);      \
3461   }
3462 
3463 #define DEFINE_TEST_NEON_2OPIMM(mnemonic, input, input_imm)   \
3464   TEST(mnemonic##_8B_2OPIMM) {                                \
3465     CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                    \
3466                                  8B,                          \
3467                                  8B,                          \
3468                                  kInput8bits##input,          \
3469                                  kInput8bitsImm##input_imm);  \
3470   }                                                           \
3471   TEST(mnemonic##_16B_2OPIMM) {                               \
3472     CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                    \
3473                                  16B,                         \
3474                                  16B,                         \
3475                                  kInput8bits##input,          \
3476                                  kInput8bitsImm##input_imm);  \
3477   }                                                           \
3478   TEST(mnemonic##_4H_2OPIMM) {                                \
3479     CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                    \
3480                                  4H,                          \
3481                                  4H,                          \
3482                                  kInput16bits##input,         \
3483                                  kInput16bitsImm##input_imm); \
3484   }                                                           \
3485   TEST(mnemonic##_8H_2OPIMM) {                                \
3486     CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                    \
3487                                  8H,                          \
3488                                  8H,                          \
3489                                  kInput16bits##input,         \
3490                                  kInput16bitsImm##input_imm); \
3491   }                                                           \
3492   TEST(mnemonic##_2S_2OPIMM) {                                \
3493     CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                    \
3494                                  2S,                          \
3495                                  2S,                          \
3496                                  kInput32bits##input,         \
3497                                  kInput32bitsImm##input_imm); \
3498   }                                                           \
3499   TEST(mnemonic##_4S_2OPIMM) {                                \
3500     CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                    \
3501                                  4S,                          \
3502                                  4S,                          \
3503                                  kInput32bits##input,         \
3504                                  kInput32bitsImm##input_imm); \
3505   }                                                           \
3506   TEST(mnemonic##_2D_2OPIMM) {                                \
3507     CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                    \
3508                                  2D,                          \
3509                                  2D,                          \
3510                                  kInput64bits##input,         \
3511                                  kInput64bitsImm##input_imm); \
3512   }
3513 
3514 #define DEFINE_TEST_NEON_2OPIMM_COPY(mnemonic, input, input_imm) \
3515   TEST(mnemonic##_8B_2OPIMM) {                                   \
3516     CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                       \
3517                                  8B,                             \
3518                                  B,                              \
3519                                  kInput8bits##input,             \
3520                                  kInput8bitsImm##input_imm);     \
3521   }                                                              \
3522   TEST(mnemonic##_16B_2OPIMM) {                                  \
3523     CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                       \
3524                                  16B,                            \
3525                                  B,                              \
3526                                  kInput8bits##input,             \
3527                                  kInput8bitsImm##input_imm);     \
3528   }                                                              \
3529   TEST(mnemonic##_4H_2OPIMM) {                                   \
3530     CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                       \
3531                                  4H,                             \
3532                                  H,                              \
3533                                  kInput16bits##input,            \
3534                                  kInput16bitsImm##input_imm);    \
3535   }                                                              \
3536   TEST(mnemonic##_8H_2OPIMM) {                                   \
3537     CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                       \
3538                                  8H,                             \
3539                                  H,                              \
3540                                  kInput16bits##input,            \
3541                                  kInput16bitsImm##input_imm);    \
3542   }                                                              \
3543   TEST(mnemonic##_2S_2OPIMM) {                                   \
3544     CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                       \
3545                                  2S,                             \
3546                                  S,                              \
3547                                  kInput32bits##input,            \
3548                                  kInput32bitsImm##input_imm);    \
3549   }                                                              \
3550   TEST(mnemonic##_4S_2OPIMM) {                                   \
3551     CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                       \
3552                                  4S,                             \
3553                                  S,                              \
3554                                  kInput32bits##input,            \
3555                                  kInput32bitsImm##input_imm);    \
3556   }                                                              \
3557   TEST(mnemonic##_2D_2OPIMM) {                                   \
3558     CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                       \
3559                                  2D,                             \
3560                                  D,                              \
3561                                  kInput64bits##input,            \
3562                                  kInput64bitsImm##input_imm);    \
3563   }
3564 
3565 #define DEFINE_TEST_NEON_2OPIMM_NARROW(mnemonic, input, input_imm) \
3566   TEST(mnemonic##_8B_2OPIMM) {                                     \
3567     CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                         \
3568                                  8B,                               \
3569                                  8H,                               \
3570                                  kInput16bits##input,              \
3571                                  kInput8bitsImm##input_imm);       \
3572   }                                                                \
3573   TEST(mnemonic##_4H_2OPIMM) {                                     \
3574     CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                         \
3575                                  4H,                               \
3576                                  4S,                               \
3577                                  kInput32bits##input,              \
3578                                  kInput16bitsImm##input_imm);      \
3579   }                                                                \
3580   TEST(mnemonic##_2S_2OPIMM) {                                     \
3581     CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                         \
3582                                  2S,                               \
3583                                  2D,                               \
3584                                  kInput64bits##input,              \
3585                                  kInput32bitsImm##input_imm);      \
3586   }                                                                \
3587   TEST(mnemonic##2_16B_2OPIMM) {                                   \
3588     CALL_TEST_NEON_HELPER_2OPIMM(mnemonic##2,                      \
3589                                  16B,                              \
3590                                  8H,                               \
3591                                  kInput16bits##input,              \
3592                                  kInput8bitsImm##input_imm);       \
3593   }                                                                \
3594   TEST(mnemonic##2_8H_2OPIMM) {                                    \
3595     CALL_TEST_NEON_HELPER_2OPIMM(mnemonic##2,                      \
3596                                  8H,                               \
3597                                  4S,                               \
3598                                  kInput32bits##input,              \
3599                                  kInput16bitsImm##input_imm);      \
3600   }                                                                \
3601   TEST(mnemonic##2_4S_2OPIMM) {                                    \
3602     CALL_TEST_NEON_HELPER_2OPIMM(mnemonic##2,                      \
3603                                  4S,                               \
3604                                  2D,                               \
3605                                  kInput64bits##input,              \
3606                                  kInput32bitsImm##input_imm);      \
3607   }
3608 
3609 #define DEFINE_TEST_NEON_2OPIMM_SCALAR_NARROW(mnemonic, input, input_imm) \
3610   TEST(mnemonic##_B_2OPIMM) {                                             \
3611     CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                                \
3612                                  B,                                       \
3613                                  H,                                       \
3614                                  kInput16bits##input,                     \
3615                                  kInput8bitsImm##input_imm);              \
3616   }                                                                       \
3617   TEST(mnemonic##_H_2OPIMM) {                                             \
3618     CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                                \
3619                                  H,                                       \
3620                                  S,                                       \
3621                                  kInput32bits##input,                     \
3622                                  kInput16bitsImm##input_imm);             \
3623   }                                                                       \
3624   TEST(mnemonic##_S_2OPIMM) {                                             \
3625     CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                                \
3626                                  S,                                       \
3627                                  D,                                       \
3628                                  kInput64bits##input,                     \
3629                                  kInput32bitsImm##input_imm);             \
3630   }
3631 
3632 #define DEFINE_TEST_NEON_2OPIMM_FCMP_ZERO(mnemonic, input, input_imm) \
3633   TEST(mnemonic##_2S_2OPIMM) {                                        \
3634     CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                            \
3635                                  2S,                                  \
3636                                  2S,                                  \
3637                                  kInputFloat##Basic,                  \
3638                                  kInputDoubleImm##input_imm)          \
3639   }                                                                   \
3640   TEST(mnemonic##_4S_2OPIMM) {                                        \
3641     CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                            \
3642                                  4S,                                  \
3643                                  4S,                                  \
3644                                  kInputFloat##input,                  \
3645                                  kInputDoubleImm##input_imm);         \
3646   }                                                                   \
3647   TEST(mnemonic##_2D_2OPIMM) {                                        \
3648     CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                            \
3649                                  2D,                                  \
3650                                  2D,                                  \
3651                                  kInputDouble##input,                 \
3652                                  kInputDoubleImm##input_imm);         \
3653   }
3654 
3655 #define DEFINE_TEST_NEON_2OPIMM_FP(mnemonic, input, input_imm) \
3656   TEST(mnemonic##_2S_2OPIMM) {                                 \
3657     CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                     \
3658                                  2S,                           \
3659                                  2S,                           \
3660                                  kInputFloat##Basic,           \
3661                                  kInput32bitsImm##input_imm)   \
3662   }                                                            \
3663   TEST(mnemonic##_4S_2OPIMM) {                                 \
3664     CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                     \
3665                                  4S,                           \
3666                                  4S,                           \
3667                                  kInputFloat##input,           \
3668                                  kInput32bitsImm##input_imm)   \
3669   }                                                            \
3670   TEST(mnemonic##_2D_2OPIMM) {                                 \
3671     CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                     \
3672                                  2D,                           \
3673                                  2D,                           \
3674                                  kInputDouble##input,          \
3675                                  kInput64bitsImm##input_imm)   \
3676   }
3677 
3678 #define DEFINE_TEST_NEON_2OPIMM_FP_SCALAR(mnemonic, input, input_imm) \
3679   TEST(mnemonic##_S_2OPIMM) {                                         \
3680     CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                            \
3681                                  S,                                   \
3682                                  S,                                   \
3683                                  kInputFloat##Basic,                  \
3684                                  kInput32bitsImm##input_imm)          \
3685   }                                                                   \
3686   TEST(mnemonic##_D_2OPIMM) {                                         \
3687     CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                            \
3688                                  D,                                   \
3689                                  D,                                   \
3690                                  kInputDouble##input,                 \
3691                                  kInput64bitsImm##input_imm)          \
3692   }
3693 
3694 #define DEFINE_TEST_NEON_2OPIMM_SD(mnemonic, input, input_imm) \
3695   TEST(mnemonic##_2S_2OPIMM) {                                 \
3696     CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                     \
3697                                  2S,                           \
3698                                  2S,                           \
3699                                  kInput32bits##input,          \
3700                                  kInput32bitsImm##input_imm);  \
3701   }                                                            \
3702   TEST(mnemonic##_4S_2OPIMM) {                                 \
3703     CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                     \
3704                                  4S,                           \
3705                                  4S,                           \
3706                                  kInput32bits##input,          \
3707                                  kInput32bitsImm##input_imm);  \
3708   }                                                            \
3709   TEST(mnemonic##_2D_2OPIMM) {                                 \
3710     CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                     \
3711                                  2D,                           \
3712                                  2D,                           \
3713                                  kInput64bits##input,          \
3714                                  kInput64bitsImm##input_imm);  \
3715   }
3716 
3717 #define DEFINE_TEST_NEON_2OPIMM_SCALAR_D(mnemonic, input, input_imm) \
3718   TEST(mnemonic##_D_2OPIMM) {                                        \
3719     CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                           \
3720                                  D,                                  \
3721                                  D,                                  \
3722                                  kInput64bits##input,                \
3723                                  kInput64bitsImm##input_imm);        \
3724   }
3725 
3726 #define DEFINE_TEST_NEON_2OPIMM_SCALAR_SD(mnemonic, input, input_imm) \
3727   TEST(mnemonic##_S_2OPIMM) {                                         \
3728     CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                            \
3729                                  S,                                   \
3730                                  S,                                   \
3731                                  kInput32bits##input,                 \
3732                                  kInput32bitsImm##input_imm);         \
3733   }                                                                   \
3734   DEFINE_TEST_NEON_2OPIMM_SCALAR_D(mnemonic, input, input_imm)
3735 
3736 #define DEFINE_TEST_NEON_2OPIMM_FP_SCALAR_D(mnemonic, input, input_imm) \
3737   TEST(mnemonic##_D_2OPIMM) {                                           \
3738     CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                              \
3739                                  D,                                     \
3740                                  D,                                     \
3741                                  kInputDouble##input,                   \
3742                                  kInputDoubleImm##input_imm);           \
3743   }
3744 
3745 #define DEFINE_TEST_NEON_2OPIMM_FP_SCALAR_SD(mnemonic, input, input_imm) \
3746   TEST(mnemonic##_S_2OPIMM) {                                            \
3747     CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                               \
3748                                  S,                                      \
3749                                  S,                                      \
3750                                  kInputFloat##input,                     \
3751                                  kInputDoubleImm##input_imm);            \
3752   }                                                                      \
3753   DEFINE_TEST_NEON_2OPIMM_FP_SCALAR_D(mnemonic, input, input_imm)
3754 
3755 #define DEFINE_TEST_NEON_2OPIMM_SCALAR(mnemonic, input, input_imm) \
3756   TEST(mnemonic##_B_2OPIMM) {                                      \
3757     CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                         \
3758                                  B,                                \
3759                                  B,                                \
3760                                  kInput8bits##input,               \
3761                                  kInput8bitsImm##input_imm);       \
3762   }                                                                \
3763   TEST(mnemonic##_H_2OPIMM) {                                      \
3764     CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                         \
3765                                  H,                                \
3766                                  H,                                \
3767                                  kInput16bits##input,              \
3768                                  kInput16bitsImm##input_imm);      \
3769   }                                                                \
3770   DEFINE_TEST_NEON_2OPIMM_SCALAR_SD(mnemonic, input, input_imm)
3771 
3772 #define DEFINE_TEST_NEON_2OPIMM_LONG(mnemonic, input, input_imm) \
3773   TEST(mnemonic##_8H_2OPIMM) {                                   \
3774     CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                       \
3775                                  8H,                             \
3776                                  8B,                             \
3777                                  kInput8bits##input,             \
3778                                  kInput8bitsImm##input_imm);     \
3779   }                                                              \
3780   TEST(mnemonic##_4S_2OPIMM) {                                   \
3781     CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                       \
3782                                  4S,                             \
3783                                  4H,                             \
3784                                  kInput16bits##input,            \
3785                                  kInput16bitsImm##input_imm);    \
3786   }                                                              \
3787   TEST(mnemonic##_2D_2OPIMM) {                                   \
3788     CALL_TEST_NEON_HELPER_2OPIMM(mnemonic,                       \
3789                                  2D,                             \
3790                                  2S,                             \
3791                                  kInput32bits##input,            \
3792                                  kInput32bitsImm##input_imm);    \
3793   }                                                              \
3794   TEST(mnemonic##2_8H_2OPIMM) {                                  \
3795     CALL_TEST_NEON_HELPER_2OPIMM(mnemonic##2,                    \
3796                                  8H,                             \
3797                                  16B,                            \
3798                                  kInput8bits##input,             \
3799                                  kInput8bitsImm##input_imm);     \
3800   }                                                              \
3801   TEST(mnemonic##2_4S_2OPIMM) {                                  \
3802     CALL_TEST_NEON_HELPER_2OPIMM(mnemonic##2,                    \
3803                                  4S,                             \
3804                                  8H,                             \
3805                                  kInput16bits##input,            \
3806                                  kInput16bitsImm##input_imm);    \
3807   }                                                              \
3808   TEST(mnemonic##2_2D_2OPIMM) {                                  \
3809     CALL_TEST_NEON_HELPER_2OPIMM(mnemonic##2,                    \
3810                                  2D,                             \
3811                                  4S,                             \
3812                                  kInput32bits##input,            \
3813                                  kInput32bitsImm##input_imm);    \
3814   }
3815 
3816 #define CALL_TEST_NEON_HELPER_BYELEMENT(                                  \
3817     mnemonic, vdform, vnform, vmform, input_d, input_n, input_m, indices) \
3818   {                                                                       \
3819     CALL_TEST_NEON_HELPER_ByElement(mnemonic,                             \
3820                                     vdform,                               \
3821                                     vnform,                               \
3822                                     vmform,                               \
3823                                     input_d,                              \
3824                                     input_n,                              \
3825                                     input_m,                              \
3826                                     indices);                             \
3827   }
3828 
3829 #define DEFINE_TEST_NEON_BYELEMENT(mnemonic, input_d, input_n, input_m) \
3830   TEST(mnemonic##_4H_4H_H) {                                            \
3831     CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic,                           \
3832                                     4H,                                 \
3833                                     4H,                                 \
3834                                     H,                                  \
3835                                     kInput16bits##input_d,              \
3836                                     kInput16bits##input_n,              \
3837                                     kInput16bits##input_m,              \
3838                                     kInputHIndices);                    \
3839   }                                                                     \
3840   TEST(mnemonic##_8H_8H_H) {                                            \
3841     CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic,                           \
3842                                     8H,                                 \
3843                                     8H,                                 \
3844                                     H,                                  \
3845                                     kInput16bits##input_d,              \
3846                                     kInput16bits##input_n,              \
3847                                     kInput16bits##input_m,              \
3848                                     kInputHIndices);                    \
3849   }                                                                     \
3850   TEST(mnemonic##_2S_2S_S) {                                            \
3851     CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic,                           \
3852                                     2S,                                 \
3853                                     2S,                                 \
3854                                     S,                                  \
3855                                     kInput32bits##input_d,              \
3856                                     kInput32bits##input_n,              \
3857                                     kInput32bits##input_m,              \
3858                                     kInputSIndices);                    \
3859   }                                                                     \
3860   TEST(mnemonic##_4S_4S_S) {                                            \
3861     CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic,                           \
3862                                     4S,                                 \
3863                                     4S,                                 \
3864                                     S,                                  \
3865                                     kInput32bits##input_d,              \
3866                                     kInput32bits##input_n,              \
3867                                     kInput32bits##input_m,              \
3868                                     kInputSIndices);                    \
3869   }
3870 
3871 #define DEFINE_TEST_NEON_BYELEMENT_SCALAR(mnemonic, input_d, input_n, input_m) \
3872   TEST(mnemonic##_H_H_H) {                                                     \
3873     CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic,                                  \
3874                                     H,                                         \
3875                                     H,                                         \
3876                                     H,                                         \
3877                                     kInput16bits##input_d,                     \
3878                                     kInput16bits##input_n,                     \
3879                                     kInput16bits##input_m,                     \
3880                                     kInputHIndices);                           \
3881   }                                                                            \
3882   TEST(mnemonic##_S_S_S) {                                                     \
3883     CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic,                                  \
3884                                     S,                                         \
3885                                     S,                                         \
3886                                     S,                                         \
3887                                     kInput32bits##input_d,                     \
3888                                     kInput32bits##input_n,                     \
3889                                     kInput32bits##input_m,                     \
3890                                     kInputSIndices);                           \
3891   }
3892 
3893 #define DEFINE_TEST_NEON_FP_BYELEMENT(mnemonic, input_d, input_n, input_m) \
3894   TEST(mnemonic##_2S_2S_S) {                                               \
3895     CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic,                              \
3896                                     2S,                                    \
3897                                     2S,                                    \
3898                                     S,                                     \
3899                                     kInputFloat##input_d,                  \
3900                                     kInputFloat##input_n,                  \
3901                                     kInputFloat##input_m,                  \
3902                                     kInputSIndices);                       \
3903   }                                                                        \
3904   TEST(mnemonic##_4S_4S_S) {                                               \
3905     CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic,                              \
3906                                     4S,                                    \
3907                                     4S,                                    \
3908                                     S,                                     \
3909                                     kInputFloat##input_d,                  \
3910                                     kInputFloat##input_n,                  \
3911                                     kInputFloat##input_m,                  \
3912                                     kInputSIndices);                       \
3913   }                                                                        \
3914   TEST(mnemonic##_2D_2D_D) {                                               \
3915     CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic,                              \
3916                                     2D,                                    \
3917                                     2D,                                    \
3918                                     D,                                     \
3919                                     kInputDouble##input_d,                 \
3920                                     kInputDouble##input_n,                 \
3921                                     kInputDouble##input_m,                 \
3922                                     kInputDIndices);                       \
3923   }
3924 
3925 #define DEFINE_TEST_NEON_FP_BYELEMENT_SCALAR(mnemonic, inp_d, inp_n, inp_m) \
3926   TEST(mnemonic##_S_S_S) {                                                  \
3927     CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic,                               \
3928                                     S,                                      \
3929                                     S,                                      \
3930                                     S,                                      \
3931                                     kInputFloat##inp_d,                     \
3932                                     kInputFloat##inp_n,                     \
3933                                     kInputFloat##inp_m,                     \
3934                                     kInputSIndices);                        \
3935   }                                                                         \
3936   TEST(mnemonic##_D_D_D) {                                                  \
3937     CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic,                               \
3938                                     D,                                      \
3939                                     D,                                      \
3940                                     D,                                      \
3941                                     kInputDouble##inp_d,                    \
3942                                     kInputDouble##inp_n,                    \
3943                                     kInputDouble##inp_m,                    \
3944                                     kInputDIndices);                        \
3945   }
3946 
3947 
3948 #define DEFINE_TEST_NEON_BYELEMENT_DIFF(mnemonic, input_d, input_n, input_m) \
3949   TEST(mnemonic##_4S_4H_H) {                                                 \
3950     CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic,                                \
3951                                     4S,                                      \
3952                                     4H,                                      \
3953                                     H,                                       \
3954                                     kInput32bits##input_d,                   \
3955                                     kInput16bits##input_n,                   \
3956                                     kInput16bits##input_m,                   \
3957                                     kInputHIndices);                         \
3958   }                                                                          \
3959   TEST(mnemonic##2_4S_8H_H) {                                                \
3960     CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic##2,                             \
3961                                     4S,                                      \
3962                                     8H,                                      \
3963                                     H,                                       \
3964                                     kInput32bits##input_d,                   \
3965                                     kInput16bits##input_n,                   \
3966                                     kInput16bits##input_m,                   \
3967                                     kInputHIndices);                         \
3968   }                                                                          \
3969   TEST(mnemonic##_2D_2S_S) {                                                 \
3970     CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic,                                \
3971                                     2D,                                      \
3972                                     2S,                                      \
3973                                     S,                                       \
3974                                     kInput64bits##input_d,                   \
3975                                     kInput32bits##input_n,                   \
3976                                     kInput32bits##input_m,                   \
3977                                     kInputSIndices);                         \
3978   }                                                                          \
3979   TEST(mnemonic##2_2D_4S_S) {                                                \
3980     CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic##2,                             \
3981                                     2D,                                      \
3982                                     4S,                                      \
3983                                     S,                                       \
3984                                     kInput64bits##input_d,                   \
3985                                     kInput32bits##input_n,                   \
3986                                     kInput32bits##input_m,                   \
3987                                     kInputSIndices);                         \
3988   }
3989 
3990 #define DEFINE_TEST_NEON_BYELEMENT_DIFF_SCALAR(            \
3991     mnemonic, input_d, input_n, input_m)                   \
3992   TEST(mnemonic##_S_H_H) {                                 \
3993     CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic,              \
3994                                     S,                     \
3995                                     H,                     \
3996                                     H,                     \
3997                                     kInput32bits##input_d, \
3998                                     kInput16bits##input_n, \
3999                                     kInput16bits##input_m, \
4000                                     kInputHIndices);       \
4001   }                                                        \
4002   TEST(mnemonic##_D_S_S) {                                 \
4003     CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic,              \
4004                                     D,                     \
4005                                     S,                     \
4006                                     S,                     \
4007                                     kInput64bits##input_d, \
4008                                     kInput32bits##input_n, \
4009                                     kInput32bits##input_m, \
4010                                     kInputSIndices);       \
4011   }
4012 
4013 
4014 #define CALL_TEST_NEON_HELPER_2OP2IMM(                           \
4015     mnemonic, variant, input_d, input_imm1, input_n, input_imm2) \
4016   {                                                              \
4017     CALL_TEST_NEON_HELPER_OpImmOpImm(&MacroAssembler::mnemonic,  \
4018                                      mnemonic,                   \
4019                                      variant,                    \
4020                                      variant,                    \
4021                                      input_d,                    \
4022                                      input_imm1,                 \
4023                                      input_n,                    \
4024                                      input_imm2);                \
4025   }
4026 
4027 #define DEFINE_TEST_NEON_2OP2IMM(                               \
4028     mnemonic, input_d, input_imm1, input_n, input_imm2)         \
4029   TEST(mnemonic##_B) {                                          \
4030     CALL_TEST_NEON_HELPER_2OP2IMM(mnemonic,                     \
4031                                   16B,                          \
4032                                   kInput8bits##input_d,         \
4033                                   kInput8bitsImm##input_imm1,   \
4034                                   kInput8bits##input_n,         \
4035                                   kInput8bitsImm##input_imm2);  \
4036   }                                                             \
4037   TEST(mnemonic##_H) {                                          \
4038     CALL_TEST_NEON_HELPER_2OP2IMM(mnemonic,                     \
4039                                   8H,                           \
4040                                   kInput16bits##input_d,        \
4041                                   kInput16bitsImm##input_imm1,  \
4042                                   kInput16bits##input_n,        \
4043                                   kInput16bitsImm##input_imm2); \
4044   }                                                             \
4045   TEST(mnemonic##_S) {                                          \
4046     CALL_TEST_NEON_HELPER_2OP2IMM(mnemonic,                     \
4047                                   4S,                           \
4048                                   kInput32bits##input_d,        \
4049                                   kInput32bitsImm##input_imm1,  \
4050                                   kInput32bits##input_n,        \
4051                                   kInput32bitsImm##input_imm2); \
4052   }                                                             \
4053   TEST(mnemonic##_D) {                                          \
4054     CALL_TEST_NEON_HELPER_2OP2IMM(mnemonic,                     \
4055                                   2D,                           \
4056                                   kInput64bits##input_d,        \
4057                                   kInput64bitsImm##input_imm1,  \
4058                                   kInput64bits##input_n,        \
4059                                   kInput64bitsImm##input_imm2); \
4060   }
4061 
4062 
4063 // Advanced SIMD copy.
4064 DEFINE_TEST_NEON_2OP2IMM(
4065     ins, Basic, LaneCountFromZero, Basic, LaneCountFromZero)
4066 DEFINE_TEST_NEON_2OPIMM_COPY(dup, Basic, LaneCountFromZero)
4067 
4068 
4069 // Advanced SIMD scalar copy.
4070 DEFINE_TEST_NEON_2OPIMM_SCALAR(dup, Basic, LaneCountFromZero)
4071 
4072 
4073 // Advanced SIMD three same.
4074 DEFINE_TEST_NEON_3SAME_NO2D(shadd, Basic)
4075 DEFINE_TEST_NEON_3SAME(sqadd, Basic)
4076 DEFINE_TEST_NEON_3SAME_NO2D(srhadd, Basic)
4077 DEFINE_TEST_NEON_3SAME_NO2D(shsub, Basic)
4078 DEFINE_TEST_NEON_3SAME(sqsub, Basic)
4079 DEFINE_TEST_NEON_3SAME(cmgt, Basic)
4080 DEFINE_TEST_NEON_3SAME(cmge, Basic)
4081 DEFINE_TEST_NEON_3SAME(sshl, Basic)
4082 DEFINE_TEST_NEON_3SAME(sqshl, Basic)
4083 DEFINE_TEST_NEON_3SAME(srshl, Basic)
4084 DEFINE_TEST_NEON_3SAME(sqrshl, Basic)
4085 DEFINE_TEST_NEON_3SAME_NO2D(smax, Basic)
4086 DEFINE_TEST_NEON_3SAME_NO2D(smin, Basic)
4087 DEFINE_TEST_NEON_3SAME_NO2D(sabd, Basic)
4088 DEFINE_TEST_NEON_3SAME_NO2D(saba, Basic)
4089 DEFINE_TEST_NEON_3SAME(add, Basic)
4090 DEFINE_TEST_NEON_3SAME(cmtst, Basic)
4091 DEFINE_TEST_NEON_3SAME_NO2D(mla, Basic)
4092 DEFINE_TEST_NEON_3SAME_NO2D(mul, Basic)
4093 DEFINE_TEST_NEON_3SAME_NO2D(smaxp, Basic)
4094 DEFINE_TEST_NEON_3SAME_NO2D(sminp, Basic)
4095 DEFINE_TEST_NEON_3SAME_HS(sqdmulh, Basic)
4096 DEFINE_TEST_NEON_3SAME(addp, Basic)
4097 DEFINE_TEST_NEON_3SAME_FP(fmaxnm, Basic)
4098 DEFINE_TEST_NEON_3SAME_FP(fmla, Basic)
4099 DEFINE_TEST_NEON_3SAME_FP(fadd, Basic)
4100 DEFINE_TEST_NEON_3SAME_FP(fmulx, Basic)
4101 DEFINE_TEST_NEON_3SAME_FP(fcmeq, Basic)
4102 DEFINE_TEST_NEON_3SAME_FP(fmax, Basic)
4103 DEFINE_TEST_NEON_3SAME_FP(frecps, Basic)
4104 DEFINE_TEST_NEON_3SAME_8B_16B(and_, Basic)
4105 DEFINE_TEST_NEON_3SAME_8B_16B(bic, Basic)
4106 DEFINE_TEST_NEON_3SAME_FP(fminnm, Basic)
4107 DEFINE_TEST_NEON_3SAME_FP(fmls, Basic)
4108 DEFINE_TEST_NEON_3SAME_FP(fsub, Basic)
4109 DEFINE_TEST_NEON_3SAME_FP(fmin, Basic)
4110 DEFINE_TEST_NEON_3SAME_FP(frsqrts, Basic)
4111 DEFINE_TEST_NEON_3SAME_8B_16B(orr, Basic)
4112 DEFINE_TEST_NEON_3SAME_8B_16B(orn, Basic)
4113 DEFINE_TEST_NEON_3SAME_NO2D(uhadd, Basic)
4114 DEFINE_TEST_NEON_3SAME(uqadd, Basic)
4115 DEFINE_TEST_NEON_3SAME_NO2D(urhadd, Basic)
4116 DEFINE_TEST_NEON_3SAME_NO2D(uhsub, Basic)
4117 DEFINE_TEST_NEON_3SAME(uqsub, Basic)
4118 DEFINE_TEST_NEON_3SAME(cmhi, Basic)
4119 DEFINE_TEST_NEON_3SAME(cmhs, Basic)
4120 DEFINE_TEST_NEON_3SAME(ushl, Basic)
4121 DEFINE_TEST_NEON_3SAME(uqshl, Basic)
4122 DEFINE_TEST_NEON_3SAME(urshl, Basic)
4123 DEFINE_TEST_NEON_3SAME(uqrshl, Basic)
4124 DEFINE_TEST_NEON_3SAME_NO2D(umax, Basic)
4125 DEFINE_TEST_NEON_3SAME_NO2D(umin, Basic)
4126 DEFINE_TEST_NEON_3SAME_NO2D(uabd, Basic)
4127 DEFINE_TEST_NEON_3SAME_NO2D(uaba, Basic)
4128 DEFINE_TEST_NEON_3SAME(sub, Basic)
4129 DEFINE_TEST_NEON_3SAME(cmeq, Basic)
4130 DEFINE_TEST_NEON_3SAME_NO2D(mls, Basic)
4131 DEFINE_TEST_NEON_3SAME_8B_16B(pmul, Basic)
4132 DEFINE_TEST_NEON_3SAME_NO2D(uminp, Basic)
4133 DEFINE_TEST_NEON_3SAME_NO2D(umaxp, Basic)
4134 DEFINE_TEST_NEON_3SAME_HS(sqrdmulh, Basic)
4135 DEFINE_TEST_NEON_3SAME_FP(fmaxnmp, Basic)
4136 DEFINE_TEST_NEON_3SAME_FP(faddp, Basic)
4137 DEFINE_TEST_NEON_3SAME_FP(fmul, Basic)
4138 DEFINE_TEST_NEON_3SAME_FP(fcmge, Basic)
4139 DEFINE_TEST_NEON_3SAME_FP(facge, Basic)
4140 DEFINE_TEST_NEON_3SAME_FP(fmaxp, Basic)
4141 DEFINE_TEST_NEON_3SAME_FP(fdiv, Basic)
4142 DEFINE_TEST_NEON_3SAME_8B_16B(eor, Basic)
4143 DEFINE_TEST_NEON_3SAME_8B_16B(bsl, Basic)
4144 DEFINE_TEST_NEON_3SAME_FP(fminnmp, Basic)
4145 DEFINE_TEST_NEON_3SAME_FP(fabd, Basic)
4146 DEFINE_TEST_NEON_3SAME_FP(fcmgt, Basic)
4147 DEFINE_TEST_NEON_3SAME_FP(facgt, Basic)
4148 DEFINE_TEST_NEON_3SAME_FP(fminp, Basic)
4149 DEFINE_TEST_NEON_3SAME_8B_16B(bit, Basic)
4150 DEFINE_TEST_NEON_3SAME_8B_16B(bif, Basic)
4151 
4152 
4153 // Advanced SIMD scalar three same.
4154 DEFINE_TEST_NEON_3SAME_SCALAR(sqadd, Basic)
4155 DEFINE_TEST_NEON_3SAME_SCALAR(sqsub, Basic)
4156 DEFINE_TEST_NEON_3SAME_SCALAR_D(cmgt, Basic)
4157 DEFINE_TEST_NEON_3SAME_SCALAR_D(cmge, Basic)
4158 DEFINE_TEST_NEON_3SAME_SCALAR_D(sshl, Basic)
4159 DEFINE_TEST_NEON_3SAME_SCALAR(sqshl, Basic)
4160 DEFINE_TEST_NEON_3SAME_SCALAR_D(srshl, Basic)
4161 DEFINE_TEST_NEON_3SAME_SCALAR(sqrshl, Basic)
4162 DEFINE_TEST_NEON_3SAME_SCALAR_D(add, Basic)
4163 DEFINE_TEST_NEON_3SAME_SCALAR_D(cmtst, Basic)
4164 DEFINE_TEST_NEON_3SAME_SCALAR_HS(sqdmulh, Basic)
4165 DEFINE_TEST_NEON_3SAME_FP_SCALAR(fmulx, Basic)
4166 DEFINE_TEST_NEON_3SAME_FP_SCALAR(fcmeq, Basic)
4167 DEFINE_TEST_NEON_3SAME_FP_SCALAR(frecps, Basic)
4168 DEFINE_TEST_NEON_3SAME_FP_SCALAR(frsqrts, Basic)
4169 DEFINE_TEST_NEON_3SAME_SCALAR_D(uqadd, Basic)
4170 DEFINE_TEST_NEON_3SAME_SCALAR_D(uqsub, Basic)
4171 DEFINE_TEST_NEON_3SAME_SCALAR_D(cmhi, Basic)
4172 DEFINE_TEST_NEON_3SAME_SCALAR_D(cmhs, Basic)
4173 DEFINE_TEST_NEON_3SAME_SCALAR_D(ushl, Basic)
4174 DEFINE_TEST_NEON_3SAME_SCALAR(uqshl, Basic)
4175 DEFINE_TEST_NEON_3SAME_SCALAR_D(urshl, Basic)
4176 DEFINE_TEST_NEON_3SAME_SCALAR(uqrshl, Basic)
4177 DEFINE_TEST_NEON_3SAME_SCALAR_D(sub, Basic)
4178 DEFINE_TEST_NEON_3SAME_SCALAR_D(cmeq, Basic)
4179 DEFINE_TEST_NEON_3SAME_SCALAR_HS(sqrdmulh, Basic)
4180 DEFINE_TEST_NEON_3SAME_FP_SCALAR(fcmge, Basic)
4181 DEFINE_TEST_NEON_3SAME_FP_SCALAR(facge, Basic)
4182 DEFINE_TEST_NEON_3SAME_FP_SCALAR(fabd, Basic)
4183 DEFINE_TEST_NEON_3SAME_FP_SCALAR(fcmgt, Basic)
4184 DEFINE_TEST_NEON_3SAME_FP_SCALAR(facgt, Basic)
4185 
4186 
4187 // Advanced SIMD three different.
4188 DEFINE_TEST_NEON_3DIFF_LONG(saddl, Basic)
4189 DEFINE_TEST_NEON_3DIFF_WIDE(saddw, Basic)
4190 DEFINE_TEST_NEON_3DIFF_LONG(ssubl, Basic)
4191 DEFINE_TEST_NEON_3DIFF_WIDE(ssubw, Basic)
4192 DEFINE_TEST_NEON_3DIFF_NARROW(addhn, Basic)
4193 DEFINE_TEST_NEON_3DIFF_LONG(sabal, Basic)
4194 DEFINE_TEST_NEON_3DIFF_NARROW(subhn, Basic)
4195 DEFINE_TEST_NEON_3DIFF_LONG(sabdl, Basic)
4196 DEFINE_TEST_NEON_3DIFF_LONG(smlal, Basic)
4197 DEFINE_TEST_NEON_3DIFF_LONG_SD(sqdmlal, Basic)
4198 DEFINE_TEST_NEON_3DIFF_LONG(smlsl, Basic)
4199 DEFINE_TEST_NEON_3DIFF_LONG_SD(sqdmlsl, Basic)
4200 DEFINE_TEST_NEON_3DIFF_LONG(smull, Basic)
4201 DEFINE_TEST_NEON_3DIFF_LONG_SD(sqdmull, Basic)
4202 DEFINE_TEST_NEON_3DIFF_LONG_8H(pmull, Basic)
4203 DEFINE_TEST_NEON_3DIFF_LONG(uaddl, Basic)
4204 DEFINE_TEST_NEON_3DIFF_WIDE(uaddw, Basic)
4205 DEFINE_TEST_NEON_3DIFF_LONG(usubl, Basic)
4206 DEFINE_TEST_NEON_3DIFF_WIDE(usubw, Basic)
4207 DEFINE_TEST_NEON_3DIFF_NARROW(raddhn, Basic)
4208 DEFINE_TEST_NEON_3DIFF_LONG(uabal, Basic)
4209 DEFINE_TEST_NEON_3DIFF_NARROW(rsubhn, Basic)
4210 DEFINE_TEST_NEON_3DIFF_LONG(uabdl, Basic)
4211 DEFINE_TEST_NEON_3DIFF_LONG(umlal, Basic)
4212 DEFINE_TEST_NEON_3DIFF_LONG(umlsl, Basic)
4213 DEFINE_TEST_NEON_3DIFF_LONG(umull, Basic)
4214 
4215 
4216 // Advanced SIMD scalar three different.
4217 DEFINE_TEST_NEON_3DIFF_SCALAR_LONG_SD(sqdmlal, Basic)
4218 DEFINE_TEST_NEON_3DIFF_SCALAR_LONG_SD(sqdmlsl, Basic)
4219 DEFINE_TEST_NEON_3DIFF_SCALAR_LONG_SD(sqdmull, Basic)
4220 
4221 
4222 // Advanced SIMD scalar pairwise.
4223 TEST(addp_SCALAR) {
4224   CALL_TEST_NEON_HELPER_2DIFF(addp, D, 2D, kInput64bitsBasic);
4225 }
DEFINE_TEST_NEON_2DIFF_FP_SCALAR_SD(fmaxnmp,Basic)4226 DEFINE_TEST_NEON_2DIFF_FP_SCALAR_SD(fmaxnmp, Basic)
4227 DEFINE_TEST_NEON_2DIFF_FP_SCALAR_SD(faddp, Basic)
4228 DEFINE_TEST_NEON_2DIFF_FP_SCALAR_SD(fmaxp, Basic)
4229 DEFINE_TEST_NEON_2DIFF_FP_SCALAR_SD(fminnmp, Basic)
4230 DEFINE_TEST_NEON_2DIFF_FP_SCALAR_SD(fminp, Basic)
4231 
4232 
4233 // Advanced SIMD shift by immediate.
4234 DEFINE_TEST_NEON_2OPIMM(sshr, Basic, TypeWidth)
4235 DEFINE_TEST_NEON_2OPIMM(ssra, Basic, TypeWidth)
4236 DEFINE_TEST_NEON_2OPIMM(srshr, Basic, TypeWidth)
4237 DEFINE_TEST_NEON_2OPIMM(srsra, Basic, TypeWidth)
4238 DEFINE_TEST_NEON_2OPIMM(shl, Basic, TypeWidthFromZero)
4239 DEFINE_TEST_NEON_2OPIMM(sqshl, Basic, TypeWidthFromZero)
4240 DEFINE_TEST_NEON_2OPIMM_NARROW(shrn, Basic, TypeWidth)
4241 DEFINE_TEST_NEON_2OPIMM_NARROW(rshrn, Basic, TypeWidth)
4242 DEFINE_TEST_NEON_2OPIMM_NARROW(sqshrn, Basic, TypeWidth)
4243 DEFINE_TEST_NEON_2OPIMM_NARROW(sqrshrn, Basic, TypeWidth)
4244 DEFINE_TEST_NEON_2OPIMM_LONG(sshll, Basic, TypeWidthFromZero)
4245 DEFINE_TEST_NEON_2OPIMM_SD(scvtf,
4246                            FixedPointConversions,
4247                            TypeWidthFromZeroToWidth)
4248 DEFINE_TEST_NEON_2OPIMM_FP(fcvtzs, Conversions, TypeWidthFromZeroToWidth)
4249 DEFINE_TEST_NEON_2OPIMM(ushr, Basic, TypeWidth)
4250 DEFINE_TEST_NEON_2OPIMM(usra, Basic, TypeWidth)
4251 DEFINE_TEST_NEON_2OPIMM(urshr, Basic, TypeWidth)
4252 DEFINE_TEST_NEON_2OPIMM(ursra, Basic, TypeWidth)
4253 DEFINE_TEST_NEON_2OPIMM(sri, Basic, TypeWidth)
4254 DEFINE_TEST_NEON_2OPIMM(sli, Basic, TypeWidthFromZero)
4255 DEFINE_TEST_NEON_2OPIMM(sqshlu, Basic, TypeWidthFromZero)
4256 DEFINE_TEST_NEON_2OPIMM(uqshl, Basic, TypeWidthFromZero)
4257 DEFINE_TEST_NEON_2OPIMM_NARROW(sqshrun, Basic, TypeWidth)
4258 DEFINE_TEST_NEON_2OPIMM_NARROW(sqrshrun, Basic, TypeWidth)
4259 DEFINE_TEST_NEON_2OPIMM_NARROW(uqshrn, Basic, TypeWidth)
4260 DEFINE_TEST_NEON_2OPIMM_NARROW(uqrshrn, Basic, TypeWidth)
4261 DEFINE_TEST_NEON_2OPIMM_LONG(ushll, Basic, TypeWidthFromZero)
4262 DEFINE_TEST_NEON_2OPIMM_SD(ucvtf,
4263                            FixedPointConversions,
4264                            TypeWidthFromZeroToWidth)
4265 DEFINE_TEST_NEON_2OPIMM_FP(fcvtzu, Conversions, TypeWidthFromZeroToWidth)
4266 
4267 
4268 // Advanced SIMD scalar shift by immediate..
4269 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(sshr, Basic, TypeWidth)
4270 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(ssra, Basic, TypeWidth)
4271 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(srshr, Basic, TypeWidth)
4272 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(srsra, Basic, TypeWidth)
4273 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(shl, Basic, TypeWidthFromZero)
4274 DEFINE_TEST_NEON_2OPIMM_SCALAR(sqshl, Basic, TypeWidthFromZero)
4275 DEFINE_TEST_NEON_2OPIMM_SCALAR_NARROW(sqshrn, Basic, TypeWidth)
4276 DEFINE_TEST_NEON_2OPIMM_SCALAR_NARROW(sqrshrn, Basic, TypeWidth)
4277 DEFINE_TEST_NEON_2OPIMM_SCALAR_SD(scvtf,
4278                                   FixedPointConversions,
4279                                   TypeWidthFromZeroToWidth)
4280 DEFINE_TEST_NEON_2OPIMM_FP_SCALAR(fcvtzs, Conversions, TypeWidthFromZeroToWidth)
4281 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(ushr, Basic, TypeWidth)
4282 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(usra, Basic, TypeWidth)
4283 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(urshr, Basic, TypeWidth)
4284 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(ursra, Basic, TypeWidth)
4285 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(sri, Basic, TypeWidth)
4286 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(sli, Basic, TypeWidthFromZero)
4287 DEFINE_TEST_NEON_2OPIMM_SCALAR(sqshlu, Basic, TypeWidthFromZero)
4288 DEFINE_TEST_NEON_2OPIMM_SCALAR(uqshl, Basic, TypeWidthFromZero)
4289 DEFINE_TEST_NEON_2OPIMM_SCALAR_NARROW(sqshrun, Basic, TypeWidth)
4290 DEFINE_TEST_NEON_2OPIMM_SCALAR_NARROW(sqrshrun, Basic, TypeWidth)
4291 DEFINE_TEST_NEON_2OPIMM_SCALAR_NARROW(uqshrn, Basic, TypeWidth)
4292 DEFINE_TEST_NEON_2OPIMM_SCALAR_NARROW(uqrshrn, Basic, TypeWidth)
4293 DEFINE_TEST_NEON_2OPIMM_SCALAR_SD(ucvtf,
4294                                   FixedPointConversions,
4295                                   TypeWidthFromZeroToWidth)
4296 DEFINE_TEST_NEON_2OPIMM_FP_SCALAR(fcvtzu, Conversions, TypeWidthFromZeroToWidth)
4297 
4298 
4299 // Advanced SIMD two-register miscellaneous.
4300 DEFINE_TEST_NEON_2SAME_NO2D(rev64, Basic)
4301 DEFINE_TEST_NEON_2SAME_8B_16B(rev16, Basic)
4302 DEFINE_TEST_NEON_2DIFF_LONG(saddlp, Basic)
4303 DEFINE_TEST_NEON_2SAME(suqadd, Basic)
4304 DEFINE_TEST_NEON_2SAME_NO2D(cls, Basic)
4305 DEFINE_TEST_NEON_2SAME_8B_16B(cnt, Basic)
4306 DEFINE_TEST_NEON_2DIFF_LONG(sadalp, Basic)
4307 DEFINE_TEST_NEON_2SAME(sqabs, Basic)
4308 DEFINE_TEST_NEON_2OPIMM(cmgt, Basic, Zero)
4309 DEFINE_TEST_NEON_2OPIMM(cmeq, Basic, Zero)
4310 DEFINE_TEST_NEON_2OPIMM(cmlt, Basic, Zero)
4311 DEFINE_TEST_NEON_2SAME(abs, Basic)
4312 DEFINE_TEST_NEON_2DIFF_NARROW(xtn, Basic)
4313 DEFINE_TEST_NEON_2DIFF_NARROW(sqxtn, Basic)
4314 DEFINE_TEST_NEON_2DIFF_FP_NARROW(fcvtn, Conversions)
4315 DEFINE_TEST_NEON_2DIFF_FP_LONG(fcvtl, Conversions)
4316 DEFINE_TEST_NEON_2SAME_FP(frintn, Conversions)
4317 DEFINE_TEST_NEON_2SAME_FP(frintm, Conversions)
4318 DEFINE_TEST_NEON_2SAME_FP(fcvtns, Conversions)
4319 DEFINE_TEST_NEON_2SAME_FP(fcvtms, Conversions)
4320 DEFINE_TEST_NEON_2SAME_FP(fcvtas, Conversions)
4321 // SCVTF (vector, integer) covered by SCVTF(vector, fixed point) with fbits 0.
4322 DEFINE_TEST_NEON_2OPIMM_FCMP_ZERO(fcmgt, Basic, Zero)
4323 DEFINE_TEST_NEON_2OPIMM_FCMP_ZERO(fcmeq, Basic, Zero)
4324 DEFINE_TEST_NEON_2OPIMM_FCMP_ZERO(fcmlt, Basic, Zero)
4325 DEFINE_TEST_NEON_2SAME_FP(fabs, Basic)
4326 DEFINE_TEST_NEON_2SAME_FP(frintp, Conversions)
4327 DEFINE_TEST_NEON_2SAME_FP(frintz, Conversions)
4328 DEFINE_TEST_NEON_2SAME_FP(fcvtps, Conversions)
4329 // FCVTZS(vector, integer) covered by FCVTZS(vector, fixed point) with fbits 0.
4330 DEFINE_TEST_NEON_2SAME_2S_4S(urecpe, Basic)
4331 DEFINE_TEST_NEON_2SAME_FP(frecpe, Basic)
4332 DEFINE_TEST_NEON_2SAME_BH(rev32, Basic)
4333 DEFINE_TEST_NEON_2DIFF_LONG(uaddlp, Basic)
4334 DEFINE_TEST_NEON_2SAME(usqadd, Basic)
4335 DEFINE_TEST_NEON_2SAME_NO2D(clz, Basic)
4336 DEFINE_TEST_NEON_2DIFF_LONG(uadalp, Basic)
4337 DEFINE_TEST_NEON_2SAME(sqneg, Basic)
4338 DEFINE_TEST_NEON_2OPIMM(cmge, Basic, Zero)
4339 DEFINE_TEST_NEON_2OPIMM(cmle, Basic, Zero)
4340 DEFINE_TEST_NEON_2SAME(neg, Basic)
4341 DEFINE_TEST_NEON_2DIFF_NARROW(sqxtun, Basic)
4342 DEFINE_TEST_NEON_2OPIMM_LONG(shll, Basic, SHLL)
4343 DEFINE_TEST_NEON_2DIFF_NARROW(uqxtn, Basic)
4344 DEFINE_TEST_NEON_2DIFF_FP_NARROW_2S(fcvtxn, Conversions)
4345 DEFINE_TEST_NEON_2SAME_FP(frinta, Conversions)
4346 DEFINE_TEST_NEON_2SAME_FP(frintx, Conversions)
4347 DEFINE_TEST_NEON_2SAME_FP(fcvtnu, Conversions)
4348 DEFINE_TEST_NEON_2SAME_FP(fcvtmu, Conversions)
4349 DEFINE_TEST_NEON_2SAME_FP(fcvtau, Conversions)
4350 // UCVTF (vector, integer) covered by UCVTF(vector, fixed point) with fbits 0.
4351 DEFINE_TEST_NEON_2SAME_8B_16B(not_, Basic)
4352 DEFINE_TEST_NEON_2SAME_8B_16B(rbit, Basic)
4353 DEFINE_TEST_NEON_2OPIMM_FCMP_ZERO(fcmge, Basic, Zero)
4354 DEFINE_TEST_NEON_2OPIMM_FCMP_ZERO(fcmle, Basic, Zero)
4355 DEFINE_TEST_NEON_2SAME_FP(fneg, Basic)
4356 DEFINE_TEST_NEON_2SAME_FP(frinti, Conversions)
4357 DEFINE_TEST_NEON_2SAME_FP(fcvtpu, Conversions)
4358 // FCVTZU(vector, integer) covered by FCVTZU(vector, fixed point) with fbits 0.
4359 DEFINE_TEST_NEON_2SAME_2S_4S(ursqrte, Basic)
4360 DEFINE_TEST_NEON_2SAME_FP(frsqrte, Basic)
4361 DEFINE_TEST_NEON_2SAME_FP(fsqrt, Basic)
4362 
4363 
4364 // Advanced SIMD scalar two-register miscellaneous.
4365 DEFINE_TEST_NEON_2SAME_SCALAR(suqadd, Basic)
4366 DEFINE_TEST_NEON_2SAME_SCALAR(sqabs, Basic)
4367 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(cmgt, Basic, Zero)
4368 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(cmeq, Basic, Zero)
4369 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(cmlt, Basic, Zero)
4370 DEFINE_TEST_NEON_2SAME_SCALAR_D(abs, Basic)
4371 DEFINE_TEST_NEON_2DIFF_SCALAR_NARROW(sqxtn, Basic)
4372 DEFINE_TEST_NEON_2SAME_FP_SCALAR(fcvtns, Conversions)
4373 DEFINE_TEST_NEON_2SAME_FP_SCALAR(fcvtms, Conversions)
4374 DEFINE_TEST_NEON_2SAME_FP_SCALAR(fcvtas, Conversions)
4375 // SCVTF (vector, integer) covered by SCVTF(vector, fixed point) with fbits 0.
4376 DEFINE_TEST_NEON_2OPIMM_FP_SCALAR_SD(fcmgt, Basic, Zero)
4377 DEFINE_TEST_NEON_2OPIMM_FP_SCALAR_SD(fcmeq, Basic, Zero)
4378 DEFINE_TEST_NEON_2OPIMM_FP_SCALAR_SD(fcmlt, Basic, Zero)
4379 DEFINE_TEST_NEON_2SAME_FP_SCALAR(fcvtps, Conversions)
4380 // FCVTZS(vector, integer) covered by FCVTZS(vector, fixed point) with fbits 0.
4381 DEFINE_TEST_NEON_2SAME_FP_SCALAR(frecpe, Basic)
4382 DEFINE_TEST_NEON_2SAME_FP_SCALAR(frecpx, Basic)
4383 DEFINE_TEST_NEON_2SAME_SCALAR(usqadd, Basic)
4384 DEFINE_TEST_NEON_2SAME_SCALAR(sqneg, Basic)
4385 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(cmge, Basic, Zero)
4386 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(cmle, Basic, Zero)
4387 DEFINE_TEST_NEON_2SAME_SCALAR_D(neg, Basic)
4388 DEFINE_TEST_NEON_2DIFF_SCALAR_NARROW(sqxtun, Basic)
4389 DEFINE_TEST_NEON_2DIFF_SCALAR_NARROW(uqxtn, Basic)
4390 TEST(fcvtxn_SCALAR) {
4391   CALL_TEST_NEON_HELPER_2DIFF(fcvtxn, S, D, kInputDoubleConversions);
4392 }
4393 DEFINE_TEST_NEON_2SAME_FP_SCALAR(fcvtnu, Conversions)
4394 DEFINE_TEST_NEON_2SAME_FP_SCALAR(fcvtmu, Conversions)
4395 DEFINE_TEST_NEON_2SAME_FP_SCALAR(fcvtau, Conversions)
4396 // UCVTF (vector, integer) covered by UCVTF(vector, fixed point) with fbits 0.
4397 DEFINE_TEST_NEON_2OPIMM_FP_SCALAR_SD(fcmge, Basic, Zero)
4398 DEFINE_TEST_NEON_2OPIMM_FP_SCALAR_SD(fcmle, Basic, Zero)
4399 DEFINE_TEST_NEON_2SAME_FP_SCALAR(fcvtpu, Conversions)
4400 // FCVTZU(vector, integer) covered by FCVTZU(vector, fixed point) with fbits 0.
4401 DEFINE_TEST_NEON_2SAME_FP_SCALAR(frsqrte, Basic)
4402 
4403 
4404 // Advanced SIMD across lanes.
4405 DEFINE_TEST_NEON_ACROSS_LONG(saddlv, Basic)
4406 DEFINE_TEST_NEON_ACROSS(smaxv, Basic)
4407 DEFINE_TEST_NEON_ACROSS(sminv, Basic)
4408 DEFINE_TEST_NEON_ACROSS(addv, Basic)
4409 DEFINE_TEST_NEON_ACROSS_LONG(uaddlv, Basic)
4410 DEFINE_TEST_NEON_ACROSS(umaxv, Basic)
4411 DEFINE_TEST_NEON_ACROSS(uminv, Basic)
4412 DEFINE_TEST_NEON_ACROSS_FP(fmaxnmv, Basic)
4413 DEFINE_TEST_NEON_ACROSS_FP(fmaxv, Basic)
4414 DEFINE_TEST_NEON_ACROSS_FP(fminnmv, Basic)
4415 DEFINE_TEST_NEON_ACROSS_FP(fminv, Basic)
4416 
4417 
4418 // Advanced SIMD permute.
4419 DEFINE_TEST_NEON_3SAME(uzp1, Basic)
4420 DEFINE_TEST_NEON_3SAME(trn1, Basic)
4421 DEFINE_TEST_NEON_3SAME(zip1, Basic)
4422 DEFINE_TEST_NEON_3SAME(uzp2, Basic)
4423 DEFINE_TEST_NEON_3SAME(trn2, Basic)
4424 DEFINE_TEST_NEON_3SAME(zip2, Basic)
4425 
4426 
4427 // Advanced SIMD vector x indexed element.
4428 DEFINE_TEST_NEON_BYELEMENT_DIFF(smlal, Basic, Basic, Basic)
4429 DEFINE_TEST_NEON_BYELEMENT_DIFF(sqdmlal, Basic, Basic, Basic)
4430 DEFINE_TEST_NEON_BYELEMENT_DIFF(smlsl, Basic, Basic, Basic)
4431 DEFINE_TEST_NEON_BYELEMENT_DIFF(sqdmlsl, Basic, Basic, Basic)
4432 DEFINE_TEST_NEON_BYELEMENT(mul, Basic, Basic, Basic)
4433 DEFINE_TEST_NEON_BYELEMENT_DIFF(smull, Basic, Basic, Basic)
4434 DEFINE_TEST_NEON_BYELEMENT_DIFF(sqdmull, Basic, Basic, Basic)
4435 DEFINE_TEST_NEON_BYELEMENT(sqdmulh, Basic, Basic, Basic)
4436 DEFINE_TEST_NEON_BYELEMENT(sqrdmulh, Basic, Basic, Basic)
4437 DEFINE_TEST_NEON_FP_BYELEMENT(fmla, Basic, Basic, Basic)
4438 DEFINE_TEST_NEON_FP_BYELEMENT(fmls, Basic, Basic, Basic)
4439 DEFINE_TEST_NEON_FP_BYELEMENT(fmul, Basic, Basic, Basic)
4440 DEFINE_TEST_NEON_BYELEMENT(mla, Basic, Basic, Basic)
4441 DEFINE_TEST_NEON_BYELEMENT_DIFF(umlal, Basic, Basic, Basic)
4442 DEFINE_TEST_NEON_BYELEMENT(mls, Basic, Basic, Basic)
4443 DEFINE_TEST_NEON_BYELEMENT_DIFF(umlsl, Basic, Basic, Basic)
4444 DEFINE_TEST_NEON_BYELEMENT_DIFF(umull, Basic, Basic, Basic)
4445 DEFINE_TEST_NEON_FP_BYELEMENT(fmulx, Basic, Basic, Basic)
4446 
4447 
4448 // Advanced SIMD scalar x indexed element.
4449 DEFINE_TEST_NEON_BYELEMENT_DIFF_SCALAR(sqdmlal, Basic, Basic, Basic)
4450 DEFINE_TEST_NEON_BYELEMENT_DIFF_SCALAR(sqdmlsl, Basic, Basic, Basic)
4451 DEFINE_TEST_NEON_BYELEMENT_DIFF_SCALAR(sqdmull, Basic, Basic, Basic)
4452 DEFINE_TEST_NEON_BYELEMENT_SCALAR(sqdmulh, Basic, Basic, Basic)
4453 DEFINE_TEST_NEON_BYELEMENT_SCALAR(sqrdmulh, Basic, Basic, Basic)
4454 DEFINE_TEST_NEON_FP_BYELEMENT_SCALAR(fmla, Basic, Basic, Basic)
4455 DEFINE_TEST_NEON_FP_BYELEMENT_SCALAR(fmls, Basic, Basic, Basic)
4456 DEFINE_TEST_NEON_FP_BYELEMENT_SCALAR(fmul, Basic, Basic, Basic)
4457 DEFINE_TEST_NEON_FP_BYELEMENT_SCALAR(fmulx, Basic, Basic, Basic)
4458 
4459 }  // namespace aarch64
4460 }  // namespace vixl
4461