• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2015, VIXL authors
2 // All rights reserved.
3 //
4 // Redistribution and use in source and binary forms, with or without
5 // modification, are permitted provided that the following conditions are met:
6 //
7 //   * Redistributions of source code must retain the above copyright notice,
8 //     this list of conditions and the following disclaimer.
9 //   * Redistributions in binary form must reproduce the above copyright notice,
10 //     this list of conditions and the following disclaimer in the documentation
11 //     and/or other materials provided with the distribution.
12 //   * Neither the name of ARM Limited nor the names of its contributors may be
13 //     used to endorse or promote products derived from this software without
14 //     specific prior written permission.
15 //
16 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
17 // ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18 // WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19 // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
20 // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
22 // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
23 // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
24 // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 
27 #include <sys/mman.h>
28 
29 #include <cfloat>
30 #include <cmath>
31 #include <cstdio>
32 #include <cstdlib>
33 #include <cstring>
34 
35 #include "test-runner.h"
36 #include "test-utils.h"
37 #include "aarch64/test-utils-aarch64.h"
38 
39 #include "aarch64/cpu-aarch64.h"
40 #include "aarch64/disasm-aarch64.h"
41 #include "aarch64/macro-assembler-aarch64.h"
42 #include "aarch64/simulator-aarch64.h"
43 
44 namespace vixl {
45 namespace aarch64 {
46 
47 // Test infrastructure.
48 //
49 // Tests are functions which accept no parameters and have no return values.
50 // The testing code should not perform an explicit return once completed. For
51 // example to test the mov immediate instruction a very simple test would be:
52 //
53 //   TEST(mov_x0_one) {
54 //     SETUP();
55 //
56 //     START();
57 //     __ mov(x0, Operand(1));
58 //     END();
59 //
60 //     RUN();
61 //
62 //     ASSERT_EQUAL_64(1, x0);
63 //
64 //     TEARDOWN();
65 //   }
66 //
67 // Within a START ... END block all registers but sp can be modified. sp has to
68 // be explicitly saved/restored. The END() macro replaces the function return
69 // so it may appear multiple times in a test if the test has multiple exit
70 // points.
71 //
72 // Once the test has been run all integer and floating point registers as well
73 // as flags are accessible through a RegisterDump instance, see
74 // utils-aarch64.cc for more info on RegisterDump.
75 //
76 // We provide some helper assert to handle common cases:
77 //
78 //   ASSERT_EQUAL_32(int32_t, int_32t)
79 //   ASSERT_EQUAL_FP32(float, float)
80 //   ASSERT_EQUAL_32(int32_t, W register)
81 //   ASSERT_EQUAL_FP32(float, S register)
82 //   ASSERT_EQUAL_64(int64_t, int_64t)
83 //   ASSERT_EQUAL_FP64(double, double)
84 //   ASSERT_EQUAL_64(int64_t, X register)
85 //   ASSERT_EQUAL_64(X register, X register)
86 //   ASSERT_EQUAL_FP64(double, D register)
87 //
88 // e.g. ASSERT_EQUAL_64(0.5, d30);
89 //
90 // If more advanced computation is required before the assert then access the
91 // RegisterDump named core directly:
92 //
93 //   ASSERT_EQUAL_64(0x1234, core->reg_x0() & 0xffff);
94 
95 
96 #define __ masm.
97 #define TEST(name) TEST_(AARCH64_ASM_##name)
98 
99 // PushCalleeSavedRegisters(), PopCalleeSavedRegisters() and Dump() use NEON, so
100 // we need to enable it in the infrastructure code for each test.
101 const CPUFeatures kInfrastructureCPUFeatures(CPUFeatures::kNEON);
102 
103 #ifdef VIXL_INCLUDE_SIMULATOR_AARCH64
104 // Run tests with the simulator.
105 
106 #define SETUP()        \
107   MacroAssembler masm; \
108   SETUP_COMMON()
109 
110 #define SETUP_WITH_FEATURES(...)                 \
111   MacroAssembler masm;                           \
112   SETUP_COMMON();                                \
113   masm.SetCPUFeatures(CPUFeatures(__VA_ARGS__)); \
114   simulator.SetCPUFeatures(CPUFeatures(__VA_ARGS__))
115 
116 #define SETUP_CUSTOM(size, pic)                                       \
117   byte* buf = new byte[size + CodeBuffer::kDefaultCapacity];          \
118   MacroAssembler masm(buf, size + CodeBuffer::kDefaultCapacity, pic); \
119   SETUP_COMMON()
120 
121 #define SETUP_COMMON()                                      \
122   masm.SetCPUFeatures(CPUFeatures::None());                 \
123   masm.SetGenerateSimulatorCode(true);                      \
124   Decoder simulator_decoder;                                \
125   Simulator simulator(&simulator_decoder);                  \
126   simulator.SetColouredTrace(Test::coloured_trace());       \
127   simulator.SetInstructionStats(Test::instruction_stats()); \
128   simulator.SetCPUFeatures(CPUFeatures::None());            \
129   RegisterDump core;                                        \
130   ptrdiff_t offset_after_infrastructure_start;              \
131   ptrdiff_t offset_before_infrastructure_end
132 
133 #define START()                                                               \
134   masm.Reset();                                                               \
135   simulator.ResetState();                                                     \
136   {                                                                           \
137     SimulationCPUFeaturesScope cpu(&masm, kInfrastructureCPUFeatures);        \
138     __ PushCalleeSavedRegisters();                                            \
139   }                                                                           \
140   {                                                                           \
141     int trace_parameters = 0;                                                 \
142     if (Test::trace_reg()) trace_parameters |= LOG_STATE;                     \
143     if (Test::trace_write()) trace_parameters |= LOG_WRITE;                   \
144     if (Test::trace_sim()) trace_parameters |= LOG_DISASM;                    \
145     if (Test::trace_branch()) trace_parameters |= LOG_BRANCH;                 \
146     if (trace_parameters != 0) {                                              \
147       __ Trace(static_cast<TraceParameters>(trace_parameters), TRACE_ENABLE); \
148     }                                                                         \
149   }                                                                           \
150   if (Test::instruction_stats()) {                                            \
151     __ EnableInstrumentation();                                               \
152   }                                                                           \
153   offset_after_infrastructure_start = masm.GetCursorOffset();                 \
154   /* Avoid unused-variable warnings in case a test never calls RUN(). */      \
155   USE(offset_after_infrastructure_start)
156 
157 #define END()                                                            \
158   offset_before_infrastructure_end = masm.GetCursorOffset();             \
159   /* Avoid unused-variable warnings in case a test never calls RUN(). */ \
160   USE(offset_before_infrastructure_end);                                 \
161   if (Test::instruction_stats()) {                                       \
162     __ DisableInstrumentation();                                         \
163   }                                                                      \
164   __ Trace(LOG_ALL, TRACE_DISABLE);                                      \
165   {                                                                      \
166     SimulationCPUFeaturesScope cpu(&masm, kInfrastructureCPUFeatures);   \
167     core.Dump(&masm);                                                    \
168     __ PopCalleeSavedRegisters();                                        \
169   }                                                                      \
170   __ Ret();                                                              \
171   masm.FinalizeCode()
172 
173 #define RUN()                                                                  \
174   RUN_WITHOUT_SEEN_FEATURE_CHECK();                                            \
175   {                                                                            \
176     /* We expect the test to use all of the features it requested, plus the */ \
177     /* features that the instructure code requires.                         */ \
178     CPUFeatures const& expected =                                              \
179         simulator.GetCPUFeatures()->With(CPUFeatures::kNEON);                  \
180     CPUFeatures const& seen = simulator.GetSeenFeatures();                     \
181     /* This gives three broad categories of features that we care about:    */ \
182     /*  1. Things both expected and seen.                                   */ \
183     /*  2. Things seen, but not expected. The simulator catches these.      */ \
184     /*  3. Things expected, but not seen. We check these here.              */ \
185     /* In a valid, passing test, categories 2 and 3 should be empty.        */ \
186     if (seen != expected) {                                                    \
187       /* The Simulator should have caught anything in category 2 already.   */ \
188       VIXL_ASSERT(expected.Has(seen));                                         \
189       /* Anything left is category 3: things expected, but not seen. This   */ \
190       /* is not necessarily a bug in VIXL itself, but indicates that the    */ \
191       /* test is less strict than it could be.                              */ \
192       CPUFeatures missing = expected.Without(seen);                            \
193       VIXL_ASSERT(missing.Count() > 0);                                        \
194       std::cout << "Error: expected to see CPUFeatures { " << missing          \
195                 << " }\n";                                                     \
196       VIXL_ABORT();                                                            \
197     }                                                                          \
198   }
199 
200 #define RUN_WITHOUT_SEEN_FEATURE_CHECK() \
201   DISASSEMBLE();                         \
202   simulator.RunFrom(masm.GetBuffer()->GetStartAddress<Instruction*>())
203 
204 #define RUN_CUSTOM() RUN()
205 
206 #define TEARDOWN()
207 
208 #define TEARDOWN_CUSTOM() delete[] buf;
209 
210 #else  // ifdef VIXL_INCLUDE_SIMULATOR_AARCH64.
211 #define SETUP()        \
212   MacroAssembler masm; \
213   SETUP_COMMON()
214 
215 #define SETUP_WITH_FEATURES(...) \
216   MacroAssembler masm;           \
217   SETUP_COMMON();                \
218   masm.SetCPUFeatures(CPUFeatures(__VA_ARGS__))
219 
220 #define SETUP_CUSTOM(size, pic)                                         \
221   byte* buffer =                                                        \
222       reinterpret_cast<byte*>(mmap(NULL,                                \
223                                    size + CodeBuffer::kDefaultCapacity, \
224                                    PROT_READ | PROT_WRITE,              \
225                                    MAP_PRIVATE | MAP_ANONYMOUS,         \
226                                    -1,                                  \
227                                    0));                                 \
228   size_t buffer_size = size + CodeBuffer::kDefaultCapacity;             \
229   MacroAssembler masm(buffer, buffer_size, pic);                        \
230   SETUP_COMMON()
231 
232 #define SETUP_COMMON()                               \
233   masm.GetCPUFeatures()->Remove(CPUFeatures::All()); \
234   masm.SetGenerateSimulatorCode(false);              \
235   RegisterDump core;                                 \
236   CPU::SetUp();                                      \
237   ptrdiff_t offset_after_infrastructure_start;       \
238   ptrdiff_t offset_before_infrastructure_end
239 
240 #define START()                                                          \
241   masm.Reset();                                                          \
242   {                                                                      \
243     CPUFeaturesScope cpu(&masm, kInfrastructureCPUFeatures);             \
244     __ PushCalleeSavedRegisters();                                       \
245   }                                                                      \
246   offset_after_infrastructure_start = masm.GetCursorOffset();            \
247   /* Avoid unused-variable warnings in case a test never calls RUN(). */ \
248   USE(offset_after_infrastructure_start)
249 
250 #define END()                                                            \
251   offset_before_infrastructure_end = masm.GetCursorOffset();             \
252   /* Avoid unused-variable warnings in case a test never calls RUN(). */ \
253   USE(offset_before_infrastructure_end);                                 \
254   {                                                                      \
255     CPUFeaturesScope cpu(&masm, kInfrastructureCPUFeatures);             \
256     core.Dump(&masm);                                                    \
257     __ PopCalleeSavedRegisters();                                        \
258   }                                                                      \
259   __ Ret();                                                              \
260   masm.FinalizeCode()
261 
262 // Execute the generated code from the memory area.
263 #define RUN()                                               \
264   DISASSEMBLE();                                            \
265   masm.GetBuffer()->SetExecutable();                        \
266   ExecuteMemory(masm.GetBuffer()->GetStartAddress<byte*>(), \
267                 masm.GetSizeOfCodeGenerated());             \
268   masm.GetBuffer()->SetWritable()
269 
270 // The generated code was written directly into `buffer`, execute it directly.
271 #define RUN_CUSTOM()                                    \
272   DISASSEMBLE();                                        \
273   mprotect(buffer, buffer_size, PROT_READ | PROT_EXEC); \
274   ExecuteMemory(buffer, buffer_size);                   \
275   mprotect(buffer, buffer_size, PROT_READ | PROT_WRITE)
276 
277 #define TEARDOWN()
278 
279 #define TEARDOWN_CUSTOM()
280 
281 #endif  // ifdef VIXL_INCLUDE_SIMULATOR_AARCH64.
282 
283 #define DISASSEMBLE()                                                     \
284   if (Test::disassemble()) {                                              \
285     PrintDisassembler disasm(stdout);                                     \
286     CodeBuffer* buffer = masm.GetBuffer();                                \
287     Instruction* start = buffer->GetOffsetAddress<Instruction*>(          \
288         offset_after_infrastructure_start);                               \
289     Instruction* end = buffer->GetOffsetAddress<Instruction*>(            \
290         offset_before_infrastructure_end);                                \
291                                                                           \
292     if (Test::disassemble_infrastructure()) {                             \
293       Instruction* infra_start = buffer->GetStartAddress<Instruction*>(); \
294       printf("# Infrastructure code (prologue)\n");                       \
295       disasm.DisassembleBuffer(infra_start, start);                       \
296       printf("# Test code\n");                                            \
297     } else {                                                              \
298       printf(                                                             \
299           "# Warning: Omitting infrastructure code. "                     \
300           "Use --disassemble to see it.\n");                              \
301     }                                                                     \
302                                                                           \
303     disasm.DisassembleBuffer(start, end);                                 \
304                                                                           \
305     if (Test::disassemble_infrastructure()) {                             \
306       printf("# Infrastructure code (epilogue)\n");                       \
307       Instruction* infra_end = buffer->GetEndAddress<Instruction*>();     \
308       disasm.DisassembleBuffer(end, infra_end);                           \
309     }                                                                     \
310   }
311 
312 #define ASSERT_EQUAL_NZCV(expected) \
313   VIXL_CHECK(EqualNzcv(expected, core.flags_nzcv()))
314 
315 #define ASSERT_EQUAL_REGISTERS(expected) \
316   VIXL_CHECK(EqualRegisters(&expected, &core))
317 
318 #define ASSERT_EQUAL_FP16(expected, result) \
319   VIXL_CHECK(EqualFP16(expected, &core, result))
320 
321 #define ASSERT_EQUAL_32(expected, result) \
322   VIXL_CHECK(Equal32(static_cast<uint32_t>(expected), &core, result))
323 
324 #define ASSERT_EQUAL_FP32(expected, result) \
325   VIXL_CHECK(EqualFP32(expected, &core, result))
326 
327 #define ASSERT_EQUAL_64(expected, result) \
328   VIXL_CHECK(Equal64(expected, &core, result))
329 
330 #define ASSERT_NOT_EQUAL_64(expected, result) \
331   VIXL_CHECK(!Equal64(expected, &core, result))
332 
333 #define ASSERT_EQUAL_FP64(expected, result) \
334   VIXL_CHECK(EqualFP64(expected, &core, result))
335 
336 #define ASSERT_EQUAL_128(expected_h, expected_l, result) \
337   VIXL_CHECK(Equal128(expected_h, expected_l, &core, result))
338 
339 #define ASSERT_LITERAL_POOL_SIZE(expected) \
340   VIXL_CHECK((expected + kInstructionSize) == (masm.GetLiteralPoolSize()))
341 
342 #define MUST_FAIL_WITH_MESSAGE(code, message)                           \
343   {                                                                     \
344     bool aborted = false;                                               \
345     try {                                                               \
346       code;                                                             \
347     } catch (const std::runtime_error& e) {                             \
348       const char* expected_error = message;                             \
349       size_t error_length = strlen(expected_error);                     \
350       VIXL_CHECK(strncmp(expected_error, e.what(), error_length) == 0); \
351       aborted = true;                                                   \
352     }                                                                   \
353     VIXL_CHECK(aborted);                                                \
354   }
355 
356 
TEST(preshift_immediates)357 TEST(preshift_immediates) {
358   SETUP();
359 
360   START();
361   // Test operations involving immediates that could be generated using a
362   // pre-shifted encodable immediate followed by a post-shift applied to
363   // the arithmetic or logical operation.
364 
365   // Save sp.
366   __ Mov(x29, sp);
367 
368   // Set the registers to known values.
369   __ Mov(x0, 0x1000);
370   __ Mov(sp, 0x1004);
371 
372   // Arithmetic ops.
373   __ Add(x1, x0, 0x1f7de);
374   __ Add(w2, w0, 0xffffff1);
375   __ Adds(x3, x0, 0x18001);
376   __ Adds(w4, w0, 0xffffff1);
377   __ Sub(x5, x0, 0x1f7de);
378   __ Sub(w6, w0, 0xffffff1);
379   __ Subs(x7, x0, 0x18001);
380   __ Subs(w8, w0, 0xffffff1);
381 
382   // Logical ops.
383   __ And(x9, x0, 0x1f7de);
384   __ Orr(w10, w0, 0xffffff1);
385   __ Eor(x11, x0, 0x18001);
386 
387   // Ops using the stack pointer.
388   __ Add(sp, sp, 0x18001);
389   __ Mov(x12, sp);
390   __ Mov(sp, 0x1004);
391 
392   __ Add(sp, sp, 0x1f7de);
393   __ Mov(x13, sp);
394   __ Mov(sp, 0x1004);
395 
396   __ Adds(x14, sp, 0x1f7de);
397 
398   __ Orr(sp, x0, 0x1f7de);
399   __ Mov(x15, sp);
400 
401   //  Restore sp.
402   __ Mov(sp, x29);
403   END();
404 
405   RUN();
406 
407   ASSERT_EQUAL_64(0x1000, x0);
408   ASSERT_EQUAL_64(0x207de, x1);
409   ASSERT_EQUAL_64(0x10000ff1, x2);
410   ASSERT_EQUAL_64(0x19001, x3);
411   ASSERT_EQUAL_64(0x10000ff1, x4);
412   ASSERT_EQUAL_64(0xfffffffffffe1822, x5);
413   ASSERT_EQUAL_64(0xf000100f, x6);
414   ASSERT_EQUAL_64(0xfffffffffffe8fff, x7);
415   ASSERT_EQUAL_64(0xf000100f, x8);
416   ASSERT_EQUAL_64(0x1000, x9);
417   ASSERT_EQUAL_64(0xffffff1, x10);
418   ASSERT_EQUAL_64(0x19001, x11);
419   ASSERT_EQUAL_64(0x19005, x12);
420   ASSERT_EQUAL_64(0x207e2, x13);
421   ASSERT_EQUAL_64(0x207e2, x14);
422   ASSERT_EQUAL_64(0x1f7de, x15);
423 
424   TEARDOWN();
425 }
426 
427 
TEST(stack_ops)428 TEST(stack_ops) {
429   SETUP();
430 
431   START();
432   // save sp.
433   __ Mov(x29, sp);
434 
435   // Set the sp to a known value.
436   __ Mov(sp, 0x1004);
437   __ Mov(x0, sp);
438 
439   // Add immediate to the sp, and move the result to a normal register.
440   __ Add(sp, sp, 0x50);
441   __ Mov(x1, sp);
442 
443   // Add extended to the sp, and move the result to a normal register.
444   __ Mov(x17, 0xfff);
445   __ Add(sp, sp, Operand(x17, SXTB));
446   __ Mov(x2, sp);
447 
448   // Create an sp using a logical instruction, and move to normal register.
449   __ Orr(sp, xzr, 0x1fff);
450   __ Mov(x3, sp);
451 
452   // Write wsp using a logical instruction.
453   __ Orr(wsp, wzr, 0xfffffff8);
454   __ Mov(x4, sp);
455 
456   // Write sp, and read back wsp.
457   __ Orr(sp, xzr, 0xfffffff8);
458   __ Mov(w5, wsp);
459 
460   //  restore sp.
461   __ Mov(sp, x29);
462   END();
463 
464   RUN();
465 
466   ASSERT_EQUAL_64(0x1004, x0);
467   ASSERT_EQUAL_64(0x1054, x1);
468   ASSERT_EQUAL_64(0x1053, x2);
469   ASSERT_EQUAL_64(0x1fff, x3);
470   ASSERT_EQUAL_64(0xfffffff8, x4);
471   ASSERT_EQUAL_64(0xfffffff8, x5);
472 
473   TEARDOWN();
474 }
475 
476 
TEST(mvn)477 TEST(mvn) {
478   SETUP();
479 
480   START();
481   __ Mvn(w0, 0xfff);
482   __ Mvn(x1, 0xfff);
483   __ Mvn(w2, Operand(w0, LSL, 1));
484   __ Mvn(x3, Operand(x1, LSL, 2));
485   __ Mvn(w4, Operand(w0, LSR, 3));
486   __ Mvn(x5, Operand(x1, LSR, 4));
487   __ Mvn(w6, Operand(w0, ASR, 11));
488   __ Mvn(x7, Operand(x1, ASR, 12));
489   __ Mvn(w8, Operand(w0, ROR, 13));
490   __ Mvn(x9, Operand(x1, ROR, 14));
491   __ Mvn(w10, Operand(w2, UXTB));
492   __ Mvn(x11, Operand(x2, SXTB, 1));
493   __ Mvn(w12, Operand(w2, UXTH, 2));
494   __ Mvn(x13, Operand(x2, SXTH, 3));
495   __ Mvn(x14, Operand(w2, UXTW, 4));
496   __ Mvn(x15, Operand(w2, SXTW, 4));
497   END();
498 
499   RUN();
500 
501   ASSERT_EQUAL_64(0xfffff000, x0);
502   ASSERT_EQUAL_64(0xfffffffffffff000, x1);
503   ASSERT_EQUAL_64(0x00001fff, x2);
504   ASSERT_EQUAL_64(0x0000000000003fff, x3);
505   ASSERT_EQUAL_64(0xe00001ff, x4);
506   ASSERT_EQUAL_64(0xf0000000000000ff, x5);
507   ASSERT_EQUAL_64(0x00000001, x6);
508   ASSERT_EQUAL_64(0x0000000000000000, x7);
509   ASSERT_EQUAL_64(0x7ff80000, x8);
510   ASSERT_EQUAL_64(0x3ffc000000000000, x9);
511   ASSERT_EQUAL_64(0xffffff00, x10);
512   ASSERT_EQUAL_64(0x0000000000000001, x11);
513   ASSERT_EQUAL_64(0xffff8003, x12);
514   ASSERT_EQUAL_64(0xffffffffffff0007, x13);
515   ASSERT_EQUAL_64(0xfffffffffffe000f, x14);
516   ASSERT_EQUAL_64(0xfffffffffffe000f, x15);
517 
518   TEARDOWN();
519 }
520 
521 
TEST(mov_imm_w)522 TEST(mov_imm_w) {
523   SETUP();
524 
525   START();
526   __ Mov(w0, 0xffffffff);
527   __ Mov(w1, 0xffff1234);
528   __ Mov(w2, 0x1234ffff);
529   __ Mov(w3, 0x00000000);
530   __ Mov(w4, 0x00001234);
531   __ Mov(w5, 0x12340000);
532   __ Mov(w6, 0x12345678);
533   __ Mov(w7, (int32_t)0x80000000);
534   __ Mov(w8, (int32_t)0xffff0000);
535   __ Mov(w9, kWMinInt);
536   END();
537 
538   RUN();
539 
540   ASSERT_EQUAL_64(0xffffffff, x0);
541   ASSERT_EQUAL_64(0xffff1234, x1);
542   ASSERT_EQUAL_64(0x1234ffff, x2);
543   ASSERT_EQUAL_64(0x00000000, x3);
544   ASSERT_EQUAL_64(0x00001234, x4);
545   ASSERT_EQUAL_64(0x12340000, x5);
546   ASSERT_EQUAL_64(0x12345678, x6);
547   ASSERT_EQUAL_64(0x80000000, x7);
548   ASSERT_EQUAL_64(0xffff0000, x8);
549   ASSERT_EQUAL_32(kWMinInt, w9);
550 
551   TEARDOWN();
552 }
553 
554 
TEST(mov_imm_x)555 TEST(mov_imm_x) {
556   SETUP();
557 
558   START();
559   __ Mov(x0, 0xffffffffffffffff);
560   __ Mov(x1, 0xffffffffffff1234);
561   __ Mov(x2, 0xffffffff12345678);
562   __ Mov(x3, 0xffff1234ffff5678);
563   __ Mov(x4, 0x1234ffffffff5678);
564   __ Mov(x5, 0x1234ffff5678ffff);
565   __ Mov(x6, 0x12345678ffffffff);
566   __ Mov(x7, 0x1234ffffffffffff);
567   __ Mov(x8, 0x123456789abcffff);
568   __ Mov(x9, 0x12345678ffff9abc);
569   __ Mov(x10, 0x1234ffff56789abc);
570   __ Mov(x11, 0xffff123456789abc);
571   __ Mov(x12, 0x0000000000000000);
572   __ Mov(x13, 0x0000000000001234);
573   __ Mov(x14, 0x0000000012345678);
574   __ Mov(x15, 0x0000123400005678);
575   __ Mov(x18, 0x1234000000005678);
576   __ Mov(x19, 0x1234000056780000);
577   __ Mov(x20, 0x1234567800000000);
578   __ Mov(x21, 0x1234000000000000);
579   __ Mov(x22, 0x123456789abc0000);
580   __ Mov(x23, 0x1234567800009abc);
581   __ Mov(x24, 0x1234000056789abc);
582   __ Mov(x25, 0x0000123456789abc);
583   __ Mov(x26, 0x123456789abcdef0);
584   __ Mov(x27, 0xffff000000000001);
585   __ Mov(x28, 0x8000ffff00000000);
586   END();
587 
588   RUN();
589 
590   ASSERT_EQUAL_64(0xffffffffffff1234, x1);
591   ASSERT_EQUAL_64(0xffffffff12345678, x2);
592   ASSERT_EQUAL_64(0xffff1234ffff5678, x3);
593   ASSERT_EQUAL_64(0x1234ffffffff5678, x4);
594   ASSERT_EQUAL_64(0x1234ffff5678ffff, x5);
595   ASSERT_EQUAL_64(0x12345678ffffffff, x6);
596   ASSERT_EQUAL_64(0x1234ffffffffffff, x7);
597   ASSERT_EQUAL_64(0x123456789abcffff, x8);
598   ASSERT_EQUAL_64(0x12345678ffff9abc, x9);
599   ASSERT_EQUAL_64(0x1234ffff56789abc, x10);
600   ASSERT_EQUAL_64(0xffff123456789abc, x11);
601   ASSERT_EQUAL_64(0x0000000000000000, x12);
602   ASSERT_EQUAL_64(0x0000000000001234, x13);
603   ASSERT_EQUAL_64(0x0000000012345678, x14);
604   ASSERT_EQUAL_64(0x0000123400005678, x15);
605   ASSERT_EQUAL_64(0x1234000000005678, x18);
606   ASSERT_EQUAL_64(0x1234000056780000, x19);
607   ASSERT_EQUAL_64(0x1234567800000000, x20);
608   ASSERT_EQUAL_64(0x1234000000000000, x21);
609   ASSERT_EQUAL_64(0x123456789abc0000, x22);
610   ASSERT_EQUAL_64(0x1234567800009abc, x23);
611   ASSERT_EQUAL_64(0x1234000056789abc, x24);
612   ASSERT_EQUAL_64(0x0000123456789abc, x25);
613   ASSERT_EQUAL_64(0x123456789abcdef0, x26);
614   ASSERT_EQUAL_64(0xffff000000000001, x27);
615   ASSERT_EQUAL_64(0x8000ffff00000000, x28);
616 
617 
618   TEARDOWN();
619 }
620 
621 
TEST(mov)622 TEST(mov) {
623   SETUP();
624 
625   START();
626   __ Mov(x0, 0xffffffffffffffff);
627   __ Mov(x1, 0xffffffffffffffff);
628   __ Mov(x2, 0xffffffffffffffff);
629   __ Mov(x3, 0xffffffffffffffff);
630 
631   __ Mov(x0, 0x0123456789abcdef);
632 
633   {
634     ExactAssemblyScope scope(&masm, 3 * kInstructionSize);
635     __ movz(x1, UINT64_C(0xabcd) << 16);
636     __ movk(x2, UINT64_C(0xabcd) << 32);
637     __ movn(x3, UINT64_C(0xabcd) << 48);
638   }
639 
640   __ Mov(x4, 0x0123456789abcdef);
641   __ Mov(x5, x4);
642 
643   __ Mov(w6, -1);
644 
645   // Test that moves back to the same register have the desired effect. This
646   // is a no-op for X registers, and a truncation for W registers.
647   __ Mov(x7, 0x0123456789abcdef);
648   __ Mov(x7, x7);
649   __ Mov(x8, 0x0123456789abcdef);
650   __ Mov(w8, w8);
651   __ Mov(x9, 0x0123456789abcdef);
652   __ Mov(x9, Operand(x9));
653   __ Mov(x10, 0x0123456789abcdef);
654   __ Mov(w10, Operand(w10));
655 
656   __ Mov(w11, 0xfff);
657   __ Mov(x12, 0xfff);
658   __ Mov(w13, Operand(w11, LSL, 1));
659   __ Mov(x14, Operand(x12, LSL, 2));
660   __ Mov(w15, Operand(w11, LSR, 3));
661   __ Mov(x18, Operand(x12, LSR, 4));
662   __ Mov(w19, Operand(w11, ASR, 11));
663   __ Mov(x20, Operand(x12, ASR, 12));
664   __ Mov(w21, Operand(w11, ROR, 13));
665   __ Mov(x22, Operand(x12, ROR, 14));
666   __ Mov(w23, Operand(w13, UXTB));
667   __ Mov(x24, Operand(x13, SXTB, 1));
668   __ Mov(w25, Operand(w13, UXTH, 2));
669   __ Mov(x26, Operand(x13, SXTH, 3));
670   __ Mov(x27, Operand(w13, UXTW, 4));
671 
672   __ Mov(x28, 0x0123456789abcdef);
673   __ Mov(w28, w28, kDiscardForSameWReg);
674   END();
675 
676   RUN();
677 
678   ASSERT_EQUAL_64(0x0123456789abcdef, x0);
679   ASSERT_EQUAL_64(0x00000000abcd0000, x1);
680   ASSERT_EQUAL_64(0xffffabcdffffffff, x2);
681   ASSERT_EQUAL_64(0x5432ffffffffffff, x3);
682   ASSERT_EQUAL_64(x4, x5);
683   ASSERT_EQUAL_32(-1, w6);
684   ASSERT_EQUAL_64(0x0123456789abcdef, x7);
685   ASSERT_EQUAL_32(0x89abcdef, w8);
686   ASSERT_EQUAL_64(0x0123456789abcdef, x9);
687   ASSERT_EQUAL_32(0x89abcdef, w10);
688   ASSERT_EQUAL_64(0x00000fff, x11);
689   ASSERT_EQUAL_64(0x0000000000000fff, x12);
690   ASSERT_EQUAL_64(0x00001ffe, x13);
691   ASSERT_EQUAL_64(0x0000000000003ffc, x14);
692   ASSERT_EQUAL_64(0x000001ff, x15);
693   ASSERT_EQUAL_64(0x00000000000000ff, x18);
694   ASSERT_EQUAL_64(0x00000001, x19);
695   ASSERT_EQUAL_64(0x0000000000000000, x20);
696   ASSERT_EQUAL_64(0x7ff80000, x21);
697   ASSERT_EQUAL_64(0x3ffc000000000000, x22);
698   ASSERT_EQUAL_64(0x000000fe, x23);
699   ASSERT_EQUAL_64(0xfffffffffffffffc, x24);
700   ASSERT_EQUAL_64(0x00007ff8, x25);
701   ASSERT_EQUAL_64(0x000000000000fff0, x26);
702   ASSERT_EQUAL_64(0x000000000001ffe0, x27);
703   ASSERT_EQUAL_64(0x0123456789abcdef, x28);
704 
705   TEARDOWN();
706 }
707 
708 
TEST(mov_negative)709 TEST(mov_negative) {
710   SETUP();
711 
712   START();
713   __ Mov(w11, 0xffffffff);
714   __ Mov(x12, 0xffffffffffffffff);
715 
716   __ Mov(w13, Operand(w11, LSL, 1));
717   __ Mov(w14, Operand(w11, LSR, 1));
718   __ Mov(w15, Operand(w11, ASR, 1));
719   __ Mov(w18, Operand(w11, ROR, 1));
720   __ Mov(w19, Operand(w11, UXTB, 1));
721   __ Mov(w20, Operand(w11, SXTB, 1));
722   __ Mov(w21, Operand(w11, UXTH, 1));
723   __ Mov(w22, Operand(w11, SXTH, 1));
724 
725   __ Mov(x23, Operand(x12, LSL, 1));
726   __ Mov(x24, Operand(x12, LSR, 1));
727   __ Mov(x25, Operand(x12, ASR, 1));
728   __ Mov(x26, Operand(x12, ROR, 1));
729   __ Mov(x27, Operand(x12, UXTH, 1));
730   __ Mov(x28, Operand(x12, SXTH, 1));
731   __ Mov(x29, Operand(x12, UXTW, 1));
732   __ Mov(x30, Operand(x12, SXTW, 1));
733   END();
734 
735   RUN();
736 
737   ASSERT_EQUAL_64(0xfffffffe, x13);
738   ASSERT_EQUAL_64(0x7fffffff, x14);
739   ASSERT_EQUAL_64(0xffffffff, x15);
740   ASSERT_EQUAL_64(0xffffffff, x18);
741   ASSERT_EQUAL_64(0x000001fe, x19);
742   ASSERT_EQUAL_64(0xfffffffe, x20);
743   ASSERT_EQUAL_64(0x0001fffe, x21);
744   ASSERT_EQUAL_64(0xfffffffe, x22);
745 
746   ASSERT_EQUAL_64(0xfffffffffffffffe, x23);
747   ASSERT_EQUAL_64(0x7fffffffffffffff, x24);
748   ASSERT_EQUAL_64(0xffffffffffffffff, x25);
749   ASSERT_EQUAL_64(0xffffffffffffffff, x26);
750   ASSERT_EQUAL_64(0x000000000001fffe, x27);
751   ASSERT_EQUAL_64(0xfffffffffffffffe, x28);
752   ASSERT_EQUAL_64(0x00000001fffffffe, x29);
753   ASSERT_EQUAL_64(0xfffffffffffffffe, x30);
754 
755   TEARDOWN();
756 }
757 
758 
TEST(orr)759 TEST(orr) {
760   SETUP();
761 
762   START();
763   __ Mov(x0, 0xf0f0);
764   __ Mov(x1, 0xf00000ff);
765 
766   __ Orr(x2, x0, Operand(x1));
767   __ Orr(w3, w0, Operand(w1, LSL, 28));
768   __ Orr(x4, x0, Operand(x1, LSL, 32));
769   __ Orr(x5, x0, Operand(x1, LSR, 4));
770   __ Orr(w6, w0, Operand(w1, ASR, 4));
771   __ Orr(x7, x0, Operand(x1, ASR, 4));
772   __ Orr(w8, w0, Operand(w1, ROR, 12));
773   __ Orr(x9, x0, Operand(x1, ROR, 12));
774   __ Orr(w10, w0, 0xf);
775   __ Orr(x11, x0, 0xf0000000f0000000);
776   END();
777 
778   RUN();
779 
780   ASSERT_EQUAL_64(0x00000000f000f0ff, x2);
781   ASSERT_EQUAL_64(0xf000f0f0, x3);
782   ASSERT_EQUAL_64(0xf00000ff0000f0f0, x4);
783   ASSERT_EQUAL_64(0x000000000f00f0ff, x5);
784   ASSERT_EQUAL_64(0xff00f0ff, x6);
785   ASSERT_EQUAL_64(0x000000000f00f0ff, x7);
786   ASSERT_EQUAL_64(0x0ffff0f0, x8);
787   ASSERT_EQUAL_64(0x0ff00000000ff0f0, x9);
788   ASSERT_EQUAL_64(0x0000f0ff, x10);
789   ASSERT_EQUAL_64(0xf0000000f000f0f0, x11);
790 
791   TEARDOWN();
792 }
793 
794 
TEST(orr_extend)795 TEST(orr_extend) {
796   SETUP();
797 
798   START();
799   __ Mov(x0, 1);
800   __ Mov(x1, 0x8000000080008080);
801   __ Orr(w6, w0, Operand(w1, UXTB));
802   __ Orr(x7, x0, Operand(x1, UXTH, 1));
803   __ Orr(w8, w0, Operand(w1, UXTW, 2));
804   __ Orr(x9, x0, Operand(x1, UXTX, 3));
805   __ Orr(w10, w0, Operand(w1, SXTB));
806   __ Orr(x11, x0, Operand(x1, SXTH, 1));
807   __ Orr(x12, x0, Operand(x1, SXTW, 2));
808   __ Orr(x13, x0, Operand(x1, SXTX, 3));
809   END();
810 
811   RUN();
812 
813   ASSERT_EQUAL_64(0x00000081, x6);
814   ASSERT_EQUAL_64(0x0000000000010101, x7);
815   ASSERT_EQUAL_64(0x00020201, x8);
816   ASSERT_EQUAL_64(0x0000000400040401, x9);
817   ASSERT_EQUAL_64(0xffffff81, x10);
818   ASSERT_EQUAL_64(0xffffffffffff0101, x11);
819   ASSERT_EQUAL_64(0xfffffffe00020201, x12);
820   ASSERT_EQUAL_64(0x0000000400040401, x13);
821 
822   TEARDOWN();
823 }
824 
825 
TEST(bitwise_wide_imm)826 TEST(bitwise_wide_imm) {
827   SETUP();
828 
829   START();
830   __ Mov(x0, 0);
831   __ Mov(x1, 0xf0f0f0f0f0f0f0f0);
832 
833   __ Orr(x10, x0, 0x1234567890abcdef);
834   __ Orr(w11, w1, 0x90abcdef);
835 
836   __ Orr(w12, w0, kWMinInt);
837   __ Eor(w13, w0, kWMinInt);
838   END();
839 
840   RUN();
841 
842   ASSERT_EQUAL_64(0, x0);
843   ASSERT_EQUAL_64(0xf0f0f0f0f0f0f0f0, x1);
844   ASSERT_EQUAL_64(0x1234567890abcdef, x10);
845   ASSERT_EQUAL_64(0x00000000f0fbfdff, x11);
846   ASSERT_EQUAL_32(kWMinInt, w12);
847   ASSERT_EQUAL_32(kWMinInt, w13);
848 
849   TEARDOWN();
850 }
851 
852 
TEST(orn)853 TEST(orn) {
854   SETUP();
855 
856   START();
857   __ Mov(x0, 0xf0f0);
858   __ Mov(x1, 0xf00000ff);
859 
860   __ Orn(x2, x0, Operand(x1));
861   __ Orn(w3, w0, Operand(w1, LSL, 4));
862   __ Orn(x4, x0, Operand(x1, LSL, 4));
863   __ Orn(x5, x0, Operand(x1, LSR, 1));
864   __ Orn(w6, w0, Operand(w1, ASR, 1));
865   __ Orn(x7, x0, Operand(x1, ASR, 1));
866   __ Orn(w8, w0, Operand(w1, ROR, 16));
867   __ Orn(x9, x0, Operand(x1, ROR, 16));
868   __ Orn(w10, w0, 0x0000ffff);
869   __ Orn(x11, x0, 0x0000ffff0000ffff);
870   END();
871 
872   RUN();
873 
874   ASSERT_EQUAL_64(0xffffffff0ffffff0, x2);
875   ASSERT_EQUAL_64(0xfffff0ff, x3);
876   ASSERT_EQUAL_64(0xfffffff0fffff0ff, x4);
877   ASSERT_EQUAL_64(0xffffffff87fffff0, x5);
878   ASSERT_EQUAL_64(0x07fffff0, x6);
879   ASSERT_EQUAL_64(0xffffffff87fffff0, x7);
880   ASSERT_EQUAL_64(0xff00ffff, x8);
881   ASSERT_EQUAL_64(0xff00ffffffffffff, x9);
882   ASSERT_EQUAL_64(0xfffff0f0, x10);
883   ASSERT_EQUAL_64(0xffff0000fffff0f0, x11);
884 
885   TEARDOWN();
886 }
887 
888 
TEST(orn_extend)889 TEST(orn_extend) {
890   SETUP();
891 
892   START();
893   __ Mov(x0, 1);
894   __ Mov(x1, 0x8000000080008081);
895   __ Orn(w6, w0, Operand(w1, UXTB));
896   __ Orn(x7, x0, Operand(x1, UXTH, 1));
897   __ Orn(w8, w0, Operand(w1, UXTW, 2));
898   __ Orn(x9, x0, Operand(x1, UXTX, 3));
899   __ Orn(w10, w0, Operand(w1, SXTB));
900   __ Orn(x11, x0, Operand(x1, SXTH, 1));
901   __ Orn(x12, x0, Operand(x1, SXTW, 2));
902   __ Orn(x13, x0, Operand(x1, SXTX, 3));
903   END();
904 
905   RUN();
906 
907   ASSERT_EQUAL_64(0xffffff7f, x6);
908   ASSERT_EQUAL_64(0xfffffffffffefefd, x7);
909   ASSERT_EQUAL_64(0xfffdfdfb, x8);
910   ASSERT_EQUAL_64(0xfffffffbfffbfbf7, x9);
911   ASSERT_EQUAL_64(0x0000007f, x10);
912   ASSERT_EQUAL_64(0x000000000000fefd, x11);
913   ASSERT_EQUAL_64(0x00000001fffdfdfb, x12);
914   ASSERT_EQUAL_64(0xfffffffbfffbfbf7, x13);
915 
916   TEARDOWN();
917 }
918 
919 
TEST(and_)920 TEST(and_) {
921   SETUP();
922 
923   START();
924   __ Mov(x0, 0xfff0);
925   __ Mov(x1, 0xf00000ff);
926 
927   __ And(x2, x0, Operand(x1));
928   __ And(w3, w0, Operand(w1, LSL, 4));
929   __ And(x4, x0, Operand(x1, LSL, 4));
930   __ And(x5, x0, Operand(x1, LSR, 1));
931   __ And(w6, w0, Operand(w1, ASR, 20));
932   __ And(x7, x0, Operand(x1, ASR, 20));
933   __ And(w8, w0, Operand(w1, ROR, 28));
934   __ And(x9, x0, Operand(x1, ROR, 28));
935   __ And(w10, w0, Operand(0xff00));
936   __ And(x11, x0, Operand(0xff));
937   END();
938 
939   RUN();
940 
941   ASSERT_EQUAL_64(0x000000f0, x2);
942   ASSERT_EQUAL_64(0x00000ff0, x3);
943   ASSERT_EQUAL_64(0x00000ff0, x4);
944   ASSERT_EQUAL_64(0x00000070, x5);
945   ASSERT_EQUAL_64(0x0000ff00, x6);
946   ASSERT_EQUAL_64(0x00000f00, x7);
947   ASSERT_EQUAL_64(0x00000ff0, x8);
948   ASSERT_EQUAL_64(0x00000000, x9);
949   ASSERT_EQUAL_64(0x0000ff00, x10);
950   ASSERT_EQUAL_64(0x000000f0, x11);
951 
952   TEARDOWN();
953 }
954 
955 
TEST(and_extend)956 TEST(and_extend) {
957   SETUP();
958 
959   START();
960   __ Mov(x0, 0xffffffffffffffff);
961   __ Mov(x1, 0x8000000080008081);
962   __ And(w6, w0, Operand(w1, UXTB));
963   __ And(x7, x0, Operand(x1, UXTH, 1));
964   __ And(w8, w0, Operand(w1, UXTW, 2));
965   __ And(x9, x0, Operand(x1, UXTX, 3));
966   __ And(w10, w0, Operand(w1, SXTB));
967   __ And(x11, x0, Operand(x1, SXTH, 1));
968   __ And(x12, x0, Operand(x1, SXTW, 2));
969   __ And(x13, x0, Operand(x1, SXTX, 3));
970   END();
971 
972   RUN();
973 
974   ASSERT_EQUAL_64(0x00000081, x6);
975   ASSERT_EQUAL_64(0x0000000000010102, x7);
976   ASSERT_EQUAL_64(0x00020204, x8);
977   ASSERT_EQUAL_64(0x0000000400040408, x9);
978   ASSERT_EQUAL_64(0xffffff81, x10);
979   ASSERT_EQUAL_64(0xffffffffffff0102, x11);
980   ASSERT_EQUAL_64(0xfffffffe00020204, x12);
981   ASSERT_EQUAL_64(0x0000000400040408, x13);
982 
983   TEARDOWN();
984 }
985 
986 
TEST(ands)987 TEST(ands) {
988   SETUP();
989 
990   START();
991   __ Mov(x1, 0xf00000ff);
992   __ Ands(w0, w1, Operand(w1));
993   END();
994 
995   RUN();
996 
997   ASSERT_EQUAL_NZCV(NFlag);
998   ASSERT_EQUAL_64(0xf00000ff, x0);
999 
1000   START();
1001   __ Mov(x0, 0xfff0);
1002   __ Mov(x1, 0xf00000ff);
1003   __ Ands(w0, w0, Operand(w1, LSR, 4));
1004   END();
1005 
1006   RUN();
1007 
1008   ASSERT_EQUAL_NZCV(ZFlag);
1009   ASSERT_EQUAL_64(0x00000000, x0);
1010 
1011   START();
1012   __ Mov(x0, 0x8000000000000000);
1013   __ Mov(x1, 0x00000001);
1014   __ Ands(x0, x0, Operand(x1, ROR, 1));
1015   END();
1016 
1017   RUN();
1018 
1019   ASSERT_EQUAL_NZCV(NFlag);
1020   ASSERT_EQUAL_64(0x8000000000000000, x0);
1021 
1022   START();
1023   __ Mov(x0, 0xfff0);
1024   __ Ands(w0, w0, Operand(0xf));
1025   END();
1026 
1027   RUN();
1028 
1029   ASSERT_EQUAL_NZCV(ZFlag);
1030   ASSERT_EQUAL_64(0x00000000, x0);
1031 
1032   START();
1033   __ Mov(x0, 0xff000000);
1034   __ Ands(w0, w0, Operand(0x80000000));
1035   END();
1036 
1037   RUN();
1038 
1039   ASSERT_EQUAL_NZCV(NFlag);
1040   ASSERT_EQUAL_64(0x80000000, x0);
1041 
1042   TEARDOWN();
1043 }
1044 
1045 
TEST(bic)1046 TEST(bic) {
1047   SETUP();
1048 
1049   START();
1050   __ Mov(x0, 0xfff0);
1051   __ Mov(x1, 0xf00000ff);
1052 
1053   __ Bic(x2, x0, Operand(x1));
1054   __ Bic(w3, w0, Operand(w1, LSL, 4));
1055   __ Bic(x4, x0, Operand(x1, LSL, 4));
1056   __ Bic(x5, x0, Operand(x1, LSR, 1));
1057   __ Bic(w6, w0, Operand(w1, ASR, 20));
1058   __ Bic(x7, x0, Operand(x1, ASR, 20));
1059   __ Bic(w8, w0, Operand(w1, ROR, 28));
1060   __ Bic(x9, x0, Operand(x1, ROR, 24));
1061   __ Bic(x10, x0, Operand(0x1f));
1062   __ Bic(x11, x0, Operand(0x100));
1063 
1064   // Test bic into sp when the constant cannot be encoded in the immediate
1065   // field.
1066   // Use x20 to preserve sp. We check for the result via x21 because the
1067   // test infrastructure requires that sp be restored to its original value.
1068   __ Mov(x20, sp);
1069   __ Mov(x0, 0xffffff);
1070   __ Bic(sp, x0, Operand(0xabcdef));
1071   __ Mov(x21, sp);
1072   __ Mov(sp, x20);
1073   END();
1074 
1075   RUN();
1076 
1077   ASSERT_EQUAL_64(0x0000ff00, x2);
1078   ASSERT_EQUAL_64(0x0000f000, x3);
1079   ASSERT_EQUAL_64(0x0000f000, x4);
1080   ASSERT_EQUAL_64(0x0000ff80, x5);
1081   ASSERT_EQUAL_64(0x000000f0, x6);
1082   ASSERT_EQUAL_64(0x0000f0f0, x7);
1083   ASSERT_EQUAL_64(0x0000f000, x8);
1084   ASSERT_EQUAL_64(0x0000ff00, x9);
1085   ASSERT_EQUAL_64(0x0000ffe0, x10);
1086   ASSERT_EQUAL_64(0x0000fef0, x11);
1087 
1088   ASSERT_EQUAL_64(0x543210, x21);
1089 
1090   TEARDOWN();
1091 }
1092 
1093 
TEST(bic_extend)1094 TEST(bic_extend) {
1095   SETUP();
1096 
1097   START();
1098   __ Mov(x0, 0xffffffffffffffff);
1099   __ Mov(x1, 0x8000000080008081);
1100   __ Bic(w6, w0, Operand(w1, UXTB));
1101   __ Bic(x7, x0, Operand(x1, UXTH, 1));
1102   __ Bic(w8, w0, Operand(w1, UXTW, 2));
1103   __ Bic(x9, x0, Operand(x1, UXTX, 3));
1104   __ Bic(w10, w0, Operand(w1, SXTB));
1105   __ Bic(x11, x0, Operand(x1, SXTH, 1));
1106   __ Bic(x12, x0, Operand(x1, SXTW, 2));
1107   __ Bic(x13, x0, Operand(x1, SXTX, 3));
1108   END();
1109 
1110   RUN();
1111 
1112   ASSERT_EQUAL_64(0xffffff7e, x6);
1113   ASSERT_EQUAL_64(0xfffffffffffefefd, x7);
1114   ASSERT_EQUAL_64(0xfffdfdfb, x8);
1115   ASSERT_EQUAL_64(0xfffffffbfffbfbf7, x9);
1116   ASSERT_EQUAL_64(0x0000007e, x10);
1117   ASSERT_EQUAL_64(0x000000000000fefd, x11);
1118   ASSERT_EQUAL_64(0x00000001fffdfdfb, x12);
1119   ASSERT_EQUAL_64(0xfffffffbfffbfbf7, x13);
1120 
1121   TEARDOWN();
1122 }
1123 
1124 
TEST(bics)1125 TEST(bics) {
1126   SETUP();
1127 
1128   START();
1129   __ Mov(x1, 0xffff);
1130   __ Bics(w0, w1, Operand(w1));
1131   END();
1132 
1133   RUN();
1134 
1135   ASSERT_EQUAL_NZCV(ZFlag);
1136   ASSERT_EQUAL_64(0x00000000, x0);
1137 
1138   START();
1139   __ Mov(x0, 0xffffffff);
1140   __ Bics(w0, w0, Operand(w0, LSR, 1));
1141   END();
1142 
1143   RUN();
1144 
1145   ASSERT_EQUAL_NZCV(NFlag);
1146   ASSERT_EQUAL_64(0x80000000, x0);
1147 
1148   START();
1149   __ Mov(x0, 0x8000000000000000);
1150   __ Mov(x1, 0x00000001);
1151   __ Bics(x0, x0, Operand(x1, ROR, 1));
1152   END();
1153 
1154   RUN();
1155 
1156   ASSERT_EQUAL_NZCV(ZFlag);
1157   ASSERT_EQUAL_64(0x00000000, x0);
1158 
1159   START();
1160   __ Mov(x0, 0xffffffffffffffff);
1161   __ Bics(x0, x0, 0x7fffffffffffffff);
1162   END();
1163 
1164   RUN();
1165 
1166   ASSERT_EQUAL_NZCV(NFlag);
1167   ASSERT_EQUAL_64(0x8000000000000000, x0);
1168 
1169   START();
1170   __ Mov(w0, 0xffff0000);
1171   __ Bics(w0, w0, 0xfffffff0);
1172   END();
1173 
1174   RUN();
1175 
1176   ASSERT_EQUAL_NZCV(ZFlag);
1177   ASSERT_EQUAL_64(0x00000000, x0);
1178 
1179   TEARDOWN();
1180 }
1181 
1182 
TEST(eor)1183 TEST(eor) {
1184   SETUP();
1185 
1186   START();
1187   __ Mov(x0, 0xfff0);
1188   __ Mov(x1, 0xf00000ff);
1189 
1190   __ Eor(x2, x0, Operand(x1));
1191   __ Eor(w3, w0, Operand(w1, LSL, 4));
1192   __ Eor(x4, x0, Operand(x1, LSL, 4));
1193   __ Eor(x5, x0, Operand(x1, LSR, 1));
1194   __ Eor(w6, w0, Operand(w1, ASR, 20));
1195   __ Eor(x7, x0, Operand(x1, ASR, 20));
1196   __ Eor(w8, w0, Operand(w1, ROR, 28));
1197   __ Eor(x9, x0, Operand(x1, ROR, 28));
1198   __ Eor(w10, w0, 0xff00ff00);
1199   __ Eor(x11, x0, 0xff00ff00ff00ff00);
1200   END();
1201 
1202   RUN();
1203 
1204   ASSERT_EQUAL_64(0x00000000f000ff0f, x2);
1205   ASSERT_EQUAL_64(0x0000f000, x3);
1206   ASSERT_EQUAL_64(0x0000000f0000f000, x4);
1207   ASSERT_EQUAL_64(0x000000007800ff8f, x5);
1208   ASSERT_EQUAL_64(0xffff00f0, x6);
1209   ASSERT_EQUAL_64(0x000000000000f0f0, x7);
1210   ASSERT_EQUAL_64(0x0000f00f, x8);
1211   ASSERT_EQUAL_64(0x00000ff00000ffff, x9);
1212   ASSERT_EQUAL_64(0xff0000f0, x10);
1213   ASSERT_EQUAL_64(0xff00ff00ff0000f0, x11);
1214 
1215   TEARDOWN();
1216 }
1217 
TEST(eor_extend)1218 TEST(eor_extend) {
1219   SETUP();
1220 
1221   START();
1222   __ Mov(x0, 0x1111111111111111);
1223   __ Mov(x1, 0x8000000080008081);
1224   __ Eor(w6, w0, Operand(w1, UXTB));
1225   __ Eor(x7, x0, Operand(x1, UXTH, 1));
1226   __ Eor(w8, w0, Operand(w1, UXTW, 2));
1227   __ Eor(x9, x0, Operand(x1, UXTX, 3));
1228   __ Eor(w10, w0, Operand(w1, SXTB));
1229   __ Eor(x11, x0, Operand(x1, SXTH, 1));
1230   __ Eor(x12, x0, Operand(x1, SXTW, 2));
1231   __ Eor(x13, x0, Operand(x1, SXTX, 3));
1232   END();
1233 
1234   RUN();
1235 
1236   ASSERT_EQUAL_64(0x11111190, x6);
1237   ASSERT_EQUAL_64(0x1111111111101013, x7);
1238   ASSERT_EQUAL_64(0x11131315, x8);
1239   ASSERT_EQUAL_64(0x1111111511151519, x9);
1240   ASSERT_EQUAL_64(0xeeeeee90, x10);
1241   ASSERT_EQUAL_64(0xeeeeeeeeeeee1013, x11);
1242   ASSERT_EQUAL_64(0xeeeeeeef11131315, x12);
1243   ASSERT_EQUAL_64(0x1111111511151519, x13);
1244 
1245   TEARDOWN();
1246 }
1247 
1248 
TEST(eon)1249 TEST(eon) {
1250   SETUP();
1251 
1252   START();
1253   __ Mov(x0, 0xfff0);
1254   __ Mov(x1, 0xf00000ff);
1255 
1256   __ Eon(x2, x0, Operand(x1));
1257   __ Eon(w3, w0, Operand(w1, LSL, 4));
1258   __ Eon(x4, x0, Operand(x1, LSL, 4));
1259   __ Eon(x5, x0, Operand(x1, LSR, 1));
1260   __ Eon(w6, w0, Operand(w1, ASR, 20));
1261   __ Eon(x7, x0, Operand(x1, ASR, 20));
1262   __ Eon(w8, w0, Operand(w1, ROR, 28));
1263   __ Eon(x9, x0, Operand(x1, ROR, 28));
1264   __ Eon(w10, w0, 0x03c003c0);
1265   __ Eon(x11, x0, 0x0000100000001000);
1266   END();
1267 
1268   RUN();
1269 
1270   ASSERT_EQUAL_64(0xffffffff0fff00f0, x2);
1271   ASSERT_EQUAL_64(0xffff0fff, x3);
1272   ASSERT_EQUAL_64(0xfffffff0ffff0fff, x4);
1273   ASSERT_EQUAL_64(0xffffffff87ff0070, x5);
1274   ASSERT_EQUAL_64(0x0000ff0f, x6);
1275   ASSERT_EQUAL_64(0xffffffffffff0f0f, x7);
1276   ASSERT_EQUAL_64(0xffff0ff0, x8);
1277   ASSERT_EQUAL_64(0xfffff00fffff0000, x9);
1278   ASSERT_EQUAL_64(0xfc3f03cf, x10);
1279   ASSERT_EQUAL_64(0xffffefffffff100f, x11);
1280 
1281   TEARDOWN();
1282 }
1283 
1284 
TEST(eon_extend)1285 TEST(eon_extend) {
1286   SETUP();
1287 
1288   START();
1289   __ Mov(x0, 0x1111111111111111);
1290   __ Mov(x1, 0x8000000080008081);
1291   __ Eon(w6, w0, Operand(w1, UXTB));
1292   __ Eon(x7, x0, Operand(x1, UXTH, 1));
1293   __ Eon(w8, w0, Operand(w1, UXTW, 2));
1294   __ Eon(x9, x0, Operand(x1, UXTX, 3));
1295   __ Eon(w10, w0, Operand(w1, SXTB));
1296   __ Eon(x11, x0, Operand(x1, SXTH, 1));
1297   __ Eon(x12, x0, Operand(x1, SXTW, 2));
1298   __ Eon(x13, x0, Operand(x1, SXTX, 3));
1299   END();
1300 
1301   RUN();
1302 
1303   ASSERT_EQUAL_64(0xeeeeee6f, x6);
1304   ASSERT_EQUAL_64(0xeeeeeeeeeeefefec, x7);
1305   ASSERT_EQUAL_64(0xeeececea, x8);
1306   ASSERT_EQUAL_64(0xeeeeeeeaeeeaeae6, x9);
1307   ASSERT_EQUAL_64(0x1111116f, x10);
1308   ASSERT_EQUAL_64(0x111111111111efec, x11);
1309   ASSERT_EQUAL_64(0x11111110eeececea, x12);
1310   ASSERT_EQUAL_64(0xeeeeeeeaeeeaeae6, x13);
1311 
1312   TEARDOWN();
1313 }
1314 
1315 
TEST(mul)1316 TEST(mul) {
1317   SETUP();
1318 
1319   START();
1320   __ Mov(x25, 0);
1321   __ Mov(x26, 1);
1322   __ Mov(x18, 0xffffffff);
1323   __ Mov(x19, 0xffffffffffffffff);
1324 
1325   __ Mul(w0, w25, w25);
1326   __ Mul(w1, w25, w26);
1327   __ Mul(w2, w26, w18);
1328   __ Mul(w3, w18, w19);
1329   __ Mul(x4, x25, x25);
1330   __ Mul(x5, x26, x18);
1331   __ Mul(x6, x18, x19);
1332   __ Mul(x7, x19, x19);
1333   __ Smull(x8, w26, w18);
1334   __ Smull(x9, w18, w18);
1335   __ Smull(x10, w19, w19);
1336   __ Mneg(w11, w25, w25);
1337   __ Mneg(w12, w25, w26);
1338   __ Mneg(w13, w26, w18);
1339   __ Mneg(w14, w18, w19);
1340   __ Mneg(x20, x25, x25);
1341   __ Mneg(x21, x26, x18);
1342   __ Mneg(x22, x18, x19);
1343   __ Mneg(x23, x19, x19);
1344   END();
1345 
1346   RUN();
1347 
1348   ASSERT_EQUAL_64(0, x0);
1349   ASSERT_EQUAL_64(0, x1);
1350   ASSERT_EQUAL_64(0xffffffff, x2);
1351   ASSERT_EQUAL_64(1, x3);
1352   ASSERT_EQUAL_64(0, x4);
1353   ASSERT_EQUAL_64(0xffffffff, x5);
1354   ASSERT_EQUAL_64(0xffffffff00000001, x6);
1355   ASSERT_EQUAL_64(1, x7);
1356   ASSERT_EQUAL_64(0xffffffffffffffff, x8);
1357   ASSERT_EQUAL_64(1, x9);
1358   ASSERT_EQUAL_64(1, x10);
1359   ASSERT_EQUAL_64(0, x11);
1360   ASSERT_EQUAL_64(0, x12);
1361   ASSERT_EQUAL_64(1, x13);
1362   ASSERT_EQUAL_64(0xffffffff, x14);
1363   ASSERT_EQUAL_64(0, x20);
1364   ASSERT_EQUAL_64(0xffffffff00000001, x21);
1365   ASSERT_EQUAL_64(0xffffffff, x22);
1366   ASSERT_EQUAL_64(0xffffffffffffffff, x23);
1367 
1368   TEARDOWN();
1369 }
1370 
1371 
SmullHelper(int64_t expected,int64_t a,int64_t b)1372 static void SmullHelper(int64_t expected, int64_t a, int64_t b) {
1373   SETUP();
1374   START();
1375   __ Mov(w0, a);
1376   __ Mov(w1, b);
1377   __ Smull(x2, w0, w1);
1378   END();
1379   RUN();
1380   ASSERT_EQUAL_64(expected, x2);
1381   TEARDOWN();
1382 }
1383 
1384 
TEST(smull)1385 TEST(smull) {
1386   SmullHelper(0, 0, 0);
1387   SmullHelper(1, 1, 1);
1388   SmullHelper(-1, -1, 1);
1389   SmullHelper(1, -1, -1);
1390   SmullHelper(0xffffffff80000000, 0x80000000, 1);
1391   SmullHelper(0x0000000080000000, 0x00010000, 0x00008000);
1392 }
1393 
1394 
TEST(madd)1395 TEST(madd) {
1396   SETUP();
1397 
1398   START();
1399   __ Mov(x16, 0);
1400   __ Mov(x17, 1);
1401   __ Mov(x18, 0xffffffff);
1402   __ Mov(x19, 0xffffffffffffffff);
1403 
1404   __ Madd(w0, w16, w16, w16);
1405   __ Madd(w1, w16, w16, w17);
1406   __ Madd(w2, w16, w16, w18);
1407   __ Madd(w3, w16, w16, w19);
1408   __ Madd(w4, w16, w17, w17);
1409   __ Madd(w5, w17, w17, w18);
1410   __ Madd(w6, w17, w17, w19);
1411   __ Madd(w7, w17, w18, w16);
1412   __ Madd(w8, w17, w18, w18);
1413   __ Madd(w9, w18, w18, w17);
1414   __ Madd(w10, w18, w19, w18);
1415   __ Madd(w11, w19, w19, w19);
1416 
1417   __ Madd(x12, x16, x16, x16);
1418   __ Madd(x13, x16, x16, x17);
1419   __ Madd(x14, x16, x16, x18);
1420   __ Madd(x15, x16, x16, x19);
1421   __ Madd(x20, x16, x17, x17);
1422   __ Madd(x21, x17, x17, x18);
1423   __ Madd(x22, x17, x17, x19);
1424   __ Madd(x23, x17, x18, x16);
1425   __ Madd(x24, x17, x18, x18);
1426   __ Madd(x25, x18, x18, x17);
1427   __ Madd(x26, x18, x19, x18);
1428   __ Madd(x27, x19, x19, x19);
1429 
1430   END();
1431 
1432   RUN();
1433 
1434   ASSERT_EQUAL_64(0, x0);
1435   ASSERT_EQUAL_64(1, x1);
1436   ASSERT_EQUAL_64(0xffffffff, x2);
1437   ASSERT_EQUAL_64(0xffffffff, x3);
1438   ASSERT_EQUAL_64(1, x4);
1439   ASSERT_EQUAL_64(0, x5);
1440   ASSERT_EQUAL_64(0, x6);
1441   ASSERT_EQUAL_64(0xffffffff, x7);
1442   ASSERT_EQUAL_64(0xfffffffe, x8);
1443   ASSERT_EQUAL_64(2, x9);
1444   ASSERT_EQUAL_64(0, x10);
1445   ASSERT_EQUAL_64(0, x11);
1446 
1447   ASSERT_EQUAL_64(0, x12);
1448   ASSERT_EQUAL_64(1, x13);
1449   ASSERT_EQUAL_64(0x00000000ffffffff, x14);
1450   ASSERT_EQUAL_64(0xffffffffffffffff, x15);
1451   ASSERT_EQUAL_64(1, x20);
1452   ASSERT_EQUAL_64(0x0000000100000000, x21);
1453   ASSERT_EQUAL_64(0, x22);
1454   ASSERT_EQUAL_64(0x00000000ffffffff, x23);
1455   ASSERT_EQUAL_64(0x00000001fffffffe, x24);
1456   ASSERT_EQUAL_64(0xfffffffe00000002, x25);
1457   ASSERT_EQUAL_64(0, x26);
1458   ASSERT_EQUAL_64(0, x27);
1459 
1460   TEARDOWN();
1461 }
1462 
1463 
TEST(msub)1464 TEST(msub) {
1465   SETUP();
1466 
1467   START();
1468   __ Mov(x16, 0);
1469   __ Mov(x17, 1);
1470   __ Mov(x18, 0xffffffff);
1471   __ Mov(x19, 0xffffffffffffffff);
1472 
1473   __ Msub(w0, w16, w16, w16);
1474   __ Msub(w1, w16, w16, w17);
1475   __ Msub(w2, w16, w16, w18);
1476   __ Msub(w3, w16, w16, w19);
1477   __ Msub(w4, w16, w17, w17);
1478   __ Msub(w5, w17, w17, w18);
1479   __ Msub(w6, w17, w17, w19);
1480   __ Msub(w7, w17, w18, w16);
1481   __ Msub(w8, w17, w18, w18);
1482   __ Msub(w9, w18, w18, w17);
1483   __ Msub(w10, w18, w19, w18);
1484   __ Msub(w11, w19, w19, w19);
1485 
1486   __ Msub(x12, x16, x16, x16);
1487   __ Msub(x13, x16, x16, x17);
1488   __ Msub(x14, x16, x16, x18);
1489   __ Msub(x15, x16, x16, x19);
1490   __ Msub(x20, x16, x17, x17);
1491   __ Msub(x21, x17, x17, x18);
1492   __ Msub(x22, x17, x17, x19);
1493   __ Msub(x23, x17, x18, x16);
1494   __ Msub(x24, x17, x18, x18);
1495   __ Msub(x25, x18, x18, x17);
1496   __ Msub(x26, x18, x19, x18);
1497   __ Msub(x27, x19, x19, x19);
1498 
1499   END();
1500 
1501   RUN();
1502 
1503   ASSERT_EQUAL_64(0, x0);
1504   ASSERT_EQUAL_64(1, x1);
1505   ASSERT_EQUAL_64(0xffffffff, x2);
1506   ASSERT_EQUAL_64(0xffffffff, x3);
1507   ASSERT_EQUAL_64(1, x4);
1508   ASSERT_EQUAL_64(0xfffffffe, x5);
1509   ASSERT_EQUAL_64(0xfffffffe, x6);
1510   ASSERT_EQUAL_64(1, x7);
1511   ASSERT_EQUAL_64(0, x8);
1512   ASSERT_EQUAL_64(0, x9);
1513   ASSERT_EQUAL_64(0xfffffffe, x10);
1514   ASSERT_EQUAL_64(0xfffffffe, x11);
1515 
1516   ASSERT_EQUAL_64(0, x12);
1517   ASSERT_EQUAL_64(1, x13);
1518   ASSERT_EQUAL_64(0x00000000ffffffff, x14);
1519   ASSERT_EQUAL_64(0xffffffffffffffff, x15);
1520   ASSERT_EQUAL_64(1, x20);
1521   ASSERT_EQUAL_64(0x00000000fffffffe, x21);
1522   ASSERT_EQUAL_64(0xfffffffffffffffe, x22);
1523   ASSERT_EQUAL_64(0xffffffff00000001, x23);
1524   ASSERT_EQUAL_64(0, x24);
1525   ASSERT_EQUAL_64(0x0000000200000000, x25);
1526   ASSERT_EQUAL_64(0x00000001fffffffe, x26);
1527   ASSERT_EQUAL_64(0xfffffffffffffffe, x27);
1528 
1529   TEARDOWN();
1530 }
1531 
1532 
TEST(smulh)1533 TEST(smulh) {
1534   SETUP();
1535 
1536   START();
1537   __ Mov(x20, 0);
1538   __ Mov(x21, 1);
1539   __ Mov(x22, 0x0000000100000000);
1540   __ Mov(x23, 0x0000000012345678);
1541   __ Mov(x24, 0x0123456789abcdef);
1542   __ Mov(x25, 0x0000000200000000);
1543   __ Mov(x26, 0x8000000000000000);
1544   __ Mov(x27, 0xffffffffffffffff);
1545   __ Mov(x28, 0x5555555555555555);
1546   __ Mov(x29, 0xaaaaaaaaaaaaaaaa);
1547 
1548   __ Smulh(x0, x20, x24);
1549   __ Smulh(x1, x21, x24);
1550   __ Smulh(x2, x22, x23);
1551   __ Smulh(x3, x22, x24);
1552   __ Smulh(x4, x24, x25);
1553   __ Smulh(x5, x23, x27);
1554   __ Smulh(x6, x26, x26);
1555   __ Smulh(x7, x26, x27);
1556   __ Smulh(x8, x27, x27);
1557   __ Smulh(x9, x28, x28);
1558   __ Smulh(x10, x28, x29);
1559   __ Smulh(x11, x29, x29);
1560   END();
1561 
1562   RUN();
1563 
1564   ASSERT_EQUAL_64(0, x0);
1565   ASSERT_EQUAL_64(0, x1);
1566   ASSERT_EQUAL_64(0, x2);
1567   ASSERT_EQUAL_64(0x0000000001234567, x3);
1568   ASSERT_EQUAL_64(0x0000000002468acf, x4);
1569   ASSERT_EQUAL_64(0xffffffffffffffff, x5);
1570   ASSERT_EQUAL_64(0x4000000000000000, x6);
1571   ASSERT_EQUAL_64(0, x7);
1572   ASSERT_EQUAL_64(0, x8);
1573   ASSERT_EQUAL_64(0x1c71c71c71c71c71, x9);
1574   ASSERT_EQUAL_64(0xe38e38e38e38e38e, x10);
1575   ASSERT_EQUAL_64(0x1c71c71c71c71c72, x11);
1576 
1577   TEARDOWN();
1578 }
1579 
1580 
TEST(umulh)1581 TEST(umulh) {
1582   SETUP();
1583 
1584   START();
1585   __ Mov(x20, 0);
1586   __ Mov(x21, 1);
1587   __ Mov(x22, 0x0000000100000000);
1588   __ Mov(x23, 0x0000000012345678);
1589   __ Mov(x24, 0x0123456789abcdef);
1590   __ Mov(x25, 0x0000000200000000);
1591   __ Mov(x26, 0x8000000000000000);
1592   __ Mov(x27, 0xffffffffffffffff);
1593   __ Mov(x28, 0x5555555555555555);
1594   __ Mov(x29, 0xaaaaaaaaaaaaaaaa);
1595 
1596   __ Umulh(x0, x20, x24);
1597   __ Umulh(x1, x21, x24);
1598   __ Umulh(x2, x22, x23);
1599   __ Umulh(x3, x22, x24);
1600   __ Umulh(x4, x24, x25);
1601   __ Umulh(x5, x23, x27);
1602   __ Umulh(x6, x26, x26);
1603   __ Umulh(x7, x26, x27);
1604   __ Umulh(x8, x27, x27);
1605   __ Umulh(x9, x28, x28);
1606   __ Umulh(x10, x28, x29);
1607   __ Umulh(x11, x29, x29);
1608   END();
1609 
1610   RUN();
1611 
1612   ASSERT_EQUAL_64(0, x0);
1613   ASSERT_EQUAL_64(0, x1);
1614   ASSERT_EQUAL_64(0, x2);
1615   ASSERT_EQUAL_64(0x0000000001234567, x3);
1616   ASSERT_EQUAL_64(0x0000000002468acf, x4);
1617   ASSERT_EQUAL_64(0x0000000012345677, x5);
1618   ASSERT_EQUAL_64(0x4000000000000000, x6);
1619   ASSERT_EQUAL_64(0x7fffffffffffffff, x7);
1620   ASSERT_EQUAL_64(0xfffffffffffffffe, x8);
1621   ASSERT_EQUAL_64(0x1c71c71c71c71c71, x9);
1622   ASSERT_EQUAL_64(0x38e38e38e38e38e3, x10);
1623   ASSERT_EQUAL_64(0x71c71c71c71c71c6, x11);
1624 
1625   TEARDOWN();
1626 }
1627 
1628 
TEST(smaddl_umaddl_umull)1629 TEST(smaddl_umaddl_umull) {
1630   SETUP();
1631 
1632   START();
1633   __ Mov(x17, 1);
1634   __ Mov(x18, 0x00000000ffffffff);
1635   __ Mov(x19, 0xffffffffffffffff);
1636   __ Mov(x20, 4);
1637   __ Mov(x21, 0x0000000200000000);
1638 
1639   __ Smaddl(x9, w17, w18, x20);
1640   __ Smaddl(x10, w18, w18, x20);
1641   __ Smaddl(x11, w19, w19, x20);
1642   __ Smaddl(x12, w19, w19, x21);
1643   __ Umaddl(x13, w17, w18, x20);
1644   __ Umaddl(x14, w18, w18, x20);
1645   __ Umaddl(x15, w19, w19, x20);
1646   __ Umaddl(x22, w19, w19, x21);
1647   __ Umull(x24, w19, w19);
1648   __ Umull(x25, w17, w18);
1649   END();
1650 
1651   RUN();
1652 
1653   ASSERT_EQUAL_64(3, x9);
1654   ASSERT_EQUAL_64(5, x10);
1655   ASSERT_EQUAL_64(5, x11);
1656   ASSERT_EQUAL_64(0x0000000200000001, x12);
1657   ASSERT_EQUAL_64(0x0000000100000003, x13);
1658   ASSERT_EQUAL_64(0xfffffffe00000005, x14);
1659   ASSERT_EQUAL_64(0xfffffffe00000005, x15);
1660   ASSERT_EQUAL_64(1, x22);
1661   ASSERT_EQUAL_64(0xfffffffe00000001, x24);
1662   ASSERT_EQUAL_64(0x00000000ffffffff, x25);
1663 
1664   TEARDOWN();
1665 }
1666 
1667 
TEST(smsubl_umsubl)1668 TEST(smsubl_umsubl) {
1669   SETUP();
1670 
1671   START();
1672   __ Mov(x17, 1);
1673   __ Mov(x18, 0x00000000ffffffff);
1674   __ Mov(x19, 0xffffffffffffffff);
1675   __ Mov(x20, 4);
1676   __ Mov(x21, 0x0000000200000000);
1677 
1678   __ Smsubl(x9, w17, w18, x20);
1679   __ Smsubl(x10, w18, w18, x20);
1680   __ Smsubl(x11, w19, w19, x20);
1681   __ Smsubl(x12, w19, w19, x21);
1682   __ Umsubl(x13, w17, w18, x20);
1683   __ Umsubl(x14, w18, w18, x20);
1684   __ Umsubl(x15, w19, w19, x20);
1685   __ Umsubl(x22, w19, w19, x21);
1686   END();
1687 
1688   RUN();
1689 
1690   ASSERT_EQUAL_64(5, x9);
1691   ASSERT_EQUAL_64(3, x10);
1692   ASSERT_EQUAL_64(3, x11);
1693   ASSERT_EQUAL_64(0x00000001ffffffff, x12);
1694   ASSERT_EQUAL_64(0xffffffff00000005, x13);
1695   ASSERT_EQUAL_64(0x0000000200000003, x14);
1696   ASSERT_EQUAL_64(0x0000000200000003, x15);
1697   ASSERT_EQUAL_64(0x00000003ffffffff, x22);
1698 
1699   TEARDOWN();
1700 }
1701 
1702 
TEST(div)1703 TEST(div) {
1704   SETUP();
1705 
1706   START();
1707   __ Mov(x16, 1);
1708   __ Mov(x17, 0xffffffff);
1709   __ Mov(x18, 0xffffffffffffffff);
1710   __ Mov(x19, 0x80000000);
1711   __ Mov(x20, 0x8000000000000000);
1712   __ Mov(x21, 2);
1713 
1714   __ Udiv(w0, w16, w16);
1715   __ Udiv(w1, w17, w16);
1716   __ Sdiv(w2, w16, w16);
1717   __ Sdiv(w3, w16, w17);
1718   __ Sdiv(w4, w17, w18);
1719 
1720   __ Udiv(x5, x16, x16);
1721   __ Udiv(x6, x17, x18);
1722   __ Sdiv(x7, x16, x16);
1723   __ Sdiv(x8, x16, x17);
1724   __ Sdiv(x9, x17, x18);
1725 
1726   __ Udiv(w10, w19, w21);
1727   __ Sdiv(w11, w19, w21);
1728   __ Udiv(x12, x19, x21);
1729   __ Sdiv(x13, x19, x21);
1730   __ Udiv(x14, x20, x21);
1731   __ Sdiv(x15, x20, x21);
1732 
1733   __ Udiv(w22, w19, w17);
1734   __ Sdiv(w23, w19, w17);
1735   __ Udiv(x24, x20, x18);
1736   __ Sdiv(x25, x20, x18);
1737 
1738   __ Udiv(x26, x16, x21);
1739   __ Sdiv(x27, x16, x21);
1740   __ Udiv(x28, x18, x21);
1741   __ Sdiv(x29, x18, x21);
1742 
1743   __ Mov(x17, 0);
1744   __ Udiv(w18, w16, w17);
1745   __ Sdiv(w19, w16, w17);
1746   __ Udiv(x20, x16, x17);
1747   __ Sdiv(x21, x16, x17);
1748   END();
1749 
1750   RUN();
1751 
1752   ASSERT_EQUAL_64(1, x0);
1753   ASSERT_EQUAL_64(0xffffffff, x1);
1754   ASSERT_EQUAL_64(1, x2);
1755   ASSERT_EQUAL_64(0xffffffff, x3);
1756   ASSERT_EQUAL_64(1, x4);
1757   ASSERT_EQUAL_64(1, x5);
1758   ASSERT_EQUAL_64(0, x6);
1759   ASSERT_EQUAL_64(1, x7);
1760   ASSERT_EQUAL_64(0, x8);
1761   ASSERT_EQUAL_64(0xffffffff00000001, x9);
1762   ASSERT_EQUAL_64(0x40000000, x10);
1763   ASSERT_EQUAL_64(0xc0000000, x11);
1764   ASSERT_EQUAL_64(0x0000000040000000, x12);
1765   ASSERT_EQUAL_64(0x0000000040000000, x13);
1766   ASSERT_EQUAL_64(0x4000000000000000, x14);
1767   ASSERT_EQUAL_64(0xc000000000000000, x15);
1768   ASSERT_EQUAL_64(0, x22);
1769   ASSERT_EQUAL_64(0x80000000, x23);
1770   ASSERT_EQUAL_64(0, x24);
1771   ASSERT_EQUAL_64(0x8000000000000000, x25);
1772   ASSERT_EQUAL_64(0, x26);
1773   ASSERT_EQUAL_64(0, x27);
1774   ASSERT_EQUAL_64(0x7fffffffffffffff, x28);
1775   ASSERT_EQUAL_64(0, x29);
1776   ASSERT_EQUAL_64(0, x18);
1777   ASSERT_EQUAL_64(0, x19);
1778   ASSERT_EQUAL_64(0, x20);
1779   ASSERT_EQUAL_64(0, x21);
1780 
1781   TEARDOWN();
1782 }
1783 
1784 
TEST(rbit_rev)1785 TEST(rbit_rev) {
1786   SETUP();
1787 
1788   START();
1789   __ Mov(x24, 0xfedcba9876543210);
1790   __ Rbit(w0, w24);
1791   __ Rbit(x1, x24);
1792   __ Rev16(w2, w24);
1793   __ Rev16(x3, x24);
1794   __ Rev(w4, w24);
1795   __ Rev32(x5, x24);
1796   __ Rev64(x6, x24);
1797   __ Rev(x7, x24);
1798   END();
1799 
1800   RUN();
1801 
1802   ASSERT_EQUAL_64(0x084c2a6e, x0);
1803   ASSERT_EQUAL_64(0x084c2a6e195d3b7f, x1);
1804   ASSERT_EQUAL_64(0x54761032, x2);
1805   ASSERT_EQUAL_64(0xdcfe98ba54761032, x3);
1806   ASSERT_EQUAL_64(0x10325476, x4);
1807   ASSERT_EQUAL_64(0x98badcfe10325476, x5);
1808   ASSERT_EQUAL_64(0x1032547698badcfe, x6);
1809   ASSERT_EQUAL_64(0x1032547698badcfe, x7);
1810 
1811   TEARDOWN();
1812 }
1813 
1814 typedef void (MacroAssembler::*TestBranchSignature)(const Register& rt,
1815                                                     unsigned bit_pos,
1816                                                     Label* label);
1817 
TbzRangePoolLimitHelper(TestBranchSignature test_branch)1818 static void TbzRangePoolLimitHelper(TestBranchSignature test_branch) {
1819   const int kTbzRange = 32768;
1820   const int kNumLdrLiteral = kTbzRange / 4;
1821   const int fuzzRange = 2;
1822   for (int n = kNumLdrLiteral - fuzzRange; n <= kNumLdrLiteral + fuzzRange;
1823        ++n) {
1824     for (int margin = -32; margin < 32; margin += 4) {
1825       SETUP();
1826 
1827       START();
1828 
1829       // Emit 32KB of literals (equal to the range of TBZ).
1830       for (int i = 0; i < n; ++i) {
1831         __ Ldr(w0, 0x12345678);
1832       }
1833 
1834       const int kLiteralMargin = 128 * KBytes;
1835 
1836       // Emit enough NOPs to be just about to emit the literal pool.
1837       ptrdiff_t end =
1838           masm.GetCursorOffset() + (kLiteralMargin - n * 4 + margin);
1839       while (masm.GetCursorOffset() < end) {
1840         __ Nop();
1841       }
1842 
1843       // Add a TBZ instruction.
1844       Label label;
1845 
1846       (masm.*test_branch)(x0, 2, &label);
1847 
1848       // Add enough NOPs to surpass its range, to make sure we can encode the
1849       // veneer.
1850       end = masm.GetCursorOffset() + (kTbzRange - 4);
1851       {
1852         ExactAssemblyScope scope(&masm,
1853                                  kTbzRange,
1854                                  ExactAssemblyScope::kMaximumSize);
1855         while (masm.GetCursorOffset() < end) __ nop();
1856       }
1857 
1858       // Finally, bind the label.
1859       __ Bind(&label);
1860 
1861       END();
1862 
1863       RUN();
1864 
1865       TEARDOWN();
1866     }
1867   }
1868 }
1869 
TEST(test_branch_limits_literal_pool_size)1870 TEST(test_branch_limits_literal_pool_size) {
1871   TbzRangePoolLimitHelper(&MacroAssembler::Tbz);
1872   TbzRangePoolLimitHelper(&MacroAssembler::Tbnz);
1873 }
1874 
TEST(clz_cls)1875 TEST(clz_cls) {
1876   SETUP();
1877 
1878   START();
1879   __ Mov(x24, 0x0008000000800000);
1880   __ Mov(x25, 0xff800000fff80000);
1881   __ Mov(x26, 0);
1882   __ Clz(w0, w24);
1883   __ Clz(x1, x24);
1884   __ Clz(w2, w25);
1885   __ Clz(x3, x25);
1886   __ Clz(w4, w26);
1887   __ Clz(x5, x26);
1888   __ Cls(w6, w24);
1889   __ Cls(x7, x24);
1890   __ Cls(w8, w25);
1891   __ Cls(x9, x25);
1892   __ Cls(w10, w26);
1893   __ Cls(x11, x26);
1894   END();
1895 
1896   RUN();
1897 
1898   ASSERT_EQUAL_64(8, x0);
1899   ASSERT_EQUAL_64(12, x1);
1900   ASSERT_EQUAL_64(0, x2);
1901   ASSERT_EQUAL_64(0, x3);
1902   ASSERT_EQUAL_64(32, x4);
1903   ASSERT_EQUAL_64(64, x5);
1904   ASSERT_EQUAL_64(7, x6);
1905   ASSERT_EQUAL_64(11, x7);
1906   ASSERT_EQUAL_64(12, x8);
1907   ASSERT_EQUAL_64(8, x9);
1908   ASSERT_EQUAL_64(31, x10);
1909   ASSERT_EQUAL_64(63, x11);
1910 
1911   TEARDOWN();
1912 }
1913 
1914 
TEST(pacia_pacib_autia_autib)1915 TEST(pacia_pacib_autia_autib) {
1916   SETUP_WITH_FEATURES(CPUFeatures::kPAuth);
1917 
1918   START();
1919 
1920   Register pointer = x24;
1921   Register modifier = x25;
1922 
1923   __ Mov(pointer, 0x0000000012345678);
1924   __ Mov(modifier, 0x477d469dec0b8760);
1925 
1926   // Generate PACs using keys A and B.
1927   __ Mov(x0, pointer);
1928   __ Pacia(x0, modifier);
1929 
1930   __ Mov(x1, pointer);
1931   __ Pacib(x1, modifier);
1932 
1933   // Authenticate the pointers above.
1934   __ Mov(x2, x0);
1935   __ Autia(x2, modifier);
1936 
1937   __ Mov(x3, x1);
1938   __ Autib(x3, modifier);
1939 
1940   // Attempt to authenticate incorrect pointers.
1941   __ Mov(x4, x1);
1942   __ Autia(x4, modifier);
1943 
1944   __ Mov(x5, x0);
1945   __ Autib(x5, modifier);
1946 
1947   // Mask out just the PAC code bits.
1948   // TODO: use Simulator::CalculatePACMask in a nice way.
1949   __ And(x0, x0, 0x007f000000000000);
1950   __ And(x1, x1, 0x007f000000000000);
1951 
1952   END();
1953 
1954 #ifdef VIXL_INCLUDE_SIMULATOR_AARCH64
1955   RUN();
1956 
1957   // Check PAC codes have been generated and aren't equal.
1958   // NOTE: with a different ComputePAC implementation, there may be a collision.
1959   ASSERT_NOT_EQUAL_64(0, x0);
1960   ASSERT_NOT_EQUAL_64(0, x1);
1961   ASSERT_NOT_EQUAL_64(x0, x1);
1962 
1963   // Pointers correctly authenticated.
1964   ASSERT_EQUAL_64(pointer, x2);
1965   ASSERT_EQUAL_64(pointer, x3);
1966 
1967   // Pointers corrupted after failing to authenticate.
1968   ASSERT_EQUAL_64(0x0020000012345678, x4);
1969   ASSERT_EQUAL_64(0x0040000012345678, x5);
1970 #endif
1971 
1972   TEARDOWN();
1973 }
1974 
1975 
TEST(paciza_pacizb_autiza_autizb)1976 TEST(paciza_pacizb_autiza_autizb) {
1977   SETUP_WITH_FEATURES(CPUFeatures::kPAuth);
1978 
1979   START();
1980 
1981   Register pointer = x24;
1982 
1983   __ Mov(pointer, 0x0000000012345678);
1984 
1985   // Generate PACs using keys A and B.
1986   __ Mov(x0, pointer);
1987   __ Paciza(x0);
1988 
1989   __ Mov(x1, pointer);
1990   __ Pacizb(x1);
1991 
1992   // Authenticate the pointers above.
1993   __ Mov(x2, x0);
1994   __ Autiza(x2);
1995 
1996   __ Mov(x3, x1);
1997   __ Autizb(x3);
1998 
1999   // Attempt to authenticate incorrect pointers.
2000   __ Mov(x4, x1);
2001   __ Autiza(x4);
2002 
2003   __ Mov(x5, x0);
2004   __ Autizb(x5);
2005 
2006   // Mask out just the PAC code bits.
2007   // TODO: use Simulator::CalculatePACMask in a nice way.
2008   __ And(x0, x0, 0x007f000000000000);
2009   __ And(x1, x1, 0x007f000000000000);
2010 
2011   END();
2012 
2013 #ifdef VIXL_INCLUDE_SIMULATOR_AARCH64
2014   RUN();
2015 
2016   // Check PAC codes have been generated and aren't equal.
2017   // NOTE: with a different ComputePAC implementation, there may be a collision.
2018   ASSERT_NOT_EQUAL_64(0, x0);
2019   ASSERT_NOT_EQUAL_64(0, x1);
2020   ASSERT_NOT_EQUAL_64(x0, x1);
2021 
2022   // Pointers correctly authenticated.
2023   ASSERT_EQUAL_64(pointer, x2);
2024   ASSERT_EQUAL_64(pointer, x3);
2025 
2026   // Pointers corrupted after failing to authenticate.
2027   ASSERT_EQUAL_64(0x0020000012345678, x4);
2028   ASSERT_EQUAL_64(0x0040000012345678, x5);
2029 #endif
2030 
2031   TEARDOWN();
2032 }
2033 
2034 
TEST(pacda_pacdb_autda_autdb)2035 TEST(pacda_pacdb_autda_autdb) {
2036   SETUP_WITH_FEATURES(CPUFeatures::kPAuth);
2037 
2038   START();
2039 
2040   Register pointer = x24;
2041   Register modifier = x25;
2042 
2043   __ Mov(pointer, 0x0000000012345678);
2044   __ Mov(modifier, 0x477d469dec0b8760);
2045 
2046   // Generate PACs using keys A and B.
2047   __ Mov(x0, pointer);
2048   __ Pacda(x0, modifier);
2049 
2050   __ Mov(x1, pointer);
2051   __ Pacdb(x1, modifier);
2052 
2053   // Authenticate the pointers above.
2054   __ Mov(x2, x0);
2055   __ Autda(x2, modifier);
2056 
2057   __ Mov(x3, x1);
2058   __ Autdb(x3, modifier);
2059 
2060   // Attempt to authenticate incorrect pointers.
2061   __ Mov(x4, x1);
2062   __ Autda(x4, modifier);
2063 
2064   __ Mov(x5, x0);
2065   __ Autdb(x5, modifier);
2066 
2067   // Mask out just the PAC code bits.
2068   // TODO: use Simulator::CalculatePACMask in a nice way.
2069   __ And(x0, x0, 0x007f000000000000);
2070   __ And(x1, x1, 0x007f000000000000);
2071 
2072   END();
2073 
2074 #ifdef VIXL_INCLUDE_SIMULATOR_AARCH64
2075   RUN();
2076 
2077   // Check PAC codes have been generated and aren't equal.
2078   // NOTE: with a different ComputePAC implementation, there may be a collision.
2079   ASSERT_NOT_EQUAL_64(0, x0);
2080   ASSERT_NOT_EQUAL_64(0, x1);
2081   ASSERT_NOT_EQUAL_64(x0, x1);
2082 
2083   // Pointers correctly authenticated.
2084   ASSERT_EQUAL_64(pointer, x2);
2085   ASSERT_EQUAL_64(pointer, x3);
2086 
2087   // Pointers corrupted after failing to authenticate.
2088   ASSERT_EQUAL_64(0x0020000012345678, x4);
2089   ASSERT_EQUAL_64(0x0040000012345678, x5);
2090 #endif
2091 
2092   TEARDOWN();
2093 }
2094 
2095 
TEST(pacdza_pacdzb_autdza_autdzb)2096 TEST(pacdza_pacdzb_autdza_autdzb) {
2097   SETUP_WITH_FEATURES(CPUFeatures::kPAuth);
2098 
2099   START();
2100 
2101   Register pointer = x24;
2102 
2103   __ Mov(pointer, 0x0000000012345678);
2104 
2105   // Generate PACs using keys A and B.
2106   __ Mov(x0, pointer);
2107   __ Pacdza(x0);
2108 
2109   __ Mov(x1, pointer);
2110   __ Pacdzb(x1);
2111 
2112   // Authenticate the pointers above.
2113   __ Mov(x2, x0);
2114   __ Autdza(x2);
2115 
2116   __ Mov(x3, x1);
2117   __ Autdzb(x3);
2118 
2119   // Attempt to authenticate incorrect pointers.
2120   __ Mov(x4, x1);
2121   __ Autdza(x4);
2122 
2123   __ Mov(x5, x0);
2124   __ Autdzb(x5);
2125 
2126   // Mask out just the PAC code bits.
2127   // TODO: use Simulator::CalculatePACMask in a nice way.
2128   __ And(x0, x0, 0x007f000000000000);
2129   __ And(x1, x1, 0x007f000000000000);
2130 
2131   END();
2132 
2133 #ifdef VIXL_INCLUDE_SIMULATOR_AARCH64
2134   RUN();
2135 
2136   // Check PAC codes have been generated and aren't equal.
2137   // NOTE: with a different ComputePAC implementation, there may be a collision.
2138   ASSERT_NOT_EQUAL_64(0, x0);
2139   ASSERT_NOT_EQUAL_64(0, x1);
2140   ASSERT_NOT_EQUAL_64(x0, x1);
2141 
2142   // Pointers correctly authenticated.
2143   ASSERT_EQUAL_64(pointer, x2);
2144   ASSERT_EQUAL_64(pointer, x3);
2145 
2146   // Pointers corrupted after failing to authenticate.
2147   ASSERT_EQUAL_64(0x0020000012345678, x4);
2148   ASSERT_EQUAL_64(0x0040000012345678, x5);
2149 #endif
2150 
2151   TEARDOWN();
2152 }
2153 
2154 
TEST(pacga_xpaci_xpacd)2155 TEST(pacga_xpaci_xpacd) {
2156   SETUP_WITH_FEATURES(CPUFeatures::kPAuth, CPUFeatures::kPAuthGeneric);
2157 
2158   START();
2159 
2160   Register pointer = x24;
2161   Register modifier = x25;
2162 
2163   __ Mov(pointer, 0x0000000012345678);
2164   __ Mov(modifier, 0x477d469dec0b8760);
2165 
2166   // Generate generic PAC.
2167   __ Pacga(x0, pointer, modifier);
2168 
2169   // Generate PACs using key A.
2170   __ Mov(x1, pointer);
2171   __ Mov(x2, pointer);
2172   __ Pacia(x1, modifier);
2173   __ Pacda(x2, modifier);
2174 
2175   // Strip PACs.
2176   __ Mov(x3, x1);
2177   __ Mov(x4, x2);
2178   __ Xpaci(x3);
2179   __ Xpacd(x4);
2180 
2181   // Mask out just the PAC code bits.
2182   // TODO: use Simulator::CalculatePACMask in a nice way.
2183   __ And(x0, x0, 0xffffffff00000000);
2184   __ And(x1, x1, 0x007f000000000000);
2185   __ And(x2, x2, 0x007f000000000000);
2186 
2187   END();
2188 
2189 #ifdef VIXL_INCLUDE_SIMULATOR_AARCH64
2190   RUN();
2191 
2192 
2193   // Check PAC codes have been generated and aren't equal.
2194   // NOTE: with a different ComputePAC implementation, there may be a collision.
2195   ASSERT_NOT_EQUAL_64(0, x0);
2196 
2197   ASSERT_NOT_EQUAL_64(0, x1);
2198   ASSERT_NOT_EQUAL_64(0, x2);
2199   ASSERT_NOT_EQUAL_64(x1, x2);
2200 
2201   ASSERT_EQUAL_64(pointer, x3);
2202   ASSERT_EQUAL_64(pointer, x4);
2203 #endif
2204 
2205   TEARDOWN();
2206 }
2207 
2208 
TEST(label)2209 TEST(label) {
2210   SETUP();
2211 
2212   Label label_1, label_2, label_3, label_4;
2213 
2214   START();
2215   __ Mov(x0, 0x1);
2216   __ Mov(x1, 0x0);
2217   __ Mov(x22, lr);  // Save lr.
2218 
2219   __ B(&label_1);
2220   __ B(&label_1);
2221   __ B(&label_1);  // Multiple branches to the same label.
2222   __ Mov(x0, 0x0);
2223   __ Bind(&label_2);
2224   __ B(&label_3);  // Forward branch.
2225   __ Mov(x0, 0x0);
2226   __ Bind(&label_1);
2227   __ B(&label_2);  // Backward branch.
2228   __ Mov(x0, 0x0);
2229   __ Bind(&label_3);
2230   __ Bl(&label_4);
2231   END();
2232 
2233   __ Bind(&label_4);
2234   __ Mov(x1, 0x1);
2235   __ Mov(lr, x22);
2236   END();
2237 
2238   RUN();
2239 
2240   ASSERT_EQUAL_64(0x1, x0);
2241   ASSERT_EQUAL_64(0x1, x1);
2242 
2243   TEARDOWN();
2244 }
2245 
2246 
TEST(label_2)2247 TEST(label_2) {
2248   SETUP();
2249 
2250   Label label_1, label_2, label_3;
2251   Label first_jump_to_3;
2252 
2253   START();
2254   __ Mov(x0, 0x0);
2255 
2256   __ B(&label_1);
2257   ptrdiff_t offset_2 = masm.GetCursorOffset();
2258   __ Orr(x0, x0, 1 << 1);
2259   __ B(&label_3);
2260   ptrdiff_t offset_1 = masm.GetCursorOffset();
2261   __ Orr(x0, x0, 1 << 0);
2262   __ B(&label_2);
2263   ptrdiff_t offset_3 = masm.GetCursorOffset();
2264   __ Tbz(x0, 2, &first_jump_to_3);
2265   __ Orr(x0, x0, 1 << 3);
2266   __ Bind(&first_jump_to_3);
2267   __ Orr(x0, x0, 1 << 2);
2268   __ Tbz(x0, 3, &label_3);
2269 
2270   // Labels 1, 2, and 3 are bound before the current buffer offset. Branches to
2271   // label_1 and label_2 branch respectively forward and backward. Branches to
2272   // label 3 include both forward and backward branches.
2273   masm.BindToOffset(&label_1, offset_1);
2274   masm.BindToOffset(&label_2, offset_2);
2275   masm.BindToOffset(&label_3, offset_3);
2276 
2277   END();
2278 
2279   RUN();
2280 
2281   ASSERT_EQUAL_64(0xf, x0);
2282 
2283   TEARDOWN();
2284 }
2285 
2286 
TEST(adr)2287 TEST(adr) {
2288   SETUP();
2289 
2290   Label label_1, label_2, label_3, label_4;
2291 
2292   START();
2293   __ Mov(x0, 0x0);       // Set to non-zero to indicate failure.
2294   __ Adr(x1, &label_3);  // Set to zero to indicate success.
2295 
2296   __ Adr(x2, &label_1);  // Multiple forward references to the same label.
2297   __ Adr(x3, &label_1);
2298   __ Adr(x4, &label_1);
2299 
2300   __ Bind(&label_2);
2301   __ Eor(x5, x2, Operand(x3));  // Ensure that x2,x3 and x4 are identical.
2302   __ Eor(x6, x2, Operand(x4));
2303   __ Orr(x0, x0, Operand(x5));
2304   __ Orr(x0, x0, Operand(x6));
2305   __ Br(x2);  // label_1, label_3
2306 
2307   __ Bind(&label_3);
2308   __ Adr(x2, &label_3);  // Self-reference (offset 0).
2309   __ Eor(x1, x1, Operand(x2));
2310   __ Adr(x2, &label_4);  // Simple forward reference.
2311   __ Br(x2);             // label_4
2312 
2313   __ Bind(&label_1);
2314   __ Adr(x2, &label_3);  // Multiple reverse references to the same label.
2315   __ Adr(x3, &label_3);
2316   __ Adr(x4, &label_3);
2317   __ Adr(x5, &label_2);  // Simple reverse reference.
2318   __ Br(x5);             // label_2
2319 
2320   __ Bind(&label_4);
2321   END();
2322 
2323   RUN();
2324 
2325   ASSERT_EQUAL_64(0x0, x0);
2326   ASSERT_EQUAL_64(0x0, x1);
2327 
2328   TEARDOWN();
2329 }
2330 
2331 
2332 // Simple adrp tests: check that labels are linked and handled properly.
2333 // This is similar to the adr test, but all the adrp instructions are put on the
2334 // same page so that they return the same value.
TEST(adrp)2335 TEST(adrp) {
2336   Label start;
2337   Label label_1, label_2, label_3;
2338 
2339   SETUP_CUSTOM(2 * kPageSize, PageOffsetDependentCode);
2340   START();
2341 
2342   // Waste space until the start of a page.
2343   {
2344     ExactAssemblyScope scope(&masm,
2345                              kPageSize,
2346                              ExactAssemblyScope::kMaximumSize);
2347     const uintptr_t kPageOffsetMask = kPageSize - 1;
2348     while ((masm.GetCursorAddress<uintptr_t>() & kPageOffsetMask) != 0) {
2349       __ b(&start);
2350     }
2351     __ bind(&start);
2352   }
2353 
2354   // Simple forward reference.
2355   __ Adrp(x0, &label_2);
2356 
2357   __ Bind(&label_1);
2358 
2359   // Multiple forward references to the same label.
2360   __ Adrp(x1, &label_3);
2361   __ Adrp(x2, &label_3);
2362   __ Adrp(x3, &label_3);
2363 
2364   __ Bind(&label_2);
2365 
2366   // Self-reference (offset 0).
2367   __ Adrp(x4, &label_2);
2368 
2369   __ Bind(&label_3);
2370 
2371   // Simple reverse reference.
2372   __ Adrp(x5, &label_1);
2373 
2374   // Multiple reverse references to the same label.
2375   __ Adrp(x6, &label_2);
2376   __ Adrp(x7, &label_2);
2377   __ Adrp(x8, &label_2);
2378 
2379   VIXL_ASSERT(masm.GetSizeOfCodeGeneratedSince(&start) < kPageSize);
2380   END();
2381   RUN_CUSTOM();
2382 
2383   uint64_t expected = reinterpret_cast<uint64_t>(
2384       AlignDown(masm.GetLabelAddress<uint64_t*>(&start), kPageSize));
2385   ASSERT_EQUAL_64(expected, x0);
2386   ASSERT_EQUAL_64(expected, x1);
2387   ASSERT_EQUAL_64(expected, x2);
2388   ASSERT_EQUAL_64(expected, x3);
2389   ASSERT_EQUAL_64(expected, x4);
2390   ASSERT_EQUAL_64(expected, x5);
2391   ASSERT_EQUAL_64(expected, x6);
2392   ASSERT_EQUAL_64(expected, x7);
2393   ASSERT_EQUAL_64(expected, x8);
2394 
2395   TEARDOWN_CUSTOM();
2396 }
2397 
2398 
AdrpPageBoundaryHelper(unsigned offset_into_page)2399 static void AdrpPageBoundaryHelper(unsigned offset_into_page) {
2400   VIXL_ASSERT(offset_into_page < kPageSize);
2401   VIXL_ASSERT((offset_into_page % kInstructionSize) == 0);
2402 
2403   const uintptr_t kPageOffsetMask = kPageSize - 1;
2404 
2405   // The test label is always bound on page 0. Adrp instructions are generated
2406   // on pages from kStartPage to kEndPage (inclusive).
2407   const int kStartPage = -16;
2408   const int kEndPage = 16;
2409   const int kMaxCodeSize = (kEndPage - kStartPage + 2) * kPageSize;
2410 
2411   SETUP_CUSTOM(kMaxCodeSize, PageOffsetDependentCode);
2412   START();
2413 
2414   Label test;
2415   Label start;
2416 
2417   {
2418     ExactAssemblyScope scope(&masm,
2419                              kMaxCodeSize,
2420                              ExactAssemblyScope::kMaximumSize);
2421     // Initialize NZCV with `eq` flags.
2422     __ cmp(wzr, wzr);
2423     // Waste space until the start of a page.
2424     while ((masm.GetCursorAddress<uintptr_t>() & kPageOffsetMask) != 0) {
2425       __ b(&start);
2426     }
2427 
2428     // The first page.
2429     VIXL_STATIC_ASSERT(kStartPage < 0);
2430     {
2431       ExactAssemblyScope scope_page(&masm, kPageSize);
2432       __ bind(&start);
2433       __ adrp(x0, &test);
2434       __ adrp(x1, &test);
2435       for (size_t i = 2; i < (kPageSize / kInstructionSize); i += 2) {
2436         __ ccmp(x0, x1, NoFlag, eq);
2437         __ adrp(x1, &test);
2438       }
2439     }
2440 
2441     // Subsequent pages.
2442     VIXL_STATIC_ASSERT(kEndPage >= 0);
2443     for (int page = (kStartPage + 1); page <= kEndPage; page++) {
2444       ExactAssemblyScope scope_page(&masm, kPageSize);
2445       if (page == 0) {
2446         for (size_t i = 0; i < (kPageSize / kInstructionSize);) {
2447           if (i++ == (offset_into_page / kInstructionSize)) __ bind(&test);
2448           __ ccmp(x0, x1, NoFlag, eq);
2449           if (i++ == (offset_into_page / kInstructionSize)) __ bind(&test);
2450           __ adrp(x1, &test);
2451         }
2452       } else {
2453         for (size_t i = 0; i < (kPageSize / kInstructionSize); i += 2) {
2454           __ ccmp(x0, x1, NoFlag, eq);
2455           __ adrp(x1, &test);
2456         }
2457       }
2458     }
2459   }
2460 
2461   // Every adrp instruction pointed to the same label (`test`), so they should
2462   // all have produced the same result.
2463 
2464   END();
2465   RUN_CUSTOM();
2466 
2467   uintptr_t expected =
2468       AlignDown(masm.GetLabelAddress<uintptr_t>(&test), kPageSize);
2469   ASSERT_EQUAL_64(expected, x0);
2470   ASSERT_EQUAL_64(expected, x1);
2471   ASSERT_EQUAL_NZCV(ZCFlag);
2472 
2473   TEARDOWN_CUSTOM();
2474 }
2475 
2476 
2477 // Test that labels are correctly referenced by adrp across page boundaries.
TEST(adrp_page_boundaries)2478 TEST(adrp_page_boundaries) {
2479   VIXL_STATIC_ASSERT(kPageSize == 4096);
2480   AdrpPageBoundaryHelper(kInstructionSize * 0);
2481   AdrpPageBoundaryHelper(kInstructionSize * 1);
2482   AdrpPageBoundaryHelper(kInstructionSize * 512);
2483   AdrpPageBoundaryHelper(kInstructionSize * 1022);
2484   AdrpPageBoundaryHelper(kInstructionSize * 1023);
2485 }
2486 
2487 
AdrpOffsetHelper(int64_t offset)2488 static void AdrpOffsetHelper(int64_t offset) {
2489   const size_t kPageOffsetMask = kPageSize - 1;
2490   const int kMaxCodeSize = 2 * kPageSize;
2491 
2492   SETUP_CUSTOM(kMaxCodeSize, PageOffsetDependentCode);
2493   START();
2494 
2495   Label page;
2496 
2497   {
2498     ExactAssemblyScope scope(&masm,
2499                              kMaxCodeSize,
2500                              ExactAssemblyScope::kMaximumSize);
2501     // Initialize NZCV with `eq` flags.
2502     __ cmp(wzr, wzr);
2503     // Waste space until the start of a page.
2504     while ((masm.GetCursorAddress<uintptr_t>() & kPageOffsetMask) != 0) {
2505       __ b(&page);
2506     }
2507     __ bind(&page);
2508 
2509     {
2510       ExactAssemblyScope scope_page(&masm, kPageSize);
2511       // Every adrp instruction on this page should return the same value.
2512       __ adrp(x0, offset);
2513       __ adrp(x1, offset);
2514       for (size_t i = 2; i < kPageSize / kInstructionSize; i += 2) {
2515         __ ccmp(x0, x1, NoFlag, eq);
2516         __ adrp(x1, offset);
2517       }
2518     }
2519   }
2520 
2521   END();
2522   RUN_CUSTOM();
2523 
2524   uintptr_t expected =
2525       masm.GetLabelAddress<uintptr_t>(&page) + (kPageSize * offset);
2526   ASSERT_EQUAL_64(expected, x0);
2527   ASSERT_EQUAL_64(expected, x1);
2528   ASSERT_EQUAL_NZCV(ZCFlag);
2529 
2530   TEARDOWN_CUSTOM();
2531 }
2532 
2533 
2534 // Check that adrp produces the correct result for a specific offset.
TEST(adrp_offset)2535 TEST(adrp_offset) {
2536   AdrpOffsetHelper(0);
2537   AdrpOffsetHelper(1);
2538   AdrpOffsetHelper(-1);
2539   AdrpOffsetHelper(4);
2540   AdrpOffsetHelper(-4);
2541   AdrpOffsetHelper(0x000fffff);
2542   AdrpOffsetHelper(-0x000fffff);
2543   AdrpOffsetHelper(-0x00100000);
2544 }
2545 
2546 
TEST(branch_cond)2547 TEST(branch_cond) {
2548   SETUP();
2549 
2550   Label done, wrong;
2551 
2552   START();
2553   __ Mov(x0, 0x1);
2554   __ Mov(x1, 0x1);
2555   __ Mov(x2, 0x8000000000000000);
2556 
2557   // For each 'cmp' instruction below, condition codes other than the ones
2558   // following it would branch.
2559 
2560   __ Cmp(x1, 0);
2561   __ B(&wrong, eq);
2562   __ B(&wrong, lo);
2563   __ B(&wrong, mi);
2564   __ B(&wrong, vs);
2565   __ B(&wrong, ls);
2566   __ B(&wrong, lt);
2567   __ B(&wrong, le);
2568   Label ok_1;
2569   __ B(&ok_1, ne);
2570   __ Mov(x0, 0x0);
2571   __ Bind(&ok_1);
2572 
2573   __ Cmp(x1, 1);
2574   __ B(&wrong, ne);
2575   __ B(&wrong, lo);
2576   __ B(&wrong, mi);
2577   __ B(&wrong, vs);
2578   __ B(&wrong, hi);
2579   __ B(&wrong, lt);
2580   __ B(&wrong, gt);
2581   Label ok_2;
2582   __ B(&ok_2, pl);
2583   __ Mov(x0, 0x0);
2584   __ Bind(&ok_2);
2585 
2586   __ Cmp(x1, 2);
2587   __ B(&wrong, eq);
2588   __ B(&wrong, hs);
2589   __ B(&wrong, pl);
2590   __ B(&wrong, vs);
2591   __ B(&wrong, hi);
2592   __ B(&wrong, ge);
2593   __ B(&wrong, gt);
2594   Label ok_3;
2595   __ B(&ok_3, vc);
2596   __ Mov(x0, 0x0);
2597   __ Bind(&ok_3);
2598 
2599   __ Cmp(x2, 1);
2600   __ B(&wrong, eq);
2601   __ B(&wrong, lo);
2602   __ B(&wrong, mi);
2603   __ B(&wrong, vc);
2604   __ B(&wrong, ls);
2605   __ B(&wrong, ge);
2606   __ B(&wrong, gt);
2607   Label ok_4;
2608   __ B(&ok_4, le);
2609   __ Mov(x0, 0x0);
2610   __ Bind(&ok_4);
2611 
2612   // The MacroAssembler does not allow al as a branch condition.
2613   Label ok_5;
2614   {
2615     ExactAssemblyScope scope(&masm, kInstructionSize);
2616     __ b(&ok_5, al);
2617   }
2618   __ Mov(x0, 0x0);
2619   __ Bind(&ok_5);
2620 
2621   // The MacroAssembler does not allow nv as a branch condition.
2622   Label ok_6;
2623   {
2624     ExactAssemblyScope scope(&masm, kInstructionSize);
2625     __ b(&ok_6, nv);
2626   }
2627   __ Mov(x0, 0x0);
2628   __ Bind(&ok_6);
2629 
2630   __ B(&done);
2631 
2632   __ Bind(&wrong);
2633   __ Mov(x0, 0x0);
2634 
2635   __ Bind(&done);
2636   END();
2637 
2638   RUN();
2639 
2640   ASSERT_EQUAL_64(0x1, x0);
2641 
2642   TEARDOWN();
2643 }
2644 
2645 
TEST(branch_to_reg)2646 TEST(branch_to_reg) {
2647   SETUP();
2648 
2649   // Test br.
2650   Label fn1, after_fn1;
2651 
2652   START();
2653   __ Mov(x29, lr);
2654 
2655   __ Mov(x1, 0);
2656   __ B(&after_fn1);
2657 
2658   __ Bind(&fn1);
2659   __ Mov(x0, lr);
2660   __ Mov(x1, 42);
2661   __ Br(x0);
2662 
2663   __ Bind(&after_fn1);
2664   __ Bl(&fn1);
2665 
2666   // Test blr.
2667   Label fn2, after_fn2, after_bl2;
2668 
2669   __ Mov(x2, 0);
2670   __ B(&after_fn2);
2671 
2672   __ Bind(&fn2);
2673   __ Mov(x0, lr);
2674   __ Mov(x2, 84);
2675   __ Blr(x0);
2676 
2677   __ Bind(&after_fn2);
2678   __ Bl(&fn2);
2679   __ Bind(&after_bl2);
2680   __ Mov(x3, lr);
2681   __ Adr(x4, &after_bl2);
2682   __ Adr(x5, &after_fn2);
2683 
2684   __ Mov(lr, x29);
2685   END();
2686 
2687   RUN();
2688 
2689   ASSERT_EQUAL_64(x4, x0);
2690   ASSERT_EQUAL_64(x5, x3);
2691   ASSERT_EQUAL_64(42, x1);
2692   ASSERT_EQUAL_64(84, x2);
2693 
2694   TEARDOWN();
2695 }
2696 
TEST(branch_to_reg_auth_a)2697 TEST(branch_to_reg_auth_a) {
2698   SETUP_WITH_FEATURES(CPUFeatures::kPAuth);
2699 
2700   START();
2701 
2702   Label fn1, after_fn1;
2703 
2704   __ Mov(x28, 0x477d469dec0b8760);
2705   __ Mov(x29, lr);
2706 
2707   __ Mov(x1, 0);
2708   __ B(&after_fn1);
2709 
2710   __ Bind(&fn1);
2711   __ Mov(x0, lr);
2712   __ Mov(x1, 42);
2713   __ Pacia(x0, x28);
2714   __ Braa(x0, x28);
2715 
2716   __ Bind(&after_fn1);
2717   __ Bl(&fn1);
2718 
2719   Label fn2, after_fn2, after_bl2;
2720 
2721   __ Mov(x2, 0);
2722   __ B(&after_fn2);
2723 
2724   __ Bind(&fn2);
2725   __ Mov(x0, lr);
2726   __ Mov(x2, 84);
2727   __ Pacia(x0, x28);
2728   __ Blraa(x0, x28);
2729 
2730   __ Bind(&after_fn2);
2731   __ Bl(&fn2);
2732   __ Bind(&after_bl2);
2733   __ Mov(x3, lr);
2734   __ Adr(x4, &after_bl2);
2735   __ Adr(x5, &after_fn2);
2736 
2737   __ Xpaci(x0);
2738   __ Mov(lr, x29);
2739   END();
2740 
2741 #ifdef VIXL_INCLUDE_SIMULATOR_AARCH64
2742   RUN();
2743 
2744   ASSERT_EQUAL_64(x4, x0);
2745   ASSERT_EQUAL_64(x5, x3);
2746   ASSERT_EQUAL_64(42, x1);
2747   ASSERT_EQUAL_64(84, x2);
2748 #endif
2749 
2750   TEARDOWN();
2751 }
2752 
TEST(return_to_reg_auth)2753 TEST(return_to_reg_auth) {
2754   SETUP_WITH_FEATURES(CPUFeatures::kPAuth);
2755 
2756   START();
2757 
2758   Label fn1, after_fn1;
2759 
2760   __ Mov(x28, sp);
2761   __ Mov(x29, lr);
2762   __ Mov(sp, 0x477d469dec0b8760);
2763 
2764   __ Mov(x0, 0);
2765   __ B(&after_fn1);
2766 
2767   __ Bind(&fn1);
2768   __ Mov(x0, 42);
2769   __ Paciasp();
2770   __ Retaa();
2771 
2772   __ Bind(&after_fn1);
2773   __ Bl(&fn1);
2774 
2775   Label fn2, after_fn2;
2776 
2777   __ Mov(x1, 0);
2778   __ B(&after_fn2);
2779 
2780   __ Bind(&fn2);
2781   __ Mov(x1, 84);
2782   __ Pacibsp();
2783   __ Retab();
2784 
2785   __ Bind(&after_fn2);
2786   __ Bl(&fn2);
2787 
2788   __ Mov(sp, x28);
2789   __ Mov(lr, x29);
2790   END();
2791 
2792 #ifdef VIXL_INCLUDE_SIMULATOR_AARCH64
2793   RUN();
2794 
2795   ASSERT_EQUAL_64(42, x0);
2796   ASSERT_EQUAL_64(84, x1);
2797 #endif
2798 
2799   TEARDOWN();
2800 }
2801 
2802 #ifdef VIXL_NEGATIVE_TESTING
TEST(branch_to_reg_auth_fail)2803 TEST(branch_to_reg_auth_fail) {
2804   SETUP_WITH_FEATURES(CPUFeatures::kPAuth);
2805 
2806   START();
2807 
2808   Label fn1, after_fn1;
2809 
2810   __ Mov(x29, lr);
2811 
2812   __ B(&after_fn1);
2813 
2814   __ Bind(&fn1);
2815   __ Mov(x0, lr);
2816   __ Pacizb(x0);
2817   __ Blraaz(x0);
2818 
2819   __ Bind(&after_fn1);
2820   __ Bl(&fn1);
2821 
2822   __ Mov(lr, x29);
2823   END();
2824 
2825 #ifdef VIXL_INCLUDE_SIMULATOR_AARCH64
2826   MUST_FAIL_WITH_MESSAGE(RUN(), "Failed to authenticate pointer.");
2827 #endif  // VIXL_INCLUDE_SIMULATOR_AARCH64
2828 
2829   TEARDOWN();
2830 }
2831 #endif  // VIXL_NEGATIVE_TESTING
2832 
2833 #ifdef VIXL_NEGATIVE_TESTING
TEST(return_to_reg_auth_fail)2834 TEST(return_to_reg_auth_fail) {
2835   SETUP_WITH_FEATURES(CPUFeatures::kPAuth);
2836 
2837   START();
2838 
2839   Label fn1, after_fn1;
2840 
2841   __ Mov(x28, sp);
2842   __ Mov(x29, lr);
2843   __ Mov(sp, 0x477d469dec0b8760);
2844 
2845   __ B(&after_fn1);
2846 
2847   __ Bind(&fn1);
2848   __ Paciasp();
2849   __ Retab();
2850 
2851   __ Bind(&after_fn1);
2852   __ Bl(&fn1);
2853 
2854   __ Mov(sp, x28);
2855   __ Mov(lr, x29);
2856   END();
2857 
2858 #ifdef VIXL_INCLUDE_SIMULATOR_AARCH64
2859   MUST_FAIL_WITH_MESSAGE(RUN(), "Failed to authenticate pointer.");
2860 #endif  // VIXL_INCLUDE_SIMULATOR_AARCH64
2861 
2862   TEARDOWN();
2863 }
2864 #endif  // VIXL_NEGATIVE_TESTING
2865 
TEST(branch_to_reg_auth_a_zero)2866 TEST(branch_to_reg_auth_a_zero) {
2867   SETUP_WITH_FEATURES(CPUFeatures::kPAuth);
2868 
2869   START();
2870 
2871   Label fn1, after_fn1;
2872 
2873   __ Mov(x29, lr);
2874 
2875   __ Mov(x1, 0);
2876   __ B(&after_fn1);
2877 
2878   __ Bind(&fn1);
2879   __ Mov(x0, lr);
2880   __ Mov(x1, 42);
2881   __ Paciza(x0);
2882   __ Braaz(x0);
2883 
2884   __ Bind(&after_fn1);
2885   __ Bl(&fn1);
2886 
2887   Label fn2, after_fn2, after_bl2;
2888 
2889   __ Mov(x2, 0);
2890   __ B(&after_fn2);
2891 
2892   __ Bind(&fn2);
2893   __ Mov(x0, lr);
2894   __ Mov(x2, 84);
2895   __ Paciza(x0);
2896   __ Blraaz(x0);
2897 
2898   __ Bind(&after_fn2);
2899   __ Bl(&fn2);
2900   __ Bind(&after_bl2);
2901   __ Mov(x3, lr);
2902   __ Adr(x4, &after_bl2);
2903   __ Adr(x5, &after_fn2);
2904 
2905   __ Xpaci(x0);
2906   __ Mov(lr, x29);
2907   END();
2908 
2909 #ifdef VIXL_INCLUDE_SIMULATOR_AARCH64
2910   RUN();
2911 
2912   ASSERT_EQUAL_64(x4, x0);
2913   ASSERT_EQUAL_64(x5, x3);
2914   ASSERT_EQUAL_64(42, x1);
2915   ASSERT_EQUAL_64(84, x2);
2916 #endif
2917 
2918   TEARDOWN();
2919 }
2920 
2921 
TEST(compare_branch)2922 TEST(compare_branch) {
2923   SETUP();
2924 
2925   START();
2926   __ Mov(x0, 0);
2927   __ Mov(x1, 0);
2928   __ Mov(x2, 0);
2929   __ Mov(x3, 0);
2930   __ Mov(x4, 0);
2931   __ Mov(x5, 0);
2932   __ Mov(x16, 0);
2933   __ Mov(x17, 42);
2934 
2935   Label zt, zt_end;
2936   __ Cbz(w16, &zt);
2937   __ B(&zt_end);
2938   __ Bind(&zt);
2939   __ Mov(x0, 1);
2940   __ Bind(&zt_end);
2941 
2942   Label zf, zf_end;
2943   __ Cbz(x17, &zf);
2944   __ B(&zf_end);
2945   __ Bind(&zf);
2946   __ Mov(x1, 1);
2947   __ Bind(&zf_end);
2948 
2949   Label nzt, nzt_end;
2950   __ Cbnz(w17, &nzt);
2951   __ B(&nzt_end);
2952   __ Bind(&nzt);
2953   __ Mov(x2, 1);
2954   __ Bind(&nzt_end);
2955 
2956   Label nzf, nzf_end;
2957   __ Cbnz(x16, &nzf);
2958   __ B(&nzf_end);
2959   __ Bind(&nzf);
2960   __ Mov(x3, 1);
2961   __ Bind(&nzf_end);
2962 
2963   __ Mov(x18, 0xffffffff00000000);
2964 
2965   Label a, a_end;
2966   __ Cbz(w18, &a);
2967   __ B(&a_end);
2968   __ Bind(&a);
2969   __ Mov(x4, 1);
2970   __ Bind(&a_end);
2971 
2972   Label b, b_end;
2973   __ Cbnz(w18, &b);
2974   __ B(&b_end);
2975   __ Bind(&b);
2976   __ Mov(x5, 1);
2977   __ Bind(&b_end);
2978 
2979   END();
2980 
2981   RUN();
2982 
2983   ASSERT_EQUAL_64(1, x0);
2984   ASSERT_EQUAL_64(0, x1);
2985   ASSERT_EQUAL_64(1, x2);
2986   ASSERT_EQUAL_64(0, x3);
2987   ASSERT_EQUAL_64(1, x4);
2988   ASSERT_EQUAL_64(0, x5);
2989 
2990   TEARDOWN();
2991 }
2992 
2993 
TEST(test_branch)2994 TEST(test_branch) {
2995   SETUP();
2996 
2997   START();
2998   __ Mov(x0, 0);
2999   __ Mov(x1, 0);
3000   __ Mov(x2, 0);
3001   __ Mov(x3, 0);
3002   __ Mov(x16, 0xaaaaaaaaaaaaaaaa);
3003 
3004   Label bz, bz_end;
3005   __ Tbz(w16, 0, &bz);
3006   __ B(&bz_end);
3007   __ Bind(&bz);
3008   __ Mov(x0, 1);
3009   __ Bind(&bz_end);
3010 
3011   Label bo, bo_end;
3012   __ Tbz(x16, 63, &bo);
3013   __ B(&bo_end);
3014   __ Bind(&bo);
3015   __ Mov(x1, 1);
3016   __ Bind(&bo_end);
3017 
3018   Label nbz, nbz_end;
3019   __ Tbnz(x16, 61, &nbz);
3020   __ B(&nbz_end);
3021   __ Bind(&nbz);
3022   __ Mov(x2, 1);
3023   __ Bind(&nbz_end);
3024 
3025   Label nbo, nbo_end;
3026   __ Tbnz(w16, 2, &nbo);
3027   __ B(&nbo_end);
3028   __ Bind(&nbo);
3029   __ Mov(x3, 1);
3030   __ Bind(&nbo_end);
3031   END();
3032 
3033   RUN();
3034 
3035   ASSERT_EQUAL_64(1, x0);
3036   ASSERT_EQUAL_64(0, x1);
3037   ASSERT_EQUAL_64(1, x2);
3038   ASSERT_EQUAL_64(0, x3);
3039 
3040   TEARDOWN();
3041 }
3042 
3043 
TEST(branch_type)3044 TEST(branch_type) {
3045   SETUP();
3046 
3047   Label fail, done;
3048 
3049   START();
3050   __ Mov(x0, 0x0);
3051   __ Mov(x10, 0x7);
3052   __ Mov(x11, 0x0);
3053 
3054   // Test non taken branches.
3055   __ Cmp(x10, 0x7);
3056   __ B(&fail, ne);
3057   __ B(&fail, never);
3058   __ B(&fail, reg_zero, x10);
3059   __ B(&fail, reg_not_zero, x11);
3060   __ B(&fail, reg_bit_clear, x10, 0);
3061   __ B(&fail, reg_bit_set, x10, 3);
3062 
3063   // Test taken branches.
3064   Label l1, l2, l3, l4, l5;
3065   __ Cmp(x10, 0x7);
3066   __ B(&l1, eq);
3067   __ B(&fail);
3068   __ Bind(&l1);
3069   __ B(&l2, always);
3070   __ B(&fail);
3071   __ Bind(&l2);
3072   __ B(&l3, reg_not_zero, x10);
3073   __ B(&fail);
3074   __ Bind(&l3);
3075   __ B(&l4, reg_bit_clear, x10, 15);
3076   __ B(&fail);
3077   __ Bind(&l4);
3078   __ B(&l5, reg_bit_set, x10, 1);
3079   __ B(&fail);
3080   __ Bind(&l5);
3081 
3082   __ B(&done);
3083 
3084   __ Bind(&fail);
3085   __ Mov(x0, 0x1);
3086 
3087   __ Bind(&done);
3088 
3089   END();
3090 
3091   RUN();
3092 
3093   ASSERT_EQUAL_64(0x0, x0);
3094 
3095   TEARDOWN();
3096 }
3097 
3098 
TEST(ldr_str_offset)3099 TEST(ldr_str_offset) {
3100   SETUP();
3101 
3102   uint64_t src[2] = {0xfedcba9876543210, 0x0123456789abcdef};
3103   uint64_t dst[5] = {0, 0, 0, 0, 0};
3104   uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
3105   uintptr_t dst_base = reinterpret_cast<uintptr_t>(dst);
3106 
3107   START();
3108   __ Mov(x17, src_base);
3109   __ Mov(x18, dst_base);
3110   __ Ldr(w0, MemOperand(x17));
3111   __ Str(w0, MemOperand(x18));
3112   __ Ldr(w1, MemOperand(x17, 4));
3113   __ Str(w1, MemOperand(x18, 12));
3114   __ Ldr(x2, MemOperand(x17, 8));
3115   __ Str(x2, MemOperand(x18, 16));
3116   __ Ldrb(w3, MemOperand(x17, 1));
3117   __ Strb(w3, MemOperand(x18, 25));
3118   __ Ldrh(w4, MemOperand(x17, 2));
3119   __ Strh(w4, MemOperand(x18, 33));
3120   END();
3121 
3122   RUN();
3123 
3124   ASSERT_EQUAL_64(0x76543210, x0);
3125   ASSERT_EQUAL_64(0x76543210, dst[0]);
3126   ASSERT_EQUAL_64(0xfedcba98, x1);
3127   ASSERT_EQUAL_64(0xfedcba9800000000, dst[1]);
3128   ASSERT_EQUAL_64(0x0123456789abcdef, x2);
3129   ASSERT_EQUAL_64(0x0123456789abcdef, dst[2]);
3130   ASSERT_EQUAL_64(0x32, x3);
3131   ASSERT_EQUAL_64(0x3200, dst[3]);
3132   ASSERT_EQUAL_64(0x7654, x4);
3133   ASSERT_EQUAL_64(0x765400, dst[4]);
3134   ASSERT_EQUAL_64(src_base, x17);
3135   ASSERT_EQUAL_64(dst_base, x18);
3136 
3137   TEARDOWN();
3138 }
3139 
3140 
TEST(ldr_str_wide)3141 TEST(ldr_str_wide) {
3142   SETUP();
3143 
3144   uint32_t src[8192];
3145   uint32_t dst[8192];
3146   uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
3147   uintptr_t dst_base = reinterpret_cast<uintptr_t>(dst);
3148   memset(src, 0xaa, 8192 * sizeof(src[0]));
3149   memset(dst, 0xaa, 8192 * sizeof(dst[0]));
3150   src[0] = 0;
3151   src[6144] = 6144;
3152   src[8191] = 8191;
3153 
3154   START();
3155   __ Mov(x22, src_base);
3156   __ Mov(x23, dst_base);
3157   __ Mov(x24, src_base);
3158   __ Mov(x25, dst_base);
3159   __ Mov(x26, src_base);
3160   __ Mov(x27, dst_base);
3161 
3162   __ Ldr(w0, MemOperand(x22, 8191 * sizeof(src[0])));
3163   __ Str(w0, MemOperand(x23, 8191 * sizeof(dst[0])));
3164   __ Ldr(w1, MemOperand(x24, 4096 * sizeof(src[0]), PostIndex));
3165   __ Str(w1, MemOperand(x25, 4096 * sizeof(dst[0]), PostIndex));
3166   __ Ldr(w2, MemOperand(x26, 6144 * sizeof(src[0]), PreIndex));
3167   __ Str(w2, MemOperand(x27, 6144 * sizeof(dst[0]), PreIndex));
3168   END();
3169 
3170   RUN();
3171 
3172   ASSERT_EQUAL_32(8191, w0);
3173   ASSERT_EQUAL_32(8191, dst[8191]);
3174   ASSERT_EQUAL_64(src_base, x22);
3175   ASSERT_EQUAL_64(dst_base, x23);
3176   ASSERT_EQUAL_32(0, w1);
3177   ASSERT_EQUAL_32(0, dst[0]);
3178   ASSERT_EQUAL_64(src_base + 4096 * sizeof(src[0]), x24);
3179   ASSERT_EQUAL_64(dst_base + 4096 * sizeof(dst[0]), x25);
3180   ASSERT_EQUAL_32(6144, w2);
3181   ASSERT_EQUAL_32(6144, dst[6144]);
3182   ASSERT_EQUAL_64(src_base + 6144 * sizeof(src[0]), x26);
3183   ASSERT_EQUAL_64(dst_base + 6144 * sizeof(dst[0]), x27);
3184 
3185   TEARDOWN();
3186 }
3187 
3188 
TEST(ldr_str_preindex)3189 TEST(ldr_str_preindex) {
3190   SETUP();
3191 
3192   uint64_t src[2] = {0xfedcba9876543210, 0x0123456789abcdef};
3193   uint64_t dst[6] = {0, 0, 0, 0, 0, 0};
3194   uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
3195   uintptr_t dst_base = reinterpret_cast<uintptr_t>(dst);
3196 
3197   START();
3198   __ Mov(x17, src_base);
3199   __ Mov(x18, dst_base);
3200   __ Mov(x19, src_base);
3201   __ Mov(x20, dst_base);
3202   __ Mov(x21, src_base + 16);
3203   __ Mov(x22, dst_base + 40);
3204   __ Mov(x23, src_base);
3205   __ Mov(x24, dst_base);
3206   __ Mov(x25, src_base);
3207   __ Mov(x26, dst_base);
3208   __ Ldr(w0, MemOperand(x17, 4, PreIndex));
3209   __ Str(w0, MemOperand(x18, 12, PreIndex));
3210   __ Ldr(x1, MemOperand(x19, 8, PreIndex));
3211   __ Str(x1, MemOperand(x20, 16, PreIndex));
3212   __ Ldr(w2, MemOperand(x21, -4, PreIndex));
3213   __ Str(w2, MemOperand(x22, -4, PreIndex));
3214   __ Ldrb(w3, MemOperand(x23, 1, PreIndex));
3215   __ Strb(w3, MemOperand(x24, 25, PreIndex));
3216   __ Ldrh(w4, MemOperand(x25, 3, PreIndex));
3217   __ Strh(w4, MemOperand(x26, 41, PreIndex));
3218   END();
3219 
3220   RUN();
3221 
3222   ASSERT_EQUAL_64(0xfedcba98, x0);
3223   ASSERT_EQUAL_64(0xfedcba9800000000, dst[1]);
3224   ASSERT_EQUAL_64(0x0123456789abcdef, x1);
3225   ASSERT_EQUAL_64(0x0123456789abcdef, dst[2]);
3226   ASSERT_EQUAL_64(0x01234567, x2);
3227   ASSERT_EQUAL_64(0x0123456700000000, dst[4]);
3228   ASSERT_EQUAL_64(0x32, x3);
3229   ASSERT_EQUAL_64(0x3200, dst[3]);
3230   ASSERT_EQUAL_64(0x9876, x4);
3231   ASSERT_EQUAL_64(0x987600, dst[5]);
3232   ASSERT_EQUAL_64(src_base + 4, x17);
3233   ASSERT_EQUAL_64(dst_base + 12, x18);
3234   ASSERT_EQUAL_64(src_base + 8, x19);
3235   ASSERT_EQUAL_64(dst_base + 16, x20);
3236   ASSERT_EQUAL_64(src_base + 12, x21);
3237   ASSERT_EQUAL_64(dst_base + 36, x22);
3238   ASSERT_EQUAL_64(src_base + 1, x23);
3239   ASSERT_EQUAL_64(dst_base + 25, x24);
3240   ASSERT_EQUAL_64(src_base + 3, x25);
3241   ASSERT_EQUAL_64(dst_base + 41, x26);
3242 
3243   TEARDOWN();
3244 }
3245 
3246 
TEST(ldr_str_postindex)3247 TEST(ldr_str_postindex) {
3248   SETUP();
3249 
3250   uint64_t src[2] = {0xfedcba9876543210, 0x0123456789abcdef};
3251   uint64_t dst[6] = {0, 0, 0, 0, 0, 0};
3252   uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
3253   uintptr_t dst_base = reinterpret_cast<uintptr_t>(dst);
3254 
3255   START();
3256   __ Mov(x17, src_base + 4);
3257   __ Mov(x18, dst_base + 12);
3258   __ Mov(x19, src_base + 8);
3259   __ Mov(x20, dst_base + 16);
3260   __ Mov(x21, src_base + 8);
3261   __ Mov(x22, dst_base + 32);
3262   __ Mov(x23, src_base + 1);
3263   __ Mov(x24, dst_base + 25);
3264   __ Mov(x25, src_base + 3);
3265   __ Mov(x26, dst_base + 41);
3266   __ Ldr(w0, MemOperand(x17, 4, PostIndex));
3267   __ Str(w0, MemOperand(x18, 12, PostIndex));
3268   __ Ldr(x1, MemOperand(x19, 8, PostIndex));
3269   __ Str(x1, MemOperand(x20, 16, PostIndex));
3270   __ Ldr(x2, MemOperand(x21, -8, PostIndex));
3271   __ Str(x2, MemOperand(x22, -32, PostIndex));
3272   __ Ldrb(w3, MemOperand(x23, 1, PostIndex));
3273   __ Strb(w3, MemOperand(x24, 5, PostIndex));
3274   __ Ldrh(w4, MemOperand(x25, -3, PostIndex));
3275   __ Strh(w4, MemOperand(x26, -41, PostIndex));
3276   END();
3277 
3278   RUN();
3279 
3280   ASSERT_EQUAL_64(0xfedcba98, x0);
3281   ASSERT_EQUAL_64(0xfedcba9800000000, dst[1]);
3282   ASSERT_EQUAL_64(0x0123456789abcdef, x1);
3283   ASSERT_EQUAL_64(0x0123456789abcdef, dst[2]);
3284   ASSERT_EQUAL_64(0x0123456789abcdef, x2);
3285   ASSERT_EQUAL_64(0x0123456789abcdef, dst[4]);
3286   ASSERT_EQUAL_64(0x32, x3);
3287   ASSERT_EQUAL_64(0x3200, dst[3]);
3288   ASSERT_EQUAL_64(0x9876, x4);
3289   ASSERT_EQUAL_64(0x987600, dst[5]);
3290   ASSERT_EQUAL_64(src_base + 8, x17);
3291   ASSERT_EQUAL_64(dst_base + 24, x18);
3292   ASSERT_EQUAL_64(src_base + 16, x19);
3293   ASSERT_EQUAL_64(dst_base + 32, x20);
3294   ASSERT_EQUAL_64(src_base, x21);
3295   ASSERT_EQUAL_64(dst_base, x22);
3296   ASSERT_EQUAL_64(src_base + 2, x23);
3297   ASSERT_EQUAL_64(dst_base + 30, x24);
3298   ASSERT_EQUAL_64(src_base, x25);
3299   ASSERT_EQUAL_64(dst_base, x26);
3300 
3301   TEARDOWN();
3302 }
3303 
3304 
TEST(ldr_str_largeindex)3305 TEST(ldr_str_largeindex) {
3306   SETUP();
3307 
3308   // This value won't fit in the immediate offset field of ldr/str instructions.
3309   int largeoffset = 0xabcdef;
3310 
3311   int64_t data[3] = {0x1122334455667788, 0, 0};
3312   uint64_t base_addr = reinterpret_cast<uintptr_t>(data);
3313   uint64_t drifted_addr = base_addr - largeoffset;
3314 
3315   // This test checks that we we can use large immediate offsets when
3316   // using PreIndex or PostIndex addressing mode of the MacroAssembler
3317   // Ldr/Str instructions.
3318 
3319   START();
3320   __ Mov(x19, drifted_addr);
3321   __ Ldr(x0, MemOperand(x19, largeoffset, PreIndex));
3322 
3323   __ Mov(x20, base_addr);
3324   __ Ldr(x1, MemOperand(x20, largeoffset, PostIndex));
3325 
3326   __ Mov(x21, drifted_addr);
3327   __ Str(x0, MemOperand(x21, largeoffset + 8, PreIndex));
3328 
3329   __ Mov(x22, base_addr + 16);
3330   __ Str(x0, MemOperand(x22, largeoffset, PostIndex));
3331   END();
3332 
3333   RUN();
3334 
3335   ASSERT_EQUAL_64(0x1122334455667788, data[0]);
3336   ASSERT_EQUAL_64(0x1122334455667788, data[1]);
3337   ASSERT_EQUAL_64(0x1122334455667788, data[2]);
3338   ASSERT_EQUAL_64(0x1122334455667788, x0);
3339   ASSERT_EQUAL_64(0x1122334455667788, x1);
3340 
3341   ASSERT_EQUAL_64(base_addr, x19);
3342   ASSERT_EQUAL_64(base_addr + largeoffset, x20);
3343   ASSERT_EQUAL_64(base_addr + 8, x21);
3344   ASSERT_EQUAL_64(base_addr + 16 + largeoffset, x22);
3345 
3346   TEARDOWN();
3347 }
3348 
3349 
TEST(load_signed)3350 TEST(load_signed) {
3351   SETUP();
3352 
3353   uint32_t src[2] = {0x80008080, 0x7fff7f7f};
3354   uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
3355 
3356   START();
3357   __ Mov(x24, src_base);
3358   __ Ldrsb(w0, MemOperand(x24));
3359   __ Ldrsb(w1, MemOperand(x24, 4));
3360   __ Ldrsh(w2, MemOperand(x24));
3361   __ Ldrsh(w3, MemOperand(x24, 4));
3362   __ Ldrsb(x4, MemOperand(x24));
3363   __ Ldrsb(x5, MemOperand(x24, 4));
3364   __ Ldrsh(x6, MemOperand(x24));
3365   __ Ldrsh(x7, MemOperand(x24, 4));
3366   __ Ldrsw(x8, MemOperand(x24));
3367   __ Ldrsw(x9, MemOperand(x24, 4));
3368   END();
3369 
3370   RUN();
3371 
3372   ASSERT_EQUAL_64(0xffffff80, x0);
3373   ASSERT_EQUAL_64(0x0000007f, x1);
3374   ASSERT_EQUAL_64(0xffff8080, x2);
3375   ASSERT_EQUAL_64(0x00007f7f, x3);
3376   ASSERT_EQUAL_64(0xffffffffffffff80, x4);
3377   ASSERT_EQUAL_64(0x000000000000007f, x5);
3378   ASSERT_EQUAL_64(0xffffffffffff8080, x6);
3379   ASSERT_EQUAL_64(0x0000000000007f7f, x7);
3380   ASSERT_EQUAL_64(0xffffffff80008080, x8);
3381   ASSERT_EQUAL_64(0x000000007fff7f7f, x9);
3382 
3383   TEARDOWN();
3384 }
3385 
3386 
TEST(load_store_regoffset)3387 TEST(load_store_regoffset) {
3388   SETUP();
3389 
3390   uint32_t src[3] = {1, 2, 3};
3391   uint32_t dst[4] = {0, 0, 0, 0};
3392   uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
3393   uintptr_t dst_base = reinterpret_cast<uintptr_t>(dst);
3394 
3395   START();
3396   __ Mov(x16, src_base);
3397   __ Mov(x17, dst_base);
3398   __ Mov(x18, src_base + 3 * sizeof(src[0]));
3399   __ Mov(x19, dst_base + 3 * sizeof(dst[0]));
3400   __ Mov(x20, dst_base + 4 * sizeof(dst[0]));
3401   __ Mov(x24, 0);
3402   __ Mov(x25, 4);
3403   __ Mov(x26, -4);
3404   __ Mov(x27, 0xfffffffc);  // 32-bit -4.
3405   __ Mov(x28, 0xfffffffe);  // 32-bit -2.
3406   __ Mov(x29, 0xffffffff);  // 32-bit -1.
3407 
3408   __ Ldr(w0, MemOperand(x16, x24));
3409   __ Ldr(x1, MemOperand(x16, x25));
3410   __ Ldr(w2, MemOperand(x18, x26));
3411   __ Ldr(w3, MemOperand(x18, x27, SXTW));
3412   __ Ldr(w4, MemOperand(x18, x28, SXTW, 2));
3413   __ Str(w0, MemOperand(x17, x24));
3414   __ Str(x1, MemOperand(x17, x25));
3415   __ Str(w2, MemOperand(x20, x29, SXTW, 2));
3416   END();
3417 
3418   RUN();
3419 
3420   ASSERT_EQUAL_64(1, x0);
3421   ASSERT_EQUAL_64(0x0000000300000002, x1);
3422   ASSERT_EQUAL_64(3, x2);
3423   ASSERT_EQUAL_64(3, x3);
3424   ASSERT_EQUAL_64(2, x4);
3425   ASSERT_EQUAL_32(1, dst[0]);
3426   ASSERT_EQUAL_32(2, dst[1]);
3427   ASSERT_EQUAL_32(3, dst[2]);
3428   ASSERT_EQUAL_32(3, dst[3]);
3429 
3430   TEARDOWN();
3431 }
3432 
3433 
TEST(load_store_float)3434 TEST(load_store_float) {
3435   SETUP_WITH_FEATURES(CPUFeatures::kFP);
3436 
3437   float src[3] = {1.0, 2.0, 3.0};
3438   float dst[3] = {0.0, 0.0, 0.0};
3439   uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
3440   uintptr_t dst_base = reinterpret_cast<uintptr_t>(dst);
3441 
3442   START();
3443   __ Mov(x17, src_base);
3444   __ Mov(x18, dst_base);
3445   __ Mov(x19, src_base);
3446   __ Mov(x20, dst_base);
3447   __ Mov(x21, src_base);
3448   __ Mov(x22, dst_base);
3449   __ Ldr(s0, MemOperand(x17, sizeof(src[0])));
3450   __ Str(s0, MemOperand(x18, sizeof(dst[0]), PostIndex));
3451   __ Ldr(s1, MemOperand(x19, sizeof(src[0]), PostIndex));
3452   __ Str(s1, MemOperand(x20, 2 * sizeof(dst[0]), PreIndex));
3453   __ Ldr(s2, MemOperand(x21, 2 * sizeof(src[0]), PreIndex));
3454   __ Str(s2, MemOperand(x22, sizeof(dst[0])));
3455   END();
3456 
3457   RUN();
3458 
3459   ASSERT_EQUAL_FP32(2.0, s0);
3460   ASSERT_EQUAL_FP32(2.0, dst[0]);
3461   ASSERT_EQUAL_FP32(1.0, s1);
3462   ASSERT_EQUAL_FP32(1.0, dst[2]);
3463   ASSERT_EQUAL_FP32(3.0, s2);
3464   ASSERT_EQUAL_FP32(3.0, dst[1]);
3465   ASSERT_EQUAL_64(src_base, x17);
3466   ASSERT_EQUAL_64(dst_base + sizeof(dst[0]), x18);
3467   ASSERT_EQUAL_64(src_base + sizeof(src[0]), x19);
3468   ASSERT_EQUAL_64(dst_base + 2 * sizeof(dst[0]), x20);
3469   ASSERT_EQUAL_64(src_base + 2 * sizeof(src[0]), x21);
3470   ASSERT_EQUAL_64(dst_base, x22);
3471 
3472   TEARDOWN();
3473 }
3474 
3475 
TEST(load_store_double)3476 TEST(load_store_double) {
3477   SETUP_WITH_FEATURES(CPUFeatures::kFP);
3478 
3479   double src[3] = {1.0, 2.0, 3.0};
3480   double dst[3] = {0.0, 0.0, 0.0};
3481   uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
3482   uintptr_t dst_base = reinterpret_cast<uintptr_t>(dst);
3483 
3484   START();
3485   __ Mov(x17, src_base);
3486   __ Mov(x18, dst_base);
3487   __ Mov(x19, src_base);
3488   __ Mov(x20, dst_base);
3489   __ Mov(x21, src_base);
3490   __ Mov(x22, dst_base);
3491   __ Ldr(d0, MemOperand(x17, sizeof(src[0])));
3492   __ Str(d0, MemOperand(x18, sizeof(dst[0]), PostIndex));
3493   __ Ldr(d1, MemOperand(x19, sizeof(src[0]), PostIndex));
3494   __ Str(d1, MemOperand(x20, 2 * sizeof(dst[0]), PreIndex));
3495   __ Ldr(d2, MemOperand(x21, 2 * sizeof(src[0]), PreIndex));
3496   __ Str(d2, MemOperand(x22, sizeof(dst[0])));
3497   END();
3498 
3499   RUN();
3500 
3501   ASSERT_EQUAL_FP64(2.0, d0);
3502   ASSERT_EQUAL_FP64(2.0, dst[0]);
3503   ASSERT_EQUAL_FP64(1.0, d1);
3504   ASSERT_EQUAL_FP64(1.0, dst[2]);
3505   ASSERT_EQUAL_FP64(3.0, d2);
3506   ASSERT_EQUAL_FP64(3.0, dst[1]);
3507   ASSERT_EQUAL_64(src_base, x17);
3508   ASSERT_EQUAL_64(dst_base + sizeof(dst[0]), x18);
3509   ASSERT_EQUAL_64(src_base + sizeof(src[0]), x19);
3510   ASSERT_EQUAL_64(dst_base + 2 * sizeof(dst[0]), x20);
3511   ASSERT_EQUAL_64(src_base + 2 * sizeof(src[0]), x21);
3512   ASSERT_EQUAL_64(dst_base, x22);
3513 
3514   TEARDOWN();
3515 }
3516 
3517 
TEST(load_store_b)3518 TEST(load_store_b) {
3519   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
3520 
3521   uint8_t src[3] = {0x12, 0x23, 0x34};
3522   uint8_t dst[3] = {0, 0, 0};
3523   uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
3524   uintptr_t dst_base = reinterpret_cast<uintptr_t>(dst);
3525 
3526   START();
3527   __ Mov(x17, src_base);
3528   __ Mov(x18, dst_base);
3529   __ Mov(x19, src_base);
3530   __ Mov(x20, dst_base);
3531   __ Mov(x21, src_base);
3532   __ Mov(x22, dst_base);
3533   __ Ldr(b0, MemOperand(x17, sizeof(src[0])));
3534   __ Str(b0, MemOperand(x18, sizeof(dst[0]), PostIndex));
3535   __ Ldr(b1, MemOperand(x19, sizeof(src[0]), PostIndex));
3536   __ Str(b1, MemOperand(x20, 2 * sizeof(dst[0]), PreIndex));
3537   __ Ldr(b2, MemOperand(x21, 2 * sizeof(src[0]), PreIndex));
3538   __ Str(b2, MemOperand(x22, sizeof(dst[0])));
3539   END();
3540 
3541   RUN();
3542 
3543   ASSERT_EQUAL_128(0, 0x23, q0);
3544   ASSERT_EQUAL_64(0x23, dst[0]);
3545   ASSERT_EQUAL_128(0, 0x12, q1);
3546   ASSERT_EQUAL_64(0x12, dst[2]);
3547   ASSERT_EQUAL_128(0, 0x34, q2);
3548   ASSERT_EQUAL_64(0x34, dst[1]);
3549   ASSERT_EQUAL_64(src_base, x17);
3550   ASSERT_EQUAL_64(dst_base + sizeof(dst[0]), x18);
3551   ASSERT_EQUAL_64(src_base + sizeof(src[0]), x19);
3552   ASSERT_EQUAL_64(dst_base + 2 * sizeof(dst[0]), x20);
3553   ASSERT_EQUAL_64(src_base + 2 * sizeof(src[0]), x21);
3554   ASSERT_EQUAL_64(dst_base, x22);
3555 
3556   TEARDOWN();
3557 }
3558 
3559 
TEST(load_store_h)3560 TEST(load_store_h) {
3561   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
3562 
3563   uint16_t src[3] = {0x1234, 0x2345, 0x3456};
3564   uint16_t dst[3] = {0, 0, 0};
3565   uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
3566   uintptr_t dst_base = reinterpret_cast<uintptr_t>(dst);
3567 
3568   START();
3569   __ Mov(x17, src_base);
3570   __ Mov(x18, dst_base);
3571   __ Mov(x19, src_base);
3572   __ Mov(x20, dst_base);
3573   __ Mov(x21, src_base);
3574   __ Mov(x22, dst_base);
3575   __ Ldr(h0, MemOperand(x17, sizeof(src[0])));
3576   __ Str(h0, MemOperand(x18, sizeof(dst[0]), PostIndex));
3577   __ Ldr(h1, MemOperand(x19, sizeof(src[0]), PostIndex));
3578   __ Str(h1, MemOperand(x20, 2 * sizeof(dst[0]), PreIndex));
3579   __ Ldr(h2, MemOperand(x21, 2 * sizeof(src[0]), PreIndex));
3580   __ Str(h2, MemOperand(x22, sizeof(dst[0])));
3581   END();
3582 
3583   RUN();
3584 
3585   ASSERT_EQUAL_128(0, 0x2345, q0);
3586   ASSERT_EQUAL_64(0x2345, dst[0]);
3587   ASSERT_EQUAL_128(0, 0x1234, q1);
3588   ASSERT_EQUAL_64(0x1234, dst[2]);
3589   ASSERT_EQUAL_128(0, 0x3456, q2);
3590   ASSERT_EQUAL_64(0x3456, dst[1]);
3591   ASSERT_EQUAL_64(src_base, x17);
3592   ASSERT_EQUAL_64(dst_base + sizeof(dst[0]), x18);
3593   ASSERT_EQUAL_64(src_base + sizeof(src[0]), x19);
3594   ASSERT_EQUAL_64(dst_base + 2 * sizeof(dst[0]), x20);
3595   ASSERT_EQUAL_64(src_base + 2 * sizeof(src[0]), x21);
3596   ASSERT_EQUAL_64(dst_base, x22);
3597 
3598   TEARDOWN();
3599 }
3600 
3601 
TEST(load_store_q)3602 TEST(load_store_q) {
3603   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
3604 
3605   uint8_t src[48] = {0x10, 0x32, 0x54, 0x76, 0x98, 0xba, 0xdc, 0xfe, 0x01, 0x23,
3606                      0x45, 0x67, 0x89, 0xab, 0xcd, 0xef, 0x21, 0x43, 0x65, 0x87,
3607                      0xa9, 0xcb, 0xed, 0x0f, 0x12, 0x34, 0x56, 0x78, 0x9a, 0xbc,
3608                      0xde, 0xf0, 0x24, 0x46, 0x68, 0x8a, 0xac, 0xce, 0xe0, 0x02,
3609                      0x42, 0x64, 0x86, 0xa8, 0xca, 0xec, 0x0e, 0x20};
3610 
3611   uint64_t dst[6] = {0, 0, 0, 0, 0, 0};
3612   uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
3613   uintptr_t dst_base = reinterpret_cast<uintptr_t>(dst);
3614 
3615   START();
3616   __ Mov(x17, src_base);
3617   __ Mov(x18, dst_base);
3618   __ Mov(x19, src_base);
3619   __ Mov(x20, dst_base);
3620   __ Mov(x21, src_base);
3621   __ Mov(x22, dst_base);
3622   __ Ldr(q0, MemOperand(x17, 16));
3623   __ Str(q0, MemOperand(x18, 16, PostIndex));
3624   __ Ldr(q1, MemOperand(x19, 16, PostIndex));
3625   __ Str(q1, MemOperand(x20, 32, PreIndex));
3626   __ Ldr(q2, MemOperand(x21, 32, PreIndex));
3627   __ Str(q2, MemOperand(x22, 16));
3628   END();
3629 
3630   RUN();
3631 
3632   ASSERT_EQUAL_128(0xf0debc9a78563412, 0x0fedcba987654321, q0);
3633   ASSERT_EQUAL_64(0x0fedcba987654321, dst[0]);
3634   ASSERT_EQUAL_64(0xf0debc9a78563412, dst[1]);
3635   ASSERT_EQUAL_128(0xefcdab8967452301, 0xfedcba9876543210, q1);
3636   ASSERT_EQUAL_64(0xfedcba9876543210, dst[4]);
3637   ASSERT_EQUAL_64(0xefcdab8967452301, dst[5]);
3638   ASSERT_EQUAL_128(0x200eeccaa8866442, 0x02e0ceac8a684624, q2);
3639   ASSERT_EQUAL_64(0x02e0ceac8a684624, dst[2]);
3640   ASSERT_EQUAL_64(0x200eeccaa8866442, dst[3]);
3641   ASSERT_EQUAL_64(src_base, x17);
3642   ASSERT_EQUAL_64(dst_base + 16, x18);
3643   ASSERT_EQUAL_64(src_base + 16, x19);
3644   ASSERT_EQUAL_64(dst_base + 32, x20);
3645   ASSERT_EQUAL_64(src_base + 32, x21);
3646   ASSERT_EQUAL_64(dst_base, x22);
3647 
3648   TEARDOWN();
3649 }
3650 
3651 
TEST(load_store_v_regoffset)3652 TEST(load_store_v_regoffset) {
3653   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
3654 
3655   uint8_t src[64];
3656   for (unsigned i = 0; i < sizeof(src); i++) {
3657     src[i] = i;
3658   }
3659   uint8_t dst[64];
3660   memset(dst, 0, sizeof(dst));
3661 
3662   uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
3663   uintptr_t dst_base = reinterpret_cast<uintptr_t>(dst);
3664 
3665   START();
3666   __ Mov(x17, src_base + 16);
3667   __ Mov(x18, 1);
3668   __ Mov(w19, -1);
3669   __ Mov(x20, dst_base - 1);
3670 
3671   __ Ldr(b0, MemOperand(x17, x18));
3672   __ Ldr(b1, MemOperand(x17, x19, SXTW));
3673 
3674   __ Ldr(h2, MemOperand(x17, x18));
3675   __ Ldr(h3, MemOperand(x17, x18, UXTW, 1));
3676   __ Ldr(h4, MemOperand(x17, x19, SXTW, 1));
3677   __ Ldr(h5, MemOperand(x17, x18, LSL, 1));
3678 
3679   __ Ldr(s16, MemOperand(x17, x18));
3680   __ Ldr(s17, MemOperand(x17, x18, UXTW, 2));
3681   __ Ldr(s18, MemOperand(x17, x19, SXTW, 2));
3682   __ Ldr(s19, MemOperand(x17, x18, LSL, 2));
3683 
3684   __ Ldr(d20, MemOperand(x17, x18));
3685   __ Ldr(d21, MemOperand(x17, x18, UXTW, 3));
3686   __ Ldr(d22, MemOperand(x17, x19, SXTW, 3));
3687   __ Ldr(d23, MemOperand(x17, x18, LSL, 3));
3688 
3689   __ Ldr(q24, MemOperand(x17, x18));
3690   __ Ldr(q25, MemOperand(x17, x18, UXTW, 4));
3691   __ Ldr(q26, MemOperand(x17, x19, SXTW, 4));
3692   __ Ldr(q27, MemOperand(x17, x18, LSL, 4));
3693 
3694   // Store [bhsdq]27 to adjacent memory locations, then load again to check.
3695   __ Str(b27, MemOperand(x20, x18));
3696   __ Str(h27, MemOperand(x20, x18, UXTW, 1));
3697   __ Add(x20, x20, 8);
3698   __ Str(s27, MemOperand(x20, x19, SXTW, 2));
3699   __ Sub(x20, x20, 8);
3700   __ Str(d27, MemOperand(x20, x18, LSL, 3));
3701   __ Add(x20, x20, 32);
3702   __ Str(q27, MemOperand(x20, x19, SXTW, 4));
3703 
3704   __ Sub(x20, x20, 32);
3705   __ Ldr(q6, MemOperand(x20, x18));
3706   __ Ldr(q7, MemOperand(x20, x18, LSL, 4));
3707 
3708   END();
3709 
3710   RUN();
3711 
3712   ASSERT_EQUAL_128(0, 0x11, q0);
3713   ASSERT_EQUAL_128(0, 0x0f, q1);
3714   ASSERT_EQUAL_128(0, 0x1211, q2);
3715   ASSERT_EQUAL_128(0, 0x1312, q3);
3716   ASSERT_EQUAL_128(0, 0x0f0e, q4);
3717   ASSERT_EQUAL_128(0, 0x1312, q5);
3718   ASSERT_EQUAL_128(0, 0x14131211, q16);
3719   ASSERT_EQUAL_128(0, 0x17161514, q17);
3720   ASSERT_EQUAL_128(0, 0x0f0e0d0c, q18);
3721   ASSERT_EQUAL_128(0, 0x17161514, q19);
3722   ASSERT_EQUAL_128(0, 0x1817161514131211, q20);
3723   ASSERT_EQUAL_128(0, 0x1f1e1d1c1b1a1918, q21);
3724   ASSERT_EQUAL_128(0, 0x0f0e0d0c0b0a0908, q22);
3725   ASSERT_EQUAL_128(0, 0x1f1e1d1c1b1a1918, q23);
3726   ASSERT_EQUAL_128(0x201f1e1d1c1b1a19, 0x1817161514131211, q24);
3727   ASSERT_EQUAL_128(0x2f2e2d2c2b2a2928, 0x2726252423222120, q25);
3728   ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0706050403020100, q26);
3729   ASSERT_EQUAL_128(0x2f2e2d2c2b2a2928, 0x2726252423222120, q27);
3730   ASSERT_EQUAL_128(0x2027262524232221, 0x2023222120212020, q6);
3731   ASSERT_EQUAL_128(0x2f2e2d2c2b2a2928, 0x2726252423222120, q7);
3732 
3733   TEARDOWN();
3734 }
3735 
3736 
TEST(neon_ld1_d)3737 TEST(neon_ld1_d) {
3738   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
3739 
3740   uint8_t src[32 + 5];
3741   for (unsigned i = 0; i < sizeof(src); i++) {
3742     src[i] = i;
3743   }
3744   uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
3745 
3746   START();
3747   __ Mov(x17, src_base);
3748   __ Ldr(q2, MemOperand(x17));  // Initialise top 64-bits of Q register.
3749   __ Ld1(v2.V8B(), MemOperand(x17));
3750   __ Add(x17, x17, 1);
3751   __ Ld1(v3.V8B(), v4.V8B(), MemOperand(x17));
3752   __ Add(x17, x17, 1);
3753   __ Ld1(v5.V4H(), v6.V4H(), v7.V4H(), MemOperand(x17));
3754   __ Add(x17, x17, 1);
3755   __ Ld1(v16.V2S(), v17.V2S(), v18.V2S(), v19.V2S(), MemOperand(x17));
3756   __ Add(x17, x17, 1);
3757   __ Ld1(v30.V2S(), v31.V2S(), v0.V2S(), v1.V2S(), MemOperand(x17));
3758   __ Add(x17, x17, 1);
3759   __ Ld1(v20.V1D(), v21.V1D(), v22.V1D(), v23.V1D(), MemOperand(x17));
3760   END();
3761 
3762   RUN();
3763 
3764   ASSERT_EQUAL_128(0, 0x0706050403020100, q2);
3765   ASSERT_EQUAL_128(0, 0x0807060504030201, q3);
3766   ASSERT_EQUAL_128(0, 0x100f0e0d0c0b0a09, q4);
3767   ASSERT_EQUAL_128(0, 0x0908070605040302, q5);
3768   ASSERT_EQUAL_128(0, 0x11100f0e0d0c0b0a, q6);
3769   ASSERT_EQUAL_128(0, 0x1918171615141312, q7);
3770   ASSERT_EQUAL_128(0, 0x0a09080706050403, q16);
3771   ASSERT_EQUAL_128(0, 0x1211100f0e0d0c0b, q17);
3772   ASSERT_EQUAL_128(0, 0x1a19181716151413, q18);
3773   ASSERT_EQUAL_128(0, 0x2221201f1e1d1c1b, q19);
3774   ASSERT_EQUAL_128(0, 0x0b0a090807060504, q30);
3775   ASSERT_EQUAL_128(0, 0x131211100f0e0d0c, q31);
3776   ASSERT_EQUAL_128(0, 0x1b1a191817161514, q0);
3777   ASSERT_EQUAL_128(0, 0x232221201f1e1d1c, q1);
3778   ASSERT_EQUAL_128(0, 0x0c0b0a0908070605, q20);
3779   ASSERT_EQUAL_128(0, 0x14131211100f0e0d, q21);
3780   ASSERT_EQUAL_128(0, 0x1c1b1a1918171615, q22);
3781   ASSERT_EQUAL_128(0, 0x24232221201f1e1d, q23);
3782 
3783   TEARDOWN();
3784 }
3785 
3786 
TEST(neon_ld1_d_postindex)3787 TEST(neon_ld1_d_postindex) {
3788   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
3789 
3790   uint8_t src[32 + 5];
3791   for (unsigned i = 0; i < sizeof(src); i++) {
3792     src[i] = i;
3793   }
3794   uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
3795 
3796   START();
3797   __ Mov(x17, src_base);
3798   __ Mov(x18, src_base + 1);
3799   __ Mov(x19, src_base + 2);
3800   __ Mov(x20, src_base + 3);
3801   __ Mov(x21, src_base + 4);
3802   __ Mov(x22, src_base + 5);
3803   __ Mov(x23, 1);
3804   __ Ldr(q2, MemOperand(x17));  // Initialise top 64-bits of Q register.
3805   __ Ld1(v2.V8B(), MemOperand(x17, x23, PostIndex));
3806   __ Ld1(v3.V8B(), v4.V8B(), MemOperand(x18, 16, PostIndex));
3807   __ Ld1(v5.V4H(), v6.V4H(), v7.V4H(), MemOperand(x19, 24, PostIndex));
3808   __ Ld1(v16.V2S(),
3809          v17.V2S(),
3810          v18.V2S(),
3811          v19.V2S(),
3812          MemOperand(x20, 32, PostIndex));
3813   __ Ld1(v30.V2S(),
3814          v31.V2S(),
3815          v0.V2S(),
3816          v1.V2S(),
3817          MemOperand(x21, 32, PostIndex));
3818   __ Ld1(v20.V1D(),
3819          v21.V1D(),
3820          v22.V1D(),
3821          v23.V1D(),
3822          MemOperand(x22, 32, PostIndex));
3823   END();
3824 
3825   RUN();
3826 
3827   ASSERT_EQUAL_128(0, 0x0706050403020100, q2);
3828   ASSERT_EQUAL_128(0, 0x0807060504030201, q3);
3829   ASSERT_EQUAL_128(0, 0x100f0e0d0c0b0a09, q4);
3830   ASSERT_EQUAL_128(0, 0x0908070605040302, q5);
3831   ASSERT_EQUAL_128(0, 0x11100f0e0d0c0b0a, q6);
3832   ASSERT_EQUAL_128(0, 0x1918171615141312, q7);
3833   ASSERT_EQUAL_128(0, 0x0a09080706050403, q16);
3834   ASSERT_EQUAL_128(0, 0x1211100f0e0d0c0b, q17);
3835   ASSERT_EQUAL_128(0, 0x1a19181716151413, q18);
3836   ASSERT_EQUAL_128(0, 0x2221201f1e1d1c1b, q19);
3837   ASSERT_EQUAL_128(0, 0x0b0a090807060504, q30);
3838   ASSERT_EQUAL_128(0, 0x131211100f0e0d0c, q31);
3839   ASSERT_EQUAL_128(0, 0x1b1a191817161514, q0);
3840   ASSERT_EQUAL_128(0, 0x232221201f1e1d1c, q1);
3841   ASSERT_EQUAL_128(0, 0x0c0b0a0908070605, q20);
3842   ASSERT_EQUAL_128(0, 0x14131211100f0e0d, q21);
3843   ASSERT_EQUAL_128(0, 0x1c1b1a1918171615, q22);
3844   ASSERT_EQUAL_128(0, 0x24232221201f1e1d, q23);
3845   ASSERT_EQUAL_64(src_base + 1, x17);
3846   ASSERT_EQUAL_64(src_base + 1 + 16, x18);
3847   ASSERT_EQUAL_64(src_base + 2 + 24, x19);
3848   ASSERT_EQUAL_64(src_base + 3 + 32, x20);
3849   ASSERT_EQUAL_64(src_base + 4 + 32, x21);
3850   ASSERT_EQUAL_64(src_base + 5 + 32, x22);
3851 
3852   TEARDOWN();
3853 }
3854 
3855 
TEST(neon_ld1_q)3856 TEST(neon_ld1_q) {
3857   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
3858 
3859   uint8_t src[64 + 4];
3860   for (unsigned i = 0; i < sizeof(src); i++) {
3861     src[i] = i;
3862   }
3863   uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
3864 
3865   START();
3866   __ Mov(x17, src_base);
3867   __ Ld1(v2.V16B(), MemOperand(x17));
3868   __ Add(x17, x17, 1);
3869   __ Ld1(v3.V16B(), v4.V16B(), MemOperand(x17));
3870   __ Add(x17, x17, 1);
3871   __ Ld1(v5.V8H(), v6.V8H(), v7.V8H(), MemOperand(x17));
3872   __ Add(x17, x17, 1);
3873   __ Ld1(v16.V4S(), v17.V4S(), v18.V4S(), v19.V4S(), MemOperand(x17));
3874   __ Add(x17, x17, 1);
3875   __ Ld1(v30.V2D(), v31.V2D(), v0.V2D(), v1.V2D(), MemOperand(x17));
3876   END();
3877 
3878   RUN();
3879 
3880   ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0706050403020100, q2);
3881   ASSERT_EQUAL_128(0x100f0e0d0c0b0a09, 0x0807060504030201, q3);
3882   ASSERT_EQUAL_128(0x201f1e1d1c1b1a19, 0x1817161514131211, q4);
3883   ASSERT_EQUAL_128(0x11100f0e0d0c0b0a, 0x0908070605040302, q5);
3884   ASSERT_EQUAL_128(0x21201f1e1d1c1b1a, 0x1918171615141312, q6);
3885   ASSERT_EQUAL_128(0x31302f2e2d2c2b2a, 0x2928272625242322, q7);
3886   ASSERT_EQUAL_128(0x1211100f0e0d0c0b, 0x0a09080706050403, q16);
3887   ASSERT_EQUAL_128(0x2221201f1e1d1c1b, 0x1a19181716151413, q17);
3888   ASSERT_EQUAL_128(0x3231302f2e2d2c2b, 0x2a29282726252423, q18);
3889   ASSERT_EQUAL_128(0x4241403f3e3d3c3b, 0x3a39383736353433, q19);
3890   ASSERT_EQUAL_128(0x131211100f0e0d0c, 0x0b0a090807060504, q30);
3891   ASSERT_EQUAL_128(0x232221201f1e1d1c, 0x1b1a191817161514, q31);
3892   ASSERT_EQUAL_128(0x333231302f2e2d2c, 0x2b2a292827262524, q0);
3893   ASSERT_EQUAL_128(0x434241403f3e3d3c, 0x3b3a393837363534, q1);
3894 
3895   TEARDOWN();
3896 }
3897 
3898 
TEST(neon_ld1_q_postindex)3899 TEST(neon_ld1_q_postindex) {
3900   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
3901 
3902   uint8_t src[64 + 4];
3903   for (unsigned i = 0; i < sizeof(src); i++) {
3904     src[i] = i;
3905   }
3906   uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
3907 
3908   START();
3909   __ Mov(x17, src_base);
3910   __ Mov(x18, src_base + 1);
3911   __ Mov(x19, src_base + 2);
3912   __ Mov(x20, src_base + 3);
3913   __ Mov(x21, src_base + 4);
3914   __ Mov(x22, 1);
3915   __ Ld1(v2.V16B(), MemOperand(x17, x22, PostIndex));
3916   __ Ld1(v3.V16B(), v4.V16B(), MemOperand(x18, 32, PostIndex));
3917   __ Ld1(v5.V8H(), v6.V8H(), v7.V8H(), MemOperand(x19, 48, PostIndex));
3918   __ Ld1(v16.V4S(),
3919          v17.V4S(),
3920          v18.V4S(),
3921          v19.V4S(),
3922          MemOperand(x20, 64, PostIndex));
3923   __ Ld1(v30.V2D(),
3924          v31.V2D(),
3925          v0.V2D(),
3926          v1.V2D(),
3927          MemOperand(x21, 64, PostIndex));
3928   END();
3929 
3930   RUN();
3931 
3932   ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0706050403020100, q2);
3933   ASSERT_EQUAL_128(0x100f0e0d0c0b0a09, 0x0807060504030201, q3);
3934   ASSERT_EQUAL_128(0x201f1e1d1c1b1a19, 0x1817161514131211, q4);
3935   ASSERT_EQUAL_128(0x11100f0e0d0c0b0a, 0x0908070605040302, q5);
3936   ASSERT_EQUAL_128(0x21201f1e1d1c1b1a, 0x1918171615141312, q6);
3937   ASSERT_EQUAL_128(0x31302f2e2d2c2b2a, 0x2928272625242322, q7);
3938   ASSERT_EQUAL_128(0x1211100f0e0d0c0b, 0x0a09080706050403, q16);
3939   ASSERT_EQUAL_128(0x2221201f1e1d1c1b, 0x1a19181716151413, q17);
3940   ASSERT_EQUAL_128(0x3231302f2e2d2c2b, 0x2a29282726252423, q18);
3941   ASSERT_EQUAL_128(0x4241403f3e3d3c3b, 0x3a39383736353433, q19);
3942   ASSERT_EQUAL_128(0x131211100f0e0d0c, 0x0b0a090807060504, q30);
3943   ASSERT_EQUAL_128(0x232221201f1e1d1c, 0x1b1a191817161514, q31);
3944   ASSERT_EQUAL_128(0x333231302f2e2d2c, 0x2b2a292827262524, q0);
3945   ASSERT_EQUAL_128(0x434241403f3e3d3c, 0x3b3a393837363534, q1);
3946   ASSERT_EQUAL_64(src_base + 1, x17);
3947   ASSERT_EQUAL_64(src_base + 1 + 32, x18);
3948   ASSERT_EQUAL_64(src_base + 2 + 48, x19);
3949   ASSERT_EQUAL_64(src_base + 3 + 64, x20);
3950   ASSERT_EQUAL_64(src_base + 4 + 64, x21);
3951 
3952   TEARDOWN();
3953 }
3954 
3955 
TEST(neon_ld1_lane)3956 TEST(neon_ld1_lane) {
3957   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
3958 
3959   uint8_t src[64];
3960   for (unsigned i = 0; i < sizeof(src); i++) {
3961     src[i] = i;
3962   }
3963   uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
3964 
3965   START();
3966 
3967   // Test loading whole register by element.
3968   __ Mov(x17, src_base);
3969   for (int i = 15; i >= 0; i--) {
3970     __ Ld1(v0.B(), i, MemOperand(x17));
3971     __ Add(x17, x17, 1);
3972   }
3973 
3974   __ Mov(x17, src_base);
3975   for (int i = 7; i >= 0; i--) {
3976     __ Ld1(v1.H(), i, MemOperand(x17));
3977     __ Add(x17, x17, 1);
3978   }
3979 
3980   __ Mov(x17, src_base);
3981   for (int i = 3; i >= 0; i--) {
3982     __ Ld1(v2.S(), i, MemOperand(x17));
3983     __ Add(x17, x17, 1);
3984   }
3985 
3986   __ Mov(x17, src_base);
3987   for (int i = 1; i >= 0; i--) {
3988     __ Ld1(v3.D(), i, MemOperand(x17));
3989     __ Add(x17, x17, 1);
3990   }
3991 
3992   // Test loading a single element into an initialised register.
3993   __ Mov(x17, src_base);
3994   __ Ldr(q4, MemOperand(x17));
3995   __ Ld1(v4.B(), 4, MemOperand(x17));
3996   __ Ldr(q5, MemOperand(x17));
3997   __ Ld1(v5.H(), 3, MemOperand(x17));
3998   __ Ldr(q6, MemOperand(x17));
3999   __ Ld1(v6.S(), 2, MemOperand(x17));
4000   __ Ldr(q7, MemOperand(x17));
4001   __ Ld1(v7.D(), 1, MemOperand(x17));
4002 
4003   END();
4004 
4005   RUN();
4006 
4007   ASSERT_EQUAL_128(0x0001020304050607, 0x08090a0b0c0d0e0f, q0);
4008   ASSERT_EQUAL_128(0x0100020103020403, 0x0504060507060807, q1);
4009   ASSERT_EQUAL_128(0x0302010004030201, 0x0504030206050403, q2);
4010   ASSERT_EQUAL_128(0x0706050403020100, 0x0807060504030201, q3);
4011   ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0706050003020100, q4);
4012   ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0100050403020100, q5);
4013   ASSERT_EQUAL_128(0x0f0e0d0c03020100, 0x0706050403020100, q6);
4014   ASSERT_EQUAL_128(0x0706050403020100, 0x0706050403020100, q7);
4015 
4016   TEARDOWN();
4017 }
4018 
TEST(neon_ld2_d)4019 TEST(neon_ld2_d) {
4020   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
4021 
4022   uint8_t src[64 + 4];
4023   for (unsigned i = 0; i < sizeof(src); i++) {
4024     src[i] = i;
4025   }
4026   uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
4027 
4028   START();
4029   __ Mov(x17, src_base);
4030   __ Ld2(v2.V8B(), v3.V8B(), MemOperand(x17));
4031   __ Add(x17, x17, 1);
4032   __ Ld2(v4.V8B(), v5.V8B(), MemOperand(x17));
4033   __ Add(x17, x17, 1);
4034   __ Ld2(v6.V4H(), v7.V4H(), MemOperand(x17));
4035   __ Add(x17, x17, 1);
4036   __ Ld2(v31.V2S(), v0.V2S(), MemOperand(x17));
4037   END();
4038 
4039   RUN();
4040 
4041   ASSERT_EQUAL_128(0, 0x0e0c0a0806040200, q2);
4042   ASSERT_EQUAL_128(0, 0x0f0d0b0907050301, q3);
4043   ASSERT_EQUAL_128(0, 0x0f0d0b0907050301, q4);
4044   ASSERT_EQUAL_128(0, 0x100e0c0a08060402, q5);
4045   ASSERT_EQUAL_128(0, 0x0f0e0b0a07060302, q6);
4046   ASSERT_EQUAL_128(0, 0x11100d0c09080504, q7);
4047   ASSERT_EQUAL_128(0, 0x0e0d0c0b06050403, q31);
4048   ASSERT_EQUAL_128(0, 0x1211100f0a090807, q0);
4049 
4050   TEARDOWN();
4051 }
4052 
TEST(neon_ld2_d_postindex)4053 TEST(neon_ld2_d_postindex) {
4054   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
4055 
4056   uint8_t src[32 + 4];
4057   for (unsigned i = 0; i < sizeof(src); i++) {
4058     src[i] = i;
4059   }
4060   uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
4061 
4062   START();
4063   __ Mov(x17, src_base);
4064   __ Mov(x18, src_base + 1);
4065   __ Mov(x19, src_base + 2);
4066   __ Mov(x20, src_base + 3);
4067   __ Mov(x21, src_base + 4);
4068   __ Mov(x22, 1);
4069   __ Ld2(v2.V8B(), v3.V8B(), MemOperand(x17, x22, PostIndex));
4070   __ Ld2(v4.V8B(), v5.V8B(), MemOperand(x18, 16, PostIndex));
4071   __ Ld2(v5.V4H(), v6.V4H(), MemOperand(x19, 16, PostIndex));
4072   __ Ld2(v16.V2S(), v17.V2S(), MemOperand(x20, 16, PostIndex));
4073   __ Ld2(v31.V2S(), v0.V2S(), MemOperand(x21, 16, PostIndex));
4074   END();
4075 
4076   RUN();
4077 
4078   ASSERT_EQUAL_128(0, 0x0e0c0a0806040200, q2);
4079   ASSERT_EQUAL_128(0, 0x0f0d0b0907050301, q3);
4080   ASSERT_EQUAL_128(0, 0x0f0d0b0907050301, q4);
4081   ASSERT_EQUAL_128(0, 0x0f0e0b0a07060302, q5);
4082   ASSERT_EQUAL_128(0, 0x11100d0c09080504, q6);
4083   ASSERT_EQUAL_128(0, 0x0e0d0c0b06050403, q16);
4084   ASSERT_EQUAL_128(0, 0x1211100f0a090807, q17);
4085   ASSERT_EQUAL_128(0, 0x0f0e0d0c07060504, q31);
4086   ASSERT_EQUAL_128(0, 0x131211100b0a0908, q0);
4087 
4088   ASSERT_EQUAL_64(src_base + 1, x17);
4089   ASSERT_EQUAL_64(src_base + 1 + 16, x18);
4090   ASSERT_EQUAL_64(src_base + 2 + 16, x19);
4091   ASSERT_EQUAL_64(src_base + 3 + 16, x20);
4092   ASSERT_EQUAL_64(src_base + 4 + 16, x21);
4093 
4094   TEARDOWN();
4095 }
4096 
4097 
TEST(neon_ld2_q)4098 TEST(neon_ld2_q) {
4099   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
4100 
4101   uint8_t src[64 + 4];
4102   for (unsigned i = 0; i < sizeof(src); i++) {
4103     src[i] = i;
4104   }
4105   uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
4106 
4107   START();
4108   __ Mov(x17, src_base);
4109   __ Ld2(v2.V16B(), v3.V16B(), MemOperand(x17));
4110   __ Add(x17, x17, 1);
4111   __ Ld2(v4.V16B(), v5.V16B(), MemOperand(x17));
4112   __ Add(x17, x17, 1);
4113   __ Ld2(v6.V8H(), v7.V8H(), MemOperand(x17));
4114   __ Add(x17, x17, 1);
4115   __ Ld2(v16.V4S(), v17.V4S(), MemOperand(x17));
4116   __ Add(x17, x17, 1);
4117   __ Ld2(v31.V2D(), v0.V2D(), MemOperand(x17));
4118   END();
4119 
4120   RUN();
4121 
4122   ASSERT_EQUAL_128(0x1e1c1a1816141210, 0x0e0c0a0806040200, q2);
4123   ASSERT_EQUAL_128(0x1f1d1b1917151311, 0x0f0d0b0907050301, q3);
4124   ASSERT_EQUAL_128(0x1f1d1b1917151311, 0x0f0d0b0907050301, q4);
4125   ASSERT_EQUAL_128(0x201e1c1a18161412, 0x100e0c0a08060402, q5);
4126   ASSERT_EQUAL_128(0x1f1e1b1a17161312, 0x0f0e0b0a07060302, q6);
4127   ASSERT_EQUAL_128(0x21201d1c19181514, 0x11100d0c09080504, q7);
4128   ASSERT_EQUAL_128(0x1e1d1c1b16151413, 0x0e0d0c0b06050403, q16);
4129   ASSERT_EQUAL_128(0x2221201f1a191817, 0x1211100f0a090807, q17);
4130   ASSERT_EQUAL_128(0x1b1a191817161514, 0x0b0a090807060504, q31);
4131   ASSERT_EQUAL_128(0x232221201f1e1d1c, 0x131211100f0e0d0c, q0);
4132 
4133   TEARDOWN();
4134 }
4135 
4136 
TEST(neon_ld2_q_postindex)4137 TEST(neon_ld2_q_postindex) {
4138   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
4139 
4140   uint8_t src[64 + 4];
4141   for (unsigned i = 0; i < sizeof(src); i++) {
4142     src[i] = i;
4143   }
4144   uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
4145 
4146   START();
4147   __ Mov(x17, src_base);
4148   __ Mov(x18, src_base + 1);
4149   __ Mov(x19, src_base + 2);
4150   __ Mov(x20, src_base + 3);
4151   __ Mov(x21, src_base + 4);
4152   __ Mov(x22, 1);
4153   __ Ld2(v2.V16B(), v3.V16B(), MemOperand(x17, x22, PostIndex));
4154   __ Ld2(v4.V16B(), v5.V16B(), MemOperand(x18, 32, PostIndex));
4155   __ Ld2(v6.V8H(), v7.V8H(), MemOperand(x19, 32, PostIndex));
4156   __ Ld2(v16.V4S(), v17.V4S(), MemOperand(x20, 32, PostIndex));
4157   __ Ld2(v31.V2D(), v0.V2D(), MemOperand(x21, 32, PostIndex));
4158   END();
4159 
4160   RUN();
4161 
4162   ASSERT_EQUAL_128(0x1e1c1a1816141210, 0x0e0c0a0806040200, q2);
4163   ASSERT_EQUAL_128(0x1f1d1b1917151311, 0x0f0d0b0907050301, q3);
4164   ASSERT_EQUAL_128(0x1f1d1b1917151311, 0x0f0d0b0907050301, q4);
4165   ASSERT_EQUAL_128(0x201e1c1a18161412, 0x100e0c0a08060402, q5);
4166   ASSERT_EQUAL_128(0x1f1e1b1a17161312, 0x0f0e0b0a07060302, q6);
4167   ASSERT_EQUAL_128(0x21201d1c19181514, 0x11100d0c09080504, q7);
4168   ASSERT_EQUAL_128(0x1e1d1c1b16151413, 0x0e0d0c0b06050403, q16);
4169   ASSERT_EQUAL_128(0x2221201f1a191817, 0x1211100f0a090807, q17);
4170   ASSERT_EQUAL_128(0x1b1a191817161514, 0x0b0a090807060504, q31);
4171   ASSERT_EQUAL_128(0x232221201f1e1d1c, 0x131211100f0e0d0c, q0);
4172 
4173 
4174   ASSERT_EQUAL_64(src_base + 1, x17);
4175   ASSERT_EQUAL_64(src_base + 1 + 32, x18);
4176   ASSERT_EQUAL_64(src_base + 2 + 32, x19);
4177   ASSERT_EQUAL_64(src_base + 3 + 32, x20);
4178   ASSERT_EQUAL_64(src_base + 4 + 32, x21);
4179 
4180   TEARDOWN();
4181 }
4182 
4183 
TEST(neon_ld2_lane)4184 TEST(neon_ld2_lane) {
4185   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
4186 
4187   uint8_t src[64];
4188   for (unsigned i = 0; i < sizeof(src); i++) {
4189     src[i] = i;
4190   }
4191   uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
4192 
4193   START();
4194 
4195   // Test loading whole register by element.
4196   __ Mov(x17, src_base);
4197   for (int i = 15; i >= 0; i--) {
4198     __ Ld2(v0.B(), v1.B(), i, MemOperand(x17));
4199     __ Add(x17, x17, 1);
4200   }
4201 
4202   __ Mov(x17, src_base);
4203   for (int i = 7; i >= 0; i--) {
4204     __ Ld2(v2.H(), v3.H(), i, MemOperand(x17));
4205     __ Add(x17, x17, 1);
4206   }
4207 
4208   __ Mov(x17, src_base);
4209   for (int i = 3; i >= 0; i--) {
4210     __ Ld2(v4.S(), v5.S(), i, MemOperand(x17));
4211     __ Add(x17, x17, 1);
4212   }
4213 
4214   __ Mov(x17, src_base);
4215   for (int i = 1; i >= 0; i--) {
4216     __ Ld2(v6.D(), v7.D(), i, MemOperand(x17));
4217     __ Add(x17, x17, 1);
4218   }
4219 
4220   // Test loading a single element into an initialised register.
4221   __ Mov(x17, src_base);
4222   __ Mov(x4, x17);
4223   __ Ldr(q8, MemOperand(x4, 16, PostIndex));
4224   __ Ldr(q9, MemOperand(x4));
4225   __ Ld2(v8.B(), v9.B(), 4, MemOperand(x17));
4226   __ Mov(x5, x17);
4227   __ Ldr(q10, MemOperand(x5, 16, PostIndex));
4228   __ Ldr(q11, MemOperand(x5));
4229   __ Ld2(v10.H(), v11.H(), 3, MemOperand(x17));
4230   __ Mov(x6, x17);
4231   __ Ldr(q12, MemOperand(x6, 16, PostIndex));
4232   __ Ldr(q13, MemOperand(x6));
4233   __ Ld2(v12.S(), v13.S(), 2, MemOperand(x17));
4234   __ Mov(x7, x17);
4235   __ Ldr(q14, MemOperand(x7, 16, PostIndex));
4236   __ Ldr(q15, MemOperand(x7));
4237   __ Ld2(v14.D(), v15.D(), 1, MemOperand(x17));
4238 
4239   END();
4240 
4241   RUN();
4242 
4243   ASSERT_EQUAL_128(0x0001020304050607, 0x08090a0b0c0d0e0f, q0);
4244   ASSERT_EQUAL_128(0x0102030405060708, 0x090a0b0c0d0e0f10, q1);
4245   ASSERT_EQUAL_128(0x0100020103020403, 0x0504060507060807, q2);
4246   ASSERT_EQUAL_128(0x0302040305040605, 0x0706080709080a09, q3);
4247   ASSERT_EQUAL_128(0x0302010004030201, 0x0504030206050403, q4);
4248   ASSERT_EQUAL_128(0x0706050408070605, 0x090807060a090807, q5);
4249   ASSERT_EQUAL_128(0x0706050403020100, 0x0807060504030201, q6);
4250   ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x100f0e0d0c0b0a09, q7);
4251   ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0706050003020100, q8);
4252   ASSERT_EQUAL_128(0x1f1e1d1c1b1a1918, 0x1716150113121110, q9);
4253   ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0100050403020100, q10);
4254   ASSERT_EQUAL_128(0x1f1e1d1c1b1a1918, 0x0302151413121110, q11);
4255   ASSERT_EQUAL_128(0x0f0e0d0c03020100, 0x0706050403020100, q12);
4256   ASSERT_EQUAL_128(0x1f1e1d1c07060504, 0x1716151413121110, q13);
4257   ASSERT_EQUAL_128(0x0706050403020100, 0x0706050403020100, q14);
4258   ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x1716151413121110, q15);
4259 
4260   TEARDOWN();
4261 }
4262 
4263 
TEST(neon_ld2_lane_postindex)4264 TEST(neon_ld2_lane_postindex) {
4265   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
4266 
4267   uint8_t src[64];
4268   for (unsigned i = 0; i < sizeof(src); i++) {
4269     src[i] = i;
4270   }
4271   uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
4272 
4273   START();
4274   __ Mov(x17, src_base);
4275   __ Mov(x18, src_base);
4276   __ Mov(x19, src_base);
4277   __ Mov(x20, src_base);
4278   __ Mov(x21, src_base);
4279   __ Mov(x22, src_base);
4280   __ Mov(x23, src_base);
4281   __ Mov(x24, src_base);
4282 
4283   // Test loading whole register by element.
4284   for (int i = 15; i >= 0; i--) {
4285     __ Ld2(v0.B(), v1.B(), i, MemOperand(x17, 2, PostIndex));
4286   }
4287 
4288   for (int i = 7; i >= 0; i--) {
4289     __ Ld2(v2.H(), v3.H(), i, MemOperand(x18, 4, PostIndex));
4290   }
4291 
4292   for (int i = 3; i >= 0; i--) {
4293     __ Ld2(v4.S(), v5.S(), i, MemOperand(x19, 8, PostIndex));
4294   }
4295 
4296   for (int i = 1; i >= 0; i--) {
4297     __ Ld2(v6.D(), v7.D(), i, MemOperand(x20, 16, PostIndex));
4298   }
4299 
4300   // Test loading a single element into an initialised register.
4301   __ Mov(x25, 1);
4302   __ Mov(x4, x21);
4303   __ Ldr(q8, MemOperand(x4, 16, PostIndex));
4304   __ Ldr(q9, MemOperand(x4));
4305   __ Ld2(v8.B(), v9.B(), 4, MemOperand(x21, x25, PostIndex));
4306   __ Add(x25, x25, 1);
4307 
4308   __ Mov(x5, x22);
4309   __ Ldr(q10, MemOperand(x5, 16, PostIndex));
4310   __ Ldr(q11, MemOperand(x5));
4311   __ Ld2(v10.H(), v11.H(), 3, MemOperand(x22, x25, PostIndex));
4312   __ Add(x25, x25, 1);
4313 
4314   __ Mov(x6, x23);
4315   __ Ldr(q12, MemOperand(x6, 16, PostIndex));
4316   __ Ldr(q13, MemOperand(x6));
4317   __ Ld2(v12.S(), v13.S(), 2, MemOperand(x23, x25, PostIndex));
4318   __ Add(x25, x25, 1);
4319 
4320   __ Mov(x7, x24);
4321   __ Ldr(q14, MemOperand(x7, 16, PostIndex));
4322   __ Ldr(q15, MemOperand(x7));
4323   __ Ld2(v14.D(), v15.D(), 1, MemOperand(x24, x25, PostIndex));
4324 
4325   END();
4326 
4327   RUN();
4328 
4329   ASSERT_EQUAL_128(0x00020406080a0c0e, 0x10121416181a1c1e, q0);
4330   ASSERT_EQUAL_128(0x01030507090b0d0f, 0x11131517191b1d1f, q1);
4331   ASSERT_EQUAL_128(0x0100050409080d0c, 0x1110151419181d1c, q2);
4332   ASSERT_EQUAL_128(0x030207060b0a0f0e, 0x131217161b1a1f1e, q3);
4333   ASSERT_EQUAL_128(0x030201000b0a0908, 0x131211101b1a1918, q4);
4334   ASSERT_EQUAL_128(0x070605040f0e0d0c, 0x171615141f1e1d1c, q5);
4335   ASSERT_EQUAL_128(0x0706050403020100, 0x1716151413121110, q6);
4336   ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x1f1e1d1c1b1a1918, q7);
4337   ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0706050003020100, q8);
4338   ASSERT_EQUAL_128(0x1f1e1d1c1b1a1918, 0x1716150113121110, q9);
4339   ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0100050403020100, q10);
4340   ASSERT_EQUAL_128(0x1f1e1d1c1b1a1918, 0x0302151413121110, q11);
4341   ASSERT_EQUAL_128(0x0f0e0d0c03020100, 0x0706050403020100, q12);
4342   ASSERT_EQUAL_128(0x1f1e1d1c07060504, 0x1716151413121110, q13);
4343   ASSERT_EQUAL_128(0x0706050403020100, 0x0706050403020100, q14);
4344   ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x1716151413121110, q15);
4345 
4346 
4347   ASSERT_EQUAL_64(src_base + 32, x17);
4348   ASSERT_EQUAL_64(src_base + 32, x18);
4349   ASSERT_EQUAL_64(src_base + 32, x19);
4350   ASSERT_EQUAL_64(src_base + 32, x20);
4351   ASSERT_EQUAL_64(src_base + 1, x21);
4352   ASSERT_EQUAL_64(src_base + 2, x22);
4353   ASSERT_EQUAL_64(src_base + 3, x23);
4354   ASSERT_EQUAL_64(src_base + 4, x24);
4355 
4356   TEARDOWN();
4357 }
4358 
4359 
TEST(neon_ld2_alllanes)4360 TEST(neon_ld2_alllanes) {
4361   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
4362 
4363   uint8_t src[64];
4364   for (unsigned i = 0; i < sizeof(src); i++) {
4365     src[i] = i;
4366   }
4367   uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
4368 
4369   START();
4370   __ Mov(x17, src_base + 1);
4371   __ Mov(x18, 1);
4372   __ Ld2r(v0.V8B(), v1.V8B(), MemOperand(x17));
4373   __ Add(x17, x17, 2);
4374   __ Ld2r(v2.V16B(), v3.V16B(), MemOperand(x17));
4375   __ Add(x17, x17, 1);
4376   __ Ld2r(v4.V4H(), v5.V4H(), MemOperand(x17));
4377   __ Add(x17, x17, 1);
4378   __ Ld2r(v6.V8H(), v7.V8H(), MemOperand(x17));
4379   __ Add(x17, x17, 4);
4380   __ Ld2r(v8.V2S(), v9.V2S(), MemOperand(x17));
4381   __ Add(x17, x17, 1);
4382   __ Ld2r(v10.V4S(), v11.V4S(), MemOperand(x17));
4383   __ Add(x17, x17, 8);
4384   __ Ld2r(v12.V2D(), v13.V2D(), MemOperand(x17));
4385   END();
4386 
4387   RUN();
4388 
4389   ASSERT_EQUAL_128(0x0000000000000000, 0x0101010101010101, q0);
4390   ASSERT_EQUAL_128(0x0000000000000000, 0x0202020202020202, q1);
4391   ASSERT_EQUAL_128(0x0303030303030303, 0x0303030303030303, q2);
4392   ASSERT_EQUAL_128(0x0404040404040404, 0x0404040404040404, q3);
4393   ASSERT_EQUAL_128(0x0000000000000000, 0x0504050405040504, q4);
4394   ASSERT_EQUAL_128(0x0000000000000000, 0x0706070607060706, q5);
4395   ASSERT_EQUAL_128(0x0605060506050605, 0x0605060506050605, q6);
4396   ASSERT_EQUAL_128(0x0807080708070807, 0x0807080708070807, q7);
4397   ASSERT_EQUAL_128(0x0000000000000000, 0x0c0b0a090c0b0a09, q8);
4398   ASSERT_EQUAL_128(0x0000000000000000, 0x100f0e0d100f0e0d, q9);
4399   ASSERT_EQUAL_128(0x0d0c0b0a0d0c0b0a, 0x0d0c0b0a0d0c0b0a, q10);
4400   ASSERT_EQUAL_128(0x11100f0e11100f0e, 0x11100f0e11100f0e, q11);
4401   ASSERT_EQUAL_128(0x1918171615141312, 0x1918171615141312, q12);
4402   ASSERT_EQUAL_128(0x21201f1e1d1c1b1a, 0x21201f1e1d1c1b1a, q13);
4403 
4404   TEARDOWN();
4405 }
4406 
4407 
TEST(neon_ld2_alllanes_postindex)4408 TEST(neon_ld2_alllanes_postindex) {
4409   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
4410 
4411   uint8_t src[64];
4412   for (unsigned i = 0; i < sizeof(src); i++) {
4413     src[i] = i;
4414   }
4415   uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
4416 
4417   START();
4418   __ Mov(x17, src_base + 1);
4419   __ Mov(x18, 1);
4420   __ Ld2r(v0.V8B(), v1.V8B(), MemOperand(x17, 2, PostIndex));
4421   __ Ld2r(v2.V16B(), v3.V16B(), MemOperand(x17, x18, PostIndex));
4422   __ Ld2r(v4.V4H(), v5.V4H(), MemOperand(x17, x18, PostIndex));
4423   __ Ld2r(v6.V8H(), v7.V8H(), MemOperand(x17, 4, PostIndex));
4424   __ Ld2r(v8.V2S(), v9.V2S(), MemOperand(x17, x18, PostIndex));
4425   __ Ld2r(v10.V4S(), v11.V4S(), MemOperand(x17, 8, PostIndex));
4426   __ Ld2r(v12.V2D(), v13.V2D(), MemOperand(x17, 16, PostIndex));
4427   END();
4428 
4429   RUN();
4430 
4431   ASSERT_EQUAL_128(0x0000000000000000, 0x0101010101010101, q0);
4432   ASSERT_EQUAL_128(0x0000000000000000, 0x0202020202020202, q1);
4433   ASSERT_EQUAL_128(0x0303030303030303, 0x0303030303030303, q2);
4434   ASSERT_EQUAL_128(0x0404040404040404, 0x0404040404040404, q3);
4435   ASSERT_EQUAL_128(0x0000000000000000, 0x0504050405040504, q4);
4436   ASSERT_EQUAL_128(0x0000000000000000, 0x0706070607060706, q5);
4437   ASSERT_EQUAL_128(0x0605060506050605, 0x0605060506050605, q6);
4438   ASSERT_EQUAL_128(0x0807080708070807, 0x0807080708070807, q7);
4439   ASSERT_EQUAL_128(0x0000000000000000, 0x0c0b0a090c0b0a09, q8);
4440   ASSERT_EQUAL_128(0x0000000000000000, 0x100f0e0d100f0e0d, q9);
4441   ASSERT_EQUAL_128(0x0d0c0b0a0d0c0b0a, 0x0d0c0b0a0d0c0b0a, q10);
4442   ASSERT_EQUAL_128(0x11100f0e11100f0e, 0x11100f0e11100f0e, q11);
4443   ASSERT_EQUAL_128(0x1918171615141312, 0x1918171615141312, q12);
4444   ASSERT_EQUAL_128(0x21201f1e1d1c1b1a, 0x21201f1e1d1c1b1a, q13);
4445   ASSERT_EQUAL_64(src_base + 34, x17);
4446 
4447   TEARDOWN();
4448 }
4449 
4450 
TEST(neon_ld3_d)4451 TEST(neon_ld3_d) {
4452   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
4453 
4454   uint8_t src[64 + 4];
4455   for (unsigned i = 0; i < sizeof(src); i++) {
4456     src[i] = i;
4457   }
4458   uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
4459 
4460   START();
4461   __ Mov(x17, src_base);
4462   __ Ld3(v2.V8B(), v3.V8B(), v4.V8B(), MemOperand(x17));
4463   __ Add(x17, x17, 1);
4464   __ Ld3(v5.V8B(), v6.V8B(), v7.V8B(), MemOperand(x17));
4465   __ Add(x17, x17, 1);
4466   __ Ld3(v8.V4H(), v9.V4H(), v10.V4H(), MemOperand(x17));
4467   __ Add(x17, x17, 1);
4468   __ Ld3(v31.V2S(), v0.V2S(), v1.V2S(), MemOperand(x17));
4469   END();
4470 
4471   RUN();
4472 
4473   ASSERT_EQUAL_128(0, 0x15120f0c09060300, q2);
4474   ASSERT_EQUAL_128(0, 0x1613100d0a070401, q3);
4475   ASSERT_EQUAL_128(0, 0x1714110e0b080502, q4);
4476   ASSERT_EQUAL_128(0, 0x1613100d0a070401, q5);
4477   ASSERT_EQUAL_128(0, 0x1714110e0b080502, q6);
4478   ASSERT_EQUAL_128(0, 0x1815120f0c090603, q7);
4479   ASSERT_EQUAL_128(0, 0x15140f0e09080302, q8);
4480   ASSERT_EQUAL_128(0, 0x171611100b0a0504, q9);
4481   ASSERT_EQUAL_128(0, 0x191813120d0c0706, q10);
4482   ASSERT_EQUAL_128(0, 0x1211100f06050403, q31);
4483   ASSERT_EQUAL_128(0, 0x161514130a090807, q0);
4484   ASSERT_EQUAL_128(0, 0x1a1918170e0d0c0b, q1);
4485 
4486   TEARDOWN();
4487 }
4488 
4489 
TEST(neon_ld3_d_postindex)4490 TEST(neon_ld3_d_postindex) {
4491   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
4492 
4493   uint8_t src[32 + 4];
4494   for (unsigned i = 0; i < sizeof(src); i++) {
4495     src[i] = i;
4496   }
4497   uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
4498 
4499   START();
4500   __ Mov(x17, src_base);
4501   __ Mov(x18, src_base + 1);
4502   __ Mov(x19, src_base + 2);
4503   __ Mov(x20, src_base + 3);
4504   __ Mov(x21, src_base + 4);
4505   __ Mov(x22, 1);
4506   __ Ld3(v2.V8B(), v3.V8B(), v4.V8B(), MemOperand(x17, x22, PostIndex));
4507   __ Ld3(v5.V8B(), v6.V8B(), v7.V8B(), MemOperand(x18, 24, PostIndex));
4508   __ Ld3(v8.V4H(), v9.V4H(), v10.V4H(), MemOperand(x19, 24, PostIndex));
4509   __ Ld3(v11.V2S(), v12.V2S(), v13.V2S(), MemOperand(x20, 24, PostIndex));
4510   __ Ld3(v31.V2S(), v0.V2S(), v1.V2S(), MemOperand(x21, 24, PostIndex));
4511   END();
4512 
4513   RUN();
4514 
4515   ASSERT_EQUAL_128(0, 0x15120f0c09060300, q2);
4516   ASSERT_EQUAL_128(0, 0x1613100d0a070401, q3);
4517   ASSERT_EQUAL_128(0, 0x1714110e0b080502, q4);
4518   ASSERT_EQUAL_128(0, 0x1613100d0a070401, q5);
4519   ASSERT_EQUAL_128(0, 0x1714110e0b080502, q6);
4520   ASSERT_EQUAL_128(0, 0x1815120f0c090603, q7);
4521   ASSERT_EQUAL_128(0, 0x15140f0e09080302, q8);
4522   ASSERT_EQUAL_128(0, 0x171611100b0a0504, q9);
4523   ASSERT_EQUAL_128(0, 0x191813120d0c0706, q10);
4524   ASSERT_EQUAL_128(0, 0x1211100f06050403, q11);
4525   ASSERT_EQUAL_128(0, 0x161514130a090807, q12);
4526   ASSERT_EQUAL_128(0, 0x1a1918170e0d0c0b, q13);
4527   ASSERT_EQUAL_128(0, 0x1312111007060504, q31);
4528   ASSERT_EQUAL_128(0, 0x171615140b0a0908, q0);
4529   ASSERT_EQUAL_128(0, 0x1b1a19180f0e0d0c, q1);
4530 
4531   ASSERT_EQUAL_64(src_base + 1, x17);
4532   ASSERT_EQUAL_64(src_base + 1 + 24, x18);
4533   ASSERT_EQUAL_64(src_base + 2 + 24, x19);
4534   ASSERT_EQUAL_64(src_base + 3 + 24, x20);
4535   ASSERT_EQUAL_64(src_base + 4 + 24, x21);
4536 
4537   TEARDOWN();
4538 }
4539 
4540 
TEST(neon_ld3_q)4541 TEST(neon_ld3_q) {
4542   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
4543 
4544   uint8_t src[64 + 4];
4545   for (unsigned i = 0; i < sizeof(src); i++) {
4546     src[i] = i;
4547   }
4548   uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
4549 
4550   START();
4551   __ Mov(x17, src_base);
4552   __ Ld3(v2.V16B(), v3.V16B(), v4.V16B(), MemOperand(x17));
4553   __ Add(x17, x17, 1);
4554   __ Ld3(v5.V16B(), v6.V16B(), v7.V16B(), MemOperand(x17));
4555   __ Add(x17, x17, 1);
4556   __ Ld3(v8.V8H(), v9.V8H(), v10.V8H(), MemOperand(x17));
4557   __ Add(x17, x17, 1);
4558   __ Ld3(v11.V4S(), v12.V4S(), v13.V4S(), MemOperand(x17));
4559   __ Add(x17, x17, 1);
4560   __ Ld3(v31.V2D(), v0.V2D(), v1.V2D(), MemOperand(x17));
4561   END();
4562 
4563   RUN();
4564 
4565   ASSERT_EQUAL_128(0x2d2a2724211e1b18, 0x15120f0c09060300, q2);
4566   ASSERT_EQUAL_128(0x2e2b2825221f1c19, 0x1613100d0a070401, q3);
4567   ASSERT_EQUAL_128(0x2f2c292623201d1a, 0x1714110e0b080502, q4);
4568   ASSERT_EQUAL_128(0x2e2b2825221f1c19, 0x1613100d0a070401, q5);
4569   ASSERT_EQUAL_128(0x2f2c292623201d1a, 0x1714110e0b080502, q6);
4570   ASSERT_EQUAL_128(0x302d2a2724211e1b, 0x1815120f0c090603, q7);
4571   ASSERT_EQUAL_128(0x2d2c272621201b1a, 0x15140f0e09080302, q8);
4572   ASSERT_EQUAL_128(0x2f2e292823221d1c, 0x171611100b0a0504, q9);
4573   ASSERT_EQUAL_128(0x31302b2a25241f1e, 0x191813120d0c0706, q10);
4574   ASSERT_EQUAL_128(0x2a2928271e1d1c1b, 0x1211100f06050403, q11);
4575   ASSERT_EQUAL_128(0x2e2d2c2b2221201f, 0x161514130a090807, q12);
4576   ASSERT_EQUAL_128(0x3231302f26252423, 0x1a1918170e0d0c0b, q13);
4577   ASSERT_EQUAL_128(0x232221201f1e1d1c, 0x0b0a090807060504, q31);
4578   ASSERT_EQUAL_128(0x2b2a292827262524, 0x131211100f0e0d0c, q0);
4579   ASSERT_EQUAL_128(0x333231302f2e2d2c, 0x1b1a191817161514, q1);
4580 
4581   TEARDOWN();
4582 }
4583 
4584 
TEST(neon_ld3_q_postindex)4585 TEST(neon_ld3_q_postindex) {
4586   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
4587 
4588   uint8_t src[64 + 4];
4589   for (unsigned i = 0; i < sizeof(src); i++) {
4590     src[i] = i;
4591   }
4592   uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
4593 
4594   START();
4595   __ Mov(x17, src_base);
4596   __ Mov(x18, src_base + 1);
4597   __ Mov(x19, src_base + 2);
4598   __ Mov(x20, src_base + 3);
4599   __ Mov(x21, src_base + 4);
4600   __ Mov(x22, 1);
4601 
4602   __ Ld3(v2.V16B(), v3.V16B(), v4.V16B(), MemOperand(x17, x22, PostIndex));
4603   __ Ld3(v5.V16B(), v6.V16B(), v7.V16B(), MemOperand(x18, 48, PostIndex));
4604   __ Ld3(v8.V8H(), v9.V8H(), v10.V8H(), MemOperand(x19, 48, PostIndex));
4605   __ Ld3(v11.V4S(), v12.V4S(), v13.V4S(), MemOperand(x20, 48, PostIndex));
4606   __ Ld3(v31.V2D(), v0.V2D(), v1.V2D(), MemOperand(x21, 48, PostIndex));
4607   END();
4608 
4609   RUN();
4610 
4611   ASSERT_EQUAL_128(0x2d2a2724211e1b18, 0x15120f0c09060300, q2);
4612   ASSERT_EQUAL_128(0x2e2b2825221f1c19, 0x1613100d0a070401, q3);
4613   ASSERT_EQUAL_128(0x2f2c292623201d1a, 0x1714110e0b080502, q4);
4614   ASSERT_EQUAL_128(0x2e2b2825221f1c19, 0x1613100d0a070401, q5);
4615   ASSERT_EQUAL_128(0x2f2c292623201d1a, 0x1714110e0b080502, q6);
4616   ASSERT_EQUAL_128(0x302d2a2724211e1b, 0x1815120f0c090603, q7);
4617   ASSERT_EQUAL_128(0x2d2c272621201b1a, 0x15140f0e09080302, q8);
4618   ASSERT_EQUAL_128(0x2f2e292823221d1c, 0x171611100b0a0504, q9);
4619   ASSERT_EQUAL_128(0x31302b2a25241f1e, 0x191813120d0c0706, q10);
4620   ASSERT_EQUAL_128(0x2a2928271e1d1c1b, 0x1211100f06050403, q11);
4621   ASSERT_EQUAL_128(0x2e2d2c2b2221201f, 0x161514130a090807, q12);
4622   ASSERT_EQUAL_128(0x3231302f26252423, 0x1a1918170e0d0c0b, q13);
4623   ASSERT_EQUAL_128(0x232221201f1e1d1c, 0x0b0a090807060504, q31);
4624   ASSERT_EQUAL_128(0x2b2a292827262524, 0x131211100f0e0d0c, q0);
4625   ASSERT_EQUAL_128(0x333231302f2e2d2c, 0x1b1a191817161514, q1);
4626 
4627   ASSERT_EQUAL_64(src_base + 1, x17);
4628   ASSERT_EQUAL_64(src_base + 1 + 48, x18);
4629   ASSERT_EQUAL_64(src_base + 2 + 48, x19);
4630   ASSERT_EQUAL_64(src_base + 3 + 48, x20);
4631   ASSERT_EQUAL_64(src_base + 4 + 48, x21);
4632 
4633   TEARDOWN();
4634 }
4635 
4636 
TEST(neon_ld3_lane)4637 TEST(neon_ld3_lane) {
4638   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
4639 
4640   uint8_t src[64];
4641   for (unsigned i = 0; i < sizeof(src); i++) {
4642     src[i] = i;
4643   }
4644   uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
4645 
4646   START();
4647 
4648   // Test loading whole register by element.
4649   __ Mov(x17, src_base);
4650   for (int i = 15; i >= 0; i--) {
4651     __ Ld3(v0.B(), v1.B(), v2.B(), i, MemOperand(x17));
4652     __ Add(x17, x17, 1);
4653   }
4654 
4655   __ Mov(x17, src_base);
4656   for (int i = 7; i >= 0; i--) {
4657     __ Ld3(v3.H(), v4.H(), v5.H(), i, MemOperand(x17));
4658     __ Add(x17, x17, 1);
4659   }
4660 
4661   __ Mov(x17, src_base);
4662   for (int i = 3; i >= 0; i--) {
4663     __ Ld3(v6.S(), v7.S(), v8.S(), i, MemOperand(x17));
4664     __ Add(x17, x17, 1);
4665   }
4666 
4667   __ Mov(x17, src_base);
4668   for (int i = 1; i >= 0; i--) {
4669     __ Ld3(v9.D(), v10.D(), v11.D(), i, MemOperand(x17));
4670     __ Add(x17, x17, 1);
4671   }
4672 
4673   // Test loading a single element into an initialised register.
4674   __ Mov(x17, src_base);
4675   __ Mov(x4, x17);
4676   __ Ldr(q12, MemOperand(x4, 16, PostIndex));
4677   __ Ldr(q13, MemOperand(x4, 16, PostIndex));
4678   __ Ldr(q14, MemOperand(x4));
4679   __ Ld3(v12.B(), v13.B(), v14.B(), 4, MemOperand(x17));
4680   __ Mov(x5, x17);
4681   __ Ldr(q15, MemOperand(x5, 16, PostIndex));
4682   __ Ldr(q16, MemOperand(x5, 16, PostIndex));
4683   __ Ldr(q17, MemOperand(x5));
4684   __ Ld3(v15.H(), v16.H(), v17.H(), 3, MemOperand(x17));
4685   __ Mov(x6, x17);
4686   __ Ldr(q18, MemOperand(x6, 16, PostIndex));
4687   __ Ldr(q19, MemOperand(x6, 16, PostIndex));
4688   __ Ldr(q20, MemOperand(x6));
4689   __ Ld3(v18.S(), v19.S(), v20.S(), 2, MemOperand(x17));
4690   __ Mov(x7, x17);
4691   __ Ldr(q21, MemOperand(x7, 16, PostIndex));
4692   __ Ldr(q22, MemOperand(x7, 16, PostIndex));
4693   __ Ldr(q23, MemOperand(x7));
4694   __ Ld3(v21.D(), v22.D(), v23.D(), 1, MemOperand(x17));
4695 
4696   END();
4697 
4698   RUN();
4699 
4700   ASSERT_EQUAL_128(0x0001020304050607, 0x08090a0b0c0d0e0f, q0);
4701   ASSERT_EQUAL_128(0x0102030405060708, 0x090a0b0c0d0e0f10, q1);
4702   ASSERT_EQUAL_128(0x0203040506070809, 0x0a0b0c0d0e0f1011, q2);
4703   ASSERT_EQUAL_128(0x0100020103020403, 0x0504060507060807, q3);
4704   ASSERT_EQUAL_128(0x0302040305040605, 0x0706080709080a09, q4);
4705   ASSERT_EQUAL_128(0x0504060507060807, 0x09080a090b0a0c0b, q5);
4706   ASSERT_EQUAL_128(0x0302010004030201, 0x0504030206050403, q6);
4707   ASSERT_EQUAL_128(0x0706050408070605, 0x090807060a090807, q7);
4708   ASSERT_EQUAL_128(0x0b0a09080c0b0a09, 0x0d0c0b0a0e0d0c0b, q8);
4709   ASSERT_EQUAL_128(0x0706050403020100, 0x0807060504030201, q9);
4710   ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x100f0e0d0c0b0a09, q10);
4711   ASSERT_EQUAL_128(0x1716151413121110, 0x1817161514131211, q11);
4712   ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0706050003020100, q12);
4713   ASSERT_EQUAL_128(0x1f1e1d1c1b1a1918, 0x1716150113121110, q13);
4714   ASSERT_EQUAL_128(0x2f2e2d2c2b2a2928, 0x2726250223222120, q14);
4715   ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0100050403020100, q15);
4716   ASSERT_EQUAL_128(0x1f1e1d1c1b1a1918, 0x0302151413121110, q16);
4717   ASSERT_EQUAL_128(0x2f2e2d2c2b2a2928, 0x0504252423222120, q17);
4718 
4719   TEARDOWN();
4720 }
4721 
4722 
TEST(neon_ld3_lane_postindex)4723 TEST(neon_ld3_lane_postindex) {
4724   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
4725 
4726   uint8_t src[64];
4727   for (unsigned i = 0; i < sizeof(src); i++) {
4728     src[i] = i;
4729   }
4730   uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
4731 
4732   START();
4733 
4734   // Test loading whole register by element.
4735   __ Mov(x17, src_base);
4736   __ Mov(x18, src_base);
4737   __ Mov(x19, src_base);
4738   __ Mov(x20, src_base);
4739   __ Mov(x21, src_base);
4740   __ Mov(x22, src_base);
4741   __ Mov(x23, src_base);
4742   __ Mov(x24, src_base);
4743   for (int i = 15; i >= 0; i--) {
4744     __ Ld3(v0.B(), v1.B(), v2.B(), i, MemOperand(x17, 3, PostIndex));
4745   }
4746 
4747   for (int i = 7; i >= 0; i--) {
4748     __ Ld3(v3.H(), v4.H(), v5.H(), i, MemOperand(x18, 6, PostIndex));
4749   }
4750 
4751   for (int i = 3; i >= 0; i--) {
4752     __ Ld3(v6.S(), v7.S(), v8.S(), i, MemOperand(x19, 12, PostIndex));
4753   }
4754 
4755   for (int i = 1; i >= 0; i--) {
4756     __ Ld3(v9.D(), v10.D(), v11.D(), i, MemOperand(x20, 24, PostIndex));
4757   }
4758 
4759 
4760   // Test loading a single element into an initialised register.
4761   __ Mov(x25, 1);
4762   __ Mov(x4, x21);
4763   __ Ldr(q12, MemOperand(x4, 16, PostIndex));
4764   __ Ldr(q13, MemOperand(x4, 16, PostIndex));
4765   __ Ldr(q14, MemOperand(x4));
4766   __ Ld3(v12.B(), v13.B(), v14.B(), 4, MemOperand(x21, x25, PostIndex));
4767   __ Add(x25, x25, 1);
4768 
4769   __ Mov(x5, x22);
4770   __ Ldr(q15, MemOperand(x5, 16, PostIndex));
4771   __ Ldr(q16, MemOperand(x5, 16, PostIndex));
4772   __ Ldr(q17, MemOperand(x5));
4773   __ Ld3(v15.H(), v16.H(), v17.H(), 3, MemOperand(x22, x25, PostIndex));
4774   __ Add(x25, x25, 1);
4775 
4776   __ Mov(x6, x23);
4777   __ Ldr(q18, MemOperand(x6, 16, PostIndex));
4778   __ Ldr(q19, MemOperand(x6, 16, PostIndex));
4779   __ Ldr(q20, MemOperand(x6));
4780   __ Ld3(v18.S(), v19.S(), v20.S(), 2, MemOperand(x23, x25, PostIndex));
4781   __ Add(x25, x25, 1);
4782 
4783   __ Mov(x7, x24);
4784   __ Ldr(q21, MemOperand(x7, 16, PostIndex));
4785   __ Ldr(q22, MemOperand(x7, 16, PostIndex));
4786   __ Ldr(q23, MemOperand(x7));
4787   __ Ld3(v21.D(), v22.D(), v23.D(), 1, MemOperand(x24, x25, PostIndex));
4788 
4789   END();
4790 
4791   RUN();
4792 
4793   ASSERT_EQUAL_128(0x000306090c0f1215, 0x181b1e2124272a2d, q0);
4794   ASSERT_EQUAL_128(0x0104070a0d101316, 0x191c1f2225282b2e, q1);
4795   ASSERT_EQUAL_128(0x0205080b0e111417, 0x1a1d202326292c2f, q2);
4796   ASSERT_EQUAL_128(0x010007060d0c1312, 0x19181f1e25242b2a, q3);
4797   ASSERT_EQUAL_128(0x030209080f0e1514, 0x1b1a212027262d2c, q4);
4798   ASSERT_EQUAL_128(0x05040b0a11101716, 0x1d1c232229282f2e, q5);
4799   ASSERT_EQUAL_128(0x030201000f0e0d0c, 0x1b1a191827262524, q6);
4800   ASSERT_EQUAL_128(0x0706050413121110, 0x1f1e1d1c2b2a2928, q7);
4801   ASSERT_EQUAL_128(0x0b0a090817161514, 0x232221202f2e2d2c, q8);
4802   ASSERT_EQUAL_128(0x0706050403020100, 0x1f1e1d1c1b1a1918, q9);
4803   ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x2726252423222120, q10);
4804   ASSERT_EQUAL_128(0x1716151413121110, 0x2f2e2d2c2b2a2928, q11);
4805   ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0706050003020100, q12);
4806   ASSERT_EQUAL_128(0x1f1e1d1c1b1a1918, 0x1716150113121110, q13);
4807   ASSERT_EQUAL_128(0x2f2e2d2c2b2a2928, 0x2726250223222120, q14);
4808   ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0100050403020100, q15);
4809   ASSERT_EQUAL_128(0x1f1e1d1c1b1a1918, 0x0302151413121110, q16);
4810   ASSERT_EQUAL_128(0x2f2e2d2c2b2a2928, 0x0504252423222120, q17);
4811   ASSERT_EQUAL_128(0x0f0e0d0c03020100, 0x0706050403020100, q18);
4812   ASSERT_EQUAL_128(0x1f1e1d1c07060504, 0x1716151413121110, q19);
4813   ASSERT_EQUAL_128(0x2f2e2d2c0b0a0908, 0x2726252423222120, q20);
4814   ASSERT_EQUAL_128(0x0706050403020100, 0x0706050403020100, q21);
4815   ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x1716151413121110, q22);
4816   ASSERT_EQUAL_128(0x1716151413121110, 0x2726252423222120, q23);
4817 
4818   ASSERT_EQUAL_64(src_base + 48, x17);
4819   ASSERT_EQUAL_64(src_base + 48, x18);
4820   ASSERT_EQUAL_64(src_base + 48, x19);
4821   ASSERT_EQUAL_64(src_base + 48, x20);
4822   ASSERT_EQUAL_64(src_base + 1, x21);
4823   ASSERT_EQUAL_64(src_base + 2, x22);
4824   ASSERT_EQUAL_64(src_base + 3, x23);
4825   ASSERT_EQUAL_64(src_base + 4, x24);
4826 
4827   TEARDOWN();
4828 }
4829 
4830 
TEST(neon_ld3_alllanes)4831 TEST(neon_ld3_alllanes) {
4832   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
4833 
4834   uint8_t src[64];
4835   for (unsigned i = 0; i < sizeof(src); i++) {
4836     src[i] = i;
4837   }
4838   uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
4839 
4840   START();
4841   __ Mov(x17, src_base + 1);
4842   __ Mov(x18, 1);
4843   __ Ld3r(v0.V8B(), v1.V8B(), v2.V8B(), MemOperand(x17));
4844   __ Add(x17, x17, 3);
4845   __ Ld3r(v3.V16B(), v4.V16B(), v5.V16B(), MemOperand(x17));
4846   __ Add(x17, x17, 1);
4847   __ Ld3r(v6.V4H(), v7.V4H(), v8.V4H(), MemOperand(x17));
4848   __ Add(x17, x17, 1);
4849   __ Ld3r(v9.V8H(), v10.V8H(), v11.V8H(), MemOperand(x17));
4850   __ Add(x17, x17, 6);
4851   __ Ld3r(v12.V2S(), v13.V2S(), v14.V2S(), MemOperand(x17));
4852   __ Add(x17, x17, 1);
4853   __ Ld3r(v15.V4S(), v16.V4S(), v17.V4S(), MemOperand(x17));
4854   __ Add(x17, x17, 12);
4855   __ Ld3r(v18.V2D(), v19.V2D(), v20.V2D(), MemOperand(x17));
4856   END();
4857 
4858   RUN();
4859 
4860   ASSERT_EQUAL_128(0x0000000000000000, 0x0101010101010101, q0);
4861   ASSERT_EQUAL_128(0x0000000000000000, 0x0202020202020202, q1);
4862   ASSERT_EQUAL_128(0x0000000000000000, 0x0303030303030303, q2);
4863   ASSERT_EQUAL_128(0x0404040404040404, 0x0404040404040404, q3);
4864   ASSERT_EQUAL_128(0x0505050505050505, 0x0505050505050505, q4);
4865   ASSERT_EQUAL_128(0x0606060606060606, 0x0606060606060606, q5);
4866   ASSERT_EQUAL_128(0x0000000000000000, 0x0605060506050605, q6);
4867   ASSERT_EQUAL_128(0x0000000000000000, 0x0807080708070807, q7);
4868   ASSERT_EQUAL_128(0x0000000000000000, 0x0a090a090a090a09, q8);
4869   ASSERT_EQUAL_128(0x0706070607060706, 0x0706070607060706, q9);
4870   ASSERT_EQUAL_128(0x0908090809080908, 0x0908090809080908, q10);
4871   ASSERT_EQUAL_128(0x0b0a0b0a0b0a0b0a, 0x0b0a0b0a0b0a0b0a, q11);
4872   ASSERT_EQUAL_128(0x0000000000000000, 0x0f0e0d0c0f0e0d0c, q12);
4873   ASSERT_EQUAL_128(0x0000000000000000, 0x1312111013121110, q13);
4874   ASSERT_EQUAL_128(0x0000000000000000, 0x1716151417161514, q14);
4875   ASSERT_EQUAL_128(0x100f0e0d100f0e0d, 0x100f0e0d100f0e0d, q15);
4876   ASSERT_EQUAL_128(0x1413121114131211, 0x1413121114131211, q16);
4877   ASSERT_EQUAL_128(0x1817161518171615, 0x1817161518171615, q17);
4878   ASSERT_EQUAL_128(0x201f1e1d1c1b1a19, 0x201f1e1d1c1b1a19, q18);
4879   ASSERT_EQUAL_128(0x2827262524232221, 0x2827262524232221, q19);
4880   ASSERT_EQUAL_128(0x302f2e2d2c2b2a29, 0x302f2e2d2c2b2a29, q20);
4881 
4882   TEARDOWN();
4883 }
4884 
4885 
TEST(neon_ld3_alllanes_postindex)4886 TEST(neon_ld3_alllanes_postindex) {
4887   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
4888 
4889   uint8_t src[64];
4890   for (unsigned i = 0; i < sizeof(src); i++) {
4891     src[i] = i;
4892   }
4893   uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
4894   __ Mov(x17, src_base + 1);
4895   __ Mov(x18, 1);
4896 
4897   START();
4898   __ Mov(x17, src_base + 1);
4899   __ Mov(x18, 1);
4900   __ Ld3r(v0.V8B(), v1.V8B(), v2.V8B(), MemOperand(x17, 3, PostIndex));
4901   __ Ld3r(v3.V16B(), v4.V16B(), v5.V16B(), MemOperand(x17, x18, PostIndex));
4902   __ Ld3r(v6.V4H(), v7.V4H(), v8.V4H(), MemOperand(x17, x18, PostIndex));
4903   __ Ld3r(v9.V8H(), v10.V8H(), v11.V8H(), MemOperand(x17, 6, PostIndex));
4904   __ Ld3r(v12.V2S(), v13.V2S(), v14.V2S(), MemOperand(x17, x18, PostIndex));
4905   __ Ld3r(v15.V4S(), v16.V4S(), v17.V4S(), MemOperand(x17, 12, PostIndex));
4906   __ Ld3r(v18.V2D(), v19.V2D(), v20.V2D(), MemOperand(x17, 24, PostIndex));
4907   END();
4908 
4909   RUN();
4910 
4911   ASSERT_EQUAL_128(0x0000000000000000, 0x0101010101010101, q0);
4912   ASSERT_EQUAL_128(0x0000000000000000, 0x0202020202020202, q1);
4913   ASSERT_EQUAL_128(0x0000000000000000, 0x0303030303030303, q2);
4914   ASSERT_EQUAL_128(0x0404040404040404, 0x0404040404040404, q3);
4915   ASSERT_EQUAL_128(0x0505050505050505, 0x0505050505050505, q4);
4916   ASSERT_EQUAL_128(0x0606060606060606, 0x0606060606060606, q5);
4917   ASSERT_EQUAL_128(0x0000000000000000, 0x0605060506050605, q6);
4918   ASSERT_EQUAL_128(0x0000000000000000, 0x0807080708070807, q7);
4919   ASSERT_EQUAL_128(0x0000000000000000, 0x0a090a090a090a09, q8);
4920   ASSERT_EQUAL_128(0x0706070607060706, 0x0706070607060706, q9);
4921   ASSERT_EQUAL_128(0x0908090809080908, 0x0908090809080908, q10);
4922   ASSERT_EQUAL_128(0x0b0a0b0a0b0a0b0a, 0x0b0a0b0a0b0a0b0a, q11);
4923   ASSERT_EQUAL_128(0x0000000000000000, 0x0f0e0d0c0f0e0d0c, q12);
4924   ASSERT_EQUAL_128(0x0000000000000000, 0x1312111013121110, q13);
4925   ASSERT_EQUAL_128(0x0000000000000000, 0x1716151417161514, q14);
4926   ASSERT_EQUAL_128(0x100f0e0d100f0e0d, 0x100f0e0d100f0e0d, q15);
4927   ASSERT_EQUAL_128(0x1413121114131211, 0x1413121114131211, q16);
4928   ASSERT_EQUAL_128(0x1817161518171615, 0x1817161518171615, q17);
4929   ASSERT_EQUAL_128(0x201f1e1d1c1b1a19, 0x201f1e1d1c1b1a19, q18);
4930   ASSERT_EQUAL_128(0x2827262524232221, 0x2827262524232221, q19);
4931   ASSERT_EQUAL_128(0x302f2e2d2c2b2a29, 0x302f2e2d2c2b2a29, q20);
4932 
4933   TEARDOWN();
4934 }
4935 
4936 
TEST(neon_ld4_d)4937 TEST(neon_ld4_d) {
4938   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
4939 
4940   uint8_t src[64 + 4];
4941   for (unsigned i = 0; i < sizeof(src); i++) {
4942     src[i] = i;
4943   }
4944   uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
4945 
4946   START();
4947   __ Mov(x17, src_base);
4948   __ Ld4(v2.V8B(), v3.V8B(), v4.V8B(), v5.V8B(), MemOperand(x17));
4949   __ Add(x17, x17, 1);
4950   __ Ld4(v6.V8B(), v7.V8B(), v8.V8B(), v9.V8B(), MemOperand(x17));
4951   __ Add(x17, x17, 1);
4952   __ Ld4(v10.V4H(), v11.V4H(), v12.V4H(), v13.V4H(), MemOperand(x17));
4953   __ Add(x17, x17, 1);
4954   __ Ld4(v30.V2S(), v31.V2S(), v0.V2S(), v1.V2S(), MemOperand(x17));
4955   END();
4956 
4957   RUN();
4958 
4959   ASSERT_EQUAL_128(0, 0x1c1814100c080400, q2);
4960   ASSERT_EQUAL_128(0, 0x1d1915110d090501, q3);
4961   ASSERT_EQUAL_128(0, 0x1e1a16120e0a0602, q4);
4962   ASSERT_EQUAL_128(0, 0x1f1b17130f0b0703, q5);
4963   ASSERT_EQUAL_128(0, 0x1d1915110d090501, q6);
4964   ASSERT_EQUAL_128(0, 0x1e1a16120e0a0602, q7);
4965   ASSERT_EQUAL_128(0, 0x1f1b17130f0b0703, q8);
4966   ASSERT_EQUAL_128(0, 0x201c1814100c0804, q9);
4967   ASSERT_EQUAL_128(0, 0x1b1a13120b0a0302, q10);
4968   ASSERT_EQUAL_128(0, 0x1d1c15140d0c0504, q11);
4969   ASSERT_EQUAL_128(0, 0x1f1e17160f0e0706, q12);
4970   ASSERT_EQUAL_128(0, 0x2120191811100908, q13);
4971   ASSERT_EQUAL_128(0, 0x1615141306050403, q30);
4972   ASSERT_EQUAL_128(0, 0x1a1918170a090807, q31);
4973   ASSERT_EQUAL_128(0, 0x1e1d1c1b0e0d0c0b, q0);
4974   ASSERT_EQUAL_128(0, 0x2221201f1211100f, q1);
4975 
4976   TEARDOWN();
4977 }
4978 
4979 
TEST(neon_ld4_d_postindex)4980 TEST(neon_ld4_d_postindex) {
4981   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
4982 
4983   uint8_t src[32 + 4];
4984   for (unsigned i = 0; i < sizeof(src); i++) {
4985     src[i] = i;
4986   }
4987   uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
4988 
4989   START();
4990   __ Mov(x17, src_base);
4991   __ Mov(x18, src_base + 1);
4992   __ Mov(x19, src_base + 2);
4993   __ Mov(x20, src_base + 3);
4994   __ Mov(x21, src_base + 4);
4995   __ Mov(x22, 1);
4996   __ Ld4(v2.V8B(),
4997          v3.V8B(),
4998          v4.V8B(),
4999          v5.V8B(),
5000          MemOperand(x17, x22, PostIndex));
5001   __ Ld4(v6.V8B(),
5002          v7.V8B(),
5003          v8.V8B(),
5004          v9.V8B(),
5005          MemOperand(x18, 32, PostIndex));
5006   __ Ld4(v10.V4H(),
5007          v11.V4H(),
5008          v12.V4H(),
5009          v13.V4H(),
5010          MemOperand(x19, 32, PostIndex));
5011   __ Ld4(v14.V2S(),
5012          v15.V2S(),
5013          v16.V2S(),
5014          v17.V2S(),
5015          MemOperand(x20, 32, PostIndex));
5016   __ Ld4(v30.V2S(),
5017          v31.V2S(),
5018          v0.V2S(),
5019          v1.V2S(),
5020          MemOperand(x21, 32, PostIndex));
5021   END();
5022 
5023   RUN();
5024 
5025   ASSERT_EQUAL_128(0, 0x1c1814100c080400, q2);
5026   ASSERT_EQUAL_128(0, 0x1d1915110d090501, q3);
5027   ASSERT_EQUAL_128(0, 0x1e1a16120e0a0602, q4);
5028   ASSERT_EQUAL_128(0, 0x1f1b17130f0b0703, q5);
5029   ASSERT_EQUAL_128(0, 0x1d1915110d090501, q6);
5030   ASSERT_EQUAL_128(0, 0x1e1a16120e0a0602, q7);
5031   ASSERT_EQUAL_128(0, 0x1f1b17130f0b0703, q8);
5032   ASSERT_EQUAL_128(0, 0x201c1814100c0804, q9);
5033   ASSERT_EQUAL_128(0, 0x1b1a13120b0a0302, q10);
5034   ASSERT_EQUAL_128(0, 0x1d1c15140d0c0504, q11);
5035   ASSERT_EQUAL_128(0, 0x1f1e17160f0e0706, q12);
5036   ASSERT_EQUAL_128(0, 0x2120191811100908, q13);
5037   ASSERT_EQUAL_128(0, 0x1615141306050403, q14);
5038   ASSERT_EQUAL_128(0, 0x1a1918170a090807, q15);
5039   ASSERT_EQUAL_128(0, 0x1e1d1c1b0e0d0c0b, q16);
5040   ASSERT_EQUAL_128(0, 0x2221201f1211100f, q17);
5041   ASSERT_EQUAL_128(0, 0x1716151407060504, q30);
5042   ASSERT_EQUAL_128(0, 0x1b1a19180b0a0908, q31);
5043   ASSERT_EQUAL_128(0, 0x1f1e1d1c0f0e0d0c, q0);
5044   ASSERT_EQUAL_128(0, 0x2322212013121110, q1);
5045 
5046 
5047   ASSERT_EQUAL_64(src_base + 1, x17);
5048   ASSERT_EQUAL_64(src_base + 1 + 32, x18);
5049   ASSERT_EQUAL_64(src_base + 2 + 32, x19);
5050   ASSERT_EQUAL_64(src_base + 3 + 32, x20);
5051   ASSERT_EQUAL_64(src_base + 4 + 32, x21);
5052   TEARDOWN();
5053 }
5054 
5055 
TEST(neon_ld4_q)5056 TEST(neon_ld4_q) {
5057   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
5058 
5059   uint8_t src[64 + 4];
5060   for (unsigned i = 0; i < sizeof(src); i++) {
5061     src[i] = i;
5062   }
5063   uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
5064 
5065   START();
5066   __ Mov(x17, src_base);
5067   __ Ld4(v2.V16B(), v3.V16B(), v4.V16B(), v5.V16B(), MemOperand(x17));
5068   __ Add(x17, x17, 1);
5069   __ Ld4(v6.V16B(), v7.V16B(), v8.V16B(), v9.V16B(), MemOperand(x17));
5070   __ Add(x17, x17, 1);
5071   __ Ld4(v10.V8H(), v11.V8H(), v12.V8H(), v13.V8H(), MemOperand(x17));
5072   __ Add(x17, x17, 1);
5073   __ Ld4(v14.V4S(), v15.V4S(), v16.V4S(), v17.V4S(), MemOperand(x17));
5074   __ Add(x17, x17, 1);
5075   __ Ld4(v18.V2D(), v19.V2D(), v20.V2D(), v21.V2D(), MemOperand(x17));
5076   END();
5077 
5078   RUN();
5079 
5080   ASSERT_EQUAL_128(0x3c3834302c282420, 0x1c1814100c080400, q2);
5081   ASSERT_EQUAL_128(0x3d3935312d292521, 0x1d1915110d090501, q3);
5082   ASSERT_EQUAL_128(0x3e3a36322e2a2622, 0x1e1a16120e0a0602, q4);
5083   ASSERT_EQUAL_128(0x3f3b37332f2b2723, 0x1f1b17130f0b0703, q5);
5084   ASSERT_EQUAL_128(0x3d3935312d292521, 0x1d1915110d090501, q6);
5085   ASSERT_EQUAL_128(0x3e3a36322e2a2622, 0x1e1a16120e0a0602, q7);
5086   ASSERT_EQUAL_128(0x3f3b37332f2b2723, 0x1f1b17130f0b0703, q8);
5087   ASSERT_EQUAL_128(0x403c3834302c2824, 0x201c1814100c0804, q9);
5088   ASSERT_EQUAL_128(0x3b3a33322b2a2322, 0x1b1a13120b0a0302, q10);
5089   ASSERT_EQUAL_128(0x3d3c35342d2c2524, 0x1d1c15140d0c0504, q11);
5090   ASSERT_EQUAL_128(0x3f3e37362f2e2726, 0x1f1e17160f0e0706, q12);
5091   ASSERT_EQUAL_128(0x4140393831302928, 0x2120191811100908, q13);
5092   ASSERT_EQUAL_128(0x3635343326252423, 0x1615141306050403, q14);
5093   ASSERT_EQUAL_128(0x3a3938372a292827, 0x1a1918170a090807, q15);
5094   ASSERT_EQUAL_128(0x3e3d3c3b2e2d2c2b, 0x1e1d1c1b0e0d0c0b, q16);
5095   ASSERT_EQUAL_128(0x4241403f3231302f, 0x2221201f1211100f, q17);
5096   ASSERT_EQUAL_128(0x2b2a292827262524, 0x0b0a090807060504, q18);
5097   ASSERT_EQUAL_128(0x333231302f2e2d2c, 0x131211100f0e0d0c, q19);
5098   ASSERT_EQUAL_128(0x3b3a393837363534, 0x1b1a191817161514, q20);
5099   ASSERT_EQUAL_128(0x434241403f3e3d3c, 0x232221201f1e1d1c, q21);
5100   TEARDOWN();
5101 }
5102 
5103 
TEST(neon_ld4_q_postindex)5104 TEST(neon_ld4_q_postindex) {
5105   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
5106 
5107   uint8_t src[64 + 4];
5108   for (unsigned i = 0; i < sizeof(src); i++) {
5109     src[i] = i;
5110   }
5111   uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
5112 
5113   START();
5114   __ Mov(x17, src_base);
5115   __ Mov(x18, src_base + 1);
5116   __ Mov(x19, src_base + 2);
5117   __ Mov(x20, src_base + 3);
5118   __ Mov(x21, src_base + 4);
5119   __ Mov(x22, 1);
5120 
5121   __ Ld4(v2.V16B(),
5122          v3.V16B(),
5123          v4.V16B(),
5124          v5.V16B(),
5125          MemOperand(x17, x22, PostIndex));
5126   __ Ld4(v6.V16B(),
5127          v7.V16B(),
5128          v8.V16B(),
5129          v9.V16B(),
5130          MemOperand(x18, 64, PostIndex));
5131   __ Ld4(v10.V8H(),
5132          v11.V8H(),
5133          v12.V8H(),
5134          v13.V8H(),
5135          MemOperand(x19, 64, PostIndex));
5136   __ Ld4(v14.V4S(),
5137          v15.V4S(),
5138          v16.V4S(),
5139          v17.V4S(),
5140          MemOperand(x20, 64, PostIndex));
5141   __ Ld4(v30.V2D(),
5142          v31.V2D(),
5143          v0.V2D(),
5144          v1.V2D(),
5145          MemOperand(x21, 64, PostIndex));
5146   END();
5147 
5148   RUN();
5149 
5150   ASSERT_EQUAL_128(0x3c3834302c282420, 0x1c1814100c080400, q2);
5151   ASSERT_EQUAL_128(0x3d3935312d292521, 0x1d1915110d090501, q3);
5152   ASSERT_EQUAL_128(0x3e3a36322e2a2622, 0x1e1a16120e0a0602, q4);
5153   ASSERT_EQUAL_128(0x3f3b37332f2b2723, 0x1f1b17130f0b0703, q5);
5154   ASSERT_EQUAL_128(0x3d3935312d292521, 0x1d1915110d090501, q6);
5155   ASSERT_EQUAL_128(0x3e3a36322e2a2622, 0x1e1a16120e0a0602, q7);
5156   ASSERT_EQUAL_128(0x3f3b37332f2b2723, 0x1f1b17130f0b0703, q8);
5157   ASSERT_EQUAL_128(0x403c3834302c2824, 0x201c1814100c0804, q9);
5158   ASSERT_EQUAL_128(0x3b3a33322b2a2322, 0x1b1a13120b0a0302, q10);
5159   ASSERT_EQUAL_128(0x3d3c35342d2c2524, 0x1d1c15140d0c0504, q11);
5160   ASSERT_EQUAL_128(0x3f3e37362f2e2726, 0x1f1e17160f0e0706, q12);
5161   ASSERT_EQUAL_128(0x4140393831302928, 0x2120191811100908, q13);
5162   ASSERT_EQUAL_128(0x3635343326252423, 0x1615141306050403, q14);
5163   ASSERT_EQUAL_128(0x3a3938372a292827, 0x1a1918170a090807, q15);
5164   ASSERT_EQUAL_128(0x3e3d3c3b2e2d2c2b, 0x1e1d1c1b0e0d0c0b, q16);
5165   ASSERT_EQUAL_128(0x4241403f3231302f, 0x2221201f1211100f, q17);
5166   ASSERT_EQUAL_128(0x2b2a292827262524, 0x0b0a090807060504, q30);
5167   ASSERT_EQUAL_128(0x333231302f2e2d2c, 0x131211100f0e0d0c, q31);
5168   ASSERT_EQUAL_128(0x3b3a393837363534, 0x1b1a191817161514, q0);
5169   ASSERT_EQUAL_128(0x434241403f3e3d3c, 0x232221201f1e1d1c, q1);
5170 
5171 
5172   ASSERT_EQUAL_64(src_base + 1, x17);
5173   ASSERT_EQUAL_64(src_base + 1 + 64, x18);
5174   ASSERT_EQUAL_64(src_base + 2 + 64, x19);
5175   ASSERT_EQUAL_64(src_base + 3 + 64, x20);
5176   ASSERT_EQUAL_64(src_base + 4 + 64, x21);
5177 
5178   TEARDOWN();
5179 }
5180 
5181 
TEST(neon_ld4_lane)5182 TEST(neon_ld4_lane) {
5183   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
5184 
5185   uint8_t src[64];
5186   for (unsigned i = 0; i < sizeof(src); i++) {
5187     src[i] = i;
5188   }
5189   uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
5190 
5191   START();
5192 
5193   // Test loading whole register by element.
5194   __ Mov(x17, src_base);
5195   for (int i = 15; i >= 0; i--) {
5196     __ Ld4(v0.B(), v1.B(), v2.B(), v3.B(), i, MemOperand(x17));
5197     __ Add(x17, x17, 1);
5198   }
5199 
5200   __ Mov(x17, src_base);
5201   for (int i = 7; i >= 0; i--) {
5202     __ Ld4(v4.H(), v5.H(), v6.H(), v7.H(), i, MemOperand(x17));
5203     __ Add(x17, x17, 1);
5204   }
5205 
5206   __ Mov(x17, src_base);
5207   for (int i = 3; i >= 0; i--) {
5208     __ Ld4(v8.S(), v9.S(), v10.S(), v11.S(), i, MemOperand(x17));
5209     __ Add(x17, x17, 1);
5210   }
5211 
5212   __ Mov(x17, src_base);
5213   for (int i = 1; i >= 0; i--) {
5214     __ Ld4(v12.D(), v13.D(), v14.D(), v15.D(), i, MemOperand(x17));
5215     __ Add(x17, x17, 1);
5216   }
5217 
5218   // Test loading a single element into an initialised register.
5219   __ Mov(x17, src_base);
5220   __ Mov(x4, x17);
5221   __ Ldr(q16, MemOperand(x4, 16, PostIndex));
5222   __ Ldr(q17, MemOperand(x4, 16, PostIndex));
5223   __ Ldr(q18, MemOperand(x4, 16, PostIndex));
5224   __ Ldr(q19, MemOperand(x4));
5225   __ Ld4(v16.B(), v17.B(), v18.B(), v19.B(), 4, MemOperand(x17));
5226 
5227   __ Mov(x5, x17);
5228   __ Ldr(q20, MemOperand(x5, 16, PostIndex));
5229   __ Ldr(q21, MemOperand(x5, 16, PostIndex));
5230   __ Ldr(q22, MemOperand(x5, 16, PostIndex));
5231   __ Ldr(q23, MemOperand(x5));
5232   __ Ld4(v20.H(), v21.H(), v22.H(), v23.H(), 3, MemOperand(x17));
5233 
5234   __ Mov(x6, x17);
5235   __ Ldr(q24, MemOperand(x6, 16, PostIndex));
5236   __ Ldr(q25, MemOperand(x6, 16, PostIndex));
5237   __ Ldr(q26, MemOperand(x6, 16, PostIndex));
5238   __ Ldr(q27, MemOperand(x6));
5239   __ Ld4(v24.S(), v25.S(), v26.S(), v27.S(), 2, MemOperand(x17));
5240 
5241   __ Mov(x7, x17);
5242   __ Ldr(q28, MemOperand(x7, 16, PostIndex));
5243   __ Ldr(q29, MemOperand(x7, 16, PostIndex));
5244   __ Ldr(q30, MemOperand(x7, 16, PostIndex));
5245   __ Ldr(q31, MemOperand(x7));
5246   __ Ld4(v28.D(), v29.D(), v30.D(), v31.D(), 1, MemOperand(x17));
5247 
5248   END();
5249 
5250   RUN();
5251 
5252   ASSERT_EQUAL_128(0x0001020304050607, 0x08090a0b0c0d0e0f, q0);
5253   ASSERT_EQUAL_128(0x0102030405060708, 0x090a0b0c0d0e0f10, q1);
5254   ASSERT_EQUAL_128(0x0203040506070809, 0x0a0b0c0d0e0f1011, q2);
5255   ASSERT_EQUAL_128(0x030405060708090a, 0x0b0c0d0e0f101112, q3);
5256   ASSERT_EQUAL_128(0x0100020103020403, 0x0504060507060807, q4);
5257   ASSERT_EQUAL_128(0x0302040305040605, 0x0706080709080a09, q5);
5258   ASSERT_EQUAL_128(0x0504060507060807, 0x09080a090b0a0c0b, q6);
5259   ASSERT_EQUAL_128(0x0706080709080a09, 0x0b0a0c0b0d0c0e0d, q7);
5260   ASSERT_EQUAL_128(0x0302010004030201, 0x0504030206050403, q8);
5261   ASSERT_EQUAL_128(0x0706050408070605, 0x090807060a090807, q9);
5262   ASSERT_EQUAL_128(0x0b0a09080c0b0a09, 0x0d0c0b0a0e0d0c0b, q10);
5263   ASSERT_EQUAL_128(0x0f0e0d0c100f0e0d, 0x11100f0e1211100f, q11);
5264   ASSERT_EQUAL_128(0x0706050403020100, 0x0807060504030201, q12);
5265   ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x100f0e0d0c0b0a09, q13);
5266   ASSERT_EQUAL_128(0x1716151413121110, 0x1817161514131211, q14);
5267   ASSERT_EQUAL_128(0x1f1e1d1c1b1a1918, 0x201f1e1d1c1b1a19, q15);
5268   ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0706050003020100, q16);
5269   ASSERT_EQUAL_128(0x1f1e1d1c1b1a1918, 0x1716150113121110, q17);
5270   ASSERT_EQUAL_128(0x2f2e2d2c2b2a2928, 0x2726250223222120, q18);
5271   ASSERT_EQUAL_128(0x3f3e3d3c3b3a3938, 0x3736350333323130, q19);
5272   ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0100050403020100, q20);
5273   ASSERT_EQUAL_128(0x1f1e1d1c1b1a1918, 0x0302151413121110, q21);
5274   ASSERT_EQUAL_128(0x2f2e2d2c2b2a2928, 0x0504252423222120, q22);
5275   ASSERT_EQUAL_128(0x3f3e3d3c3b3a3938, 0x0706353433323130, q23);
5276   ASSERT_EQUAL_128(0x0f0e0d0c03020100, 0x0706050403020100, q24);
5277   ASSERT_EQUAL_128(0x1f1e1d1c07060504, 0x1716151413121110, q25);
5278   ASSERT_EQUAL_128(0x2f2e2d2c0b0a0908, 0x2726252423222120, q26);
5279   ASSERT_EQUAL_128(0x3f3e3d3c0f0e0d0c, 0x3736353433323130, q27);
5280   ASSERT_EQUAL_128(0x0706050403020100, 0x0706050403020100, q28);
5281   ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x1716151413121110, q29);
5282   ASSERT_EQUAL_128(0x1716151413121110, 0x2726252423222120, q30);
5283   ASSERT_EQUAL_128(0x1f1e1d1c1b1a1918, 0x3736353433323130, q31);
5284 
5285   TEARDOWN();
5286 }
5287 
5288 
TEST(neon_ld4_lane_postindex)5289 TEST(neon_ld4_lane_postindex) {
5290   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
5291 
5292   uint8_t src[64];
5293   for (unsigned i = 0; i < sizeof(src); i++) {
5294     src[i] = i;
5295   }
5296   uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
5297 
5298   START();
5299 
5300   // Test loading whole register by element.
5301   __ Mov(x17, src_base);
5302   for (int i = 15; i >= 0; i--) {
5303     __ Ld4(v0.B(), v1.B(), v2.B(), v3.B(), i, MemOperand(x17, 4, PostIndex));
5304   }
5305 
5306   __ Mov(x18, src_base);
5307   for (int i = 7; i >= 0; i--) {
5308     __ Ld4(v4.H(), v5.H(), v6.H(), v7.H(), i, MemOperand(x18, 8, PostIndex));
5309   }
5310 
5311   __ Mov(x19, src_base);
5312   for (int i = 3; i >= 0; i--) {
5313     __ Ld4(v8.S(), v9.S(), v10.S(), v11.S(), i, MemOperand(x19, 16, PostIndex));
5314   }
5315 
5316   __ Mov(x20, src_base);
5317   for (int i = 1; i >= 0; i--) {
5318     __ Ld4(v12.D(),
5319            v13.D(),
5320            v14.D(),
5321            v15.D(),
5322            i,
5323            MemOperand(x20, 32, PostIndex));
5324   }
5325 
5326   // Test loading a single element into an initialised register.
5327   __ Mov(x25, 1);
5328   __ Mov(x21, src_base);
5329   __ Mov(x22, src_base);
5330   __ Mov(x23, src_base);
5331   __ Mov(x24, src_base);
5332 
5333   __ Mov(x4, x21);
5334   __ Ldr(q16, MemOperand(x4, 16, PostIndex));
5335   __ Ldr(q17, MemOperand(x4, 16, PostIndex));
5336   __ Ldr(q18, MemOperand(x4, 16, PostIndex));
5337   __ Ldr(q19, MemOperand(x4));
5338   __ Ld4(v16.B(),
5339          v17.B(),
5340          v18.B(),
5341          v19.B(),
5342          4,
5343          MemOperand(x21, x25, PostIndex));
5344   __ Add(x25, x25, 1);
5345 
5346   __ Mov(x5, x22);
5347   __ Ldr(q20, MemOperand(x5, 16, PostIndex));
5348   __ Ldr(q21, MemOperand(x5, 16, PostIndex));
5349   __ Ldr(q22, MemOperand(x5, 16, PostIndex));
5350   __ Ldr(q23, MemOperand(x5));
5351   __ Ld4(v20.H(),
5352          v21.H(),
5353          v22.H(),
5354          v23.H(),
5355          3,
5356          MemOperand(x22, x25, PostIndex));
5357   __ Add(x25, x25, 1);
5358 
5359   __ Mov(x6, x23);
5360   __ Ldr(q24, MemOperand(x6, 16, PostIndex));
5361   __ Ldr(q25, MemOperand(x6, 16, PostIndex));
5362   __ Ldr(q26, MemOperand(x6, 16, PostIndex));
5363   __ Ldr(q27, MemOperand(x6));
5364   __ Ld4(v24.S(),
5365          v25.S(),
5366          v26.S(),
5367          v27.S(),
5368          2,
5369          MemOperand(x23, x25, PostIndex));
5370   __ Add(x25, x25, 1);
5371 
5372   __ Mov(x7, x24);
5373   __ Ldr(q28, MemOperand(x7, 16, PostIndex));
5374   __ Ldr(q29, MemOperand(x7, 16, PostIndex));
5375   __ Ldr(q30, MemOperand(x7, 16, PostIndex));
5376   __ Ldr(q31, MemOperand(x7));
5377   __ Ld4(v28.D(),
5378          v29.D(),
5379          v30.D(),
5380          v31.D(),
5381          1,
5382          MemOperand(x24, x25, PostIndex));
5383 
5384   END();
5385 
5386   RUN();
5387 
5388   ASSERT_EQUAL_128(0x0004080c1014181c, 0x2024282c3034383c, q0);
5389   ASSERT_EQUAL_128(0x0105090d1115191d, 0x2125292d3135393d, q1);
5390   ASSERT_EQUAL_128(0x02060a0e12161a1e, 0x22262a2e32363a3e, q2);
5391   ASSERT_EQUAL_128(0x03070b0f13171b1f, 0x23272b2f33373b3f, q3);
5392   ASSERT_EQUAL_128(0x0100090811101918, 0x2120292831303938, q4);
5393   ASSERT_EQUAL_128(0x03020b0a13121b1a, 0x23222b2a33323b3a, q5);
5394   ASSERT_EQUAL_128(0x05040d0c15141d1c, 0x25242d2c35343d3c, q6);
5395   ASSERT_EQUAL_128(0x07060f0e17161f1e, 0x27262f2e37363f3e, q7);
5396   ASSERT_EQUAL_128(0x0302010013121110, 0x2322212033323130, q8);
5397   ASSERT_EQUAL_128(0x0706050417161514, 0x2726252437363534, q9);
5398   ASSERT_EQUAL_128(0x0b0a09081b1a1918, 0x2b2a29283b3a3938, q10);
5399   ASSERT_EQUAL_128(0x0f0e0d0c1f1e1d1c, 0x2f2e2d2c3f3e3d3c, q11);
5400   ASSERT_EQUAL_128(0x0706050403020100, 0x2726252423222120, q12);
5401   ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x2f2e2d2c2b2a2928, q13);
5402   ASSERT_EQUAL_128(0x1716151413121110, 0x3736353433323130, q14);
5403   ASSERT_EQUAL_128(0x1f1e1d1c1b1a1918, 0x3f3e3d3c3b3a3938, q15);
5404   ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0706050003020100, q16);
5405   ASSERT_EQUAL_128(0x1f1e1d1c1b1a1918, 0x1716150113121110, q17);
5406   ASSERT_EQUAL_128(0x2f2e2d2c2b2a2928, 0x2726250223222120, q18);
5407   ASSERT_EQUAL_128(0x3f3e3d3c3b3a3938, 0x3736350333323130, q19);
5408   ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0100050403020100, q20);
5409   ASSERT_EQUAL_128(0x1f1e1d1c1b1a1918, 0x0302151413121110, q21);
5410   ASSERT_EQUAL_128(0x2f2e2d2c2b2a2928, 0x0504252423222120, q22);
5411   ASSERT_EQUAL_128(0x3f3e3d3c3b3a3938, 0x0706353433323130, q23);
5412   ASSERT_EQUAL_128(0x0f0e0d0c03020100, 0x0706050403020100, q24);
5413   ASSERT_EQUAL_128(0x1f1e1d1c07060504, 0x1716151413121110, q25);
5414   ASSERT_EQUAL_128(0x2f2e2d2c0b0a0908, 0x2726252423222120, q26);
5415   ASSERT_EQUAL_128(0x3f3e3d3c0f0e0d0c, 0x3736353433323130, q27);
5416   ASSERT_EQUAL_128(0x0706050403020100, 0x0706050403020100, q28);
5417   ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x1716151413121110, q29);
5418   ASSERT_EQUAL_128(0x1716151413121110, 0x2726252423222120, q30);
5419   ASSERT_EQUAL_128(0x1f1e1d1c1b1a1918, 0x3736353433323130, q31);
5420 
5421   ASSERT_EQUAL_64(src_base + 64, x17);
5422   ASSERT_EQUAL_64(src_base + 64, x18);
5423   ASSERT_EQUAL_64(src_base + 64, x19);
5424   ASSERT_EQUAL_64(src_base + 64, x20);
5425   ASSERT_EQUAL_64(src_base + 1, x21);
5426   ASSERT_EQUAL_64(src_base + 2, x22);
5427   ASSERT_EQUAL_64(src_base + 3, x23);
5428   ASSERT_EQUAL_64(src_base + 4, x24);
5429 
5430   TEARDOWN();
5431 }
5432 
5433 
TEST(neon_ld4_alllanes)5434 TEST(neon_ld4_alllanes) {
5435   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
5436 
5437   uint8_t src[64];
5438   for (unsigned i = 0; i < sizeof(src); i++) {
5439     src[i] = i;
5440   }
5441   uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
5442 
5443   START();
5444   __ Mov(x17, src_base + 1);
5445   __ Mov(x18, 1);
5446   __ Ld4r(v0.V8B(), v1.V8B(), v2.V8B(), v3.V8B(), MemOperand(x17));
5447   __ Add(x17, x17, 4);
5448   __ Ld4r(v4.V16B(), v5.V16B(), v6.V16B(), v7.V16B(), MemOperand(x17));
5449   __ Add(x17, x17, 1);
5450   __ Ld4r(v8.V4H(), v9.V4H(), v10.V4H(), v11.V4H(), MemOperand(x17));
5451   __ Add(x17, x17, 1);
5452   __ Ld4r(v12.V8H(), v13.V8H(), v14.V8H(), v15.V8H(), MemOperand(x17));
5453   __ Add(x17, x17, 8);
5454   __ Ld4r(v16.V2S(), v17.V2S(), v18.V2S(), v19.V2S(), MemOperand(x17));
5455   __ Add(x17, x17, 1);
5456   __ Ld4r(v20.V4S(), v21.V4S(), v22.V4S(), v23.V4S(), MemOperand(x17));
5457   __ Add(x17, x17, 16);
5458   __ Ld4r(v24.V2D(), v25.V2D(), v26.V2D(), v27.V2D(), MemOperand(x17));
5459 
5460 
5461   END();
5462 
5463   RUN();
5464 
5465   ASSERT_EQUAL_128(0x0000000000000000, 0x0101010101010101, q0);
5466   ASSERT_EQUAL_128(0x0000000000000000, 0x0202020202020202, q1);
5467   ASSERT_EQUAL_128(0x0000000000000000, 0x0303030303030303, q2);
5468   ASSERT_EQUAL_128(0x0000000000000000, 0x0404040404040404, q3);
5469   ASSERT_EQUAL_128(0x0505050505050505, 0x0505050505050505, q4);
5470   ASSERT_EQUAL_128(0x0606060606060606, 0x0606060606060606, q5);
5471   ASSERT_EQUAL_128(0x0707070707070707, 0x0707070707070707, q6);
5472   ASSERT_EQUAL_128(0x0808080808080808, 0x0808080808080808, q7);
5473   ASSERT_EQUAL_128(0x0000000000000000, 0x0706070607060706, q8);
5474   ASSERT_EQUAL_128(0x0000000000000000, 0x0908090809080908, q9);
5475   ASSERT_EQUAL_128(0x0000000000000000, 0x0b0a0b0a0b0a0b0a, q10);
5476   ASSERT_EQUAL_128(0x0000000000000000, 0x0d0c0d0c0d0c0d0c, q11);
5477   ASSERT_EQUAL_128(0x0807080708070807, 0x0807080708070807, q12);
5478   ASSERT_EQUAL_128(0x0a090a090a090a09, 0x0a090a090a090a09, q13);
5479   ASSERT_EQUAL_128(0x0c0b0c0b0c0b0c0b, 0x0c0b0c0b0c0b0c0b, q14);
5480   ASSERT_EQUAL_128(0x0e0d0e0d0e0d0e0d, 0x0e0d0e0d0e0d0e0d, q15);
5481   ASSERT_EQUAL_128(0x0000000000000000, 0x1211100f1211100f, q16);
5482   ASSERT_EQUAL_128(0x0000000000000000, 0x1615141316151413, q17);
5483   ASSERT_EQUAL_128(0x0000000000000000, 0x1a1918171a191817, q18);
5484   ASSERT_EQUAL_128(0x0000000000000000, 0x1e1d1c1b1e1d1c1b, q19);
5485   ASSERT_EQUAL_128(0x1312111013121110, 0x1312111013121110, q20);
5486   ASSERT_EQUAL_128(0x1716151417161514, 0x1716151417161514, q21);
5487   ASSERT_EQUAL_128(0x1b1a19181b1a1918, 0x1b1a19181b1a1918, q22);
5488   ASSERT_EQUAL_128(0x1f1e1d1c1f1e1d1c, 0x1f1e1d1c1f1e1d1c, q23);
5489   ASSERT_EQUAL_128(0x2726252423222120, 0x2726252423222120, q24);
5490   ASSERT_EQUAL_128(0x2f2e2d2c2b2a2928, 0x2f2e2d2c2b2a2928, q25);
5491   ASSERT_EQUAL_128(0x3736353433323130, 0x3736353433323130, q26);
5492   ASSERT_EQUAL_128(0x3f3e3d3c3b3a3938, 0x3f3e3d3c3b3a3938, q27);
5493 
5494   TEARDOWN();
5495 }
5496 
5497 
TEST(neon_ld4_alllanes_postindex)5498 TEST(neon_ld4_alllanes_postindex) {
5499   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
5500 
5501   uint8_t src[64];
5502   for (unsigned i = 0; i < sizeof(src); i++) {
5503     src[i] = i;
5504   }
5505   uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
5506   __ Mov(x17, src_base + 1);
5507   __ Mov(x18, 1);
5508 
5509   START();
5510   __ Mov(x17, src_base + 1);
5511   __ Mov(x18, 1);
5512   __ Ld4r(v0.V8B(),
5513           v1.V8B(),
5514           v2.V8B(),
5515           v3.V8B(),
5516           MemOperand(x17, 4, PostIndex));
5517   __ Ld4r(v4.V16B(),
5518           v5.V16B(),
5519           v6.V16B(),
5520           v7.V16B(),
5521           MemOperand(x17, x18, PostIndex));
5522   __ Ld4r(v8.V4H(),
5523           v9.V4H(),
5524           v10.V4H(),
5525           v11.V4H(),
5526           MemOperand(x17, x18, PostIndex));
5527   __ Ld4r(v12.V8H(),
5528           v13.V8H(),
5529           v14.V8H(),
5530           v15.V8H(),
5531           MemOperand(x17, 8, PostIndex));
5532   __ Ld4r(v16.V2S(),
5533           v17.V2S(),
5534           v18.V2S(),
5535           v19.V2S(),
5536           MemOperand(x17, x18, PostIndex));
5537   __ Ld4r(v20.V4S(),
5538           v21.V4S(),
5539           v22.V4S(),
5540           v23.V4S(),
5541           MemOperand(x17, 16, PostIndex));
5542   __ Ld4r(v24.V2D(),
5543           v25.V2D(),
5544           v26.V2D(),
5545           v27.V2D(),
5546           MemOperand(x17, 32, PostIndex));
5547   END();
5548 
5549   RUN();
5550 
5551   ASSERT_EQUAL_128(0x0000000000000000, 0x0101010101010101, q0);
5552   ASSERT_EQUAL_128(0x0000000000000000, 0x0202020202020202, q1);
5553   ASSERT_EQUAL_128(0x0000000000000000, 0x0303030303030303, q2);
5554   ASSERT_EQUAL_128(0x0000000000000000, 0x0404040404040404, q3);
5555   ASSERT_EQUAL_128(0x0505050505050505, 0x0505050505050505, q4);
5556   ASSERT_EQUAL_128(0x0606060606060606, 0x0606060606060606, q5);
5557   ASSERT_EQUAL_128(0x0707070707070707, 0x0707070707070707, q6);
5558   ASSERT_EQUAL_128(0x0808080808080808, 0x0808080808080808, q7);
5559   ASSERT_EQUAL_128(0x0000000000000000, 0x0706070607060706, q8);
5560   ASSERT_EQUAL_128(0x0000000000000000, 0x0908090809080908, q9);
5561   ASSERT_EQUAL_128(0x0000000000000000, 0x0b0a0b0a0b0a0b0a, q10);
5562   ASSERT_EQUAL_128(0x0000000000000000, 0x0d0c0d0c0d0c0d0c, q11);
5563   ASSERT_EQUAL_128(0x0807080708070807, 0x0807080708070807, q12);
5564   ASSERT_EQUAL_128(0x0a090a090a090a09, 0x0a090a090a090a09, q13);
5565   ASSERT_EQUAL_128(0x0c0b0c0b0c0b0c0b, 0x0c0b0c0b0c0b0c0b, q14);
5566   ASSERT_EQUAL_128(0x0e0d0e0d0e0d0e0d, 0x0e0d0e0d0e0d0e0d, q15);
5567   ASSERT_EQUAL_128(0x0000000000000000, 0x1211100f1211100f, q16);
5568   ASSERT_EQUAL_128(0x0000000000000000, 0x1615141316151413, q17);
5569   ASSERT_EQUAL_128(0x0000000000000000, 0x1a1918171a191817, q18);
5570   ASSERT_EQUAL_128(0x0000000000000000, 0x1e1d1c1b1e1d1c1b, q19);
5571   ASSERT_EQUAL_128(0x1312111013121110, 0x1312111013121110, q20);
5572   ASSERT_EQUAL_128(0x1716151417161514, 0x1716151417161514, q21);
5573   ASSERT_EQUAL_128(0x1b1a19181b1a1918, 0x1b1a19181b1a1918, q22);
5574   ASSERT_EQUAL_128(0x1f1e1d1c1f1e1d1c, 0x1f1e1d1c1f1e1d1c, q23);
5575   ASSERT_EQUAL_128(0x2726252423222120, 0x2726252423222120, q24);
5576   ASSERT_EQUAL_128(0x2f2e2d2c2b2a2928, 0x2f2e2d2c2b2a2928, q25);
5577   ASSERT_EQUAL_128(0x3736353433323130, 0x3736353433323130, q26);
5578   ASSERT_EQUAL_128(0x3f3e3d3c3b3a3938, 0x3f3e3d3c3b3a3938, q27);
5579   ASSERT_EQUAL_64(src_base + 64, x17);
5580 
5581   TEARDOWN();
5582 }
5583 
5584 
TEST(neon_st1_lane)5585 TEST(neon_st1_lane) {
5586   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
5587 
5588   uint8_t src[64];
5589   for (unsigned i = 0; i < sizeof(src); i++) {
5590     src[i] = i;
5591   }
5592   uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
5593 
5594   START();
5595   __ Mov(x17, src_base);
5596   __ Mov(x18, -16);
5597   __ Ldr(q0, MemOperand(x17));
5598 
5599   for (int i = 15; i >= 0; i--) {
5600     __ St1(v0.B(), i, MemOperand(x17));
5601     __ Add(x17, x17, 1);
5602   }
5603   __ Ldr(q1, MemOperand(x17, x18));
5604 
5605   for (int i = 7; i >= 0; i--) {
5606     __ St1(v0.H(), i, MemOperand(x17));
5607     __ Add(x17, x17, 2);
5608   }
5609   __ Ldr(q2, MemOperand(x17, x18));
5610 
5611   for (int i = 3; i >= 0; i--) {
5612     __ St1(v0.S(), i, MemOperand(x17));
5613     __ Add(x17, x17, 4);
5614   }
5615   __ Ldr(q3, MemOperand(x17, x18));
5616 
5617   for (int i = 1; i >= 0; i--) {
5618     __ St1(v0.D(), i, MemOperand(x17));
5619     __ Add(x17, x17, 8);
5620   }
5621   __ Ldr(q4, MemOperand(x17, x18));
5622 
5623   END();
5624 
5625   RUN();
5626 
5627   ASSERT_EQUAL_128(0x0001020304050607, 0x08090a0b0c0d0e0f, q1);
5628   ASSERT_EQUAL_128(0x0100030205040706, 0x09080b0a0d0c0f0e, q2);
5629   ASSERT_EQUAL_128(0x0302010007060504, 0x0b0a09080f0e0d0c, q3);
5630   ASSERT_EQUAL_128(0x0706050403020100, 0x0f0e0d0c0b0a0908, q4);
5631 
5632   TEARDOWN();
5633 }
5634 
5635 
TEST(neon_st2_lane)5636 TEST(neon_st2_lane) {
5637   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
5638 
5639   // Struct size * addressing modes * element sizes * vector size.
5640   uint8_t dst[2 * 2 * 4 * 16];
5641   memset(dst, 0, sizeof(dst));
5642   uintptr_t dst_base = reinterpret_cast<uintptr_t>(dst);
5643 
5644   START();
5645   __ Mov(x17, dst_base);
5646   __ Mov(x18, dst_base);
5647   __ Movi(v0.V2D(), 0x0001020304050607, 0x08090a0b0c0d0e0f);
5648   __ Movi(v1.V2D(), 0x1011121314151617, 0x18191a1b1c1d1e1f);
5649 
5650   // Test B stores with and without post index.
5651   for (int i = 15; i >= 0; i--) {
5652     __ St2(v0.B(), v1.B(), i, MemOperand(x18));
5653     __ Add(x18, x18, 2);
5654   }
5655   for (int i = 15; i >= 0; i--) {
5656     __ St2(v0.B(), v1.B(), i, MemOperand(x18, 2, PostIndex));
5657   }
5658   __ Ldr(q2, MemOperand(x17, 0 * 16));
5659   __ Ldr(q3, MemOperand(x17, 1 * 16));
5660   __ Ldr(q4, MemOperand(x17, 2 * 16));
5661   __ Ldr(q5, MemOperand(x17, 3 * 16));
5662 
5663   // Test H stores with and without post index.
5664   __ Mov(x0, 4);
5665   for (int i = 7; i >= 0; i--) {
5666     __ St2(v0.H(), v1.H(), i, MemOperand(x18));
5667     __ Add(x18, x18, 4);
5668   }
5669   for (int i = 7; i >= 0; i--) {
5670     __ St2(v0.H(), v1.H(), i, MemOperand(x18, x0, PostIndex));
5671   }
5672   __ Ldr(q6, MemOperand(x17, 4 * 16));
5673   __ Ldr(q7, MemOperand(x17, 5 * 16));
5674   __ Ldr(q16, MemOperand(x17, 6 * 16));
5675   __ Ldr(q17, MemOperand(x17, 7 * 16));
5676 
5677   // Test S stores with and without post index.
5678   for (int i = 3; i >= 0; i--) {
5679     __ St2(v0.S(), v1.S(), i, MemOperand(x18));
5680     __ Add(x18, x18, 8);
5681   }
5682   for (int i = 3; i >= 0; i--) {
5683     __ St2(v0.S(), v1.S(), i, MemOperand(x18, 8, PostIndex));
5684   }
5685   __ Ldr(q18, MemOperand(x17, 8 * 16));
5686   __ Ldr(q19, MemOperand(x17, 9 * 16));
5687   __ Ldr(q20, MemOperand(x17, 10 * 16));
5688   __ Ldr(q21, MemOperand(x17, 11 * 16));
5689 
5690   // Test D stores with and without post index.
5691   __ Mov(x0, 16);
5692   __ St2(v0.D(), v1.D(), 1, MemOperand(x18));
5693   __ Add(x18, x18, 16);
5694   __ St2(v0.D(), v1.D(), 0, MemOperand(x18, 16, PostIndex));
5695   __ St2(v0.D(), v1.D(), 1, MemOperand(x18, x0, PostIndex));
5696   __ St2(v0.D(), v1.D(), 0, MemOperand(x18, x0, PostIndex));
5697   __ Ldr(q22, MemOperand(x17, 12 * 16));
5698   __ Ldr(q23, MemOperand(x17, 13 * 16));
5699   __ Ldr(q24, MemOperand(x17, 14 * 16));
5700   __ Ldr(q25, MemOperand(x17, 15 * 16));
5701   END();
5702 
5703   RUN();
5704 
5705   ASSERT_EQUAL_128(0x1707160615051404, 0x1303120211011000, q2);
5706   ASSERT_EQUAL_128(0x1f0f1e0e1d0d1c0c, 0x1b0b1a0a19091808, q3);
5707   ASSERT_EQUAL_128(0x1707160615051404, 0x1303120211011000, q4);
5708   ASSERT_EQUAL_128(0x1f0f1e0e1d0d1c0c, 0x1b0b1a0a19091808, q5);
5709 
5710   ASSERT_EQUAL_128(0x1617060714150405, 0x1213020310110001, q6);
5711   ASSERT_EQUAL_128(0x1e1f0e0f1c1d0c0d, 0x1a1b0a0b18190809, q7);
5712   ASSERT_EQUAL_128(0x1617060714150405, 0x1213020310110001, q16);
5713   ASSERT_EQUAL_128(0x1e1f0e0f1c1d0c0d, 0x1a1b0a0b18190809, q17);
5714 
5715   ASSERT_EQUAL_128(0x1415161704050607, 0x1011121300010203, q18);
5716   ASSERT_EQUAL_128(0x1c1d1e1f0c0d0e0f, 0x18191a1b08090a0b, q19);
5717   ASSERT_EQUAL_128(0x1415161704050607, 0x1011121300010203, q20);
5718   ASSERT_EQUAL_128(0x1c1d1e1f0c0d0e0f, 0x18191a1b08090a0b, q21);
5719 
5720   ASSERT_EQUAL_128(0x1011121314151617, 0x0001020304050607, q22);
5721   ASSERT_EQUAL_128(0x18191a1b1c1d1e1f, 0x08090a0b0c0d0e0f, q23);
5722   ASSERT_EQUAL_128(0x1011121314151617, 0x0001020304050607, q22);
5723   ASSERT_EQUAL_128(0x18191a1b1c1d1e1f, 0x08090a0b0c0d0e0f, q23);
5724 
5725   TEARDOWN();
5726 }
5727 
5728 
TEST(neon_st3_lane)5729 TEST(neon_st3_lane) {
5730   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
5731 
5732   // Struct size * addressing modes * element sizes * vector size.
5733   uint8_t dst[3 * 2 * 4 * 16];
5734   memset(dst, 0, sizeof(dst));
5735   uintptr_t dst_base = reinterpret_cast<uintptr_t>(dst);
5736 
5737   START();
5738   __ Mov(x17, dst_base);
5739   __ Mov(x18, dst_base);
5740   __ Movi(v0.V2D(), 0x0001020304050607, 0x08090a0b0c0d0e0f);
5741   __ Movi(v1.V2D(), 0x1011121314151617, 0x18191a1b1c1d1e1f);
5742   __ Movi(v2.V2D(), 0x2021222324252627, 0x28292a2b2c2d2e2f);
5743 
5744   // Test B stores with and without post index.
5745   for (int i = 15; i >= 0; i--) {
5746     __ St3(v0.B(), v1.B(), v2.B(), i, MemOperand(x18));
5747     __ Add(x18, x18, 3);
5748   }
5749   for (int i = 15; i >= 0; i--) {
5750     __ St3(v0.B(), v1.B(), v2.B(), i, MemOperand(x18, 3, PostIndex));
5751   }
5752   __ Ldr(q3, MemOperand(x17, 0 * 16));
5753   __ Ldr(q4, MemOperand(x17, 1 * 16));
5754   __ Ldr(q5, MemOperand(x17, 2 * 16));
5755   __ Ldr(q6, MemOperand(x17, 3 * 16));
5756   __ Ldr(q7, MemOperand(x17, 4 * 16));
5757   __ Ldr(q16, MemOperand(x17, 5 * 16));
5758 
5759   // Test H stores with and without post index.
5760   __ Mov(x0, 6);
5761   for (int i = 7; i >= 0; i--) {
5762     __ St3(v0.H(), v1.H(), v2.H(), i, MemOperand(x18));
5763     __ Add(x18, x18, 6);
5764   }
5765   for (int i = 7; i >= 0; i--) {
5766     __ St3(v0.H(), v1.H(), v2.H(), i, MemOperand(x18, x0, PostIndex));
5767   }
5768   __ Ldr(q17, MemOperand(x17, 6 * 16));
5769   __ Ldr(q18, MemOperand(x17, 7 * 16));
5770   __ Ldr(q19, MemOperand(x17, 8 * 16));
5771   __ Ldr(q20, MemOperand(x17, 9 * 16));
5772   __ Ldr(q21, MemOperand(x17, 10 * 16));
5773   __ Ldr(q22, MemOperand(x17, 11 * 16));
5774 
5775   // Test S stores with and without post index.
5776   for (int i = 3; i >= 0; i--) {
5777     __ St3(v0.S(), v1.S(), v2.S(), i, MemOperand(x18));
5778     __ Add(x18, x18, 12);
5779   }
5780   for (int i = 3; i >= 0; i--) {
5781     __ St3(v0.S(), v1.S(), v2.S(), i, MemOperand(x18, 12, PostIndex));
5782   }
5783   __ Ldr(q23, MemOperand(x17, 12 * 16));
5784   __ Ldr(q24, MemOperand(x17, 13 * 16));
5785   __ Ldr(q25, MemOperand(x17, 14 * 16));
5786   __ Ldr(q26, MemOperand(x17, 15 * 16));
5787   __ Ldr(q27, MemOperand(x17, 16 * 16));
5788   __ Ldr(q28, MemOperand(x17, 17 * 16));
5789 
5790   // Test D stores with and without post index.
5791   __ Mov(x0, 24);
5792   __ St3(v0.D(), v1.D(), v2.D(), 1, MemOperand(x18));
5793   __ Add(x18, x18, 24);
5794   __ St3(v0.D(), v1.D(), v2.D(), 0, MemOperand(x18, 24, PostIndex));
5795   __ St3(v0.D(), v1.D(), v2.D(), 1, MemOperand(x18, x0, PostIndex));
5796   __ Ldr(q29, MemOperand(x17, 18 * 16));
5797   __ Ldr(q30, MemOperand(x17, 19 * 16));
5798   __ Ldr(q31, MemOperand(x17, 20 * 16));
5799   END();
5800 
5801   RUN();
5802 
5803   ASSERT_EQUAL_128(0x0524140423130322, 0x1202211101201000, q3);
5804   ASSERT_EQUAL_128(0x1a0a291909281808, 0x2717072616062515, q4);
5805   ASSERT_EQUAL_128(0x2f1f0f2e1e0e2d1d, 0x0d2c1c0c2b1b0b2a, q5);
5806   ASSERT_EQUAL_128(0x0524140423130322, 0x1202211101201000, q6);
5807   ASSERT_EQUAL_128(0x1a0a291909281808, 0x2717072616062515, q7);
5808   ASSERT_EQUAL_128(0x2f1f0f2e1e0e2d1d, 0x0d2c1c0c2b1b0b2a, q16);
5809 
5810   ASSERT_EQUAL_128(0x1415040522231213, 0x0203202110110001, q17);
5811   ASSERT_EQUAL_128(0x0a0b282918190809, 0x2627161706072425, q18);
5812   ASSERT_EQUAL_128(0x2e2f1e1f0e0f2c2d, 0x1c1d0c0d2a2b1a1b, q19);
5813   ASSERT_EQUAL_128(0x1415040522231213, 0x0203202110110001, q20);
5814   ASSERT_EQUAL_128(0x0a0b282918190809, 0x2627161706072425, q21);
5815   ASSERT_EQUAL_128(0x2e2f1e1f0e0f2c2d, 0x1c1d0c0d2a2b1a1b, q22);
5816 
5817   ASSERT_EQUAL_128(0x0405060720212223, 0x1011121300010203, q23);
5818   ASSERT_EQUAL_128(0x18191a1b08090a0b, 0x2425262714151617, q24);
5819   ASSERT_EQUAL_128(0x2c2d2e2f1c1d1e1f, 0x0c0d0e0f28292a2b, q25);
5820   ASSERT_EQUAL_128(0x0405060720212223, 0x1011121300010203, q26);
5821   ASSERT_EQUAL_128(0x18191a1b08090a0b, 0x2425262714151617, q27);
5822   ASSERT_EQUAL_128(0x2c2d2e2f1c1d1e1f, 0x0c0d0e0f28292a2b, q28);
5823 
5824   TEARDOWN();
5825 }
5826 
5827 
TEST(neon_st4_lane)5828 TEST(neon_st4_lane) {
5829   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
5830 
5831   // Struct size * element sizes * vector size.
5832   uint8_t dst[4 * 4 * 16];
5833   memset(dst, 0, sizeof(dst));
5834   uintptr_t dst_base = reinterpret_cast<uintptr_t>(dst);
5835 
5836   START();
5837   __ Mov(x17, dst_base);
5838   __ Mov(x18, dst_base);
5839   __ Movi(v0.V2D(), 0x0001020304050607, 0x08090a0b0c0d0e0f);
5840   __ Movi(v1.V2D(), 0x1011121314151617, 0x18191a1b1c1d1e1f);
5841   __ Movi(v2.V2D(), 0x2021222324252627, 0x28292a2b2c2d2e2f);
5842   __ Movi(v3.V2D(), 0x2021222324252627, 0x28292a2b2c2d2e2f);
5843 
5844   // Test B stores without post index.
5845   for (int i = 15; i >= 0; i--) {
5846     __ St4(v0.B(), v1.B(), v2.B(), v3.B(), i, MemOperand(x18));
5847     __ Add(x18, x18, 4);
5848   }
5849   __ Ldr(q4, MemOperand(x17, 0 * 16));
5850   __ Ldr(q5, MemOperand(x17, 1 * 16));
5851   __ Ldr(q6, MemOperand(x17, 2 * 16));
5852   __ Ldr(q7, MemOperand(x17, 3 * 16));
5853 
5854   // Test H stores with post index.
5855   __ Mov(x0, 8);
5856   for (int i = 7; i >= 0; i--) {
5857     __ St4(v0.H(), v1.H(), v2.H(), v3.H(), i, MemOperand(x18, x0, PostIndex));
5858   }
5859   __ Ldr(q16, MemOperand(x17, 4 * 16));
5860   __ Ldr(q17, MemOperand(x17, 5 * 16));
5861   __ Ldr(q18, MemOperand(x17, 6 * 16));
5862   __ Ldr(q19, MemOperand(x17, 7 * 16));
5863 
5864   // Test S stores without post index.
5865   for (int i = 3; i >= 0; i--) {
5866     __ St4(v0.S(), v1.S(), v2.S(), v3.S(), i, MemOperand(x18));
5867     __ Add(x18, x18, 16);
5868   }
5869   __ Ldr(q20, MemOperand(x17, 8 * 16));
5870   __ Ldr(q21, MemOperand(x17, 9 * 16));
5871   __ Ldr(q22, MemOperand(x17, 10 * 16));
5872   __ Ldr(q23, MemOperand(x17, 11 * 16));
5873 
5874   // Test D stores with post index.
5875   __ Mov(x0, 32);
5876   __ St4(v0.D(), v1.D(), v2.D(), v3.D(), 0, MemOperand(x18, 32, PostIndex));
5877   __ St4(v0.D(), v1.D(), v2.D(), v3.D(), 1, MemOperand(x18, x0, PostIndex));
5878 
5879   __ Ldr(q24, MemOperand(x17, 12 * 16));
5880   __ Ldr(q25, MemOperand(x17, 13 * 16));
5881   __ Ldr(q26, MemOperand(x17, 14 * 16));
5882   __ Ldr(q27, MemOperand(x17, 15 * 16));
5883   END();
5884 
5885   RUN();
5886 
5887   ASSERT_EQUAL_128(0x2323130322221202, 0x2121110120201000, q4);
5888   ASSERT_EQUAL_128(0x2727170726261606, 0x2525150524241404, q5);
5889   ASSERT_EQUAL_128(0x2b2b1b0b2a2a1a0a, 0x2929190928281808, q6);
5890   ASSERT_EQUAL_128(0x2f2f1f0f2e2e1e0e, 0x2d2d1d0d2c2c1c0c, q7);
5891 
5892   ASSERT_EQUAL_128(0x2223222312130203, 0x2021202110110001, q16);
5893   ASSERT_EQUAL_128(0x2627262716170607, 0x2425242514150405, q17);
5894   ASSERT_EQUAL_128(0x2a2b2a2b1a1b0a0b, 0x2829282918190809, q18);
5895   ASSERT_EQUAL_128(0x2e2f2e2f1e1f0e0f, 0x2c2d2c2d1c1d0c0d, q19);
5896 
5897   ASSERT_EQUAL_128(0x2021222320212223, 0x1011121300010203, q20);
5898   ASSERT_EQUAL_128(0x2425262724252627, 0x1415161704050607, q21);
5899   ASSERT_EQUAL_128(0x28292a2b28292a2b, 0x18191a1b08090a0b, q22);
5900   ASSERT_EQUAL_128(0x2c2d2e2f2c2d2e2f, 0x1c1d1e1f0c0d0e0f, q23);
5901 
5902   ASSERT_EQUAL_128(0x18191a1b1c1d1e1f, 0x08090a0b0c0d0e0f, q24);
5903   ASSERT_EQUAL_128(0x28292a2b2c2d2e2f, 0x28292a2b2c2d2e2f, q25);
5904   ASSERT_EQUAL_128(0x1011121314151617, 0x0001020304050607, q26);
5905   ASSERT_EQUAL_128(0x2021222324252627, 0x2021222324252627, q27);
5906 
5907   TEARDOWN();
5908 }
5909 
5910 
TEST(neon_ld1_lane_postindex)5911 TEST(neon_ld1_lane_postindex) {
5912   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
5913 
5914   uint8_t src[64];
5915   for (unsigned i = 0; i < sizeof(src); i++) {
5916     src[i] = i;
5917   }
5918   uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
5919 
5920   START();
5921   __ Mov(x17, src_base);
5922   __ Mov(x18, src_base);
5923   __ Mov(x19, src_base);
5924   __ Mov(x20, src_base);
5925   __ Mov(x21, src_base);
5926   __ Mov(x22, src_base);
5927   __ Mov(x23, src_base);
5928   __ Mov(x24, src_base);
5929 
5930   // Test loading whole register by element.
5931   for (int i = 15; i >= 0; i--) {
5932     __ Ld1(v0.B(), i, MemOperand(x17, 1, PostIndex));
5933   }
5934 
5935   for (int i = 7; i >= 0; i--) {
5936     __ Ld1(v1.H(), i, MemOperand(x18, 2, PostIndex));
5937   }
5938 
5939   for (int i = 3; i >= 0; i--) {
5940     __ Ld1(v2.S(), i, MemOperand(x19, 4, PostIndex));
5941   }
5942 
5943   for (int i = 1; i >= 0; i--) {
5944     __ Ld1(v3.D(), i, MemOperand(x20, 8, PostIndex));
5945   }
5946 
5947   // Test loading a single element into an initialised register.
5948   __ Mov(x25, 1);
5949   __ Ldr(q4, MemOperand(x21));
5950   __ Ld1(v4.B(), 4, MemOperand(x21, x25, PostIndex));
5951   __ Add(x25, x25, 1);
5952 
5953   __ Ldr(q5, MemOperand(x22));
5954   __ Ld1(v5.H(), 3, MemOperand(x22, x25, PostIndex));
5955   __ Add(x25, x25, 1);
5956 
5957   __ Ldr(q6, MemOperand(x23));
5958   __ Ld1(v6.S(), 2, MemOperand(x23, x25, PostIndex));
5959   __ Add(x25, x25, 1);
5960 
5961   __ Ldr(q7, MemOperand(x24));
5962   __ Ld1(v7.D(), 1, MemOperand(x24, x25, PostIndex));
5963 
5964   END();
5965 
5966   RUN();
5967 
5968   ASSERT_EQUAL_128(0x0001020304050607, 0x08090a0b0c0d0e0f, q0);
5969   ASSERT_EQUAL_128(0x0100030205040706, 0x09080b0a0d0c0f0e, q1);
5970   ASSERT_EQUAL_128(0x0302010007060504, 0x0b0a09080f0e0d0c, q2);
5971   ASSERT_EQUAL_128(0x0706050403020100, 0x0f0e0d0c0b0a0908, q3);
5972   ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0706050003020100, q4);
5973   ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0100050403020100, q5);
5974   ASSERT_EQUAL_128(0x0f0e0d0c03020100, 0x0706050403020100, q6);
5975   ASSERT_EQUAL_128(0x0706050403020100, 0x0706050403020100, q7);
5976   ASSERT_EQUAL_64(src_base + 16, x17);
5977   ASSERT_EQUAL_64(src_base + 16, x18);
5978   ASSERT_EQUAL_64(src_base + 16, x19);
5979   ASSERT_EQUAL_64(src_base + 16, x20);
5980   ASSERT_EQUAL_64(src_base + 1, x21);
5981   ASSERT_EQUAL_64(src_base + 2, x22);
5982   ASSERT_EQUAL_64(src_base + 3, x23);
5983   ASSERT_EQUAL_64(src_base + 4, x24);
5984 
5985   TEARDOWN();
5986 }
5987 
5988 
TEST(neon_st1_lane_postindex)5989 TEST(neon_st1_lane_postindex) {
5990   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
5991 
5992   uint8_t src[64];
5993   for (unsigned i = 0; i < sizeof(src); i++) {
5994     src[i] = i;
5995   }
5996   uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
5997 
5998   START();
5999   __ Mov(x17, src_base);
6000   __ Mov(x18, -16);
6001   __ Ldr(q0, MemOperand(x17));
6002 
6003   for (int i = 15; i >= 0; i--) {
6004     __ St1(v0.B(), i, MemOperand(x17, 1, PostIndex));
6005   }
6006   __ Ldr(q1, MemOperand(x17, x18));
6007 
6008   for (int i = 7; i >= 0; i--) {
6009     __ St1(v0.H(), i, MemOperand(x17, 2, PostIndex));
6010   }
6011   __ Ldr(q2, MemOperand(x17, x18));
6012 
6013   for (int i = 3; i >= 0; i--) {
6014     __ St1(v0.S(), i, MemOperand(x17, 4, PostIndex));
6015   }
6016   __ Ldr(q3, MemOperand(x17, x18));
6017 
6018   for (int i = 1; i >= 0; i--) {
6019     __ St1(v0.D(), i, MemOperand(x17, 8, PostIndex));
6020   }
6021   __ Ldr(q4, MemOperand(x17, x18));
6022 
6023   END();
6024 
6025   RUN();
6026 
6027   ASSERT_EQUAL_128(0x0001020304050607, 0x08090a0b0c0d0e0f, q1);
6028   ASSERT_EQUAL_128(0x0100030205040706, 0x09080b0a0d0c0f0e, q2);
6029   ASSERT_EQUAL_128(0x0302010007060504, 0x0b0a09080f0e0d0c, q3);
6030   ASSERT_EQUAL_128(0x0706050403020100, 0x0f0e0d0c0b0a0908, q4);
6031 
6032   TEARDOWN();
6033 }
6034 
6035 
TEST(neon_ld1_alllanes)6036 TEST(neon_ld1_alllanes) {
6037   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
6038 
6039   uint8_t src[64];
6040   for (unsigned i = 0; i < sizeof(src); i++) {
6041     src[i] = i;
6042   }
6043   uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
6044 
6045   START();
6046   __ Mov(x17, src_base + 1);
6047   __ Ld1r(v0.V8B(), MemOperand(x17));
6048   __ Add(x17, x17, 1);
6049   __ Ld1r(v1.V16B(), MemOperand(x17));
6050   __ Add(x17, x17, 1);
6051   __ Ld1r(v2.V4H(), MemOperand(x17));
6052   __ Add(x17, x17, 1);
6053   __ Ld1r(v3.V8H(), MemOperand(x17));
6054   __ Add(x17, x17, 1);
6055   __ Ld1r(v4.V2S(), MemOperand(x17));
6056   __ Add(x17, x17, 1);
6057   __ Ld1r(v5.V4S(), MemOperand(x17));
6058   __ Add(x17, x17, 1);
6059   __ Ld1r(v6.V1D(), MemOperand(x17));
6060   __ Add(x17, x17, 1);
6061   __ Ld1r(v7.V2D(), MemOperand(x17));
6062   END();
6063 
6064   RUN();
6065 
6066   ASSERT_EQUAL_128(0, 0x0101010101010101, q0);
6067   ASSERT_EQUAL_128(0x0202020202020202, 0x0202020202020202, q1);
6068   ASSERT_EQUAL_128(0, 0x0403040304030403, q2);
6069   ASSERT_EQUAL_128(0x0504050405040504, 0x0504050405040504, q3);
6070   ASSERT_EQUAL_128(0, 0x0807060508070605, q4);
6071   ASSERT_EQUAL_128(0x0908070609080706, 0x0908070609080706, q5);
6072   ASSERT_EQUAL_128(0, 0x0e0d0c0b0a090807, q6);
6073   ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0f0e0d0c0b0a0908, q7);
6074 
6075   TEARDOWN();
6076 }
6077 
6078 
TEST(neon_ld1_alllanes_postindex)6079 TEST(neon_ld1_alllanes_postindex) {
6080   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
6081 
6082   uint8_t src[64];
6083   for (unsigned i = 0; i < sizeof(src); i++) {
6084     src[i] = i;
6085   }
6086   uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
6087 
6088   START();
6089   __ Mov(x17, src_base + 1);
6090   __ Mov(x18, 1);
6091   __ Ld1r(v0.V8B(), MemOperand(x17, 1, PostIndex));
6092   __ Ld1r(v1.V16B(), MemOperand(x17, x18, PostIndex));
6093   __ Ld1r(v2.V4H(), MemOperand(x17, x18, PostIndex));
6094   __ Ld1r(v3.V8H(), MemOperand(x17, 2, PostIndex));
6095   __ Ld1r(v4.V2S(), MemOperand(x17, x18, PostIndex));
6096   __ Ld1r(v5.V4S(), MemOperand(x17, 4, PostIndex));
6097   __ Ld1r(v6.V2D(), MemOperand(x17, 8, PostIndex));
6098   END();
6099 
6100   RUN();
6101 
6102   ASSERT_EQUAL_128(0, 0x0101010101010101, q0);
6103   ASSERT_EQUAL_128(0x0202020202020202, 0x0202020202020202, q1);
6104   ASSERT_EQUAL_128(0, 0x0403040304030403, q2);
6105   ASSERT_EQUAL_128(0x0504050405040504, 0x0504050405040504, q3);
6106   ASSERT_EQUAL_128(0, 0x0908070609080706, q4);
6107   ASSERT_EQUAL_128(0x0a0908070a090807, 0x0a0908070a090807, q5);
6108   ASSERT_EQUAL_128(0x1211100f0e0d0c0b, 0x1211100f0e0d0c0b, q6);
6109   ASSERT_EQUAL_64(src_base + 19, x17);
6110 
6111   TEARDOWN();
6112 }
6113 
6114 
TEST(neon_st1_d)6115 TEST(neon_st1_d) {
6116   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
6117 
6118   uint8_t src[14 * kDRegSizeInBytes];
6119   for (unsigned i = 0; i < sizeof(src); i++) {
6120     src[i] = i;
6121   }
6122   uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
6123 
6124   START();
6125   __ Mov(x17, src_base);
6126   __ Ldr(q0, MemOperand(x17, 16, PostIndex));
6127   __ Ldr(q1, MemOperand(x17, 16, PostIndex));
6128   __ Ldr(q2, MemOperand(x17, 16, PostIndex));
6129   __ Ldr(q3, MemOperand(x17, 16, PostIndex));
6130   __ Mov(x17, src_base);
6131 
6132   __ St1(v0.V8B(), MemOperand(x17));
6133   __ Ldr(d16, MemOperand(x17, 8, PostIndex));
6134 
6135   __ St1(v0.V8B(), v1.V8B(), MemOperand(x17));
6136   __ Ldr(q17, MemOperand(x17, 16, PostIndex));
6137 
6138   __ St1(v0.V4H(), v1.V4H(), v2.V4H(), MemOperand(x17));
6139   __ Ldr(d18, MemOperand(x17, 8, PostIndex));
6140   __ Ldr(d19, MemOperand(x17, 8, PostIndex));
6141   __ Ldr(d20, MemOperand(x17, 8, PostIndex));
6142 
6143   __ St1(v0.V2S(), v1.V2S(), v2.V2S(), v3.V2S(), MemOperand(x17));
6144   __ Ldr(q21, MemOperand(x17, 16, PostIndex));
6145   __ Ldr(q22, MemOperand(x17, 16, PostIndex));
6146 
6147   __ St1(v0.V1D(), v1.V1D(), v2.V1D(), v3.V1D(), MemOperand(x17));
6148   __ Ldr(q23, MemOperand(x17, 16, PostIndex));
6149   __ Ldr(q24, MemOperand(x17));
6150   END();
6151 
6152   RUN();
6153 
6154   ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0706050403020100, q0);
6155   ASSERT_EQUAL_128(0x1f1e1d1c1b1a1918, 0x1716151413121110, q1);
6156   ASSERT_EQUAL_128(0x2f2e2d2c2b2a2928, 0x2726252423222120, q2);
6157   ASSERT_EQUAL_128(0x3f3e3d3c3b3a3938, 0x3736353433323130, q3);
6158   ASSERT_EQUAL_128(0, 0x0706050403020100, q16);
6159   ASSERT_EQUAL_128(0x1716151413121110, 0x0706050403020100, q17);
6160   ASSERT_EQUAL_128(0, 0x0706050403020100, q18);
6161   ASSERT_EQUAL_128(0, 0x1716151413121110, q19);
6162   ASSERT_EQUAL_128(0, 0x2726252423222120, q20);
6163   ASSERT_EQUAL_128(0x1716151413121110, 0x0706050403020100, q21);
6164   ASSERT_EQUAL_128(0x3736353433323130, 0x2726252423222120, q22);
6165   ASSERT_EQUAL_128(0x1716151413121110, 0x0706050403020100, q23);
6166   ASSERT_EQUAL_128(0x3736353433323130, 0x2726252423222120, q24);
6167 
6168   TEARDOWN();
6169 }
6170 
6171 
TEST(neon_st1_d_postindex)6172 TEST(neon_st1_d_postindex) {
6173   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
6174 
6175   uint8_t src[64 + 14 * kDRegSizeInBytes];
6176   for (unsigned i = 0; i < sizeof(src); i++) {
6177     src[i] = i;
6178   }
6179   uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
6180 
6181   START();
6182   __ Mov(x17, src_base);
6183   __ Mov(x18, -8);
6184   __ Mov(x19, -16);
6185   __ Mov(x20, -24);
6186   __ Mov(x21, -32);
6187   __ Ldr(q0, MemOperand(x17, 16, PostIndex));
6188   __ Ldr(q1, MemOperand(x17, 16, PostIndex));
6189   __ Ldr(q2, MemOperand(x17, 16, PostIndex));
6190   __ Ldr(q3, MemOperand(x17, 16, PostIndex));
6191   __ Mov(x17, src_base);
6192 
6193   __ St1(v0.V8B(), MemOperand(x17, 8, PostIndex));
6194   __ Ldr(d16, MemOperand(x17, x18));
6195 
6196   __ St1(v0.V8B(), v1.V8B(), MemOperand(x17, 16, PostIndex));
6197   __ Ldr(q17, MemOperand(x17, x19));
6198 
6199   __ St1(v0.V4H(), v1.V4H(), v2.V4H(), MemOperand(x17, 24, PostIndex));
6200   __ Ldr(d18, MemOperand(x17, x20));
6201   __ Ldr(d19, MemOperand(x17, x19));
6202   __ Ldr(d20, MemOperand(x17, x18));
6203 
6204   __ St1(v0.V2S(),
6205          v1.V2S(),
6206          v2.V2S(),
6207          v3.V2S(),
6208          MemOperand(x17, 32, PostIndex));
6209   __ Ldr(q21, MemOperand(x17, x21));
6210   __ Ldr(q22, MemOperand(x17, x19));
6211 
6212   __ St1(v0.V1D(),
6213          v1.V1D(),
6214          v2.V1D(),
6215          v3.V1D(),
6216          MemOperand(x17, 32, PostIndex));
6217   __ Ldr(q23, MemOperand(x17, x21));
6218   __ Ldr(q24, MemOperand(x17, x19));
6219   END();
6220 
6221   RUN();
6222 
6223   ASSERT_EQUAL_128(0, 0x0706050403020100, q16);
6224   ASSERT_EQUAL_128(0x1716151413121110, 0x0706050403020100, q17);
6225   ASSERT_EQUAL_128(0, 0x0706050403020100, q18);
6226   ASSERT_EQUAL_128(0, 0x1716151413121110, q19);
6227   ASSERT_EQUAL_128(0, 0x2726252423222120, q20);
6228   ASSERT_EQUAL_128(0x1716151413121110, 0x0706050403020100, q21);
6229   ASSERT_EQUAL_128(0x3736353433323130, 0x2726252423222120, q22);
6230   ASSERT_EQUAL_128(0x1716151413121110, 0x0706050403020100, q23);
6231   ASSERT_EQUAL_128(0x3736353433323130, 0x2726252423222120, q24);
6232 
6233   TEARDOWN();
6234 }
6235 
6236 
TEST(neon_st1_q)6237 TEST(neon_st1_q) {
6238   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
6239 
6240   uint8_t src[64 + 160];
6241   for (unsigned i = 0; i < sizeof(src); i++) {
6242     src[i] = i;
6243   }
6244   uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
6245 
6246   START();
6247   __ Mov(x17, src_base);
6248   __ Ldr(q0, MemOperand(x17, 16, PostIndex));
6249   __ Ldr(q1, MemOperand(x17, 16, PostIndex));
6250   __ Ldr(q2, MemOperand(x17, 16, PostIndex));
6251   __ Ldr(q3, MemOperand(x17, 16, PostIndex));
6252 
6253   __ St1(v0.V16B(), MemOperand(x17));
6254   __ Ldr(q16, MemOperand(x17, 16, PostIndex));
6255 
6256   __ St1(v0.V8H(), v1.V8H(), MemOperand(x17));
6257   __ Ldr(q17, MemOperand(x17, 16, PostIndex));
6258   __ Ldr(q18, MemOperand(x17, 16, PostIndex));
6259 
6260   __ St1(v0.V4S(), v1.V4S(), v2.V4S(), MemOperand(x17));
6261   __ Ldr(q19, MemOperand(x17, 16, PostIndex));
6262   __ Ldr(q20, MemOperand(x17, 16, PostIndex));
6263   __ Ldr(q21, MemOperand(x17, 16, PostIndex));
6264 
6265   __ St1(v0.V2D(), v1.V2D(), v2.V2D(), v3.V2D(), MemOperand(x17));
6266   __ Ldr(q22, MemOperand(x17, 16, PostIndex));
6267   __ Ldr(q23, MemOperand(x17, 16, PostIndex));
6268   __ Ldr(q24, MemOperand(x17, 16, PostIndex));
6269   __ Ldr(q25, MemOperand(x17));
6270   END();
6271 
6272   RUN();
6273 
6274   ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0706050403020100, q16);
6275   ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0706050403020100, q17);
6276   ASSERT_EQUAL_128(0x1f1e1d1c1b1a1918, 0x1716151413121110, q18);
6277   ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0706050403020100, q19);
6278   ASSERT_EQUAL_128(0x1f1e1d1c1b1a1918, 0x1716151413121110, q20);
6279   ASSERT_EQUAL_128(0x2f2e2d2c2b2a2928, 0x2726252423222120, q21);
6280   ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0706050403020100, q22);
6281   ASSERT_EQUAL_128(0x1f1e1d1c1b1a1918, 0x1716151413121110, q23);
6282   ASSERT_EQUAL_128(0x2f2e2d2c2b2a2928, 0x2726252423222120, q24);
6283   ASSERT_EQUAL_128(0x3f3e3d3c3b3a3938, 0x3736353433323130, q25);
6284 
6285   TEARDOWN();
6286 }
6287 
6288 
TEST(neon_st1_q_postindex)6289 TEST(neon_st1_q_postindex) {
6290   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
6291 
6292   uint8_t src[64 + 160];
6293   for (unsigned i = 0; i < sizeof(src); i++) {
6294     src[i] = i;
6295   }
6296   uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
6297 
6298   START();
6299   __ Mov(x17, src_base);
6300   __ Mov(x18, -16);
6301   __ Mov(x19, -32);
6302   __ Mov(x20, -48);
6303   __ Mov(x21, -64);
6304   __ Ldr(q0, MemOperand(x17, 16, PostIndex));
6305   __ Ldr(q1, MemOperand(x17, 16, PostIndex));
6306   __ Ldr(q2, MemOperand(x17, 16, PostIndex));
6307   __ Ldr(q3, MemOperand(x17, 16, PostIndex));
6308 
6309   __ St1(v0.V16B(), MemOperand(x17, 16, PostIndex));
6310   __ Ldr(q16, MemOperand(x17, x18));
6311 
6312   __ St1(v0.V8H(), v1.V8H(), MemOperand(x17, 32, PostIndex));
6313   __ Ldr(q17, MemOperand(x17, x19));
6314   __ Ldr(q18, MemOperand(x17, x18));
6315 
6316   __ St1(v0.V4S(), v1.V4S(), v2.V4S(), MemOperand(x17, 48, PostIndex));
6317   __ Ldr(q19, MemOperand(x17, x20));
6318   __ Ldr(q20, MemOperand(x17, x19));
6319   __ Ldr(q21, MemOperand(x17, x18));
6320 
6321   __ St1(v0.V2D(),
6322          v1.V2D(),
6323          v2.V2D(),
6324          v3.V2D(),
6325          MemOperand(x17, 64, PostIndex));
6326   __ Ldr(q22, MemOperand(x17, x21));
6327   __ Ldr(q23, MemOperand(x17, x20));
6328   __ Ldr(q24, MemOperand(x17, x19));
6329   __ Ldr(q25, MemOperand(x17, x18));
6330 
6331   END();
6332 
6333   RUN();
6334 
6335   ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0706050403020100, q16);
6336   ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0706050403020100, q17);
6337   ASSERT_EQUAL_128(0x1f1e1d1c1b1a1918, 0x1716151413121110, q18);
6338   ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0706050403020100, q19);
6339   ASSERT_EQUAL_128(0x1f1e1d1c1b1a1918, 0x1716151413121110, q20);
6340   ASSERT_EQUAL_128(0x2f2e2d2c2b2a2928, 0x2726252423222120, q21);
6341   ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0706050403020100, q22);
6342   ASSERT_EQUAL_128(0x1f1e1d1c1b1a1918, 0x1716151413121110, q23);
6343   ASSERT_EQUAL_128(0x2f2e2d2c2b2a2928, 0x2726252423222120, q24);
6344   ASSERT_EQUAL_128(0x3f3e3d3c3b3a3938, 0x3736353433323130, q25);
6345 
6346   TEARDOWN();
6347 }
6348 
6349 
TEST(neon_st2_d)6350 TEST(neon_st2_d) {
6351   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
6352 
6353   uint8_t src[4 * 16];
6354   for (unsigned i = 0; i < sizeof(src); i++) {
6355     src[i] = i;
6356   }
6357   uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
6358 
6359   START();
6360   __ Mov(x17, src_base);
6361   __ Mov(x18, src_base);
6362   __ Ldr(q0, MemOperand(x17, 16, PostIndex));
6363   __ Ldr(q1, MemOperand(x17, 16, PostIndex));
6364 
6365   __ St2(v0.V8B(), v1.V8B(), MemOperand(x18));
6366   __ Add(x18, x18, 22);
6367   __ St2(v0.V4H(), v1.V4H(), MemOperand(x18));
6368   __ Add(x18, x18, 11);
6369   __ St2(v0.V2S(), v1.V2S(), MemOperand(x18));
6370 
6371   __ Mov(x19, src_base);
6372   __ Ldr(q0, MemOperand(x19, 16, PostIndex));
6373   __ Ldr(q1, MemOperand(x19, 16, PostIndex));
6374   __ Ldr(q2, MemOperand(x19, 16, PostIndex));
6375   __ Ldr(q3, MemOperand(x19, 16, PostIndex));
6376 
6377   END();
6378 
6379   RUN();
6380 
6381   ASSERT_EQUAL_128(0x1707160615051404, 0x1303120211011000, q0);
6382   ASSERT_EQUAL_128(0x0504131203021110, 0x0100151413121110, q1);
6383   ASSERT_EQUAL_128(0x1615140706050413, 0x1211100302010014, q2);
6384   ASSERT_EQUAL_128(0x3f3e3d3c3b3a3938, 0x3736353433323117, q3);
6385 
6386   TEARDOWN();
6387 }
6388 
6389 
TEST(neon_st2_d_postindex)6390 TEST(neon_st2_d_postindex) {
6391   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
6392 
6393   uint8_t src[4 * 16];
6394   for (unsigned i = 0; i < sizeof(src); i++) {
6395     src[i] = i;
6396   }
6397   uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
6398 
6399   START();
6400   __ Mov(x22, 5);
6401   __ Mov(x17, src_base);
6402   __ Mov(x18, src_base);
6403   __ Ldr(q0, MemOperand(x17, 16, PostIndex));
6404   __ Ldr(q1, MemOperand(x17, 16, PostIndex));
6405 
6406   __ St2(v0.V8B(), v1.V8B(), MemOperand(x18, x22, PostIndex));
6407   __ St2(v0.V4H(), v1.V4H(), MemOperand(x18, 16, PostIndex));
6408   __ St2(v0.V2S(), v1.V2S(), MemOperand(x18));
6409 
6410 
6411   __ Mov(x19, src_base);
6412   __ Ldr(q0, MemOperand(x19, 16, PostIndex));
6413   __ Ldr(q1, MemOperand(x19, 16, PostIndex));
6414   __ Ldr(q2, MemOperand(x19, 16, PostIndex));
6415 
6416   END();
6417 
6418   RUN();
6419 
6420   ASSERT_EQUAL_128(0x1405041312030211, 0x1001000211011000, q0);
6421   ASSERT_EQUAL_128(0x0605041312111003, 0x0201001716070615, q1);
6422   ASSERT_EQUAL_128(0x2f2e2d2c2b2a2928, 0x2726251716151407, q2);
6423 
6424   TEARDOWN();
6425 }
6426 
6427 
TEST(neon_st2_q)6428 TEST(neon_st2_q) {
6429   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
6430 
6431   uint8_t src[5 * 16];
6432   for (unsigned i = 0; i < sizeof(src); i++) {
6433     src[i] = i;
6434   }
6435   uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
6436 
6437   START();
6438   __ Mov(x17, src_base);
6439   __ Mov(x18, src_base);
6440   __ Ldr(q0, MemOperand(x17, 16, PostIndex));
6441   __ Ldr(q1, MemOperand(x17, 16, PostIndex));
6442 
6443   __ St2(v0.V16B(), v1.V16B(), MemOperand(x18));
6444   __ Add(x18, x18, 8);
6445   __ St2(v0.V8H(), v1.V8H(), MemOperand(x18));
6446   __ Add(x18, x18, 22);
6447   __ St2(v0.V4S(), v1.V4S(), MemOperand(x18));
6448   __ Add(x18, x18, 2);
6449   __ St2(v0.V2D(), v1.V2D(), MemOperand(x18));
6450 
6451   __ Mov(x19, src_base);
6452   __ Ldr(q0, MemOperand(x19, 16, PostIndex));
6453   __ Ldr(q1, MemOperand(x19, 16, PostIndex));
6454   __ Ldr(q2, MemOperand(x19, 16, PostIndex));
6455   __ Ldr(q3, MemOperand(x19, 16, PostIndex));
6456 
6457   END();
6458 
6459   RUN();
6460 
6461   ASSERT_EQUAL_128(0x1312030211100100, 0x1303120211011000, q0);
6462   ASSERT_EQUAL_128(0x01000b0a19180908, 0x1716070615140504, q1);
6463   ASSERT_EQUAL_128(0x1716151413121110, 0x0706050403020100, q2);
6464   ASSERT_EQUAL_128(0x1f1e1d1c1b1a1918, 0x0f0e0d0c0b0a0908, q3);
6465   TEARDOWN();
6466 }
6467 
6468 
TEST(neon_st2_q_postindex)6469 TEST(neon_st2_q_postindex) {
6470   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
6471 
6472   uint8_t src[5 * 16];
6473   for (unsigned i = 0; i < sizeof(src); i++) {
6474     src[i] = i;
6475   }
6476   uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
6477 
6478   START();
6479   __ Mov(x22, 5);
6480   __ Mov(x17, src_base);
6481   __ Mov(x18, src_base);
6482   __ Ldr(q0, MemOperand(x17, 16, PostIndex));
6483   __ Ldr(q1, MemOperand(x17, 16, PostIndex));
6484 
6485   __ St2(v0.V16B(), v1.V16B(), MemOperand(x18, x22, PostIndex));
6486   __ St2(v0.V8H(), v1.V8H(), MemOperand(x18, 32, PostIndex));
6487   __ St2(v0.V4S(), v1.V4S(), MemOperand(x18, x22, PostIndex));
6488   __ St2(v0.V2D(), v1.V2D(), MemOperand(x18));
6489 
6490   __ Mov(x19, src_base);
6491   __ Ldr(q0, MemOperand(x19, 16, PostIndex));
6492   __ Ldr(q1, MemOperand(x19, 16, PostIndex));
6493   __ Ldr(q2, MemOperand(x19, 16, PostIndex));
6494   __ Ldr(q3, MemOperand(x19, 16, PostIndex));
6495   __ Ldr(q4, MemOperand(x19, 16, PostIndex));
6496 
6497   END();
6498 
6499   RUN();
6500 
6501   ASSERT_EQUAL_128(0x1405041312030211, 0x1001000211011000, q0);
6502   ASSERT_EQUAL_128(0x1c0d0c1b1a0b0a19, 0x1809081716070615, q1);
6503   ASSERT_EQUAL_128(0x0504030201001003, 0x0201001f1e0f0e1d, q2);
6504   ASSERT_EQUAL_128(0x0d0c0b0a09081716, 0x1514131211100706, q3);
6505   ASSERT_EQUAL_128(0x4f4e4d4c4b4a1f1e, 0x1d1c1b1a19180f0e, q4);
6506 
6507   TEARDOWN();
6508 }
6509 
6510 
TEST(neon_st3_d)6511 TEST(neon_st3_d) {
6512   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
6513 
6514   uint8_t src[3 * 16];
6515   for (unsigned i = 0; i < sizeof(src); i++) {
6516     src[i] = i;
6517   }
6518   uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
6519 
6520   START();
6521   __ Mov(x17, src_base);
6522   __ Mov(x18, src_base);
6523   __ Ldr(q0, MemOperand(x17, 16, PostIndex));
6524   __ Ldr(q1, MemOperand(x17, 16, PostIndex));
6525   __ Ldr(q2, MemOperand(x17, 16, PostIndex));
6526 
6527   __ St3(v0.V8B(), v1.V8B(), v2.V8B(), MemOperand(x18));
6528   __ Add(x18, x18, 3);
6529   __ St3(v0.V4H(), v1.V4H(), v2.V4H(), MemOperand(x18));
6530   __ Add(x18, x18, 2);
6531   __ St3(v0.V2S(), v1.V2S(), v2.V2S(), MemOperand(x18));
6532 
6533 
6534   __ Mov(x19, src_base);
6535   __ Ldr(q0, MemOperand(x19, 16, PostIndex));
6536   __ Ldr(q1, MemOperand(x19, 16, PostIndex));
6537 
6538   END();
6539 
6540   RUN();
6541 
6542   ASSERT_EQUAL_128(0x2221201312111003, 0x0201000100201000, q0);
6543   ASSERT_EQUAL_128(0x1f1e1d2726252417, 0x1615140706050423, q1);
6544 
6545   TEARDOWN();
6546 }
6547 
6548 
TEST(neon_st3_d_postindex)6549 TEST(neon_st3_d_postindex) {
6550   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
6551 
6552   uint8_t src[4 * 16];
6553   for (unsigned i = 0; i < sizeof(src); i++) {
6554     src[i] = i;
6555   }
6556   uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
6557 
6558   START();
6559   __ Mov(x22, 5);
6560   __ Mov(x17, src_base);
6561   __ Mov(x18, src_base);
6562   __ Ldr(q0, MemOperand(x17, 16, PostIndex));
6563   __ Ldr(q1, MemOperand(x17, 16, PostIndex));
6564   __ Ldr(q2, MemOperand(x17, 16, PostIndex));
6565 
6566   __ St3(v0.V8B(), v1.V8B(), v2.V8B(), MemOperand(x18, x22, PostIndex));
6567   __ St3(v0.V4H(), v1.V4H(), v2.V4H(), MemOperand(x18, 24, PostIndex));
6568   __ St3(v0.V2S(), v1.V2S(), v2.V2S(), MemOperand(x18));
6569 
6570 
6571   __ Mov(x19, src_base);
6572   __ Ldr(q0, MemOperand(x19, 16, PostIndex));
6573   __ Ldr(q1, MemOperand(x19, 16, PostIndex));
6574   __ Ldr(q2, MemOperand(x19, 16, PostIndex));
6575   __ Ldr(q3, MemOperand(x19, 16, PostIndex));
6576 
6577   END();
6578 
6579   RUN();
6580 
6581   ASSERT_EQUAL_128(0x2213120302212011, 0x1001001101201000, q0);
6582   ASSERT_EQUAL_128(0x0201002726171607, 0x0625241514050423, q1);
6583   ASSERT_EQUAL_128(0x1615140706050423, 0x2221201312111003, q2);
6584   ASSERT_EQUAL_128(0x3f3e3d3c3b3a3938, 0x3736352726252417, q3);
6585 
6586   TEARDOWN();
6587 }
6588 
6589 
TEST(neon_st3_q)6590 TEST(neon_st3_q) {
6591   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
6592 
6593   uint8_t src[6 * 16];
6594   for (unsigned i = 0; i < sizeof(src); i++) {
6595     src[i] = i;
6596   }
6597   uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
6598 
6599   START();
6600   __ Mov(x17, src_base);
6601   __ Mov(x18, src_base);
6602   __ Ldr(q0, MemOperand(x17, 16, PostIndex));
6603   __ Ldr(q1, MemOperand(x17, 16, PostIndex));
6604   __ Ldr(q2, MemOperand(x17, 16, PostIndex));
6605 
6606   __ St3(v0.V16B(), v1.V16B(), v2.V16B(), MemOperand(x18));
6607   __ Add(x18, x18, 5);
6608   __ St3(v0.V8H(), v1.V8H(), v2.V8H(), MemOperand(x18));
6609   __ Add(x18, x18, 12);
6610   __ St3(v0.V4S(), v1.V4S(), v2.V4S(), MemOperand(x18));
6611   __ Add(x18, x18, 22);
6612   __ St3(v0.V2D(), v1.V2D(), v2.V2D(), MemOperand(x18));
6613 
6614   __ Mov(x19, src_base);
6615   __ Ldr(q0, MemOperand(x19, 16, PostIndex));
6616   __ Ldr(q1, MemOperand(x19, 16, PostIndex));
6617   __ Ldr(q2, MemOperand(x19, 16, PostIndex));
6618   __ Ldr(q3, MemOperand(x19, 16, PostIndex));
6619   __ Ldr(q4, MemOperand(x19, 16, PostIndex));
6620   __ Ldr(q5, MemOperand(x19, 16, PostIndex));
6621 
6622   END();
6623 
6624   RUN();
6625 
6626   ASSERT_EQUAL_128(0x2213120302212011, 0x1001001101201000, q0);
6627   ASSERT_EQUAL_128(0x0605042322212013, 0x1211100302010023, q1);
6628   ASSERT_EQUAL_128(0x1007060504030201, 0x0025241716151407, q2);
6629   ASSERT_EQUAL_128(0x0827262524232221, 0x2017161514131211, q3);
6630   ASSERT_EQUAL_128(0x281f1e1d1c1b1a19, 0x180f0e0d0c0b0a09, q4);
6631   ASSERT_EQUAL_128(0x5f5e5d5c5b5a5958, 0x572f2e2d2c2b2a29, q5);
6632 
6633   TEARDOWN();
6634 }
6635 
6636 
TEST(neon_st3_q_postindex)6637 TEST(neon_st3_q_postindex) {
6638   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
6639 
6640   uint8_t src[7 * 16];
6641   for (unsigned i = 0; i < sizeof(src); i++) {
6642     src[i] = i;
6643   }
6644   uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
6645 
6646   START();
6647   __ Mov(x22, 5);
6648   __ Mov(x17, src_base);
6649   __ Mov(x18, src_base);
6650   __ Ldr(q0, MemOperand(x17, 16, PostIndex));
6651   __ Ldr(q1, MemOperand(x17, 16, PostIndex));
6652   __ Ldr(q2, MemOperand(x17, 16, PostIndex));
6653 
6654   __ St3(v0.V16B(), v1.V16B(), v2.V16B(), MemOperand(x18, x22, PostIndex));
6655   __ St3(v0.V8H(), v1.V8H(), v2.V8H(), MemOperand(x18, 48, PostIndex));
6656   __ St3(v0.V4S(), v1.V4S(), v2.V4S(), MemOperand(x18, x22, PostIndex));
6657   __ St3(v0.V2D(), v1.V2D(), v2.V2D(), MemOperand(x18));
6658 
6659   __ Mov(x19, src_base);
6660   __ Ldr(q0, MemOperand(x19, 16, PostIndex));
6661   __ Ldr(q1, MemOperand(x19, 16, PostIndex));
6662   __ Ldr(q2, MemOperand(x19, 16, PostIndex));
6663   __ Ldr(q3, MemOperand(x19, 16, PostIndex));
6664   __ Ldr(q4, MemOperand(x19, 16, PostIndex));
6665   __ Ldr(q5, MemOperand(x19, 16, PostIndex));
6666   __ Ldr(q6, MemOperand(x19, 16, PostIndex));
6667 
6668   END();
6669 
6670   RUN();
6671 
6672   ASSERT_EQUAL_128(0x2213120302212011, 0x1001001101201000, q0);
6673   ASSERT_EQUAL_128(0x1809082726171607, 0x0625241514050423, q1);
6674   ASSERT_EQUAL_128(0x0e2d2c1d1c0d0c2b, 0x2a1b1a0b0a292819, q2);
6675   ASSERT_EQUAL_128(0x0504030201001003, 0x0201002f2e1f1e0f, q3);
6676   ASSERT_EQUAL_128(0x2524232221201716, 0x1514131211100706, q4);
6677   ASSERT_EQUAL_128(0x1d1c1b1a19180f0e, 0x0d0c0b0a09082726, q5);
6678   ASSERT_EQUAL_128(0x6f6e6d6c6b6a2f2e, 0x2d2c2b2a29281f1e, q6);
6679 
6680   TEARDOWN();
6681 }
6682 
6683 
TEST(neon_st4_d)6684 TEST(neon_st4_d) {
6685   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
6686 
6687   uint8_t src[4 * 16];
6688   for (unsigned i = 0; i < sizeof(src); i++) {
6689     src[i] = i;
6690   }
6691   uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
6692 
6693   START();
6694   __ Mov(x17, src_base);
6695   __ Mov(x18, src_base);
6696   __ Ldr(q0, MemOperand(x17, 16, PostIndex));
6697   __ Ldr(q1, MemOperand(x17, 16, PostIndex));
6698   __ Ldr(q2, MemOperand(x17, 16, PostIndex));
6699   __ Ldr(q3, MemOperand(x17, 16, PostIndex));
6700 
6701   __ St4(v0.V8B(), v1.V8B(), v2.V8B(), v3.V8B(), MemOperand(x18));
6702   __ Add(x18, x18, 12);
6703   __ St4(v0.V4H(), v1.V4H(), v2.V4H(), v3.V4H(), MemOperand(x18));
6704   __ Add(x18, x18, 15);
6705   __ St4(v0.V2S(), v1.V2S(), v2.V2S(), v3.V2S(), MemOperand(x18));
6706 
6707 
6708   __ Mov(x19, src_base);
6709   __ Ldr(q0, MemOperand(x19, 16, PostIndex));
6710   __ Ldr(q1, MemOperand(x19, 16, PostIndex));
6711   __ Ldr(q2, MemOperand(x19, 16, PostIndex));
6712   __ Ldr(q3, MemOperand(x19, 16, PostIndex));
6713 
6714   END();
6715 
6716   RUN();
6717 
6718   ASSERT_EQUAL_128(0x1110010032221202, 0X3121110130201000, q0);
6719   ASSERT_EQUAL_128(0x1003020100322322, 0X1312030231302120, q1);
6720   ASSERT_EQUAL_128(0x1407060504333231, 0X3023222120131211, q2);
6721   ASSERT_EQUAL_128(0x3f3e3d3c3b373635, 0x3427262524171615, q3);
6722 
6723   TEARDOWN();
6724 }
6725 
6726 
TEST(neon_st4_d_postindex)6727 TEST(neon_st4_d_postindex) {
6728   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
6729 
6730   uint8_t src[5 * 16];
6731   for (unsigned i = 0; i < sizeof(src); i++) {
6732     src[i] = i;
6733   }
6734   uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
6735 
6736   START();
6737   __ Mov(x22, 5);
6738   __ Mov(x17, src_base);
6739   __ Mov(x18, src_base);
6740   __ Ldr(q0, MemOperand(x17, 16, PostIndex));
6741   __ Ldr(q1, MemOperand(x17, 16, PostIndex));
6742   __ Ldr(q2, MemOperand(x17, 16, PostIndex));
6743   __ Ldr(q3, MemOperand(x17, 16, PostIndex));
6744 
6745   __ St4(v0.V8B(),
6746          v1.V8B(),
6747          v2.V8B(),
6748          v3.V8B(),
6749          MemOperand(x18, x22, PostIndex));
6750   __ St4(v0.V4H(),
6751          v1.V4H(),
6752          v2.V4H(),
6753          v3.V4H(),
6754          MemOperand(x18, 32, PostIndex));
6755   __ St4(v0.V2S(), v1.V2S(), v2.V2S(), v3.V2S(), MemOperand(x18));
6756 
6757 
6758   __ Mov(x19, src_base);
6759   __ Ldr(q0, MemOperand(x19, 16, PostIndex));
6760   __ Ldr(q1, MemOperand(x19, 16, PostIndex));
6761   __ Ldr(q2, MemOperand(x19, 16, PostIndex));
6762   __ Ldr(q3, MemOperand(x19, 16, PostIndex));
6763   __ Ldr(q4, MemOperand(x19, 16, PostIndex));
6764 
6765   END();
6766 
6767   RUN();
6768 
6769   ASSERT_EQUAL_128(0x1203023130212011, 0x1001000130201000, q0);
6770   ASSERT_EQUAL_128(0x1607063534252415, 0x1405043332232213, q1);
6771   ASSERT_EQUAL_128(0x2221201312111003, 0x0201003736272617, q2);
6772   ASSERT_EQUAL_128(0x2625241716151407, 0x0605043332313023, q3);
6773   ASSERT_EQUAL_128(0x4f4e4d4c4b4a4948, 0x4746453736353427, q4);
6774 
6775   TEARDOWN();
6776 }
6777 
6778 
TEST(neon_st4_q)6779 TEST(neon_st4_q) {
6780   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
6781 
6782   uint8_t src[7 * 16];
6783   for (unsigned i = 0; i < sizeof(src); i++) {
6784     src[i] = i;
6785   }
6786   uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
6787 
6788   START();
6789   __ Mov(x17, src_base);
6790   __ Mov(x18, src_base);
6791   __ Ldr(q0, MemOperand(x17, 16, PostIndex));
6792   __ Ldr(q1, MemOperand(x17, 16, PostIndex));
6793   __ Ldr(q2, MemOperand(x17, 16, PostIndex));
6794   __ Ldr(q3, MemOperand(x17, 16, PostIndex));
6795 
6796   __ St4(v0.V16B(), v1.V16B(), v2.V16B(), v3.V16B(), MemOperand(x18));
6797   __ Add(x18, x18, 5);
6798   __ St4(v0.V8H(), v1.V8H(), v2.V8H(), v3.V8H(), MemOperand(x18));
6799   __ Add(x18, x18, 12);
6800   __ St4(v0.V4S(), v1.V4S(), v2.V4S(), v3.V4S(), MemOperand(x18));
6801   __ Add(x18, x18, 22);
6802   __ St4(v0.V2D(), v1.V2D(), v2.V2D(), v3.V2D(), MemOperand(x18));
6803   __ Add(x18, x18, 10);
6804 
6805   __ Mov(x19, src_base);
6806   __ Ldr(q0, MemOperand(x19, 16, PostIndex));
6807   __ Ldr(q1, MemOperand(x19, 16, PostIndex));
6808   __ Ldr(q2, MemOperand(x19, 16, PostIndex));
6809   __ Ldr(q3, MemOperand(x19, 16, PostIndex));
6810   __ Ldr(q4, MemOperand(x19, 16, PostIndex));
6811   __ Ldr(q5, MemOperand(x19, 16, PostIndex));
6812   __ Ldr(q6, MemOperand(x19, 16, PostIndex));
6813 
6814   END();
6815 
6816   RUN();
6817 
6818   ASSERT_EQUAL_128(0x1203023130212011, 0x1001000130201000, q0);
6819   ASSERT_EQUAL_128(0x3231302322212013, 0x1211100302010013, q1);
6820   ASSERT_EQUAL_128(0x1007060504030201, 0x0015140706050433, q2);
6821   ASSERT_EQUAL_128(0x3027262524232221, 0x2017161514131211, q3);
6822   ASSERT_EQUAL_128(0x180f0e0d0c0b0a09, 0x0837363534333231, q4);
6823   ASSERT_EQUAL_128(0x382f2e2d2c2b2a29, 0x281f1e1d1c1b1a19, q5);
6824   ASSERT_EQUAL_128(0x6f6e6d6c6b6a6968, 0x673f3e3d3c3b3a39, q6);
6825 
6826   TEARDOWN();
6827 }
6828 
6829 
TEST(neon_st4_q_postindex)6830 TEST(neon_st4_q_postindex) {
6831   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
6832 
6833   uint8_t src[9 * 16];
6834   for (unsigned i = 0; i < sizeof(src); i++) {
6835     src[i] = i;
6836   }
6837   uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
6838 
6839   START();
6840   __ Mov(x22, 5);
6841   __ Mov(x17, src_base);
6842   __ Mov(x18, src_base);
6843   __ Ldr(q0, MemOperand(x17, 16, PostIndex));
6844   __ Ldr(q1, MemOperand(x17, 16, PostIndex));
6845   __ Ldr(q2, MemOperand(x17, 16, PostIndex));
6846   __ Ldr(q3, MemOperand(x17, 16, PostIndex));
6847 
6848   __ St4(v0.V16B(),
6849          v1.V16B(),
6850          v2.V16B(),
6851          v3.V16B(),
6852          MemOperand(x18, x22, PostIndex));
6853   __ St4(v0.V8H(),
6854          v1.V8H(),
6855          v2.V8H(),
6856          v3.V8H(),
6857          MemOperand(x18, 64, PostIndex));
6858   __ St4(v0.V4S(),
6859          v1.V4S(),
6860          v2.V4S(),
6861          v3.V4S(),
6862          MemOperand(x18, x22, PostIndex));
6863   __ St4(v0.V2D(), v1.V2D(), v2.V2D(), v3.V2D(), MemOperand(x18));
6864 
6865   __ Mov(x19, src_base);
6866   __ Ldr(q0, MemOperand(x19, 16, PostIndex));
6867   __ Ldr(q1, MemOperand(x19, 16, PostIndex));
6868   __ Ldr(q2, MemOperand(x19, 16, PostIndex));
6869   __ Ldr(q3, MemOperand(x19, 16, PostIndex));
6870   __ Ldr(q4, MemOperand(x19, 16, PostIndex));
6871   __ Ldr(q5, MemOperand(x19, 16, PostIndex));
6872   __ Ldr(q6, MemOperand(x19, 16, PostIndex));
6873   __ Ldr(q7, MemOperand(x19, 16, PostIndex));
6874   __ Ldr(q8, MemOperand(x19, 16, PostIndex));
6875 
6876   END();
6877 
6878   RUN();
6879 
6880   ASSERT_EQUAL_128(0x1203023130212011, 0x1001000130201000, q0);
6881   ASSERT_EQUAL_128(0x1607063534252415, 0x1405043332232213, q1);
6882   ASSERT_EQUAL_128(0x1a0b0a3938292819, 0x1809083736272617, q2);
6883   ASSERT_EQUAL_128(0x1e0f0e3d3c2d2c1d, 0x1c0d0c3b3a2b2a1b, q3);
6884   ASSERT_EQUAL_128(0x0504030201001003, 0x0201003f3e2f2e1f, q4);
6885   ASSERT_EQUAL_128(0x2524232221201716, 0x1514131211100706, q5);
6886   ASSERT_EQUAL_128(0x0d0c0b0a09083736, 0x3534333231302726, q6);
6887   ASSERT_EQUAL_128(0x2d2c2b2a29281f1e, 0x1d1c1b1a19180f0e, q7);
6888   ASSERT_EQUAL_128(0x8f8e8d8c8b8a3f3e, 0x3d3c3b3a39382f2e, q8);
6889 
6890   TEARDOWN();
6891 }
6892 
6893 
TEST(neon_destructive_minmaxp)6894 TEST(neon_destructive_minmaxp) {
6895   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
6896 
6897   START();
6898   __ Movi(v0.V2D(), 0, 0x2222222233333333);
6899   __ Movi(v1.V2D(), 0, 0x0000000011111111);
6900 
6901   __ Sminp(v16.V2S(), v0.V2S(), v1.V2S());
6902   __ Mov(v17, v0);
6903   __ Sminp(v17.V2S(), v17.V2S(), v1.V2S());
6904   __ Mov(v18, v1);
6905   __ Sminp(v18.V2S(), v0.V2S(), v18.V2S());
6906   __ Mov(v19, v0);
6907   __ Sminp(v19.V2S(), v19.V2S(), v19.V2S());
6908 
6909   __ Smaxp(v20.V2S(), v0.V2S(), v1.V2S());
6910   __ Mov(v21, v0);
6911   __ Smaxp(v21.V2S(), v21.V2S(), v1.V2S());
6912   __ Mov(v22, v1);
6913   __ Smaxp(v22.V2S(), v0.V2S(), v22.V2S());
6914   __ Mov(v23, v0);
6915   __ Smaxp(v23.V2S(), v23.V2S(), v23.V2S());
6916 
6917   __ Uminp(v24.V2S(), v0.V2S(), v1.V2S());
6918   __ Mov(v25, v0);
6919   __ Uminp(v25.V2S(), v25.V2S(), v1.V2S());
6920   __ Mov(v26, v1);
6921   __ Uminp(v26.V2S(), v0.V2S(), v26.V2S());
6922   __ Mov(v27, v0);
6923   __ Uminp(v27.V2S(), v27.V2S(), v27.V2S());
6924 
6925   __ Umaxp(v28.V2S(), v0.V2S(), v1.V2S());
6926   __ Mov(v29, v0);
6927   __ Umaxp(v29.V2S(), v29.V2S(), v1.V2S());
6928   __ Mov(v30, v1);
6929   __ Umaxp(v30.V2S(), v0.V2S(), v30.V2S());
6930   __ Mov(v31, v0);
6931   __ Umaxp(v31.V2S(), v31.V2S(), v31.V2S());
6932   END();
6933 
6934   RUN();
6935 
6936   ASSERT_EQUAL_128(0, 0x0000000022222222, q16);
6937   ASSERT_EQUAL_128(0, 0x0000000022222222, q17);
6938   ASSERT_EQUAL_128(0, 0x0000000022222222, q18);
6939   ASSERT_EQUAL_128(0, 0x2222222222222222, q19);
6940 
6941   ASSERT_EQUAL_128(0, 0x1111111133333333, q20);
6942   ASSERT_EQUAL_128(0, 0x1111111133333333, q21);
6943   ASSERT_EQUAL_128(0, 0x1111111133333333, q22);
6944   ASSERT_EQUAL_128(0, 0x3333333333333333, q23);
6945 
6946   ASSERT_EQUAL_128(0, 0x0000000022222222, q24);
6947   ASSERT_EQUAL_128(0, 0x0000000022222222, q25);
6948   ASSERT_EQUAL_128(0, 0x0000000022222222, q26);
6949   ASSERT_EQUAL_128(0, 0x2222222222222222, q27);
6950 
6951   ASSERT_EQUAL_128(0, 0x1111111133333333, q28);
6952   ASSERT_EQUAL_128(0, 0x1111111133333333, q29);
6953   ASSERT_EQUAL_128(0, 0x1111111133333333, q30);
6954   ASSERT_EQUAL_128(0, 0x3333333333333333, q31);
6955 
6956   TEARDOWN();
6957 }
6958 
6959 
TEST(neon_destructive_tbl)6960 TEST(neon_destructive_tbl) {
6961   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
6962 
6963   START();
6964   __ Movi(v0.V2D(), 0x0041424334353627, 0x28291a1b1c0d0e0f);
6965   __ Movi(v1.V2D(), 0xafaeadacabaaa9a8, 0xa7a6a5a4a3a2a1a0);
6966   __ Movi(v2.V2D(), 0xbfbebdbcbbbab9b8, 0xb7b6b5b4b3b2b1b0);
6967   __ Movi(v3.V2D(), 0xcfcecdcccbcac9c8, 0xc7c6c5c4c3c2c1c0);
6968   __ Movi(v4.V2D(), 0xdfdedddcdbdad9d8, 0xd7d6d5d4d3d2d1d0);
6969 
6970   __ Movi(v16.V2D(), 0x5555555555555555, 0x5555555555555555);
6971   __ Tbl(v16.V16B(), v1.V16B(), v0.V16B());
6972   __ Mov(v17, v0);
6973   __ Tbl(v17.V16B(), v1.V16B(), v17.V16B());
6974   __ Mov(v18, v1);
6975   __ Tbl(v18.V16B(), v18.V16B(), v0.V16B());
6976   __ Mov(v19, v0);
6977   __ Tbl(v19.V16B(), v19.V16B(), v19.V16B());
6978 
6979   __ Movi(v20.V2D(), 0x5555555555555555, 0x5555555555555555);
6980   __ Tbl(v20.V16B(), v1.V16B(), v2.V16B(), v3.V16B(), v4.V16B(), v0.V16B());
6981   __ Mov(v21, v0);
6982   __ Tbl(v21.V16B(), v1.V16B(), v2.V16B(), v3.V16B(), v4.V16B(), v21.V16B());
6983   __ Mov(v22, v1);
6984   __ Mov(v23, v2);
6985   __ Mov(v24, v3);
6986   __ Mov(v25, v4);
6987   __ Tbl(v22.V16B(), v22.V16B(), v23.V16B(), v24.V16B(), v25.V16B(), v0.V16B());
6988   __ Mov(v26, v0);
6989   __ Mov(v27, v1);
6990   __ Mov(v28, v2);
6991   __ Mov(v29, v3);
6992   __ Tbl(v26.V16B(),
6993          v26.V16B(),
6994          v27.V16B(),
6995          v28.V16B(),
6996          v29.V16B(),
6997          v26.V16B());
6998   END();
6999 
7000   RUN();
7001 
7002   ASSERT_EQUAL_128(0xa000000000000000, 0x0000000000adaeaf, q16);
7003   ASSERT_EQUAL_128(0xa000000000000000, 0x0000000000adaeaf, q17);
7004   ASSERT_EQUAL_128(0xa000000000000000, 0x0000000000adaeaf, q18);
7005   ASSERT_EQUAL_128(0x0f00000000000000, 0x0000000000424100, q19);
7006 
7007   ASSERT_EQUAL_128(0xa0000000d4d5d6c7, 0xc8c9babbbcadaeaf, q20);
7008   ASSERT_EQUAL_128(0xa0000000d4d5d6c7, 0xc8c9babbbcadaeaf, q21);
7009   ASSERT_EQUAL_128(0xa0000000d4d5d6c7, 0xc8c9babbbcadaeaf, q22);
7010   ASSERT_EQUAL_128(0x0f000000c4c5c6b7, 0xb8b9aaabac424100, q26);
7011 
7012   TEARDOWN();
7013 }
7014 
7015 
TEST(neon_destructive_tbx)7016 TEST(neon_destructive_tbx) {
7017   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
7018 
7019   START();
7020   __ Movi(v0.V2D(), 0x0041424334353627, 0x28291a1b1c0d0e0f);
7021   __ Movi(v1.V2D(), 0xafaeadacabaaa9a8, 0xa7a6a5a4a3a2a1a0);
7022   __ Movi(v2.V2D(), 0xbfbebdbcbbbab9b8, 0xb7b6b5b4b3b2b1b0);
7023   __ Movi(v3.V2D(), 0xcfcecdcccbcac9c8, 0xc7c6c5c4c3c2c1c0);
7024   __ Movi(v4.V2D(), 0xdfdedddcdbdad9d8, 0xd7d6d5d4d3d2d1d0);
7025 
7026   __ Movi(v16.V2D(), 0x5555555555555555, 0x5555555555555555);
7027   __ Tbx(v16.V16B(), v1.V16B(), v0.V16B());
7028   __ Mov(v17, v0);
7029   __ Tbx(v17.V16B(), v1.V16B(), v17.V16B());
7030   __ Mov(v18, v1);
7031   __ Tbx(v18.V16B(), v18.V16B(), v0.V16B());
7032   __ Mov(v19, v0);
7033   __ Tbx(v19.V16B(), v19.V16B(), v19.V16B());
7034 
7035   __ Movi(v20.V2D(), 0x5555555555555555, 0x5555555555555555);
7036   __ Tbx(v20.V16B(), v1.V16B(), v2.V16B(), v3.V16B(), v4.V16B(), v0.V16B());
7037   __ Mov(v21, v0);
7038   __ Tbx(v21.V16B(), v1.V16B(), v2.V16B(), v3.V16B(), v4.V16B(), v21.V16B());
7039   __ Mov(v22, v1);
7040   __ Mov(v23, v2);
7041   __ Mov(v24, v3);
7042   __ Mov(v25, v4);
7043   __ Tbx(v22.V16B(), v22.V16B(), v23.V16B(), v24.V16B(), v25.V16B(), v0.V16B());
7044   __ Mov(v26, v0);
7045   __ Mov(v27, v1);
7046   __ Mov(v28, v2);
7047   __ Mov(v29, v3);
7048   __ Tbx(v26.V16B(),
7049          v26.V16B(),
7050          v27.V16B(),
7051          v28.V16B(),
7052          v29.V16B(),
7053          v26.V16B());
7054   END();
7055 
7056   RUN();
7057 
7058   ASSERT_EQUAL_128(0xa055555555555555, 0x5555555555adaeaf, q16);
7059   ASSERT_EQUAL_128(0xa041424334353627, 0x28291a1b1cadaeaf, q17);
7060   ASSERT_EQUAL_128(0xa0aeadacabaaa9a8, 0xa7a6a5a4a3adaeaf, q18);
7061   ASSERT_EQUAL_128(0x0f41424334353627, 0x28291a1b1c424100, q19);
7062 
7063   ASSERT_EQUAL_128(0xa0555555d4d5d6c7, 0xc8c9babbbcadaeaf, q20);
7064   ASSERT_EQUAL_128(0xa0414243d4d5d6c7, 0xc8c9babbbcadaeaf, q21);
7065   ASSERT_EQUAL_128(0xa0aeadacd4d5d6c7, 0xc8c9babbbcadaeaf, q22);
7066   ASSERT_EQUAL_128(0x0f414243c4c5c6b7, 0xb8b9aaabac424100, q26);
7067 
7068   TEARDOWN();
7069 }
7070 
7071 
TEST(neon_destructive_fcvtl)7072 TEST(neon_destructive_fcvtl) {
7073   SETUP_WITH_FEATURES(CPUFeatures::kNEON, CPUFeatures::kFP);
7074 
7075   START();
7076   __ Movi(v0.V2D(), 0x400000003f800000, 0xbf800000c0000000);
7077   __ Fcvtl(v16.V2D(), v0.V2S());
7078   __ Fcvtl2(v17.V2D(), v0.V4S());
7079   __ Mov(v18, v0);
7080   __ Mov(v19, v0);
7081   __ Fcvtl(v18.V2D(), v18.V2S());
7082   __ Fcvtl2(v19.V2D(), v19.V4S());
7083 
7084   __ Movi(v1.V2D(), 0x40003c003c004000, 0xc000bc00bc00c000);
7085   __ Fcvtl(v20.V4S(), v1.V4H());
7086   __ Fcvtl2(v21.V4S(), v1.V8H());
7087   __ Mov(v22, v1);
7088   __ Mov(v23, v1);
7089   __ Fcvtl(v22.V4S(), v22.V4H());
7090   __ Fcvtl2(v23.V4S(), v23.V8H());
7091 
7092   END();
7093 
7094   RUN();
7095 
7096   ASSERT_EQUAL_128(0xbff0000000000000, 0xc000000000000000, q16);
7097   ASSERT_EQUAL_128(0x4000000000000000, 0x3ff0000000000000, q17);
7098   ASSERT_EQUAL_128(0xbff0000000000000, 0xc000000000000000, q18);
7099   ASSERT_EQUAL_128(0x4000000000000000, 0x3ff0000000000000, q19);
7100 
7101   ASSERT_EQUAL_128(0xc0000000bf800000, 0xbf800000c0000000, q20);
7102   ASSERT_EQUAL_128(0x400000003f800000, 0x3f80000040000000, q21);
7103   ASSERT_EQUAL_128(0xc0000000bf800000, 0xbf800000c0000000, q22);
7104   ASSERT_EQUAL_128(0x400000003f800000, 0x3f80000040000000, q23);
7105 
7106   TEARDOWN();
7107 }
7108 
7109 
TEST(ldp_stp_float)7110 TEST(ldp_stp_float) {
7111   SETUP_WITH_FEATURES(CPUFeatures::kFP);
7112 
7113   float src[2] = {1.0, 2.0};
7114   float dst[3] = {0.0, 0.0, 0.0};
7115   uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
7116   uintptr_t dst_base = reinterpret_cast<uintptr_t>(dst);
7117 
7118   START();
7119   __ Mov(x16, src_base);
7120   __ Mov(x17, dst_base);
7121   __ Ldp(s31, s0, MemOperand(x16, 2 * sizeof(src[0]), PostIndex));
7122   __ Stp(s0, s31, MemOperand(x17, sizeof(dst[1]), PreIndex));
7123   END();
7124 
7125   RUN();
7126 
7127   ASSERT_EQUAL_FP32(1.0, s31);
7128   ASSERT_EQUAL_FP32(2.0, s0);
7129   ASSERT_EQUAL_FP32(0.0, dst[0]);
7130   ASSERT_EQUAL_FP32(2.0, dst[1]);
7131   ASSERT_EQUAL_FP32(1.0, dst[2]);
7132   ASSERT_EQUAL_64(src_base + 2 * sizeof(src[0]), x16);
7133   ASSERT_EQUAL_64(dst_base + sizeof(dst[1]), x17);
7134 
7135   TEARDOWN();
7136 }
7137 
7138 
TEST(ldp_stp_double)7139 TEST(ldp_stp_double) {
7140   SETUP_WITH_FEATURES(CPUFeatures::kFP);
7141 
7142   double src[2] = {1.0, 2.0};
7143   double dst[3] = {0.0, 0.0, 0.0};
7144   uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
7145   uintptr_t dst_base = reinterpret_cast<uintptr_t>(dst);
7146 
7147   START();
7148   __ Mov(x16, src_base);
7149   __ Mov(x17, dst_base);
7150   __ Ldp(d31, d0, MemOperand(x16, 2 * sizeof(src[0]), PostIndex));
7151   __ Stp(d0, d31, MemOperand(x17, sizeof(dst[1]), PreIndex));
7152   END();
7153 
7154   RUN();
7155 
7156   ASSERT_EQUAL_FP64(1.0, d31);
7157   ASSERT_EQUAL_FP64(2.0, d0);
7158   ASSERT_EQUAL_FP64(0.0, dst[0]);
7159   ASSERT_EQUAL_FP64(2.0, dst[1]);
7160   ASSERT_EQUAL_FP64(1.0, dst[2]);
7161   ASSERT_EQUAL_64(src_base + 2 * sizeof(src[0]), x16);
7162   ASSERT_EQUAL_64(dst_base + sizeof(dst[1]), x17);
7163 
7164   TEARDOWN();
7165 }
7166 
7167 
TEST(ldp_stp_quad)7168 TEST(ldp_stp_quad) {
7169   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
7170 
7171   uint64_t src[4] = {0x0123456789abcdef,
7172                      0xaaaaaaaa55555555,
7173                      0xfedcba9876543210,
7174                      0x55555555aaaaaaaa};
7175   uint64_t dst[6] = {0, 0, 0, 0, 0, 0};
7176   uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
7177   uintptr_t dst_base = reinterpret_cast<uintptr_t>(dst);
7178 
7179   START();
7180   __ Mov(x16, src_base);
7181   __ Mov(x17, dst_base);
7182   __ Ldp(q31, q0, MemOperand(x16, 4 * sizeof(src[0]), PostIndex));
7183   __ Stp(q0, q31, MemOperand(x17, 2 * sizeof(dst[1]), PreIndex));
7184   END();
7185 
7186   RUN();
7187 
7188   ASSERT_EQUAL_128(0xaaaaaaaa55555555, 0x0123456789abcdef, q31);
7189   ASSERT_EQUAL_128(0x55555555aaaaaaaa, 0xfedcba9876543210, q0);
7190   ASSERT_EQUAL_64(0, dst[0]);
7191   ASSERT_EQUAL_64(0, dst[1]);
7192   ASSERT_EQUAL_64(0xfedcba9876543210, dst[2]);
7193   ASSERT_EQUAL_64(0x55555555aaaaaaaa, dst[3]);
7194   ASSERT_EQUAL_64(0x0123456789abcdef, dst[4]);
7195   ASSERT_EQUAL_64(0xaaaaaaaa55555555, dst[5]);
7196   ASSERT_EQUAL_64(src_base + 4 * sizeof(src[0]), x16);
7197   ASSERT_EQUAL_64(dst_base + 2 * sizeof(dst[1]), x17);
7198 
7199   TEARDOWN();
7200 }
7201 
7202 
TEST(ldp_stp_offset)7203 TEST(ldp_stp_offset) {
7204   SETUP();
7205 
7206   uint64_t src[3] = {0x0011223344556677,
7207                      0x8899aabbccddeeff,
7208                      0xffeeddccbbaa9988};
7209   uint64_t dst[7] = {0, 0, 0, 0, 0, 0, 0};
7210   uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
7211   uintptr_t dst_base = reinterpret_cast<uintptr_t>(dst);
7212 
7213   START();
7214   __ Mov(x16, src_base);
7215   __ Mov(x17, dst_base);
7216   __ Mov(x18, src_base + 24);
7217   __ Mov(x19, dst_base + 56);
7218   __ Ldp(w0, w1, MemOperand(x16));
7219   __ Ldp(w2, w3, MemOperand(x16, 4));
7220   __ Ldp(x4, x5, MemOperand(x16, 8));
7221   __ Ldp(w6, w7, MemOperand(x18, -12));
7222   __ Ldp(x8, x9, MemOperand(x18, -16));
7223   __ Stp(w0, w1, MemOperand(x17));
7224   __ Stp(w2, w3, MemOperand(x17, 8));
7225   __ Stp(x4, x5, MemOperand(x17, 16));
7226   __ Stp(w6, w7, MemOperand(x19, -24));
7227   __ Stp(x8, x9, MemOperand(x19, -16));
7228   END();
7229 
7230   RUN();
7231 
7232   ASSERT_EQUAL_64(0x44556677, x0);
7233   ASSERT_EQUAL_64(0x00112233, x1);
7234   ASSERT_EQUAL_64(0x0011223344556677, dst[0]);
7235   ASSERT_EQUAL_64(0x00112233, x2);
7236   ASSERT_EQUAL_64(0xccddeeff, x3);
7237   ASSERT_EQUAL_64(0xccddeeff00112233, dst[1]);
7238   ASSERT_EQUAL_64(0x8899aabbccddeeff, x4);
7239   ASSERT_EQUAL_64(0x8899aabbccddeeff, dst[2]);
7240   ASSERT_EQUAL_64(0xffeeddccbbaa9988, x5);
7241   ASSERT_EQUAL_64(0xffeeddccbbaa9988, dst[3]);
7242   ASSERT_EQUAL_64(0x8899aabb, x6);
7243   ASSERT_EQUAL_64(0xbbaa9988, x7);
7244   ASSERT_EQUAL_64(0xbbaa99888899aabb, dst[4]);
7245   ASSERT_EQUAL_64(0x8899aabbccddeeff, x8);
7246   ASSERT_EQUAL_64(0x8899aabbccddeeff, dst[5]);
7247   ASSERT_EQUAL_64(0xffeeddccbbaa9988, x9);
7248   ASSERT_EQUAL_64(0xffeeddccbbaa9988, dst[6]);
7249   ASSERT_EQUAL_64(src_base, x16);
7250   ASSERT_EQUAL_64(dst_base, x17);
7251   ASSERT_EQUAL_64(src_base + 24, x18);
7252   ASSERT_EQUAL_64(dst_base + 56, x19);
7253 
7254   TEARDOWN();
7255 }
7256 
7257 
TEST(ldp_stp_offset_wide)7258 TEST(ldp_stp_offset_wide) {
7259   SETUP();
7260 
7261   uint64_t src[3] = {0x0011223344556677,
7262                      0x8899aabbccddeeff,
7263                      0xffeeddccbbaa9988};
7264   uint64_t dst[7] = {0, 0, 0, 0, 0, 0, 0};
7265   uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
7266   uintptr_t dst_base = reinterpret_cast<uintptr_t>(dst);
7267   // Move base too far from the array to force multiple instructions
7268   // to be emitted.
7269   const int64_t base_offset = 1024;
7270 
7271   START();
7272   __ Mov(x20, src_base - base_offset);
7273   __ Mov(x21, dst_base - base_offset);
7274   __ Mov(x18, src_base + base_offset + 24);
7275   __ Mov(x19, dst_base + base_offset + 56);
7276   __ Ldp(w0, w1, MemOperand(x20, base_offset));
7277   __ Ldp(w2, w3, MemOperand(x20, base_offset + 4));
7278   __ Ldp(x4, x5, MemOperand(x20, base_offset + 8));
7279   __ Ldp(w6, w7, MemOperand(x18, -12 - base_offset));
7280   __ Ldp(x8, x9, MemOperand(x18, -16 - base_offset));
7281   __ Stp(w0, w1, MemOperand(x21, base_offset));
7282   __ Stp(w2, w3, MemOperand(x21, base_offset + 8));
7283   __ Stp(x4, x5, MemOperand(x21, base_offset + 16));
7284   __ Stp(w6, w7, MemOperand(x19, -24 - base_offset));
7285   __ Stp(x8, x9, MemOperand(x19, -16 - base_offset));
7286   END();
7287 
7288   RUN();
7289 
7290   ASSERT_EQUAL_64(0x44556677, x0);
7291   ASSERT_EQUAL_64(0x00112233, x1);
7292   ASSERT_EQUAL_64(0x0011223344556677, dst[0]);
7293   ASSERT_EQUAL_64(0x00112233, x2);
7294   ASSERT_EQUAL_64(0xccddeeff, x3);
7295   ASSERT_EQUAL_64(0xccddeeff00112233, dst[1]);
7296   ASSERT_EQUAL_64(0x8899aabbccddeeff, x4);
7297   ASSERT_EQUAL_64(0x8899aabbccddeeff, dst[2]);
7298   ASSERT_EQUAL_64(0xffeeddccbbaa9988, x5);
7299   ASSERT_EQUAL_64(0xffeeddccbbaa9988, dst[3]);
7300   ASSERT_EQUAL_64(0x8899aabb, x6);
7301   ASSERT_EQUAL_64(0xbbaa9988, x7);
7302   ASSERT_EQUAL_64(0xbbaa99888899aabb, dst[4]);
7303   ASSERT_EQUAL_64(0x8899aabbccddeeff, x8);
7304   ASSERT_EQUAL_64(0x8899aabbccddeeff, dst[5]);
7305   ASSERT_EQUAL_64(0xffeeddccbbaa9988, x9);
7306   ASSERT_EQUAL_64(0xffeeddccbbaa9988, dst[6]);
7307   ASSERT_EQUAL_64(src_base - base_offset, x20);
7308   ASSERT_EQUAL_64(dst_base - base_offset, x21);
7309   ASSERT_EQUAL_64(src_base + base_offset + 24, x18);
7310   ASSERT_EQUAL_64(dst_base + base_offset + 56, x19);
7311 
7312   TEARDOWN();
7313 }
7314 
7315 
TEST(ldnp_stnp_offset)7316 TEST(ldnp_stnp_offset) {
7317   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
7318 
7319   uint64_t src[4] = {0x0011223344556677,
7320                      0x8899aabbccddeeff,
7321                      0xffeeddccbbaa9988,
7322                      0x7766554433221100};
7323   uint64_t dst[16] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
7324   uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
7325   uintptr_t dst_base = reinterpret_cast<uintptr_t>(dst);
7326 
7327   START();
7328   __ Mov(x16, src_base);
7329   __ Mov(x17, dst_base);
7330   __ Mov(x18, src_base + 24);
7331   __ Mov(x19, dst_base + 64);
7332   __ Mov(x20, src_base + 32);
7333 
7334   // Ensure address set up has happened before executing non-temporal ops.
7335   __ Dmb(InnerShareable, BarrierAll);
7336 
7337   __ Ldnp(w0, w1, MemOperand(x16));
7338   __ Ldnp(w2, w3, MemOperand(x16, 4));
7339   __ Ldnp(x4, x5, MemOperand(x16, 8));
7340   __ Ldnp(w6, w7, MemOperand(x18, -12));
7341   __ Ldnp(x8, x9, MemOperand(x18, -16));
7342   __ Ldnp(q16, q17, MemOperand(x16));
7343   __ Ldnp(q19, q18, MemOperand(x20, -32));
7344   __ Stnp(w0, w1, MemOperand(x17));
7345   __ Stnp(w2, w3, MemOperand(x17, 8));
7346   __ Stnp(x4, x5, MemOperand(x17, 16));
7347   __ Stnp(w6, w7, MemOperand(x19, -32));
7348   __ Stnp(x8, x9, MemOperand(x19, -24));
7349   __ Stnp(q17, q16, MemOperand(x19));
7350   __ Stnp(q18, q19, MemOperand(x19, 32));
7351   END();
7352 
7353   RUN();
7354 
7355   ASSERT_EQUAL_64(0x44556677, x0);
7356   ASSERT_EQUAL_64(0x00112233, x1);
7357   ASSERT_EQUAL_64(0x0011223344556677, dst[0]);
7358   ASSERT_EQUAL_64(0x00112233, x2);
7359   ASSERT_EQUAL_64(0xccddeeff, x3);
7360   ASSERT_EQUAL_64(0xccddeeff00112233, dst[1]);
7361   ASSERT_EQUAL_64(0x8899aabbccddeeff, x4);
7362   ASSERT_EQUAL_64(0x8899aabbccddeeff, dst[2]);
7363   ASSERT_EQUAL_64(0xffeeddccbbaa9988, x5);
7364   ASSERT_EQUAL_64(0xffeeddccbbaa9988, dst[3]);
7365   ASSERT_EQUAL_64(0x8899aabb, x6);
7366   ASSERT_EQUAL_64(0xbbaa9988, x7);
7367   ASSERT_EQUAL_64(0xbbaa99888899aabb, dst[4]);
7368   ASSERT_EQUAL_64(0x8899aabbccddeeff, x8);
7369   ASSERT_EQUAL_64(0x8899aabbccddeeff, dst[5]);
7370   ASSERT_EQUAL_64(0xffeeddccbbaa9988, x9);
7371   ASSERT_EQUAL_64(0xffeeddccbbaa9988, dst[6]);
7372   ASSERT_EQUAL_128(0x8899aabbccddeeff, 0x0011223344556677, q16);
7373   ASSERT_EQUAL_128(0x7766554433221100, 0xffeeddccbbaa9988, q17);
7374   ASSERT_EQUAL_128(0x7766554433221100, 0xffeeddccbbaa9988, q18);
7375   ASSERT_EQUAL_128(0x8899aabbccddeeff, 0x0011223344556677, q19);
7376   ASSERT_EQUAL_64(0xffeeddccbbaa9988, dst[8]);
7377   ASSERT_EQUAL_64(0x7766554433221100, dst[9]);
7378   ASSERT_EQUAL_64(0x0011223344556677, dst[10]);
7379   ASSERT_EQUAL_64(0x8899aabbccddeeff, dst[11]);
7380   ASSERT_EQUAL_64(0xffeeddccbbaa9988, dst[12]);
7381   ASSERT_EQUAL_64(0x7766554433221100, dst[13]);
7382   ASSERT_EQUAL_64(0x0011223344556677, dst[14]);
7383   ASSERT_EQUAL_64(0x8899aabbccddeeff, dst[15]);
7384   ASSERT_EQUAL_64(src_base, x16);
7385   ASSERT_EQUAL_64(dst_base, x17);
7386   ASSERT_EQUAL_64(src_base + 24, x18);
7387   ASSERT_EQUAL_64(dst_base + 64, x19);
7388   ASSERT_EQUAL_64(src_base + 32, x20);
7389 
7390   TEARDOWN();
7391 }
7392 
7393 
TEST(ldnp_stnp_offset_float)7394 TEST(ldnp_stnp_offset_float) {
7395   SETUP_WITH_FEATURES(CPUFeatures::kFP);
7396 
7397   float src[3] = {1.2, 2.3, 3.4};
7398   float dst[6] = {0.0, 0.0, 0.0, 0.0, 0.0, 0.0};
7399   uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
7400   uintptr_t dst_base = reinterpret_cast<uintptr_t>(dst);
7401 
7402   START();
7403   __ Mov(x16, src_base);
7404   __ Mov(x17, dst_base);
7405   __ Mov(x18, src_base + 12);
7406   __ Mov(x19, dst_base + 24);
7407 
7408   // Ensure address set up has happened before executing non-temporal ops.
7409   __ Dmb(InnerShareable, BarrierAll);
7410 
7411   __ Ldnp(s0, s1, MemOperand(x16));
7412   __ Ldnp(s2, s3, MemOperand(x16, 4));
7413   __ Ldnp(s5, s4, MemOperand(x18, -8));
7414   __ Stnp(s1, s0, MemOperand(x17));
7415   __ Stnp(s3, s2, MemOperand(x17, 8));
7416   __ Stnp(s4, s5, MemOperand(x19, -8));
7417   END();
7418 
7419   RUN();
7420 
7421   ASSERT_EQUAL_FP32(1.2, s0);
7422   ASSERT_EQUAL_FP32(2.3, s1);
7423   ASSERT_EQUAL_FP32(2.3, dst[0]);
7424   ASSERT_EQUAL_FP32(1.2, dst[1]);
7425   ASSERT_EQUAL_FP32(2.3, s2);
7426   ASSERT_EQUAL_FP32(3.4, s3);
7427   ASSERT_EQUAL_FP32(3.4, dst[2]);
7428   ASSERT_EQUAL_FP32(2.3, dst[3]);
7429   ASSERT_EQUAL_FP32(3.4, s4);
7430   ASSERT_EQUAL_FP32(2.3, s5);
7431   ASSERT_EQUAL_FP32(3.4, dst[4]);
7432   ASSERT_EQUAL_FP32(2.3, dst[5]);
7433   ASSERT_EQUAL_64(src_base, x16);
7434   ASSERT_EQUAL_64(dst_base, x17);
7435   ASSERT_EQUAL_64(src_base + 12, x18);
7436   ASSERT_EQUAL_64(dst_base + 24, x19);
7437 
7438   TEARDOWN();
7439 }
7440 
7441 
TEST(ldnp_stnp_offset_double)7442 TEST(ldnp_stnp_offset_double) {
7443   SETUP_WITH_FEATURES(CPUFeatures::kFP);
7444 
7445   double src[3] = {1.2, 2.3, 3.4};
7446   double dst[6] = {0.0, 0.0, 0.0, 0.0, 0.0, 0.0};
7447   uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
7448   uintptr_t dst_base = reinterpret_cast<uintptr_t>(dst);
7449 
7450   START();
7451   __ Mov(x16, src_base);
7452   __ Mov(x17, dst_base);
7453   __ Mov(x18, src_base + 24);
7454   __ Mov(x19, dst_base + 48);
7455 
7456   // Ensure address set up has happened before executing non-temporal ops.
7457   __ Dmb(InnerShareable, BarrierAll);
7458 
7459   __ Ldnp(d0, d1, MemOperand(x16));
7460   __ Ldnp(d2, d3, MemOperand(x16, 8));
7461   __ Ldnp(d5, d4, MemOperand(x18, -16));
7462   __ Stnp(d1, d0, MemOperand(x17));
7463   __ Stnp(d3, d2, MemOperand(x17, 16));
7464   __ Stnp(d4, d5, MemOperand(x19, -16));
7465   END();
7466 
7467   RUN();
7468 
7469   ASSERT_EQUAL_FP64(1.2, d0);
7470   ASSERT_EQUAL_FP64(2.3, d1);
7471   ASSERT_EQUAL_FP64(2.3, dst[0]);
7472   ASSERT_EQUAL_FP64(1.2, dst[1]);
7473   ASSERT_EQUAL_FP64(2.3, d2);
7474   ASSERT_EQUAL_FP64(3.4, d3);
7475   ASSERT_EQUAL_FP64(3.4, dst[2]);
7476   ASSERT_EQUAL_FP64(2.3, dst[3]);
7477   ASSERT_EQUAL_FP64(3.4, d4);
7478   ASSERT_EQUAL_FP64(2.3, d5);
7479   ASSERT_EQUAL_FP64(3.4, dst[4]);
7480   ASSERT_EQUAL_FP64(2.3, dst[5]);
7481   ASSERT_EQUAL_64(src_base, x16);
7482   ASSERT_EQUAL_64(dst_base, x17);
7483   ASSERT_EQUAL_64(src_base + 24, x18);
7484   ASSERT_EQUAL_64(dst_base + 48, x19);
7485 
7486   TEARDOWN();
7487 }
7488 
7489 
TEST(ldp_stp_preindex)7490 TEST(ldp_stp_preindex) {
7491   SETUP();
7492 
7493   uint64_t src[3] = {0x0011223344556677,
7494                      0x8899aabbccddeeff,
7495                      0xffeeddccbbaa9988};
7496   uint64_t dst[5] = {0, 0, 0, 0, 0};
7497   uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
7498   uintptr_t dst_base = reinterpret_cast<uintptr_t>(dst);
7499 
7500   START();
7501   __ Mov(x16, src_base);
7502   __ Mov(x17, dst_base);
7503   __ Mov(x18, dst_base + 16);
7504   __ Ldp(w0, w1, MemOperand(x16, 4, PreIndex));
7505   __ Mov(x19, x16);
7506   __ Ldp(w2, w3, MemOperand(x16, -4, PreIndex));
7507   __ Stp(w2, w3, MemOperand(x17, 4, PreIndex));
7508   __ Mov(x20, x17);
7509   __ Stp(w0, w1, MemOperand(x17, -4, PreIndex));
7510   __ Ldp(x4, x5, MemOperand(x16, 8, PreIndex));
7511   __ Mov(x21, x16);
7512   __ Ldp(x6, x7, MemOperand(x16, -8, PreIndex));
7513   __ Stp(x7, x6, MemOperand(x18, 8, PreIndex));
7514   __ Mov(x22, x18);
7515   __ Stp(x5, x4, MemOperand(x18, -8, PreIndex));
7516   END();
7517 
7518   RUN();
7519 
7520   ASSERT_EQUAL_64(0x00112233, x0);
7521   ASSERT_EQUAL_64(0xccddeeff, x1);
7522   ASSERT_EQUAL_64(0x44556677, x2);
7523   ASSERT_EQUAL_64(0x00112233, x3);
7524   ASSERT_EQUAL_64(0xccddeeff00112233, dst[0]);
7525   ASSERT_EQUAL_64(0x0000000000112233, dst[1]);
7526   ASSERT_EQUAL_64(0x8899aabbccddeeff, x4);
7527   ASSERT_EQUAL_64(0xffeeddccbbaa9988, x5);
7528   ASSERT_EQUAL_64(0x0011223344556677, x6);
7529   ASSERT_EQUAL_64(0x8899aabbccddeeff, x7);
7530   ASSERT_EQUAL_64(0xffeeddccbbaa9988, dst[2]);
7531   ASSERT_EQUAL_64(0x8899aabbccddeeff, dst[3]);
7532   ASSERT_EQUAL_64(0x0011223344556677, dst[4]);
7533   ASSERT_EQUAL_64(src_base, x16);
7534   ASSERT_EQUAL_64(dst_base, x17);
7535   ASSERT_EQUAL_64(dst_base + 16, x18);
7536   ASSERT_EQUAL_64(src_base + 4, x19);
7537   ASSERT_EQUAL_64(dst_base + 4, x20);
7538   ASSERT_EQUAL_64(src_base + 8, x21);
7539   ASSERT_EQUAL_64(dst_base + 24, x22);
7540 
7541   TEARDOWN();
7542 }
7543 
7544 
TEST(ldp_stp_preindex_wide)7545 TEST(ldp_stp_preindex_wide) {
7546   SETUP();
7547 
7548   uint64_t src[3] = {0x0011223344556677,
7549                      0x8899aabbccddeeff,
7550                      0xffeeddccbbaa9988};
7551   uint64_t dst[5] = {0, 0, 0, 0, 0};
7552   uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
7553   uintptr_t dst_base = reinterpret_cast<uintptr_t>(dst);
7554   // Move base too far from the array to force multiple instructions
7555   // to be emitted.
7556   const int64_t base_offset = 1024;
7557 
7558   START();
7559   __ Mov(x24, src_base - base_offset);
7560   __ Mov(x25, dst_base + base_offset);
7561   __ Mov(x18, dst_base + base_offset + 16);
7562   __ Ldp(w0, w1, MemOperand(x24, base_offset + 4, PreIndex));
7563   __ Mov(x19, x24);
7564   __ Mov(x24, src_base - base_offset + 4);
7565   __ Ldp(w2, w3, MemOperand(x24, base_offset - 4, PreIndex));
7566   __ Stp(w2, w3, MemOperand(x25, 4 - base_offset, PreIndex));
7567   __ Mov(x20, x25);
7568   __ Mov(x25, dst_base + base_offset + 4);
7569   __ Mov(x24, src_base - base_offset);
7570   __ Stp(w0, w1, MemOperand(x25, -4 - base_offset, PreIndex));
7571   __ Ldp(x4, x5, MemOperand(x24, base_offset + 8, PreIndex));
7572   __ Mov(x21, x24);
7573   __ Mov(x24, src_base - base_offset + 8);
7574   __ Ldp(x6, x7, MemOperand(x24, base_offset - 8, PreIndex));
7575   __ Stp(x7, x6, MemOperand(x18, 8 - base_offset, PreIndex));
7576   __ Mov(x22, x18);
7577   __ Mov(x18, dst_base + base_offset + 16 + 8);
7578   __ Stp(x5, x4, MemOperand(x18, -8 - base_offset, PreIndex));
7579   END();
7580 
7581   RUN();
7582 
7583   ASSERT_EQUAL_64(0x00112233, x0);
7584   ASSERT_EQUAL_64(0xccddeeff, x1);
7585   ASSERT_EQUAL_64(0x44556677, x2);
7586   ASSERT_EQUAL_64(0x00112233, x3);
7587   ASSERT_EQUAL_64(0xccddeeff00112233, dst[0]);
7588   ASSERT_EQUAL_64(0x0000000000112233, dst[1]);
7589   ASSERT_EQUAL_64(0x8899aabbccddeeff, x4);
7590   ASSERT_EQUAL_64(0xffeeddccbbaa9988, x5);
7591   ASSERT_EQUAL_64(0x0011223344556677, x6);
7592   ASSERT_EQUAL_64(0x8899aabbccddeeff, x7);
7593   ASSERT_EQUAL_64(0xffeeddccbbaa9988, dst[2]);
7594   ASSERT_EQUAL_64(0x8899aabbccddeeff, dst[3]);
7595   ASSERT_EQUAL_64(0x0011223344556677, dst[4]);
7596   ASSERT_EQUAL_64(src_base, x24);
7597   ASSERT_EQUAL_64(dst_base, x25);
7598   ASSERT_EQUAL_64(dst_base + 16, x18);
7599   ASSERT_EQUAL_64(src_base + 4, x19);
7600   ASSERT_EQUAL_64(dst_base + 4, x20);
7601   ASSERT_EQUAL_64(src_base + 8, x21);
7602   ASSERT_EQUAL_64(dst_base + 24, x22);
7603 
7604   TEARDOWN();
7605 }
7606 
7607 
TEST(ldp_stp_postindex)7608 TEST(ldp_stp_postindex) {
7609   SETUP();
7610 
7611   uint64_t src[4] = {0x0011223344556677,
7612                      0x8899aabbccddeeff,
7613                      0xffeeddccbbaa9988,
7614                      0x7766554433221100};
7615   uint64_t dst[5] = {0, 0, 0, 0, 0};
7616   uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
7617   uintptr_t dst_base = reinterpret_cast<uintptr_t>(dst);
7618 
7619   START();
7620   __ Mov(x16, src_base);
7621   __ Mov(x17, dst_base);
7622   __ Mov(x18, dst_base + 16);
7623   __ Ldp(w0, w1, MemOperand(x16, 4, PostIndex));
7624   __ Mov(x19, x16);
7625   __ Ldp(w2, w3, MemOperand(x16, -4, PostIndex));
7626   __ Stp(w2, w3, MemOperand(x17, 4, PostIndex));
7627   __ Mov(x20, x17);
7628   __ Stp(w0, w1, MemOperand(x17, -4, PostIndex));
7629   __ Ldp(x4, x5, MemOperand(x16, 8, PostIndex));
7630   __ Mov(x21, x16);
7631   __ Ldp(x6, x7, MemOperand(x16, -8, PostIndex));
7632   __ Stp(x7, x6, MemOperand(x18, 8, PostIndex));
7633   __ Mov(x22, x18);
7634   __ Stp(x5, x4, MemOperand(x18, -8, PostIndex));
7635   END();
7636 
7637   RUN();
7638 
7639   ASSERT_EQUAL_64(0x44556677, x0);
7640   ASSERT_EQUAL_64(0x00112233, x1);
7641   ASSERT_EQUAL_64(0x00112233, x2);
7642   ASSERT_EQUAL_64(0xccddeeff, x3);
7643   ASSERT_EQUAL_64(0x4455667700112233, dst[0]);
7644   ASSERT_EQUAL_64(0x0000000000112233, dst[1]);
7645   ASSERT_EQUAL_64(0x0011223344556677, x4);
7646   ASSERT_EQUAL_64(0x8899aabbccddeeff, x5);
7647   ASSERT_EQUAL_64(0x8899aabbccddeeff, x6);
7648   ASSERT_EQUAL_64(0xffeeddccbbaa9988, x7);
7649   ASSERT_EQUAL_64(0xffeeddccbbaa9988, dst[2]);
7650   ASSERT_EQUAL_64(0x8899aabbccddeeff, dst[3]);
7651   ASSERT_EQUAL_64(0x0011223344556677, dst[4]);
7652   ASSERT_EQUAL_64(src_base, x16);
7653   ASSERT_EQUAL_64(dst_base, x17);
7654   ASSERT_EQUAL_64(dst_base + 16, x18);
7655   ASSERT_EQUAL_64(src_base + 4, x19);
7656   ASSERT_EQUAL_64(dst_base + 4, x20);
7657   ASSERT_EQUAL_64(src_base + 8, x21);
7658   ASSERT_EQUAL_64(dst_base + 24, x22);
7659 
7660   TEARDOWN();
7661 }
7662 
7663 
TEST(ldp_stp_postindex_wide)7664 TEST(ldp_stp_postindex_wide) {
7665   SETUP();
7666 
7667   uint64_t src[4] = {0x0011223344556677,
7668                      0x8899aabbccddeeff,
7669                      0xffeeddccbbaa9988,
7670                      0x7766554433221100};
7671   uint64_t dst[5] = {0, 0, 0, 0, 0};
7672   uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
7673   uintptr_t dst_base = reinterpret_cast<uintptr_t>(dst);
7674   // Move base too far from the array to force multiple instructions
7675   // to be emitted.
7676   const int64_t base_offset = 1024;
7677 
7678   START();
7679   __ Mov(x24, src_base);
7680   __ Mov(x25, dst_base);
7681   __ Mov(x18, dst_base + 16);
7682   __ Ldp(w0, w1, MemOperand(x24, base_offset + 4, PostIndex));
7683   __ Mov(x19, x24);
7684   __ Sub(x24, x24, base_offset);
7685   __ Ldp(w2, w3, MemOperand(x24, base_offset - 4, PostIndex));
7686   __ Stp(w2, w3, MemOperand(x25, 4 - base_offset, PostIndex));
7687   __ Mov(x20, x25);
7688   __ Sub(x24, x24, base_offset);
7689   __ Add(x25, x25, base_offset);
7690   __ Stp(w0, w1, MemOperand(x25, -4 - base_offset, PostIndex));
7691   __ Ldp(x4, x5, MemOperand(x24, base_offset + 8, PostIndex));
7692   __ Mov(x21, x24);
7693   __ Sub(x24, x24, base_offset);
7694   __ Ldp(x6, x7, MemOperand(x24, base_offset - 8, PostIndex));
7695   __ Stp(x7, x6, MemOperand(x18, 8 - base_offset, PostIndex));
7696   __ Mov(x22, x18);
7697   __ Add(x18, x18, base_offset);
7698   __ Stp(x5, x4, MemOperand(x18, -8 - base_offset, PostIndex));
7699   END();
7700 
7701   RUN();
7702 
7703   ASSERT_EQUAL_64(0x44556677, x0);
7704   ASSERT_EQUAL_64(0x00112233, x1);
7705   ASSERT_EQUAL_64(0x00112233, x2);
7706   ASSERT_EQUAL_64(0xccddeeff, x3);
7707   ASSERT_EQUAL_64(0x4455667700112233, dst[0]);
7708   ASSERT_EQUAL_64(0x0000000000112233, dst[1]);
7709   ASSERT_EQUAL_64(0x0011223344556677, x4);
7710   ASSERT_EQUAL_64(0x8899aabbccddeeff, x5);
7711   ASSERT_EQUAL_64(0x8899aabbccddeeff, x6);
7712   ASSERT_EQUAL_64(0xffeeddccbbaa9988, x7);
7713   ASSERT_EQUAL_64(0xffeeddccbbaa9988, dst[2]);
7714   ASSERT_EQUAL_64(0x8899aabbccddeeff, dst[3]);
7715   ASSERT_EQUAL_64(0x0011223344556677, dst[4]);
7716   ASSERT_EQUAL_64(src_base + base_offset, x24);
7717   ASSERT_EQUAL_64(dst_base - base_offset, x25);
7718   ASSERT_EQUAL_64(dst_base - base_offset + 16, x18);
7719   ASSERT_EQUAL_64(src_base + base_offset + 4, x19);
7720   ASSERT_EQUAL_64(dst_base - base_offset + 4, x20);
7721   ASSERT_EQUAL_64(src_base + base_offset + 8, x21);
7722   ASSERT_EQUAL_64(dst_base - base_offset + 24, x22);
7723 
7724   TEARDOWN();
7725 }
7726 
7727 
TEST(ldp_sign_extend)7728 TEST(ldp_sign_extend) {
7729   SETUP();
7730 
7731   uint32_t src[2] = {0x80000000, 0x7fffffff};
7732   uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
7733 
7734   START();
7735   __ Mov(x24, src_base);
7736   __ Ldpsw(x0, x1, MemOperand(x24));
7737   END();
7738 
7739   RUN();
7740 
7741   ASSERT_EQUAL_64(0xffffffff80000000, x0);
7742   ASSERT_EQUAL_64(0x000000007fffffff, x1);
7743 
7744   TEARDOWN();
7745 }
7746 
7747 
TEST(ldur_stur)7748 TEST(ldur_stur) {
7749   SETUP();
7750 
7751   int64_t src[2] = {0x0123456789abcdef, 0x0123456789abcdef};
7752   int64_t dst[5] = {0, 0, 0, 0, 0};
7753   uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
7754   uintptr_t dst_base = reinterpret_cast<uintptr_t>(dst);
7755 
7756   START();
7757   __ Mov(x17, src_base);
7758   __ Mov(x18, dst_base);
7759   __ Mov(x19, src_base + 16);
7760   __ Mov(x20, dst_base + 32);
7761   __ Mov(x21, dst_base + 40);
7762   __ Ldr(w0, MemOperand(x17, 1));
7763   __ Str(w0, MemOperand(x18, 2));
7764   __ Ldr(x1, MemOperand(x17, 3));
7765   __ Str(x1, MemOperand(x18, 9));
7766   __ Ldr(w2, MemOperand(x19, -9));
7767   __ Str(w2, MemOperand(x20, -5));
7768   __ Ldrb(w3, MemOperand(x19, -1));
7769   __ Strb(w3, MemOperand(x21, -1));
7770   END();
7771 
7772   RUN();
7773 
7774   ASSERT_EQUAL_64(0x6789abcd, x0);
7775   ASSERT_EQUAL_64(0x00006789abcd0000, dst[0]);
7776   ASSERT_EQUAL_64(0xabcdef0123456789, x1);
7777   ASSERT_EQUAL_64(0xcdef012345678900, dst[1]);
7778   ASSERT_EQUAL_64(0x000000ab, dst[2]);
7779   ASSERT_EQUAL_64(0xabcdef01, x2);
7780   ASSERT_EQUAL_64(0x00abcdef01000000, dst[3]);
7781   ASSERT_EQUAL_64(0x00000001, x3);
7782   ASSERT_EQUAL_64(0x0100000000000000, dst[4]);
7783   ASSERT_EQUAL_64(src_base, x17);
7784   ASSERT_EQUAL_64(dst_base, x18);
7785   ASSERT_EQUAL_64(src_base + 16, x19);
7786   ASSERT_EQUAL_64(dst_base + 32, x20);
7787 
7788   TEARDOWN();
7789 }
7790 
7791 
TEST(ldur_stur_neon)7792 TEST(ldur_stur_neon) {
7793   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
7794 
7795   int64_t src[3] = {0x0123456789abcdef, 0x0123456789abcdef, 0x0123456789abcdef};
7796   int64_t dst[5] = {0, 0, 0, 0, 0};
7797   uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
7798   uintptr_t dst_base = reinterpret_cast<uintptr_t>(dst);
7799 
7800   START();
7801   __ Mov(x17, src_base);
7802   __ Mov(x18, dst_base);
7803   __ Ldr(b0, MemOperand(x17));
7804   __ Str(b0, MemOperand(x18));
7805   __ Ldr(h1, MemOperand(x17, 1));
7806   __ Str(h1, MemOperand(x18, 1));
7807   __ Ldr(s2, MemOperand(x17, 2));
7808   __ Str(s2, MemOperand(x18, 3));
7809   __ Ldr(d3, MemOperand(x17, 3));
7810   __ Str(d3, MemOperand(x18, 7));
7811   __ Ldr(q4, MemOperand(x17, 4));
7812   __ Str(q4, MemOperand(x18, 15));
7813   END();
7814 
7815   RUN();
7816 
7817   ASSERT_EQUAL_128(0, 0xef, q0);
7818   ASSERT_EQUAL_128(0, 0xabcd, q1);
7819   ASSERT_EQUAL_128(0, 0x456789ab, q2);
7820   ASSERT_EQUAL_128(0, 0xabcdef0123456789, q3);
7821   ASSERT_EQUAL_128(0x89abcdef01234567, 0x89abcdef01234567, q4);
7822   ASSERT_EQUAL_64(0x89456789ababcdef, dst[0]);
7823   ASSERT_EQUAL_64(0x67abcdef01234567, dst[1]);
7824   ASSERT_EQUAL_64(0x6789abcdef012345, dst[2]);
7825   ASSERT_EQUAL_64(0x0089abcdef012345, dst[3]);
7826 
7827   TEARDOWN();
7828 }
7829 
7830 
TEST(ldr_literal)7831 TEST(ldr_literal) {
7832   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
7833 
7834   START();
7835   __ Ldr(x2, 0x1234567890abcdef);
7836   __ Ldr(w3, 0xfedcba09);
7837   __ Ldrsw(x4, 0x7fffffff);
7838   __ Ldrsw(x5, 0x80000000);
7839   __ Ldr(q11, 0x1234000056780000, 0xabcd0000ef000000);
7840   __ Ldr(d13, 1.234);
7841   __ Ldr(s25, 2.5);
7842   END();
7843 
7844   RUN();
7845 
7846   ASSERT_EQUAL_64(0x1234567890abcdef, x2);
7847   ASSERT_EQUAL_64(0xfedcba09, x3);
7848   ASSERT_EQUAL_64(0x7fffffff, x4);
7849   ASSERT_EQUAL_64(0xffffffff80000000, x5);
7850   ASSERT_EQUAL_128(0x1234000056780000, 0xabcd0000ef000000, q11);
7851   ASSERT_EQUAL_FP64(1.234, d13);
7852   ASSERT_EQUAL_FP32(2.5, s25);
7853 
7854   TEARDOWN();
7855 }
7856 
7857 
TEST(ldr_literal_range)7858 TEST(ldr_literal_range) {
7859   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
7860 
7861   START();
7862   // Make sure the pool is empty;
7863   masm.EmitLiteralPool(LiteralPool::kBranchRequired);
7864   ASSERT_LITERAL_POOL_SIZE(0);
7865 
7866   // Create some literal pool entries.
7867   __ Ldr(x0, 0x1234567890abcdef);
7868   __ Ldr(w1, 0xfedcba09);
7869   __ Ldrsw(x2, 0x7fffffff);
7870   __ Ldrsw(x3, 0x80000000);
7871   __ Ldr(q2, 0x1234000056780000, 0xabcd0000ef000000);
7872   __ Ldr(d0, 1.234);
7873   __ Ldr(s1, 2.5);
7874   ASSERT_LITERAL_POOL_SIZE(48);
7875 
7876   // Emit more code than the maximum literal load range to ensure the pool
7877   // should be emitted.
7878   const ptrdiff_t end = masm.GetCursorOffset() + 2 * kMaxLoadLiteralRange;
7879   while (masm.GetCursorOffset() < end) {
7880     __ Nop();
7881   }
7882 
7883   // The pool should have been emitted.
7884   ASSERT_LITERAL_POOL_SIZE(0);
7885 
7886   // These loads should be after the pool (and will require a new one).
7887   __ Ldr(x4, 0x34567890abcdef12);
7888   __ Ldr(w5, 0xdcba09fe);
7889   __ Ldrsw(x6, 0x7fffffff);
7890   __ Ldrsw(x7, 0x80000000);
7891   __ Ldr(q6, 0x1234000056780000, 0xabcd0000ef000000);
7892   __ Ldr(d4, 123.4);
7893   __ Ldr(s5, 250.0);
7894   ASSERT_LITERAL_POOL_SIZE(48);
7895   END();
7896 
7897   RUN();
7898 
7899   // Check that the literals loaded correctly.
7900   ASSERT_EQUAL_64(0x1234567890abcdef, x0);
7901   ASSERT_EQUAL_64(0xfedcba09, x1);
7902   ASSERT_EQUAL_64(0x7fffffff, x2);
7903   ASSERT_EQUAL_64(0xffffffff80000000, x3);
7904   ASSERT_EQUAL_128(0x1234000056780000, 0xabcd0000ef000000, q2);
7905   ASSERT_EQUAL_FP64(1.234, d0);
7906   ASSERT_EQUAL_FP32(2.5, s1);
7907   ASSERT_EQUAL_64(0x34567890abcdef12, x4);
7908   ASSERT_EQUAL_64(0xdcba09fe, x5);
7909   ASSERT_EQUAL_64(0x7fffffff, x6);
7910   ASSERT_EQUAL_64(0xffffffff80000000, x7);
7911   ASSERT_EQUAL_128(0x1234000056780000, 0xabcd0000ef000000, q6);
7912   ASSERT_EQUAL_FP64(123.4, d4);
7913   ASSERT_EQUAL_FP32(250.0, s5);
7914 
7915   TEARDOWN();
7916 }
7917 
7918 
TEST(ldr_literal_values_q)7919 TEST(ldr_literal_values_q) {
7920   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
7921 
7922   static const uint64_t kHalfValues[] = {0x8000000000000000,
7923                                          0x7fffffffffffffff,
7924                                          0x0000000000000000,
7925                                          0xffffffffffffffff,
7926                                          0x00ff00ff00ff00ff,
7927                                          0x1234567890abcdef};
7928   const int card = sizeof(kHalfValues) / sizeof(kHalfValues[0]);
7929   const Register& ref_low64 = x1;
7930   const Register& ref_high64 = x2;
7931   const Register& loaded_low64 = x3;
7932   const Register& loaded_high64 = x4;
7933   const VRegister& tgt = q0;
7934 
7935   START();
7936   __ Mov(x0, 0);
7937 
7938   for (int i = 0; i < card; i++) {
7939     __ Mov(ref_low64, kHalfValues[i]);
7940     for (int j = 0; j < card; j++) {
7941       __ Mov(ref_high64, kHalfValues[j]);
7942       __ Ldr(tgt, kHalfValues[j], kHalfValues[i]);
7943       __ Mov(loaded_low64, tgt.V2D(), 0);
7944       __ Mov(loaded_high64, tgt.V2D(), 1);
7945       __ Cmp(loaded_low64, ref_low64);
7946       __ Ccmp(loaded_high64, ref_high64, NoFlag, eq);
7947       __ Cset(x0, ne);
7948     }
7949   }
7950   END();
7951 
7952   RUN();
7953 
7954   // If one of the values differs, the trace can be used to identify which one.
7955   ASSERT_EQUAL_64(0, x0);
7956 
7957   TEARDOWN();
7958 }
7959 
7960 
7961 template <typename T>
LoadIntValueHelper(T values[],int card)7962 void LoadIntValueHelper(T values[], int card) {
7963   SETUP();
7964 
7965   const bool is_32bit = (sizeof(T) == 4);
7966   Register tgt1 = is_32bit ? Register(w1) : Register(x1);
7967   Register tgt2 = is_32bit ? Register(w2) : Register(x2);
7968 
7969   START();
7970   __ Mov(x0, 0);
7971 
7972   // If one of the values differ then x0 will be one.
7973   for (int i = 0; i < card; ++i) {
7974     __ Mov(tgt1, values[i]);
7975     __ Ldr(tgt2, values[i]);
7976     __ Cmp(tgt1, tgt2);
7977     __ Cset(x0, ne);
7978   }
7979   END();
7980 
7981   RUN();
7982 
7983   // If one of the values differs, the trace can be used to identify which one.
7984   ASSERT_EQUAL_64(0, x0);
7985 
7986   TEARDOWN();
7987 }
7988 
7989 
TEST(ldr_literal_values_x)7990 TEST(ldr_literal_values_x) {
7991   static const uint64_t kValues[] = {0x8000000000000000,
7992                                      0x7fffffffffffffff,
7993                                      0x0000000000000000,
7994                                      0xffffffffffffffff,
7995                                      0x00ff00ff00ff00ff,
7996                                      0x1234567890abcdef};
7997 
7998   LoadIntValueHelper(kValues, sizeof(kValues) / sizeof(kValues[0]));
7999 }
8000 
8001 
TEST(ldr_literal_values_w)8002 TEST(ldr_literal_values_w) {
8003   static const uint32_t kValues[] = {0x80000000,
8004                                      0x7fffffff,
8005                                      0x00000000,
8006                                      0xffffffff,
8007                                      0x00ff00ff,
8008                                      0x12345678,
8009                                      0x90abcdef};
8010 
8011   LoadIntValueHelper(kValues, sizeof(kValues) / sizeof(kValues[0]));
8012 }
8013 
8014 
8015 template <typename T>
LoadFPValueHelper(T values[],int card)8016 void LoadFPValueHelper(T values[], int card) {
8017   SETUP_WITH_FEATURES(CPUFeatures::kFP);
8018 
8019   const bool is_32bits = (sizeof(T) == 4);
8020   const FPRegister& fp_tgt = is_32bits ? s2 : d2;
8021   const Register& tgt1 = is_32bits ? Register(w1) : Register(x1);
8022   const Register& tgt2 = is_32bits ? Register(w2) : Register(x2);
8023 
8024   START();
8025   __ Mov(x0, 0);
8026 
8027   // If one of the values differ then x0 will be one.
8028   for (int i = 0; i < card; ++i) {
8029     __ Mov(tgt1,
8030            is_32bits ? FloatToRawbits(values[i]) : DoubleToRawbits(values[i]));
8031     __ Ldr(fp_tgt, values[i]);
8032     __ Fmov(tgt2, fp_tgt);
8033     __ Cmp(tgt1, tgt2);
8034     __ Cset(x0, ne);
8035   }
8036   END();
8037 
8038   RUN();
8039 
8040   // If one of the values differs, the trace can be used to identify which one.
8041   ASSERT_EQUAL_64(0, x0);
8042 
8043   TEARDOWN();
8044 }
8045 
TEST(ldr_literal_values_d)8046 TEST(ldr_literal_values_d) {
8047   static const double kValues[] = {-0.0, 0.0, -1.0, 1.0, -1e10, 1e10};
8048 
8049   LoadFPValueHelper(kValues, sizeof(kValues) / sizeof(kValues[0]));
8050 }
8051 
8052 
TEST(ldr_literal_values_s)8053 TEST(ldr_literal_values_s) {
8054   static const float kValues[] = {-0.0, 0.0, -1.0, 1.0, -1e10, 1e10};
8055 
8056   LoadFPValueHelper(kValues, sizeof(kValues) / sizeof(kValues[0]));
8057 }
8058 
8059 
TEST(ldr_literal_custom)8060 TEST(ldr_literal_custom) {
8061   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
8062 
8063   Label end_of_pool_before;
8064   Label end_of_pool_after;
8065 
8066   const size_t kSizeOfPoolInBytes = 44;
8067 
8068   Literal<uint64_t> before_x(0x1234567890abcdef);
8069   Literal<uint32_t> before_w(0xfedcba09);
8070   Literal<uint32_t> before_sx(0x80000000);
8071   Literal<uint64_t> before_q(0x1234000056780000, 0xabcd0000ef000000);
8072   Literal<double> before_d(1.234);
8073   Literal<float> before_s(2.5);
8074 
8075   Literal<uint64_t> after_x(0x1234567890abcdef);
8076   Literal<uint32_t> after_w(0xfedcba09);
8077   Literal<uint32_t> after_sx(0x80000000);
8078   Literal<uint64_t> after_q(0x1234000056780000, 0xabcd0000ef000000);
8079   Literal<double> after_d(1.234);
8080   Literal<float> after_s(2.5);
8081 
8082   START();
8083 
8084   // Manually generate a pool.
8085   __ B(&end_of_pool_before);
8086   {
8087     ExactAssemblyScope scope(&masm, kSizeOfPoolInBytes);
8088     __ place(&before_x);
8089     __ place(&before_w);
8090     __ place(&before_sx);
8091     __ place(&before_q);
8092     __ place(&before_d);
8093     __ place(&before_s);
8094   }
8095   __ Bind(&end_of_pool_before);
8096 
8097   {
8098     ExactAssemblyScope scope(&masm, 12 * kInstructionSize);
8099     __ ldr(x2, &before_x);
8100     __ ldr(w3, &before_w);
8101     __ ldrsw(x5, &before_sx);
8102     __ ldr(q11, &before_q);
8103     __ ldr(d13, &before_d);
8104     __ ldr(s25, &before_s);
8105 
8106     __ ldr(x6, &after_x);
8107     __ ldr(w7, &after_w);
8108     __ ldrsw(x8, &after_sx);
8109     __ ldr(q18, &after_q);
8110     __ ldr(d14, &after_d);
8111     __ ldr(s26, &after_s);
8112   }
8113 
8114   // Manually generate a pool.
8115   __ B(&end_of_pool_after);
8116   {
8117     ExactAssemblyScope scope(&masm, kSizeOfPoolInBytes);
8118     __ place(&after_x);
8119     __ place(&after_w);
8120     __ place(&after_sx);
8121     __ place(&after_q);
8122     __ place(&after_d);
8123     __ place(&after_s);
8124   }
8125   __ Bind(&end_of_pool_after);
8126 
8127   END();
8128 
8129   RUN();
8130 
8131   ASSERT_EQUAL_64(0x1234567890abcdef, x2);
8132   ASSERT_EQUAL_64(0xfedcba09, x3);
8133   ASSERT_EQUAL_64(0xffffffff80000000, x5);
8134   ASSERT_EQUAL_128(0x1234000056780000, 0xabcd0000ef000000, q11);
8135   ASSERT_EQUAL_FP64(1.234, d13);
8136   ASSERT_EQUAL_FP32(2.5, s25);
8137 
8138   ASSERT_EQUAL_64(0x1234567890abcdef, x6);
8139   ASSERT_EQUAL_64(0xfedcba09, x7);
8140   ASSERT_EQUAL_64(0xffffffff80000000, x8);
8141   ASSERT_EQUAL_128(0x1234000056780000, 0xabcd0000ef000000, q18);
8142   ASSERT_EQUAL_FP64(1.234, d14);
8143   ASSERT_EQUAL_FP32(2.5, s26);
8144 
8145   TEARDOWN();
8146 }
8147 
8148 
TEST(ldr_literal_custom_shared)8149 TEST(ldr_literal_custom_shared) {
8150   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
8151 
8152   Label end_of_pool_before;
8153   Label end_of_pool_after;
8154 
8155   const size_t kSizeOfPoolInBytes = 40;
8156 
8157   Literal<uint64_t> before_x(0x1234567890abcdef);
8158   Literal<uint32_t> before_w(0xfedcba09);
8159   Literal<uint64_t> before_q(0x1234000056780000, 0xabcd0000ef000000);
8160   Literal<double> before_d(1.234);
8161   Literal<float> before_s(2.5);
8162 
8163   Literal<uint64_t> after_x(0x1234567890abcdef);
8164   Literal<uint32_t> after_w(0xfedcba09);
8165   Literal<uint64_t> after_q(0x1234000056780000, 0xabcd0000ef000000);
8166   Literal<double> after_d(1.234);
8167   Literal<float> after_s(2.5);
8168 
8169   START();
8170 
8171   // Manually generate a pool.
8172   __ B(&end_of_pool_before);
8173   {
8174     ExactAssemblyScope scope(&masm, kSizeOfPoolInBytes);
8175     __ place(&before_x);
8176     __ place(&before_w);
8177     __ place(&before_q);
8178     __ place(&before_d);
8179     __ place(&before_s);
8180   }
8181   __ Bind(&end_of_pool_before);
8182 
8183   // Load the entries several times to test that literals can be shared.
8184   for (int i = 0; i < 50; i++) {
8185     ExactAssemblyScope scope(&masm, 12 * kInstructionSize);
8186     __ ldr(x2, &before_x);
8187     __ ldr(w3, &before_w);
8188     __ ldrsw(x5, &before_w);  // Re-use before_w.
8189     __ ldr(q11, &before_q);
8190     __ ldr(d13, &before_d);
8191     __ ldr(s25, &before_s);
8192 
8193     __ ldr(x6, &after_x);
8194     __ ldr(w7, &after_w);
8195     __ ldrsw(x8, &after_w);  // Re-use after_w.
8196     __ ldr(q18, &after_q);
8197     __ ldr(d14, &after_d);
8198     __ ldr(s26, &after_s);
8199   }
8200 
8201   // Manually generate a pool.
8202   __ B(&end_of_pool_after);
8203   {
8204     ExactAssemblyScope scope(&masm, kSizeOfPoolInBytes);
8205     __ place(&after_x);
8206     __ place(&after_w);
8207     __ place(&after_q);
8208     __ place(&after_d);
8209     __ place(&after_s);
8210   }
8211   __ Bind(&end_of_pool_after);
8212 
8213   END();
8214 
8215   RUN();
8216 
8217   ASSERT_EQUAL_64(0x1234567890abcdef, x2);
8218   ASSERT_EQUAL_64(0xfedcba09, x3);
8219   ASSERT_EQUAL_64(0xfffffffffedcba09, x5);
8220   ASSERT_EQUAL_128(0x1234000056780000, 0xabcd0000ef000000, q11);
8221   ASSERT_EQUAL_FP64(1.234, d13);
8222   ASSERT_EQUAL_FP32(2.5, s25);
8223 
8224   ASSERT_EQUAL_64(0x1234567890abcdef, x6);
8225   ASSERT_EQUAL_64(0xfedcba09, x7);
8226   ASSERT_EQUAL_64(0xfffffffffedcba09, x8);
8227   ASSERT_EQUAL_128(0x1234000056780000, 0xabcd0000ef000000, q18);
8228   ASSERT_EQUAL_FP64(1.234, d14);
8229   ASSERT_EQUAL_FP32(2.5, s26);
8230 
8231   TEARDOWN();
8232 }
8233 
8234 
TEST(prfm_offset)8235 TEST(prfm_offset) {
8236   SETUP();
8237 
8238   START();
8239   // The address used in prfm doesn't have to be valid.
8240   __ Mov(x0, 0x0123456789abcdef);
8241 
8242   for (int i = 0; i < (1 << ImmPrefetchOperation_width); i++) {
8243     // Unallocated prefetch operations are ignored, so test all of them.
8244     PrefetchOperation op = static_cast<PrefetchOperation>(i);
8245 
8246     __ Prfm(op, MemOperand(x0));
8247     __ Prfm(op, MemOperand(x0, 8));
8248     __ Prfm(op, MemOperand(x0, 32760));
8249     __ Prfm(op, MemOperand(x0, 32768));
8250 
8251     __ Prfm(op, MemOperand(x0, 1));
8252     __ Prfm(op, MemOperand(x0, 9));
8253     __ Prfm(op, MemOperand(x0, 255));
8254     __ Prfm(op, MemOperand(x0, 257));
8255     __ Prfm(op, MemOperand(x0, -1));
8256     __ Prfm(op, MemOperand(x0, -9));
8257     __ Prfm(op, MemOperand(x0, -255));
8258     __ Prfm(op, MemOperand(x0, -257));
8259 
8260     __ Prfm(op, MemOperand(x0, 0xfedcba9876543210));
8261   }
8262 
8263   END();
8264   RUN();
8265   TEARDOWN();
8266 }
8267 
8268 
TEST(prfm_regoffset)8269 TEST(prfm_regoffset) {
8270   SETUP();
8271 
8272   START();
8273   // The address used in prfm doesn't have to be valid.
8274   __ Mov(x0, 0x0123456789abcdef);
8275 
8276   CPURegList inputs(CPURegister::kRegister, kXRegSize, 10, 18);
8277   __ Mov(x10, 0);
8278   __ Mov(x11, 1);
8279   __ Mov(x12, 8);
8280   __ Mov(x13, 255);
8281   __ Mov(x14, -0);
8282   __ Mov(x15, -1);
8283   __ Mov(x16, -8);
8284   __ Mov(x17, -255);
8285   __ Mov(x18, 0xfedcba9876543210);
8286 
8287   for (int i = 0; i < (1 << ImmPrefetchOperation_width); i++) {
8288     // Unallocated prefetch operations are ignored, so test all of them.
8289     PrefetchOperation op = static_cast<PrefetchOperation>(i);
8290 
8291     CPURegList loop = inputs;
8292     while (!loop.IsEmpty()) {
8293       Register input(loop.PopLowestIndex());
8294       __ Prfm(op, MemOperand(x0, input));
8295       __ Prfm(op, MemOperand(x0, input, UXTW));
8296       __ Prfm(op, MemOperand(x0, input, UXTW, 3));
8297       __ Prfm(op, MemOperand(x0, input, LSL));
8298       __ Prfm(op, MemOperand(x0, input, LSL, 3));
8299       __ Prfm(op, MemOperand(x0, input, SXTW));
8300       __ Prfm(op, MemOperand(x0, input, SXTW, 3));
8301       __ Prfm(op, MemOperand(x0, input, SXTX));
8302       __ Prfm(op, MemOperand(x0, input, SXTX, 3));
8303     }
8304   }
8305 
8306   END();
8307   RUN();
8308   TEARDOWN();
8309 }
8310 
8311 
TEST(prfm_literal_imm19)8312 TEST(prfm_literal_imm19) {
8313   SETUP();
8314   START();
8315 
8316   for (int i = 0; i < (1 << ImmPrefetchOperation_width); i++) {
8317     // Unallocated prefetch operations are ignored, so test all of them.
8318     PrefetchOperation op = static_cast<PrefetchOperation>(i);
8319 
8320     ExactAssemblyScope scope(&masm, 7 * kInstructionSize);
8321     // The address used in prfm doesn't have to be valid.
8322     __ prfm(op, INT64_C(0));
8323     __ prfm(op, 1);
8324     __ prfm(op, -1);
8325     __ prfm(op, 1000);
8326     __ prfm(op, -1000);
8327     __ prfm(op, 0x3ffff);
8328     __ prfm(op, -0x40000);
8329   }
8330 
8331   END();
8332   RUN();
8333   TEARDOWN();
8334 }
8335 
8336 
TEST(prfm_literal)8337 TEST(prfm_literal) {
8338   SETUP();
8339 
8340   Label end_of_pool_before;
8341   Label end_of_pool_after;
8342   Literal<uint64_t> before(0);
8343   Literal<uint64_t> after(0);
8344 
8345   START();
8346 
8347   // Manually generate a pool.
8348   __ B(&end_of_pool_before);
8349   {
8350     ExactAssemblyScope scope(&masm, before.GetSize());
8351     __ place(&before);
8352   }
8353   __ Bind(&end_of_pool_before);
8354 
8355   for (int i = 0; i < (1 << ImmPrefetchOperation_width); i++) {
8356     // Unallocated prefetch operations are ignored, so test all of them.
8357     PrefetchOperation op = static_cast<PrefetchOperation>(i);
8358 
8359     ExactAssemblyScope guard(&masm, 2 * kInstructionSize);
8360     __ prfm(op, &before);
8361     __ prfm(op, &after);
8362   }
8363 
8364   // Manually generate a pool.
8365   __ B(&end_of_pool_after);
8366   {
8367     ExactAssemblyScope scope(&masm, after.GetSize());
8368     __ place(&after);
8369   }
8370   __ Bind(&end_of_pool_after);
8371 
8372   END();
8373   RUN();
8374   TEARDOWN();
8375 }
8376 
8377 
TEST(prfm_wide)8378 TEST(prfm_wide) {
8379   SETUP();
8380 
8381   START();
8382   // The address used in prfm doesn't have to be valid.
8383   __ Mov(x0, 0x0123456789abcdef);
8384 
8385   for (int i = 0; i < (1 << ImmPrefetchOperation_width); i++) {
8386     // Unallocated prefetch operations are ignored, so test all of them.
8387     PrefetchOperation op = static_cast<PrefetchOperation>(i);
8388 
8389     __ Prfm(op, MemOperand(x0, 0x40000));
8390     __ Prfm(op, MemOperand(x0, -0x40001));
8391     __ Prfm(op, MemOperand(x0, UINT64_C(0x5555555555555555)));
8392     __ Prfm(op, MemOperand(x0, UINT64_C(0xfedcba9876543210)));
8393   }
8394 
8395   END();
8396   RUN();
8397   TEARDOWN();
8398 }
8399 
8400 
TEST(load_prfm_literal)8401 TEST(load_prfm_literal) {
8402   // Test literals shared between both prfm and ldr.
8403   SETUP_WITH_FEATURES(CPUFeatures::kFP);
8404 
8405   Label end_of_pool_before;
8406   Label end_of_pool_after;
8407 
8408   const size_t kSizeOfPoolInBytes = 28;
8409 
8410   Literal<uint64_t> before_x(0x1234567890abcdef);
8411   Literal<uint32_t> before_w(0xfedcba09);
8412   Literal<uint32_t> before_sx(0x80000000);
8413   Literal<double> before_d(1.234);
8414   Literal<float> before_s(2.5);
8415   Literal<uint64_t> after_x(0x1234567890abcdef);
8416   Literal<uint32_t> after_w(0xfedcba09);
8417   Literal<uint32_t> after_sx(0x80000000);
8418   Literal<double> after_d(1.234);
8419   Literal<float> after_s(2.5);
8420 
8421   START();
8422 
8423   // Manually generate a pool.
8424   __ B(&end_of_pool_before);
8425   {
8426     ExactAssemblyScope scope(&masm, kSizeOfPoolInBytes);
8427     __ place(&before_x);
8428     __ place(&before_w);
8429     __ place(&before_sx);
8430     __ place(&before_d);
8431     __ place(&before_s);
8432   }
8433   __ Bind(&end_of_pool_before);
8434 
8435   for (int i = 0; i < (1 << ImmPrefetchOperation_width); i++) {
8436     // Unallocated prefetch operations are ignored, so test all of them.
8437     PrefetchOperation op = static_cast<PrefetchOperation>(i);
8438     ExactAssemblyScope scope(&masm, 10 * kInstructionSize);
8439 
8440     __ prfm(op, &before_x);
8441     __ prfm(op, &before_w);
8442     __ prfm(op, &before_sx);
8443     __ prfm(op, &before_d);
8444     __ prfm(op, &before_s);
8445 
8446     __ prfm(op, &after_x);
8447     __ prfm(op, &after_w);
8448     __ prfm(op, &after_sx);
8449     __ prfm(op, &after_d);
8450     __ prfm(op, &after_s);
8451   }
8452 
8453   {
8454     ExactAssemblyScope scope(&masm, 10 * kInstructionSize);
8455     __ ldr(x2, &before_x);
8456     __ ldr(w3, &before_w);
8457     __ ldrsw(x5, &before_sx);
8458     __ ldr(d13, &before_d);
8459     __ ldr(s25, &before_s);
8460 
8461     __ ldr(x6, &after_x);
8462     __ ldr(w7, &after_w);
8463     __ ldrsw(x8, &after_sx);
8464     __ ldr(d14, &after_d);
8465     __ ldr(s26, &after_s);
8466   }
8467 
8468   // Manually generate a pool.
8469   __ B(&end_of_pool_after);
8470   {
8471     ExactAssemblyScope scope(&masm, kSizeOfPoolInBytes);
8472     __ place(&after_x);
8473     __ place(&after_w);
8474     __ place(&after_sx);
8475     __ place(&after_d);
8476     __ place(&after_s);
8477   }
8478   __ Bind(&end_of_pool_after);
8479 
8480   END();
8481 
8482   RUN();
8483 
8484   ASSERT_EQUAL_64(0x1234567890abcdef, x2);
8485   ASSERT_EQUAL_64(0xfedcba09, x3);
8486   ASSERT_EQUAL_64(0xffffffff80000000, x5);
8487   ASSERT_EQUAL_FP64(1.234, d13);
8488   ASSERT_EQUAL_FP32(2.5, s25);
8489 
8490   ASSERT_EQUAL_64(0x1234567890abcdef, x6);
8491   ASSERT_EQUAL_64(0xfedcba09, x7);
8492   ASSERT_EQUAL_64(0xffffffff80000000, x8);
8493   ASSERT_EQUAL_FP64(1.234, d14);
8494   ASSERT_EQUAL_FP32(2.5, s26);
8495 
8496   TEARDOWN();
8497 }
8498 
8499 
TEST(add_sub_imm)8500 TEST(add_sub_imm) {
8501   SETUP();
8502 
8503   START();
8504   __ Mov(x0, 0x0);
8505   __ Mov(x1, 0x1111);
8506   __ Mov(x2, 0xffffffffffffffff);
8507   __ Mov(x3, 0x8000000000000000);
8508 
8509   __ Add(x10, x0, Operand(0x123));
8510   __ Add(x11, x1, Operand(0x122000));
8511   __ Add(x12, x0, Operand(0xabc << 12));
8512   __ Add(x13, x2, Operand(1));
8513 
8514   __ Add(w14, w0, Operand(0x123));
8515   __ Add(w15, w1, Operand(0x122000));
8516   __ Add(w16, w0, Operand(0xabc << 12));
8517   __ Add(w17, w2, Operand(1));
8518 
8519   __ Sub(x20, x0, Operand(0x1));
8520   __ Sub(x21, x1, Operand(0x111));
8521   __ Sub(x22, x1, Operand(0x1 << 12));
8522   __ Sub(x23, x3, Operand(1));
8523 
8524   __ Sub(w24, w0, Operand(0x1));
8525   __ Sub(w25, w1, Operand(0x111));
8526   __ Sub(w26, w1, Operand(0x1 << 12));
8527   __ Sub(w27, w3, Operand(1));
8528   END();
8529 
8530   RUN();
8531 
8532   ASSERT_EQUAL_64(0x123, x10);
8533   ASSERT_EQUAL_64(0x123111, x11);
8534   ASSERT_EQUAL_64(0xabc000, x12);
8535   ASSERT_EQUAL_64(0x0, x13);
8536 
8537   ASSERT_EQUAL_32(0x123, w14);
8538   ASSERT_EQUAL_32(0x123111, w15);
8539   ASSERT_EQUAL_32(0xabc000, w16);
8540   ASSERT_EQUAL_32(0x0, w17);
8541 
8542   ASSERT_EQUAL_64(0xffffffffffffffff, x20);
8543   ASSERT_EQUAL_64(0x1000, x21);
8544   ASSERT_EQUAL_64(0x111, x22);
8545   ASSERT_EQUAL_64(0x7fffffffffffffff, x23);
8546 
8547   ASSERT_EQUAL_32(0xffffffff, w24);
8548   ASSERT_EQUAL_32(0x1000, w25);
8549   ASSERT_EQUAL_32(0x111, w26);
8550   ASSERT_EQUAL_32(0xffffffff, w27);
8551 
8552   TEARDOWN();
8553 }
8554 
8555 
TEST(add_sub_wide_imm)8556 TEST(add_sub_wide_imm) {
8557   SETUP();
8558 
8559   START();
8560   __ Mov(x0, 0x0);
8561   __ Mov(x1, 0x1);
8562 
8563   __ Add(x10, x0, Operand(0x1234567890abcdef));
8564   __ Add(x11, x1, Operand(0xffffffff));
8565 
8566   __ Add(w12, w0, Operand(0x12345678));
8567   __ Add(w13, w1, Operand(0xffffffff));
8568 
8569   __ Add(w18, w0, Operand(kWMinInt));
8570   __ Sub(w19, w0, Operand(kWMinInt));
8571 
8572   __ Sub(x20, x0, Operand(0x1234567890abcdef));
8573   __ Sub(w21, w0, Operand(0x12345678));
8574 
8575   END();
8576 
8577   RUN();
8578 
8579   ASSERT_EQUAL_64(0x1234567890abcdef, x10);
8580   ASSERT_EQUAL_64(0x100000000, x11);
8581 
8582   ASSERT_EQUAL_32(0x12345678, w12);
8583   ASSERT_EQUAL_64(0x0, x13);
8584 
8585   ASSERT_EQUAL_32(kWMinInt, w18);
8586   ASSERT_EQUAL_32(kWMinInt, w19);
8587 
8588   ASSERT_EQUAL_64(-0x1234567890abcdef, x20);
8589   ASSERT_EQUAL_32(-0x12345678, w21);
8590 
8591   TEARDOWN();
8592 }
8593 
8594 
TEST(add_sub_shifted)8595 TEST(add_sub_shifted) {
8596   SETUP();
8597 
8598   START();
8599   __ Mov(x0, 0);
8600   __ Mov(x1, 0x0123456789abcdef);
8601   __ Mov(x2, 0xfedcba9876543210);
8602   __ Mov(x3, 0xffffffffffffffff);
8603 
8604   __ Add(x10, x1, Operand(x2));
8605   __ Add(x11, x0, Operand(x1, LSL, 8));
8606   __ Add(x12, x0, Operand(x1, LSR, 8));
8607   __ Add(x13, x0, Operand(x1, ASR, 8));
8608   __ Add(x14, x0, Operand(x2, ASR, 8));
8609   __ Add(w15, w0, Operand(w1, ASR, 8));
8610   __ Add(w18, w3, Operand(w1, ROR, 8));
8611   __ Add(x19, x3, Operand(x1, ROR, 8));
8612 
8613   __ Sub(x20, x3, Operand(x2));
8614   __ Sub(x21, x3, Operand(x1, LSL, 8));
8615   __ Sub(x22, x3, Operand(x1, LSR, 8));
8616   __ Sub(x23, x3, Operand(x1, ASR, 8));
8617   __ Sub(x24, x3, Operand(x2, ASR, 8));
8618   __ Sub(w25, w3, Operand(w1, ASR, 8));
8619   __ Sub(w26, w3, Operand(w1, ROR, 8));
8620   __ Sub(x27, x3, Operand(x1, ROR, 8));
8621   END();
8622 
8623   RUN();
8624 
8625   ASSERT_EQUAL_64(0xffffffffffffffff, x10);
8626   ASSERT_EQUAL_64(0x23456789abcdef00, x11);
8627   ASSERT_EQUAL_64(0x000123456789abcd, x12);
8628   ASSERT_EQUAL_64(0x000123456789abcd, x13);
8629   ASSERT_EQUAL_64(0xfffedcba98765432, x14);
8630   ASSERT_EQUAL_64(0xff89abcd, x15);
8631   ASSERT_EQUAL_64(0xef89abcc, x18);
8632   ASSERT_EQUAL_64(0xef0123456789abcc, x19);
8633 
8634   ASSERT_EQUAL_64(0x0123456789abcdef, x20);
8635   ASSERT_EQUAL_64(0xdcba9876543210ff, x21);
8636   ASSERT_EQUAL_64(0xfffedcba98765432, x22);
8637   ASSERT_EQUAL_64(0xfffedcba98765432, x23);
8638   ASSERT_EQUAL_64(0x000123456789abcd, x24);
8639   ASSERT_EQUAL_64(0x00765432, x25);
8640   ASSERT_EQUAL_64(0x10765432, x26);
8641   ASSERT_EQUAL_64(0x10fedcba98765432, x27);
8642 
8643   TEARDOWN();
8644 }
8645 
8646 
TEST(add_sub_extended)8647 TEST(add_sub_extended) {
8648   SETUP();
8649 
8650   START();
8651   __ Mov(x0, 0);
8652   __ Mov(x1, 0x0123456789abcdef);
8653   __ Mov(x2, 0xfedcba9876543210);
8654   __ Mov(w3, 0x80);
8655 
8656   __ Add(x10, x0, Operand(x1, UXTB, 0));
8657   __ Add(x11, x0, Operand(x1, UXTB, 1));
8658   __ Add(x12, x0, Operand(x1, UXTH, 2));
8659   __ Add(x13, x0, Operand(x1, UXTW, 4));
8660 
8661   __ Add(x14, x0, Operand(x1, SXTB, 0));
8662   __ Add(x15, x0, Operand(x1, SXTB, 1));
8663   __ Add(x16, x0, Operand(x1, SXTH, 2));
8664   __ Add(x17, x0, Operand(x1, SXTW, 3));
8665   __ Add(x18, x0, Operand(x2, SXTB, 0));
8666   __ Add(x19, x0, Operand(x2, SXTB, 1));
8667   __ Add(x20, x0, Operand(x2, SXTH, 2));
8668   __ Add(x21, x0, Operand(x2, SXTW, 3));
8669 
8670   __ Add(x22, x1, Operand(x2, SXTB, 1));
8671   __ Sub(x23, x1, Operand(x2, SXTB, 1));
8672 
8673   __ Add(w24, w1, Operand(w2, UXTB, 2));
8674   __ Add(w25, w0, Operand(w1, SXTB, 0));
8675   __ Add(w26, w0, Operand(w1, SXTB, 1));
8676   __ Add(w27, w2, Operand(w1, SXTW, 3));
8677 
8678   __ Add(w28, w0, Operand(w1, SXTW, 3));
8679   __ Add(x29, x0, Operand(w1, SXTW, 3));
8680 
8681   __ Sub(x30, x0, Operand(w3, SXTB, 1));
8682   END();
8683 
8684   RUN();
8685 
8686   ASSERT_EQUAL_64(0xef, x10);
8687   ASSERT_EQUAL_64(0x1de, x11);
8688   ASSERT_EQUAL_64(0x337bc, x12);
8689   ASSERT_EQUAL_64(0x89abcdef0, x13);
8690 
8691   ASSERT_EQUAL_64(0xffffffffffffffef, x14);
8692   ASSERT_EQUAL_64(0xffffffffffffffde, x15);
8693   ASSERT_EQUAL_64(0xffffffffffff37bc, x16);
8694   ASSERT_EQUAL_64(0xfffffffc4d5e6f78, x17);
8695   ASSERT_EQUAL_64(0x10, x18);
8696   ASSERT_EQUAL_64(0x20, x19);
8697   ASSERT_EQUAL_64(0xc840, x20);
8698   ASSERT_EQUAL_64(0x3b2a19080, x21);
8699 
8700   ASSERT_EQUAL_64(0x0123456789abce0f, x22);
8701   ASSERT_EQUAL_64(0x0123456789abcdcf, x23);
8702 
8703   ASSERT_EQUAL_32(0x89abce2f, w24);
8704   ASSERT_EQUAL_32(0xffffffef, w25);
8705   ASSERT_EQUAL_32(0xffffffde, w26);
8706   ASSERT_EQUAL_32(0xc3b2a188, w27);
8707 
8708   ASSERT_EQUAL_32(0x4d5e6f78, w28);
8709   ASSERT_EQUAL_64(0xfffffffc4d5e6f78, x29);
8710 
8711   ASSERT_EQUAL_64(256, x30);
8712 
8713   TEARDOWN();
8714 }
8715 
8716 
TEST(add_sub_negative)8717 TEST(add_sub_negative) {
8718   SETUP();
8719 
8720   START();
8721   __ Mov(x0, 0);
8722   __ Mov(x1, 4687);
8723   __ Mov(x2, 0x1122334455667788);
8724   __ Mov(w3, 0x11223344);
8725   __ Mov(w4, 400000);
8726 
8727   __ Add(x10, x0, -42);
8728   __ Add(x11, x1, -687);
8729   __ Add(x12, x2, -0x88);
8730 
8731   __ Sub(x13, x0, -600);
8732   __ Sub(x14, x1, -313);
8733   __ Sub(x15, x2, -0x555);
8734 
8735   __ Add(w19, w3, -0x344);
8736   __ Add(w20, w4, -2000);
8737 
8738   __ Sub(w21, w3, -0xbc);
8739   __ Sub(w22, w4, -2000);
8740   END();
8741 
8742   RUN();
8743 
8744   ASSERT_EQUAL_64(-42, x10);
8745   ASSERT_EQUAL_64(4000, x11);
8746   ASSERT_EQUAL_64(0x1122334455667700, x12);
8747 
8748   ASSERT_EQUAL_64(600, x13);
8749   ASSERT_EQUAL_64(5000, x14);
8750   ASSERT_EQUAL_64(0x1122334455667cdd, x15);
8751 
8752   ASSERT_EQUAL_32(0x11223000, w19);
8753   ASSERT_EQUAL_32(398000, w20);
8754 
8755   ASSERT_EQUAL_32(0x11223400, w21);
8756   ASSERT_EQUAL_32(402000, w22);
8757 
8758   TEARDOWN();
8759 }
8760 
8761 
TEST(add_sub_zero)8762 TEST(add_sub_zero) {
8763   SETUP();
8764 
8765   START();
8766   __ Mov(x0, 0);
8767   __ Mov(x1, 0);
8768   __ Mov(x2, 0);
8769 
8770   Label blob1;
8771   __ Bind(&blob1);
8772   __ Add(x0, x0, 0);
8773   __ Sub(x1, x1, 0);
8774   __ Sub(x2, x2, xzr);
8775   VIXL_CHECK(__ GetSizeOfCodeGeneratedSince(&blob1) == 0);
8776 
8777   Label blob2;
8778   __ Bind(&blob2);
8779   __ Add(w3, w3, 0);
8780   VIXL_CHECK(__ GetSizeOfCodeGeneratedSince(&blob2) != 0);
8781 
8782   Label blob3;
8783   __ Bind(&blob3);
8784   __ Sub(w3, w3, wzr);
8785   VIXL_CHECK(__ GetSizeOfCodeGeneratedSince(&blob3) != 0);
8786 
8787   END();
8788 
8789   RUN();
8790 
8791   ASSERT_EQUAL_64(0, x0);
8792   ASSERT_EQUAL_64(0, x1);
8793   ASSERT_EQUAL_64(0, x2);
8794 
8795   TEARDOWN();
8796 }
8797 
8798 
TEST(claim_drop_zero)8799 TEST(claim_drop_zero) {
8800   SETUP();
8801 
8802   START();
8803 
8804   Label start;
8805   __ Bind(&start);
8806   __ Claim(Operand(0));
8807   __ Drop(Operand(0));
8808   __ Claim(Operand(xzr));
8809   __ Drop(Operand(xzr));
8810   VIXL_CHECK(__ GetSizeOfCodeGeneratedSince(&start) == 0);
8811 
8812   END();
8813 
8814   RUN();
8815 
8816   TEARDOWN();
8817 }
8818 
8819 
TEST(neg)8820 TEST(neg) {
8821   SETUP();
8822 
8823   START();
8824   __ Mov(x0, 0xf123456789abcdef);
8825 
8826   // Immediate.
8827   __ Neg(x1, 0x123);
8828   __ Neg(w2, 0x123);
8829 
8830   // Shifted.
8831   __ Neg(x3, Operand(x0, LSL, 1));
8832   __ Neg(w4, Operand(w0, LSL, 2));
8833   __ Neg(x5, Operand(x0, LSR, 3));
8834   __ Neg(w6, Operand(w0, LSR, 4));
8835   __ Neg(x7, Operand(x0, ASR, 5));
8836   __ Neg(w8, Operand(w0, ASR, 6));
8837 
8838   // Extended.
8839   __ Neg(w9, Operand(w0, UXTB));
8840   __ Neg(x10, Operand(x0, SXTB, 1));
8841   __ Neg(w11, Operand(w0, UXTH, 2));
8842   __ Neg(x12, Operand(x0, SXTH, 3));
8843   __ Neg(w13, Operand(w0, UXTW, 4));
8844   __ Neg(x14, Operand(x0, SXTW, 4));
8845   END();
8846 
8847   RUN();
8848 
8849   ASSERT_EQUAL_64(0xfffffffffffffedd, x1);
8850   ASSERT_EQUAL_64(0xfffffedd, x2);
8851   ASSERT_EQUAL_64(0x1db97530eca86422, x3);
8852   ASSERT_EQUAL_64(0xd950c844, x4);
8853   ASSERT_EQUAL_64(0xe1db97530eca8643, x5);
8854   ASSERT_EQUAL_64(0xf7654322, x6);
8855   ASSERT_EQUAL_64(0x0076e5d4c3b2a191, x7);
8856   ASSERT_EQUAL_64(0x01d950c9, x8);
8857   ASSERT_EQUAL_64(0xffffff11, x9);
8858   ASSERT_EQUAL_64(0x0000000000000022, x10);
8859   ASSERT_EQUAL_64(0xfffcc844, x11);
8860   ASSERT_EQUAL_64(0x0000000000019088, x12);
8861   ASSERT_EQUAL_64(0x65432110, x13);
8862   ASSERT_EQUAL_64(0x0000000765432110, x14);
8863 
8864   TEARDOWN();
8865 }
8866 
8867 
8868 template <typename T, typename Op>
AdcsSbcsHelper(Op op,T left,T right,int carry,T expected,StatusFlags expected_flags)8869 static void AdcsSbcsHelper(
8870     Op op, T left, T right, int carry, T expected, StatusFlags expected_flags) {
8871   int reg_size = sizeof(T) * 8;
8872   Register left_reg(0, reg_size);
8873   Register right_reg(1, reg_size);
8874   Register result_reg(2, reg_size);
8875 
8876   SETUP();
8877   START();
8878 
8879   __ Mov(left_reg, left);
8880   __ Mov(right_reg, right);
8881   __ Mov(x10, (carry ? CFlag : NoFlag));
8882 
8883   __ Msr(NZCV, x10);
8884   (masm.*op)(result_reg, left_reg, right_reg);
8885 
8886   END();
8887   RUN();
8888 
8889   ASSERT_EQUAL_64(left, left_reg.X());
8890   ASSERT_EQUAL_64(right, right_reg.X());
8891   ASSERT_EQUAL_64(expected, result_reg.X());
8892   ASSERT_EQUAL_NZCV(expected_flags);
8893 
8894   TEARDOWN();
8895 }
8896 
8897 
TEST(adcs_sbcs_x)8898 TEST(adcs_sbcs_x) {
8899   uint64_t inputs[] = {
8900       0x0000000000000000,
8901       0x0000000000000001,
8902       0x7ffffffffffffffe,
8903       0x7fffffffffffffff,
8904       0x8000000000000000,
8905       0x8000000000000001,
8906       0xfffffffffffffffe,
8907       0xffffffffffffffff,
8908   };
8909   static const size_t input_count = sizeof(inputs) / sizeof(inputs[0]);
8910 
8911   struct Expected {
8912     uint64_t carry0_result;
8913     StatusFlags carry0_flags;
8914     uint64_t carry1_result;
8915     StatusFlags carry1_flags;
8916   };
8917 
8918   static const Expected expected_adcs_x[input_count][input_count] =
8919       {{{0x0000000000000000, ZFlag, 0x0000000000000001, NoFlag},
8920         {0x0000000000000001, NoFlag, 0x0000000000000002, NoFlag},
8921         {0x7ffffffffffffffe, NoFlag, 0x7fffffffffffffff, NoFlag},
8922         {0x7fffffffffffffff, NoFlag, 0x8000000000000000, NVFlag},
8923         {0x8000000000000000, NFlag, 0x8000000000000001, NFlag},
8924         {0x8000000000000001, NFlag, 0x8000000000000002, NFlag},
8925         {0xfffffffffffffffe, NFlag, 0xffffffffffffffff, NFlag},
8926         {0xffffffffffffffff, NFlag, 0x0000000000000000, ZCFlag}},
8927        {{0x0000000000000001, NoFlag, 0x0000000000000002, NoFlag},
8928         {0x0000000000000002, NoFlag, 0x0000000000000003, NoFlag},
8929         {0x7fffffffffffffff, NoFlag, 0x8000000000000000, NVFlag},
8930         {0x8000000000000000, NVFlag, 0x8000000000000001, NVFlag},
8931         {0x8000000000000001, NFlag, 0x8000000000000002, NFlag},
8932         {0x8000000000000002, NFlag, 0x8000000000000003, NFlag},
8933         {0xffffffffffffffff, NFlag, 0x0000000000000000, ZCFlag},
8934         {0x0000000000000000, ZCFlag, 0x0000000000000001, CFlag}},
8935        {{0x7ffffffffffffffe, NoFlag, 0x7fffffffffffffff, NoFlag},
8936         {0x7fffffffffffffff, NoFlag, 0x8000000000000000, NVFlag},
8937         {0xfffffffffffffffc, NVFlag, 0xfffffffffffffffd, NVFlag},
8938         {0xfffffffffffffffd, NVFlag, 0xfffffffffffffffe, NVFlag},
8939         {0xfffffffffffffffe, NFlag, 0xffffffffffffffff, NFlag},
8940         {0xffffffffffffffff, NFlag, 0x0000000000000000, ZCFlag},
8941         {0x7ffffffffffffffc, CFlag, 0x7ffffffffffffffd, CFlag},
8942         {0x7ffffffffffffffd, CFlag, 0x7ffffffffffffffe, CFlag}},
8943        {{0x7fffffffffffffff, NoFlag, 0x8000000000000000, NVFlag},
8944         {0x8000000000000000, NVFlag, 0x8000000000000001, NVFlag},
8945         {0xfffffffffffffffd, NVFlag, 0xfffffffffffffffe, NVFlag},
8946         {0xfffffffffffffffe, NVFlag, 0xffffffffffffffff, NVFlag},
8947         {0xffffffffffffffff, NFlag, 0x0000000000000000, ZCFlag},
8948         {0x0000000000000000, ZCFlag, 0x0000000000000001, CFlag},
8949         {0x7ffffffffffffffd, CFlag, 0x7ffffffffffffffe, CFlag},
8950         {0x7ffffffffffffffe, CFlag, 0x7fffffffffffffff, CFlag}},
8951        {{0x8000000000000000, NFlag, 0x8000000000000001, NFlag},
8952         {0x8000000000000001, NFlag, 0x8000000000000002, NFlag},
8953         {0xfffffffffffffffe, NFlag, 0xffffffffffffffff, NFlag},
8954         {0xffffffffffffffff, NFlag, 0x0000000000000000, ZCFlag},
8955         {0x0000000000000000, ZCVFlag, 0x0000000000000001, CVFlag},
8956         {0x0000000000000001, CVFlag, 0x0000000000000002, CVFlag},
8957         {0x7ffffffffffffffe, CVFlag, 0x7fffffffffffffff, CVFlag},
8958         {0x7fffffffffffffff, CVFlag, 0x8000000000000000, NCFlag}},
8959        {{0x8000000000000001, NFlag, 0x8000000000000002, NFlag},
8960         {0x8000000000000002, NFlag, 0x8000000000000003, NFlag},
8961         {0xffffffffffffffff, NFlag, 0x0000000000000000, ZCFlag},
8962         {0x0000000000000000, ZCFlag, 0x0000000000000001, CFlag},
8963         {0x0000000000000001, CVFlag, 0x0000000000000002, CVFlag},
8964         {0x0000000000000002, CVFlag, 0x0000000000000003, CVFlag},
8965         {0x7fffffffffffffff, CVFlag, 0x8000000000000000, NCFlag},
8966         {0x8000000000000000, NCFlag, 0x8000000000000001, NCFlag}},
8967        {{0xfffffffffffffffe, NFlag, 0xffffffffffffffff, NFlag},
8968         {0xffffffffffffffff, NFlag, 0x0000000000000000, ZCFlag},
8969         {0x7ffffffffffffffc, CFlag, 0x7ffffffffffffffd, CFlag},
8970         {0x7ffffffffffffffd, CFlag, 0x7ffffffffffffffe, CFlag},
8971         {0x7ffffffffffffffe, CVFlag, 0x7fffffffffffffff, CVFlag},
8972         {0x7fffffffffffffff, CVFlag, 0x8000000000000000, NCFlag},
8973         {0xfffffffffffffffc, NCFlag, 0xfffffffffffffffd, NCFlag},
8974         {0xfffffffffffffffd, NCFlag, 0xfffffffffffffffe, NCFlag}},
8975        {{0xffffffffffffffff, NFlag, 0x0000000000000000, ZCFlag},
8976         {0x0000000000000000, ZCFlag, 0x0000000000000001, CFlag},
8977         {0x7ffffffffffffffd, CFlag, 0x7ffffffffffffffe, CFlag},
8978         {0x7ffffffffffffffe, CFlag, 0x7fffffffffffffff, CFlag},
8979         {0x7fffffffffffffff, CVFlag, 0x8000000000000000, NCFlag},
8980         {0x8000000000000000, NCFlag, 0x8000000000000001, NCFlag},
8981         {0xfffffffffffffffd, NCFlag, 0xfffffffffffffffe, NCFlag},
8982         {0xfffffffffffffffe, NCFlag, 0xffffffffffffffff, NCFlag}}};
8983 
8984   static const Expected expected_sbcs_x[input_count][input_count] =
8985       {{{0xffffffffffffffff, NFlag, 0x0000000000000000, ZCFlag},
8986         {0xfffffffffffffffe, NFlag, 0xffffffffffffffff, NFlag},
8987         {0x8000000000000001, NFlag, 0x8000000000000002, NFlag},
8988         {0x8000000000000000, NFlag, 0x8000000000000001, NFlag},
8989         {0x7fffffffffffffff, NoFlag, 0x8000000000000000, NVFlag},
8990         {0x7ffffffffffffffe, NoFlag, 0x7fffffffffffffff, NoFlag},
8991         {0x0000000000000001, NoFlag, 0x0000000000000002, NoFlag},
8992         {0x0000000000000000, ZFlag, 0x0000000000000001, NoFlag}},
8993        {{0x0000000000000000, ZCFlag, 0x0000000000000001, CFlag},
8994         {0xffffffffffffffff, NFlag, 0x0000000000000000, ZCFlag},
8995         {0x8000000000000002, NFlag, 0x8000000000000003, NFlag},
8996         {0x8000000000000001, NFlag, 0x8000000000000002, NFlag},
8997         {0x8000000000000000, NVFlag, 0x8000000000000001, NVFlag},
8998         {0x7fffffffffffffff, NoFlag, 0x8000000000000000, NVFlag},
8999         {0x0000000000000002, NoFlag, 0x0000000000000003, NoFlag},
9000         {0x0000000000000001, NoFlag, 0x0000000000000002, NoFlag}},
9001        {{0x7ffffffffffffffd, CFlag, 0x7ffffffffffffffe, CFlag},
9002         {0x7ffffffffffffffc, CFlag, 0x7ffffffffffffffd, CFlag},
9003         {0xffffffffffffffff, NFlag, 0x0000000000000000, ZCFlag},
9004         {0xfffffffffffffffe, NFlag, 0xffffffffffffffff, NFlag},
9005         {0xfffffffffffffffd, NVFlag, 0xfffffffffffffffe, NVFlag},
9006         {0xfffffffffffffffc, NVFlag, 0xfffffffffffffffd, NVFlag},
9007         {0x7fffffffffffffff, NoFlag, 0x8000000000000000, NVFlag},
9008         {0x7ffffffffffffffe, NoFlag, 0x7fffffffffffffff, NoFlag}},
9009        {{0x7ffffffffffffffe, CFlag, 0x7fffffffffffffff, CFlag},
9010         {0x7ffffffffffffffd, CFlag, 0x7ffffffffffffffe, CFlag},
9011         {0x0000000000000000, ZCFlag, 0x0000000000000001, CFlag},
9012         {0xffffffffffffffff, NFlag, 0x0000000000000000, ZCFlag},
9013         {0xfffffffffffffffe, NVFlag, 0xffffffffffffffff, NVFlag},
9014         {0xfffffffffffffffd, NVFlag, 0xfffffffffffffffe, NVFlag},
9015         {0x8000000000000000, NVFlag, 0x8000000000000001, NVFlag},
9016         {0x7fffffffffffffff, NoFlag, 0x8000000000000000, NVFlag}},
9017        {{0x7fffffffffffffff, CVFlag, 0x8000000000000000, NCFlag},
9018         {0x7ffffffffffffffe, CVFlag, 0x7fffffffffffffff, CVFlag},
9019         {0x0000000000000001, CVFlag, 0x0000000000000002, CVFlag},
9020         {0x0000000000000000, ZCVFlag, 0x0000000000000001, CVFlag},
9021         {0xffffffffffffffff, NFlag, 0x0000000000000000, ZCFlag},
9022         {0xfffffffffffffffe, NFlag, 0xffffffffffffffff, NFlag},
9023         {0x8000000000000001, NFlag, 0x8000000000000002, NFlag},
9024         {0x8000000000000000, NFlag, 0x8000000000000001, NFlag}},
9025        {{0x8000000000000000, NCFlag, 0x8000000000000001, NCFlag},
9026         {0x7fffffffffffffff, CVFlag, 0x8000000000000000, NCFlag},
9027         {0x0000000000000002, CVFlag, 0x0000000000000003, CVFlag},
9028         {0x0000000000000001, CVFlag, 0x0000000000000002, CVFlag},
9029         {0x0000000000000000, ZCFlag, 0x0000000000000001, CFlag},
9030         {0xffffffffffffffff, NFlag, 0x0000000000000000, ZCFlag},
9031         {0x8000000000000002, NFlag, 0x8000000000000003, NFlag},
9032         {0x8000000000000001, NFlag, 0x8000000000000002, NFlag}},
9033        {{0xfffffffffffffffd, NCFlag, 0xfffffffffffffffe, NCFlag},
9034         {0xfffffffffffffffc, NCFlag, 0xfffffffffffffffd, NCFlag},
9035         {0x7fffffffffffffff, CVFlag, 0x8000000000000000, NCFlag},
9036         {0x7ffffffffffffffe, CVFlag, 0x7fffffffffffffff, CVFlag},
9037         {0x7ffffffffffffffd, CFlag, 0x7ffffffffffffffe, CFlag},
9038         {0x7ffffffffffffffc, CFlag, 0x7ffffffffffffffd, CFlag},
9039         {0xffffffffffffffff, NFlag, 0x0000000000000000, ZCFlag},
9040         {0xfffffffffffffffe, NFlag, 0xffffffffffffffff, NFlag}},
9041        {{0xfffffffffffffffe, NCFlag, 0xffffffffffffffff, NCFlag},
9042         {0xfffffffffffffffd, NCFlag, 0xfffffffffffffffe, NCFlag},
9043         {0x8000000000000000, NCFlag, 0x8000000000000001, NCFlag},
9044         {0x7fffffffffffffff, CVFlag, 0x8000000000000000, NCFlag},
9045         {0x7ffffffffffffffe, CFlag, 0x7fffffffffffffff, CFlag},
9046         {0x7ffffffffffffffd, CFlag, 0x7ffffffffffffffe, CFlag},
9047         {0x0000000000000000, ZCFlag, 0x0000000000000001, CFlag},
9048         {0xffffffffffffffff, NFlag, 0x0000000000000000, ZCFlag}}};
9049 
9050   for (size_t left = 0; left < input_count; left++) {
9051     for (size_t right = 0; right < input_count; right++) {
9052       const Expected& expected = expected_adcs_x[left][right];
9053       AdcsSbcsHelper(&MacroAssembler::Adcs,
9054                      inputs[left],
9055                      inputs[right],
9056                      0,
9057                      expected.carry0_result,
9058                      expected.carry0_flags);
9059       AdcsSbcsHelper(&MacroAssembler::Adcs,
9060                      inputs[left],
9061                      inputs[right],
9062                      1,
9063                      expected.carry1_result,
9064                      expected.carry1_flags);
9065     }
9066   }
9067 
9068   for (size_t left = 0; left < input_count; left++) {
9069     for (size_t right = 0; right < input_count; right++) {
9070       const Expected& expected = expected_sbcs_x[left][right];
9071       AdcsSbcsHelper(&MacroAssembler::Sbcs,
9072                      inputs[left],
9073                      inputs[right],
9074                      0,
9075                      expected.carry0_result,
9076                      expected.carry0_flags);
9077       AdcsSbcsHelper(&MacroAssembler::Sbcs,
9078                      inputs[left],
9079                      inputs[right],
9080                      1,
9081                      expected.carry1_result,
9082                      expected.carry1_flags);
9083     }
9084   }
9085 }
9086 
9087 
TEST(adcs_sbcs_w)9088 TEST(adcs_sbcs_w) {
9089   uint32_t inputs[] = {
9090       0x00000000,
9091       0x00000001,
9092       0x7ffffffe,
9093       0x7fffffff,
9094       0x80000000,
9095       0x80000001,
9096       0xfffffffe,
9097       0xffffffff,
9098   };
9099   static const size_t input_count = sizeof(inputs) / sizeof(inputs[0]);
9100 
9101   struct Expected {
9102     uint32_t carry0_result;
9103     StatusFlags carry0_flags;
9104     uint32_t carry1_result;
9105     StatusFlags carry1_flags;
9106   };
9107 
9108   static const Expected expected_adcs_w[input_count][input_count] =
9109       {{{0x00000000, ZFlag, 0x00000001, NoFlag},
9110         {0x00000001, NoFlag, 0x00000002, NoFlag},
9111         {0x7ffffffe, NoFlag, 0x7fffffff, NoFlag},
9112         {0x7fffffff, NoFlag, 0x80000000, NVFlag},
9113         {0x80000000, NFlag, 0x80000001, NFlag},
9114         {0x80000001, NFlag, 0x80000002, NFlag},
9115         {0xfffffffe, NFlag, 0xffffffff, NFlag},
9116         {0xffffffff, NFlag, 0x00000000, ZCFlag}},
9117        {{0x00000001, NoFlag, 0x00000002, NoFlag},
9118         {0x00000002, NoFlag, 0x00000003, NoFlag},
9119         {0x7fffffff, NoFlag, 0x80000000, NVFlag},
9120         {0x80000000, NVFlag, 0x80000001, NVFlag},
9121         {0x80000001, NFlag, 0x80000002, NFlag},
9122         {0x80000002, NFlag, 0x80000003, NFlag},
9123         {0xffffffff, NFlag, 0x00000000, ZCFlag},
9124         {0x00000000, ZCFlag, 0x00000001, CFlag}},
9125        {{0x7ffffffe, NoFlag, 0x7fffffff, NoFlag},
9126         {0x7fffffff, NoFlag, 0x80000000, NVFlag},
9127         {0xfffffffc, NVFlag, 0xfffffffd, NVFlag},
9128         {0xfffffffd, NVFlag, 0xfffffffe, NVFlag},
9129         {0xfffffffe, NFlag, 0xffffffff, NFlag},
9130         {0xffffffff, NFlag, 0x00000000, ZCFlag},
9131         {0x7ffffffc, CFlag, 0x7ffffffd, CFlag},
9132         {0x7ffffffd, CFlag, 0x7ffffffe, CFlag}},
9133        {{0x7fffffff, NoFlag, 0x80000000, NVFlag},
9134         {0x80000000, NVFlag, 0x80000001, NVFlag},
9135         {0xfffffffd, NVFlag, 0xfffffffe, NVFlag},
9136         {0xfffffffe, NVFlag, 0xffffffff, NVFlag},
9137         {0xffffffff, NFlag, 0x00000000, ZCFlag},
9138         {0x00000000, ZCFlag, 0x00000001, CFlag},
9139         {0x7ffffffd, CFlag, 0x7ffffffe, CFlag},
9140         {0x7ffffffe, CFlag, 0x7fffffff, CFlag}},
9141        {{0x80000000, NFlag, 0x80000001, NFlag},
9142         {0x80000001, NFlag, 0x80000002, NFlag},
9143         {0xfffffffe, NFlag, 0xffffffff, NFlag},
9144         {0xffffffff, NFlag, 0x00000000, ZCFlag},
9145         {0x00000000, ZCVFlag, 0x00000001, CVFlag},
9146         {0x00000001, CVFlag, 0x00000002, CVFlag},
9147         {0x7ffffffe, CVFlag, 0x7fffffff, CVFlag},
9148         {0x7fffffff, CVFlag, 0x80000000, NCFlag}},
9149        {{0x80000001, NFlag, 0x80000002, NFlag},
9150         {0x80000002, NFlag, 0x80000003, NFlag},
9151         {0xffffffff, NFlag, 0x00000000, ZCFlag},
9152         {0x00000000, ZCFlag, 0x00000001, CFlag},
9153         {0x00000001, CVFlag, 0x00000002, CVFlag},
9154         {0x00000002, CVFlag, 0x00000003, CVFlag},
9155         {0x7fffffff, CVFlag, 0x80000000, NCFlag},
9156         {0x80000000, NCFlag, 0x80000001, NCFlag}},
9157        {{0xfffffffe, NFlag, 0xffffffff, NFlag},
9158         {0xffffffff, NFlag, 0x00000000, ZCFlag},
9159         {0x7ffffffc, CFlag, 0x7ffffffd, CFlag},
9160         {0x7ffffffd, CFlag, 0x7ffffffe, CFlag},
9161         {0x7ffffffe, CVFlag, 0x7fffffff, CVFlag},
9162         {0x7fffffff, CVFlag, 0x80000000, NCFlag},
9163         {0xfffffffc, NCFlag, 0xfffffffd, NCFlag},
9164         {0xfffffffd, NCFlag, 0xfffffffe, NCFlag}},
9165        {{0xffffffff, NFlag, 0x00000000, ZCFlag},
9166         {0x00000000, ZCFlag, 0x00000001, CFlag},
9167         {0x7ffffffd, CFlag, 0x7ffffffe, CFlag},
9168         {0x7ffffffe, CFlag, 0x7fffffff, CFlag},
9169         {0x7fffffff, CVFlag, 0x80000000, NCFlag},
9170         {0x80000000, NCFlag, 0x80000001, NCFlag},
9171         {0xfffffffd, NCFlag, 0xfffffffe, NCFlag},
9172         {0xfffffffe, NCFlag, 0xffffffff, NCFlag}}};
9173 
9174   static const Expected expected_sbcs_w[input_count][input_count] =
9175       {{{0xffffffff, NFlag, 0x00000000, ZCFlag},
9176         {0xfffffffe, NFlag, 0xffffffff, NFlag},
9177         {0x80000001, NFlag, 0x80000002, NFlag},
9178         {0x80000000, NFlag, 0x80000001, NFlag},
9179         {0x7fffffff, NoFlag, 0x80000000, NVFlag},
9180         {0x7ffffffe, NoFlag, 0x7fffffff, NoFlag},
9181         {0x00000001, NoFlag, 0x00000002, NoFlag},
9182         {0x00000000, ZFlag, 0x00000001, NoFlag}},
9183        {{0x00000000, ZCFlag, 0x00000001, CFlag},
9184         {0xffffffff, NFlag, 0x00000000, ZCFlag},
9185         {0x80000002, NFlag, 0x80000003, NFlag},
9186         {0x80000001, NFlag, 0x80000002, NFlag},
9187         {0x80000000, NVFlag, 0x80000001, NVFlag},
9188         {0x7fffffff, NoFlag, 0x80000000, NVFlag},
9189         {0x00000002, NoFlag, 0x00000003, NoFlag},
9190         {0x00000001, NoFlag, 0x00000002, NoFlag}},
9191        {{0x7ffffffd, CFlag, 0x7ffffffe, CFlag},
9192         {0x7ffffffc, CFlag, 0x7ffffffd, CFlag},
9193         {0xffffffff, NFlag, 0x00000000, ZCFlag},
9194         {0xfffffffe, NFlag, 0xffffffff, NFlag},
9195         {0xfffffffd, NVFlag, 0xfffffffe, NVFlag},
9196         {0xfffffffc, NVFlag, 0xfffffffd, NVFlag},
9197         {0x7fffffff, NoFlag, 0x80000000, NVFlag},
9198         {0x7ffffffe, NoFlag, 0x7fffffff, NoFlag}},
9199        {{0x7ffffffe, CFlag, 0x7fffffff, CFlag},
9200         {0x7ffffffd, CFlag, 0x7ffffffe, CFlag},
9201         {0x00000000, ZCFlag, 0x00000001, CFlag},
9202         {0xffffffff, NFlag, 0x00000000, ZCFlag},
9203         {0xfffffffe, NVFlag, 0xffffffff, NVFlag},
9204         {0xfffffffd, NVFlag, 0xfffffffe, NVFlag},
9205         {0x80000000, NVFlag, 0x80000001, NVFlag},
9206         {0x7fffffff, NoFlag, 0x80000000, NVFlag}},
9207        {{0x7fffffff, CVFlag, 0x80000000, NCFlag},
9208         {0x7ffffffe, CVFlag, 0x7fffffff, CVFlag},
9209         {0x00000001, CVFlag, 0x00000002, CVFlag},
9210         {0x00000000, ZCVFlag, 0x00000001, CVFlag},
9211         {0xffffffff, NFlag, 0x00000000, ZCFlag},
9212         {0xfffffffe, NFlag, 0xffffffff, NFlag},
9213         {0x80000001, NFlag, 0x80000002, NFlag},
9214         {0x80000000, NFlag, 0x80000001, NFlag}},
9215        {{0x80000000, NCFlag, 0x80000001, NCFlag},
9216         {0x7fffffff, CVFlag, 0x80000000, NCFlag},
9217         {0x00000002, CVFlag, 0x00000003, CVFlag},
9218         {0x00000001, CVFlag, 0x00000002, CVFlag},
9219         {0x00000000, ZCFlag, 0x00000001, CFlag},
9220         {0xffffffff, NFlag, 0x00000000, ZCFlag},
9221         {0x80000002, NFlag, 0x80000003, NFlag},
9222         {0x80000001, NFlag, 0x80000002, NFlag}},
9223        {{0xfffffffd, NCFlag, 0xfffffffe, NCFlag},
9224         {0xfffffffc, NCFlag, 0xfffffffd, NCFlag},
9225         {0x7fffffff, CVFlag, 0x80000000, NCFlag},
9226         {0x7ffffffe, CVFlag, 0x7fffffff, CVFlag},
9227         {0x7ffffffd, CFlag, 0x7ffffffe, CFlag},
9228         {0x7ffffffc, CFlag, 0x7ffffffd, CFlag},
9229         {0xffffffff, NFlag, 0x00000000, ZCFlag},
9230         {0xfffffffe, NFlag, 0xffffffff, NFlag}},
9231        {{0xfffffffe, NCFlag, 0xffffffff, NCFlag},
9232         {0xfffffffd, NCFlag, 0xfffffffe, NCFlag},
9233         {0x80000000, NCFlag, 0x80000001, NCFlag},
9234         {0x7fffffff, CVFlag, 0x80000000, NCFlag},
9235         {0x7ffffffe, CFlag, 0x7fffffff, CFlag},
9236         {0x7ffffffd, CFlag, 0x7ffffffe, CFlag},
9237         {0x00000000, ZCFlag, 0x00000001, CFlag},
9238         {0xffffffff, NFlag, 0x00000000, ZCFlag}}};
9239 
9240   for (size_t left = 0; left < input_count; left++) {
9241     for (size_t right = 0; right < input_count; right++) {
9242       const Expected& expected = expected_adcs_w[left][right];
9243       AdcsSbcsHelper(&MacroAssembler::Adcs,
9244                      inputs[left],
9245                      inputs[right],
9246                      0,
9247                      expected.carry0_result,
9248                      expected.carry0_flags);
9249       AdcsSbcsHelper(&MacroAssembler::Adcs,
9250                      inputs[left],
9251                      inputs[right],
9252                      1,
9253                      expected.carry1_result,
9254                      expected.carry1_flags);
9255     }
9256   }
9257 
9258   for (size_t left = 0; left < input_count; left++) {
9259     for (size_t right = 0; right < input_count; right++) {
9260       const Expected& expected = expected_sbcs_w[left][right];
9261       AdcsSbcsHelper(&MacroAssembler::Sbcs,
9262                      inputs[left],
9263                      inputs[right],
9264                      0,
9265                      expected.carry0_result,
9266                      expected.carry0_flags);
9267       AdcsSbcsHelper(&MacroAssembler::Sbcs,
9268                      inputs[left],
9269                      inputs[right],
9270                      1,
9271                      expected.carry1_result,
9272                      expected.carry1_flags);
9273     }
9274   }
9275 }
9276 
9277 
TEST(adc_sbc_shift)9278 TEST(adc_sbc_shift) {
9279   SETUP();
9280 
9281   START();
9282   __ Mov(x0, 0);
9283   __ Mov(x1, 1);
9284   __ Mov(x2, 0x0123456789abcdef);
9285   __ Mov(x3, 0xfedcba9876543210);
9286   __ Mov(x4, 0xffffffffffffffff);
9287 
9288   // Clear the C flag.
9289   __ Adds(x0, x0, Operand(0));
9290 
9291   __ Adc(x5, x2, Operand(x3));
9292   __ Adc(x6, x0, Operand(x1, LSL, 60));
9293   __ Sbc(x7, x4, Operand(x3, LSR, 4));
9294   __ Adc(x8, x2, Operand(x3, ASR, 4));
9295   __ Adc(x9, x2, Operand(x3, ROR, 8));
9296 
9297   __ Adc(w10, w2, Operand(w3));
9298   __ Adc(w11, w0, Operand(w1, LSL, 30));
9299   __ Sbc(w12, w4, Operand(w3, LSR, 4));
9300   __ Adc(w13, w2, Operand(w3, ASR, 4));
9301   __ Adc(w14, w2, Operand(w3, ROR, 8));
9302 
9303   // Set the C flag.
9304   __ Cmp(w0, Operand(w0));
9305 
9306   __ Adc(x18, x2, Operand(x3));
9307   __ Adc(x19, x0, Operand(x1, LSL, 60));
9308   __ Sbc(x20, x4, Operand(x3, LSR, 4));
9309   __ Adc(x21, x2, Operand(x3, ASR, 4));
9310   __ Adc(x22, x2, Operand(x3, ROR, 8));
9311 
9312   __ Adc(w23, w2, Operand(w3));
9313   __ Adc(w24, w0, Operand(w1, LSL, 30));
9314   __ Sbc(w25, w4, Operand(w3, LSR, 4));
9315   __ Adc(w26, w2, Operand(w3, ASR, 4));
9316   __ Adc(w27, w2, Operand(w3, ROR, 8));
9317   END();
9318 
9319   RUN();
9320 
9321   ASSERT_EQUAL_64(0xffffffffffffffff, x5);
9322   ASSERT_EQUAL_64(INT64_C(1) << 60, x6);
9323   ASSERT_EQUAL_64(0xf0123456789abcdd, x7);
9324   ASSERT_EQUAL_64(0x0111111111111110, x8);
9325   ASSERT_EQUAL_64(0x1222222222222221, x9);
9326 
9327   ASSERT_EQUAL_32(0xffffffff, w10);
9328   ASSERT_EQUAL_32(INT32_C(1) << 30, w11);
9329   ASSERT_EQUAL_32(0xf89abcdd, w12);
9330   ASSERT_EQUAL_32(0x91111110, w13);
9331   ASSERT_EQUAL_32(0x9a222221, w14);
9332 
9333   ASSERT_EQUAL_64(0xffffffffffffffff + 1, x18);
9334   ASSERT_EQUAL_64((INT64_C(1) << 60) + 1, x19);
9335   ASSERT_EQUAL_64(0xf0123456789abcdd + 1, x20);
9336   ASSERT_EQUAL_64(0x0111111111111110 + 1, x21);
9337   ASSERT_EQUAL_64(0x1222222222222221 + 1, x22);
9338 
9339   ASSERT_EQUAL_32(0xffffffff + 1, w23);
9340   ASSERT_EQUAL_32((INT32_C(1) << 30) + 1, w24);
9341   ASSERT_EQUAL_32(0xf89abcdd + 1, w25);
9342   ASSERT_EQUAL_32(0x91111110 + 1, w26);
9343   ASSERT_EQUAL_32(0x9a222221 + 1, w27);
9344 
9345   TEARDOWN();
9346 }
9347 
9348 
TEST(adc_sbc_extend)9349 TEST(adc_sbc_extend) {
9350   SETUP();
9351 
9352   START();
9353   // Clear the C flag.
9354   __ Adds(x0, x0, Operand(0));
9355 
9356   __ Mov(x0, 0);
9357   __ Mov(x1, 1);
9358   __ Mov(x2, 0x0123456789abcdef);
9359 
9360   __ Adc(x10, x1, Operand(w2, UXTB, 1));
9361   __ Adc(x11, x1, Operand(x2, SXTH, 2));
9362   __ Sbc(x12, x1, Operand(w2, UXTW, 4));
9363   __ Adc(x13, x1, Operand(x2, UXTX, 4));
9364 
9365   __ Adc(w14, w1, Operand(w2, UXTB, 1));
9366   __ Adc(w15, w1, Operand(w2, SXTH, 2));
9367   __ Adc(w9, w1, Operand(w2, UXTW, 4));
9368 
9369   // Set the C flag.
9370   __ Cmp(w0, Operand(w0));
9371 
9372   __ Adc(x20, x1, Operand(w2, UXTB, 1));
9373   __ Adc(x21, x1, Operand(x2, SXTH, 2));
9374   __ Sbc(x22, x1, Operand(w2, UXTW, 4));
9375   __ Adc(x23, x1, Operand(x2, UXTX, 4));
9376 
9377   __ Adc(w24, w1, Operand(w2, UXTB, 1));
9378   __ Adc(w25, w1, Operand(w2, SXTH, 2));
9379   __ Adc(w26, w1, Operand(w2, UXTW, 4));
9380   END();
9381 
9382   RUN();
9383 
9384   ASSERT_EQUAL_64(0x1df, x10);
9385   ASSERT_EQUAL_64(0xffffffffffff37bd, x11);
9386   ASSERT_EQUAL_64(0xfffffff765432110, x12);
9387   ASSERT_EQUAL_64(0x123456789abcdef1, x13);
9388 
9389   ASSERT_EQUAL_32(0x1df, w14);
9390   ASSERT_EQUAL_32(0xffff37bd, w15);
9391   ASSERT_EQUAL_32(0x9abcdef1, w9);
9392 
9393   ASSERT_EQUAL_64(0x1df + 1, x20);
9394   ASSERT_EQUAL_64(0xffffffffffff37bd + 1, x21);
9395   ASSERT_EQUAL_64(0xfffffff765432110 + 1, x22);
9396   ASSERT_EQUAL_64(0x123456789abcdef1 + 1, x23);
9397 
9398   ASSERT_EQUAL_32(0x1df + 1, w24);
9399   ASSERT_EQUAL_32(0xffff37bd + 1, w25);
9400   ASSERT_EQUAL_32(0x9abcdef1 + 1, w26);
9401 
9402   // Check that adc correctly sets the condition flags.
9403   START();
9404   __ Mov(x0, 0xff);
9405   __ Mov(x1, 0xffffffffffffffff);
9406   // Clear the C flag.
9407   __ Adds(x0, x0, Operand(0));
9408   __ Adcs(x10, x0, Operand(x1, SXTX, 1));
9409   END();
9410 
9411   RUN();
9412 
9413   ASSERT_EQUAL_NZCV(CFlag);
9414 
9415   START();
9416   __ Mov(x0, 0x7fffffffffffffff);
9417   __ Mov(x1, 1);
9418   // Clear the C flag.
9419   __ Adds(x0, x0, Operand(0));
9420   __ Adcs(x10, x0, Operand(x1, UXTB, 2));
9421   END();
9422 
9423   RUN();
9424 
9425   ASSERT_EQUAL_NZCV(NVFlag);
9426 
9427   START();
9428   __ Mov(x0, 0x7fffffffffffffff);
9429   // Clear the C flag.
9430   __ Adds(x0, x0, Operand(0));
9431   __ Adcs(x10, x0, Operand(1));
9432   END();
9433 
9434   RUN();
9435 
9436   ASSERT_EQUAL_NZCV(NVFlag);
9437 
9438   TEARDOWN();
9439 }
9440 
9441 
TEST(adc_sbc_wide_imm)9442 TEST(adc_sbc_wide_imm) {
9443   SETUP();
9444 
9445   START();
9446   __ Mov(x0, 0);
9447 
9448   // Clear the C flag.
9449   __ Adds(x0, x0, Operand(0));
9450 
9451   __ Adc(x7, x0, Operand(0x1234567890abcdef));
9452   __ Adc(w8, w0, Operand(0xffffffff));
9453   __ Sbc(x9, x0, Operand(0x1234567890abcdef));
9454   __ Sbc(w10, w0, Operand(0xffffffff));
9455   __ Ngc(x11, Operand(0xffffffff00000000));
9456   __ Ngc(w12, Operand(0xffff0000));
9457 
9458   // Set the C flag.
9459   __ Cmp(w0, Operand(w0));
9460 
9461   __ Adc(x18, x0, Operand(0x1234567890abcdef));
9462   __ Adc(w19, w0, Operand(0xffffffff));
9463   __ Sbc(x20, x0, Operand(0x1234567890abcdef));
9464   __ Sbc(w21, w0, Operand(0xffffffff));
9465   __ Ngc(x22, Operand(0xffffffff00000000));
9466   __ Ngc(w23, Operand(0xffff0000));
9467   END();
9468 
9469   RUN();
9470 
9471   ASSERT_EQUAL_64(0x1234567890abcdef, x7);
9472   ASSERT_EQUAL_64(0xffffffff, x8);
9473   ASSERT_EQUAL_64(0xedcba9876f543210, x9);
9474   ASSERT_EQUAL_64(0, x10);
9475   ASSERT_EQUAL_64(0xffffffff, x11);
9476   ASSERT_EQUAL_64(0xffff, x12);
9477 
9478   ASSERT_EQUAL_64(0x1234567890abcdef + 1, x18);
9479   ASSERT_EQUAL_64(0, x19);
9480   ASSERT_EQUAL_64(0xedcba9876f543211, x20);
9481   ASSERT_EQUAL_64(1, x21);
9482   ASSERT_EQUAL_64(0x0000000100000000, x22);
9483   ASSERT_EQUAL_64(0x0000000000010000, x23);
9484 
9485   TEARDOWN();
9486 }
9487 
TEST(flags)9488 TEST(flags) {
9489   SETUP();
9490 
9491   START();
9492   __ Mov(x0, 0);
9493   __ Mov(x1, 0x1111111111111111);
9494   __ Neg(x10, Operand(x0));
9495   __ Neg(x11, Operand(x1));
9496   __ Neg(w12, Operand(w1));
9497   // Clear the C flag.
9498   __ Adds(x0, x0, Operand(0));
9499   __ Ngc(x13, Operand(x0));
9500   // Set the C flag.
9501   __ Cmp(x0, Operand(x0));
9502   __ Ngc(w14, Operand(w0));
9503   END();
9504 
9505   RUN();
9506 
9507   ASSERT_EQUAL_64(0, x10);
9508   ASSERT_EQUAL_64(-0x1111111111111111, x11);
9509   ASSERT_EQUAL_32(-0x11111111, w12);
9510   ASSERT_EQUAL_64(-1, x13);
9511   ASSERT_EQUAL_32(0, w14);
9512 
9513   START();
9514   __ Mov(x0, 0);
9515   __ Cmp(x0, Operand(x0));
9516   END();
9517 
9518   RUN();
9519 
9520   ASSERT_EQUAL_NZCV(ZCFlag);
9521 
9522   START();
9523   __ Mov(w0, 0);
9524   __ Cmp(w0, Operand(w0));
9525   END();
9526 
9527   RUN();
9528 
9529   ASSERT_EQUAL_NZCV(ZCFlag);
9530 
9531   START();
9532   __ Mov(x0, 0);
9533   __ Mov(x1, 0x1111111111111111);
9534   __ Cmp(x0, Operand(x1));
9535   END();
9536 
9537   RUN();
9538 
9539   ASSERT_EQUAL_NZCV(NFlag);
9540 
9541   START();
9542   __ Mov(w0, 0);
9543   __ Mov(w1, 0x11111111);
9544   __ Cmp(w0, Operand(w1));
9545   END();
9546 
9547   RUN();
9548 
9549   ASSERT_EQUAL_NZCV(NFlag);
9550 
9551   START();
9552   __ Mov(x1, 0x1111111111111111);
9553   __ Cmp(x1, Operand(0));
9554   END();
9555 
9556   RUN();
9557 
9558   ASSERT_EQUAL_NZCV(CFlag);
9559 
9560   START();
9561   __ Mov(w1, 0x11111111);
9562   __ Cmp(w1, Operand(0));
9563   END();
9564 
9565   RUN();
9566 
9567   ASSERT_EQUAL_NZCV(CFlag);
9568 
9569   START();
9570   __ Mov(x0, 1);
9571   __ Mov(x1, 0x7fffffffffffffff);
9572   __ Cmn(x1, Operand(x0));
9573   END();
9574 
9575   RUN();
9576 
9577   ASSERT_EQUAL_NZCV(NVFlag);
9578 
9579   START();
9580   __ Mov(w0, 1);
9581   __ Mov(w1, 0x7fffffff);
9582   __ Cmn(w1, Operand(w0));
9583   END();
9584 
9585   RUN();
9586 
9587   ASSERT_EQUAL_NZCV(NVFlag);
9588 
9589   START();
9590   __ Mov(x0, 1);
9591   __ Mov(x1, 0xffffffffffffffff);
9592   __ Cmn(x1, Operand(x0));
9593   END();
9594 
9595   RUN();
9596 
9597   ASSERT_EQUAL_NZCV(ZCFlag);
9598 
9599   START();
9600   __ Mov(w0, 1);
9601   __ Mov(w1, 0xffffffff);
9602   __ Cmn(w1, Operand(w0));
9603   END();
9604 
9605   RUN();
9606 
9607   ASSERT_EQUAL_NZCV(ZCFlag);
9608 
9609   START();
9610   __ Mov(w0, 0);
9611   __ Mov(w1, 1);
9612   // Clear the C flag.
9613   __ Adds(w0, w0, Operand(0));
9614   __ Ngcs(w0, Operand(w1));
9615   END();
9616 
9617   RUN();
9618 
9619   ASSERT_EQUAL_NZCV(NFlag);
9620 
9621   START();
9622   __ Mov(w0, 0);
9623   __ Mov(w1, 0);
9624   // Set the C flag.
9625   __ Cmp(w0, Operand(w0));
9626   __ Ngcs(w0, Operand(w1));
9627   END();
9628 
9629   RUN();
9630 
9631   ASSERT_EQUAL_NZCV(ZCFlag);
9632 
9633   TEARDOWN();
9634 }
9635 
9636 
TEST(cmp_shift)9637 TEST(cmp_shift) {
9638   SETUP();
9639 
9640   START();
9641   __ Mov(x18, 0xf0000000);
9642   __ Mov(x19, 0xf000000010000000);
9643   __ Mov(x20, 0xf0000000f0000000);
9644   __ Mov(x21, 0x7800000078000000);
9645   __ Mov(x22, 0x3c0000003c000000);
9646   __ Mov(x23, 0x8000000780000000);
9647   __ Mov(x24, 0x0000000f00000000);
9648   __ Mov(x25, 0x00000003c0000000);
9649   __ Mov(x26, 0x8000000780000000);
9650   __ Mov(x27, 0xc0000003);
9651 
9652   __ Cmp(w20, Operand(w21, LSL, 1));
9653   __ Mrs(x0, NZCV);
9654 
9655   __ Cmp(x20, Operand(x22, LSL, 2));
9656   __ Mrs(x1, NZCV);
9657 
9658   __ Cmp(w19, Operand(w23, LSR, 3));
9659   __ Mrs(x2, NZCV);
9660 
9661   __ Cmp(x18, Operand(x24, LSR, 4));
9662   __ Mrs(x3, NZCV);
9663 
9664   __ Cmp(w20, Operand(w25, ASR, 2));
9665   __ Mrs(x4, NZCV);
9666 
9667   __ Cmp(x20, Operand(x26, ASR, 3));
9668   __ Mrs(x5, NZCV);
9669 
9670   __ Cmp(w27, Operand(w22, ROR, 28));
9671   __ Mrs(x6, NZCV);
9672 
9673   __ Cmp(x20, Operand(x21, ROR, 31));
9674   __ Mrs(x7, NZCV);
9675   END();
9676 
9677   RUN();
9678 
9679   ASSERT_EQUAL_32(ZCFlag, w0);
9680   ASSERT_EQUAL_32(ZCFlag, w1);
9681   ASSERT_EQUAL_32(ZCFlag, w2);
9682   ASSERT_EQUAL_32(ZCFlag, w3);
9683   ASSERT_EQUAL_32(ZCFlag, w4);
9684   ASSERT_EQUAL_32(ZCFlag, w5);
9685   ASSERT_EQUAL_32(ZCFlag, w6);
9686   ASSERT_EQUAL_32(ZCFlag, w7);
9687 
9688   TEARDOWN();
9689 }
9690 
9691 
TEST(cmp_extend)9692 TEST(cmp_extend) {
9693   SETUP();
9694 
9695   START();
9696   __ Mov(w20, 0x2);
9697   __ Mov(w21, 0x1);
9698   __ Mov(x22, 0xffffffffffffffff);
9699   __ Mov(x23, 0xff);
9700   __ Mov(x24, 0xfffffffffffffffe);
9701   __ Mov(x25, 0xffff);
9702   __ Mov(x26, 0xffffffff);
9703 
9704   __ Cmp(w20, Operand(w21, LSL, 1));
9705   __ Mrs(x0, NZCV);
9706 
9707   __ Cmp(x22, Operand(x23, SXTB, 0));
9708   __ Mrs(x1, NZCV);
9709 
9710   __ Cmp(x24, Operand(x23, SXTB, 1));
9711   __ Mrs(x2, NZCV);
9712 
9713   __ Cmp(x24, Operand(x23, UXTB, 1));
9714   __ Mrs(x3, NZCV);
9715 
9716   __ Cmp(w22, Operand(w25, UXTH));
9717   __ Mrs(x4, NZCV);
9718 
9719   __ Cmp(x22, Operand(x25, SXTH));
9720   __ Mrs(x5, NZCV);
9721 
9722   __ Cmp(x22, Operand(x26, UXTW));
9723   __ Mrs(x6, NZCV);
9724 
9725   __ Cmp(x24, Operand(x26, SXTW, 1));
9726   __ Mrs(x7, NZCV);
9727   END();
9728 
9729   RUN();
9730 
9731   ASSERT_EQUAL_32(ZCFlag, w0);
9732   ASSERT_EQUAL_32(ZCFlag, w1);
9733   ASSERT_EQUAL_32(ZCFlag, w2);
9734   ASSERT_EQUAL_32(NCFlag, w3);
9735   ASSERT_EQUAL_32(NCFlag, w4);
9736   ASSERT_EQUAL_32(ZCFlag, w5);
9737   ASSERT_EQUAL_32(NCFlag, w6);
9738   ASSERT_EQUAL_32(ZCFlag, w7);
9739 
9740   TEARDOWN();
9741 }
9742 
9743 
TEST(ccmp)9744 TEST(ccmp) {
9745   SETUP();
9746 
9747   START();
9748   __ Mov(w16, 0);
9749   __ Mov(w17, 1);
9750   __ Cmp(w16, w16);
9751   __ Ccmp(w16, w17, NCFlag, eq);
9752   __ Mrs(x0, NZCV);
9753 
9754   __ Cmp(w16, w16);
9755   __ Ccmp(w16, w17, NCFlag, ne);
9756   __ Mrs(x1, NZCV);
9757 
9758   __ Cmp(x16, x16);
9759   __ Ccmn(x16, 2, NZCVFlag, eq);
9760   __ Mrs(x2, NZCV);
9761 
9762   __ Cmp(x16, x16);
9763   __ Ccmn(x16, 2, NZCVFlag, ne);
9764   __ Mrs(x3, NZCV);
9765 
9766   // The MacroAssembler does not allow al as a condition.
9767   {
9768     ExactAssemblyScope scope(&masm, kInstructionSize);
9769     __ ccmp(x16, x16, NZCVFlag, al);
9770   }
9771   __ Mrs(x4, NZCV);
9772 
9773   // The MacroAssembler does not allow nv as a condition.
9774   {
9775     ExactAssemblyScope scope(&masm, kInstructionSize);
9776     __ ccmp(x16, x16, NZCVFlag, nv);
9777   }
9778   __ Mrs(x5, NZCV);
9779 
9780   END();
9781 
9782   RUN();
9783 
9784   ASSERT_EQUAL_32(NFlag, w0);
9785   ASSERT_EQUAL_32(NCFlag, w1);
9786   ASSERT_EQUAL_32(NoFlag, w2);
9787   ASSERT_EQUAL_32(NZCVFlag, w3);
9788   ASSERT_EQUAL_32(ZCFlag, w4);
9789   ASSERT_EQUAL_32(ZCFlag, w5);
9790 
9791   TEARDOWN();
9792 }
9793 
9794 
TEST(ccmp_wide_imm)9795 TEST(ccmp_wide_imm) {
9796   SETUP();
9797 
9798   START();
9799   __ Mov(w20, 0);
9800 
9801   __ Cmp(w20, Operand(w20));
9802   __ Ccmp(w20, Operand(0x12345678), NZCVFlag, eq);
9803   __ Mrs(x0, NZCV);
9804 
9805   __ Cmp(w20, Operand(w20));
9806   __ Ccmp(x20, Operand(0xffffffffffffffff), NZCVFlag, eq);
9807   __ Mrs(x1, NZCV);
9808   END();
9809 
9810   RUN();
9811 
9812   ASSERT_EQUAL_32(NFlag, w0);
9813   ASSERT_EQUAL_32(NoFlag, w1);
9814 
9815   TEARDOWN();
9816 }
9817 
9818 
TEST(ccmp_shift_extend)9819 TEST(ccmp_shift_extend) {
9820   SETUP();
9821 
9822   START();
9823   __ Mov(w20, 0x2);
9824   __ Mov(w21, 0x1);
9825   __ Mov(x22, 0xffffffffffffffff);
9826   __ Mov(x23, 0xff);
9827   __ Mov(x24, 0xfffffffffffffffe);
9828 
9829   __ Cmp(w20, Operand(w20));
9830   __ Ccmp(w20, Operand(w21, LSL, 1), NZCVFlag, eq);
9831   __ Mrs(x0, NZCV);
9832 
9833   __ Cmp(w20, Operand(w20));
9834   __ Ccmp(x22, Operand(x23, SXTB, 0), NZCVFlag, eq);
9835   __ Mrs(x1, NZCV);
9836 
9837   __ Cmp(w20, Operand(w20));
9838   __ Ccmp(x24, Operand(x23, SXTB, 1), NZCVFlag, eq);
9839   __ Mrs(x2, NZCV);
9840 
9841   __ Cmp(w20, Operand(w20));
9842   __ Ccmp(x24, Operand(x23, UXTB, 1), NZCVFlag, eq);
9843   __ Mrs(x3, NZCV);
9844 
9845   __ Cmp(w20, Operand(w20));
9846   __ Ccmp(x24, Operand(x23, UXTB, 1), NZCVFlag, ne);
9847   __ Mrs(x4, NZCV);
9848   END();
9849 
9850   RUN();
9851 
9852   ASSERT_EQUAL_32(ZCFlag, w0);
9853   ASSERT_EQUAL_32(ZCFlag, w1);
9854   ASSERT_EQUAL_32(ZCFlag, w2);
9855   ASSERT_EQUAL_32(NCFlag, w3);
9856   ASSERT_EQUAL_32(NZCVFlag, w4);
9857 
9858   TEARDOWN();
9859 }
9860 
9861 
TEST(csel_reg)9862 TEST(csel_reg) {
9863   SETUP();
9864 
9865   START();
9866   __ Mov(x16, 0);
9867   __ Mov(x24, 0x0000000f0000000f);
9868   __ Mov(x25, 0x0000001f0000001f);
9869 
9870   __ Cmp(w16, Operand(0));
9871   __ Csel(w0, w24, w25, eq);
9872   __ Csel(w1, w24, w25, ne);
9873   __ Csinc(w2, w24, w25, mi);
9874   __ Csinc(w3, w24, w25, pl);
9875 
9876   // The MacroAssembler does not allow al or nv as a condition.
9877   {
9878     ExactAssemblyScope scope(&masm, 2 * kInstructionSize);
9879     __ csel(w13, w24, w25, al);
9880     __ csel(x14, x24, x25, nv);
9881   }
9882 
9883   __ Cmp(x16, Operand(1));
9884   __ Csinv(x4, x24, x25, gt);
9885   __ Csinv(x5, x24, x25, le);
9886   __ Csneg(x6, x24, x25, hs);
9887   __ Csneg(x7, x24, x25, lo);
9888 
9889   __ Cset(w8, ne);
9890   __ Csetm(w9, ne);
9891   __ Cinc(x10, x25, ne);
9892   __ Cinv(x11, x24, ne);
9893   __ Cneg(x12, x24, ne);
9894 
9895   // The MacroAssembler does not allow al or nv as a condition.
9896   {
9897     ExactAssemblyScope scope(&masm, 2 * kInstructionSize);
9898     __ csel(w15, w24, w25, al);
9899     __ csel(x17, x24, x25, nv);
9900   }
9901 
9902   END();
9903 
9904   RUN();
9905 
9906   ASSERT_EQUAL_64(0x0000000f, x0);
9907   ASSERT_EQUAL_64(0x0000001f, x1);
9908   ASSERT_EQUAL_64(0x00000020, x2);
9909   ASSERT_EQUAL_64(0x0000000f, x3);
9910   ASSERT_EQUAL_64(0xffffffe0ffffffe0, x4);
9911   ASSERT_EQUAL_64(0x0000000f0000000f, x5);
9912   ASSERT_EQUAL_64(0xffffffe0ffffffe1, x6);
9913   ASSERT_EQUAL_64(0x0000000f0000000f, x7);
9914   ASSERT_EQUAL_64(0x00000001, x8);
9915   ASSERT_EQUAL_64(0xffffffff, x9);
9916   ASSERT_EQUAL_64(0x0000001f00000020, x10);
9917   ASSERT_EQUAL_64(0xfffffff0fffffff0, x11);
9918   ASSERT_EQUAL_64(0xfffffff0fffffff1, x12);
9919   ASSERT_EQUAL_64(0x0000000f, x13);
9920   ASSERT_EQUAL_64(0x0000000f0000000f, x14);
9921   ASSERT_EQUAL_64(0x0000000f, x15);
9922   ASSERT_EQUAL_64(0x0000000f0000000f, x17);
9923 
9924   TEARDOWN();
9925 }
9926 
9927 
TEST(csel_imm)9928 TEST(csel_imm) {
9929   SETUP();
9930 
9931   int values[] = {-123, -2, -1, 0, 1, 2, 123};
9932   int n_values = sizeof(values) / sizeof(values[0]);
9933 
9934   for (int i = 0; i < n_values; i++) {
9935     for (int j = 0; j < n_values; j++) {
9936       int left = values[i];
9937       int right = values[j];
9938 
9939       START();
9940       __ Mov(x10, 0);
9941       __ Cmp(x10, 0);
9942       __ Csel(w0, left, right, eq);
9943       __ Csel(w1, left, right, ne);
9944       __ Csel(x2, left, right, eq);
9945       __ Csel(x3, left, right, ne);
9946 
9947       END();
9948 
9949       RUN();
9950 
9951       ASSERT_EQUAL_32(left, w0);
9952       ASSERT_EQUAL_32(right, w1);
9953       ASSERT_EQUAL_64(left, x2);
9954       ASSERT_EQUAL_64(right, x3);
9955     }
9956   }
9957 
9958   TEARDOWN();
9959 }
9960 
9961 
TEST(csel_mixed)9962 TEST(csel_mixed) {
9963   SETUP();
9964 
9965   START();
9966   __ Mov(x18, 0);
9967   __ Mov(x19, 0x80000000);
9968   __ Mov(x20, 0x8000000000000000);
9969 
9970   __ Cmp(x18, Operand(0));
9971   __ Csel(w0, w19, -2, ne);
9972   __ Csel(w1, w19, -1, ne);
9973   __ Csel(w2, w19, 0, ne);
9974   __ Csel(w3, w19, 1, ne);
9975   __ Csel(w4, w19, 2, ne);
9976   __ Csel(w5, w19, Operand(w19, ASR, 31), ne);
9977   __ Csel(w6, w19, Operand(w19, ROR, 1), ne);
9978   __ Csel(w7, w19, 3, eq);
9979 
9980   __ Csel(x8, x20, -2, ne);
9981   __ Csel(x9, x20, -1, ne);
9982   __ Csel(x10, x20, 0, ne);
9983   __ Csel(x11, x20, 1, ne);
9984   __ Csel(x12, x20, 2, ne);
9985   __ Csel(x13, x20, Operand(x20, ASR, 63), ne);
9986   __ Csel(x14, x20, Operand(x20, ROR, 1), ne);
9987   __ Csel(x15, x20, 3, eq);
9988 
9989   END();
9990 
9991   RUN();
9992 
9993   ASSERT_EQUAL_32(-2, w0);
9994   ASSERT_EQUAL_32(-1, w1);
9995   ASSERT_EQUAL_32(0, w2);
9996   ASSERT_EQUAL_32(1, w3);
9997   ASSERT_EQUAL_32(2, w4);
9998   ASSERT_EQUAL_32(-1, w5);
9999   ASSERT_EQUAL_32(0x40000000, w6);
10000   ASSERT_EQUAL_32(0x80000000, w7);
10001 
10002   ASSERT_EQUAL_64(-2, x8);
10003   ASSERT_EQUAL_64(-1, x9);
10004   ASSERT_EQUAL_64(0, x10);
10005   ASSERT_EQUAL_64(1, x11);
10006   ASSERT_EQUAL_64(2, x12);
10007   ASSERT_EQUAL_64(-1, x13);
10008   ASSERT_EQUAL_64(0x4000000000000000, x14);
10009   ASSERT_EQUAL_64(0x8000000000000000, x15);
10010 
10011   TEARDOWN();
10012 }
10013 
10014 
TEST(lslv)10015 TEST(lslv) {
10016   SETUP();
10017 
10018   uint64_t value = 0x0123456789abcdef;
10019   int shift[] = {1, 3, 5, 9, 17, 33};
10020 
10021   START();
10022   __ Mov(x0, value);
10023   __ Mov(w1, shift[0]);
10024   __ Mov(w2, shift[1]);
10025   __ Mov(w3, shift[2]);
10026   __ Mov(w4, shift[3]);
10027   __ Mov(w5, shift[4]);
10028   __ Mov(w6, shift[5]);
10029 
10030   // The MacroAssembler does not allow zr as an argument.
10031   {
10032     ExactAssemblyScope scope(&masm, kInstructionSize);
10033     __ lslv(x0, x0, xzr);
10034   }
10035 
10036   __ Lsl(x16, x0, x1);
10037   __ Lsl(x17, x0, x2);
10038   __ Lsl(x18, x0, x3);
10039   __ Lsl(x19, x0, x4);
10040   __ Lsl(x20, x0, x5);
10041   __ Lsl(x21, x0, x6);
10042 
10043   __ Lsl(w22, w0, w1);
10044   __ Lsl(w23, w0, w2);
10045   __ Lsl(w24, w0, w3);
10046   __ Lsl(w25, w0, w4);
10047   __ Lsl(w26, w0, w5);
10048   __ Lsl(w27, w0, w6);
10049   END();
10050 
10051   RUN();
10052 
10053   ASSERT_EQUAL_64(value, x0);
10054   ASSERT_EQUAL_64(value << (shift[0] & 63), x16);
10055   ASSERT_EQUAL_64(value << (shift[1] & 63), x17);
10056   ASSERT_EQUAL_64(value << (shift[2] & 63), x18);
10057   ASSERT_EQUAL_64(value << (shift[3] & 63), x19);
10058   ASSERT_EQUAL_64(value << (shift[4] & 63), x20);
10059   ASSERT_EQUAL_64(value << (shift[5] & 63), x21);
10060   ASSERT_EQUAL_32(value << (shift[0] & 31), w22);
10061   ASSERT_EQUAL_32(value << (shift[1] & 31), w23);
10062   ASSERT_EQUAL_32(value << (shift[2] & 31), w24);
10063   ASSERT_EQUAL_32(value << (shift[3] & 31), w25);
10064   ASSERT_EQUAL_32(value << (shift[4] & 31), w26);
10065   ASSERT_EQUAL_32(value << (shift[5] & 31), w27);
10066 
10067   TEARDOWN();
10068 }
10069 
10070 
TEST(lsrv)10071 TEST(lsrv) {
10072   SETUP();
10073 
10074   uint64_t value = 0x0123456789abcdef;
10075   int shift[] = {1, 3, 5, 9, 17, 33};
10076 
10077   START();
10078   __ Mov(x0, value);
10079   __ Mov(w1, shift[0]);
10080   __ Mov(w2, shift[1]);
10081   __ Mov(w3, shift[2]);
10082   __ Mov(w4, shift[3]);
10083   __ Mov(w5, shift[4]);
10084   __ Mov(w6, shift[5]);
10085 
10086   // The MacroAssembler does not allow zr as an argument.
10087   {
10088     ExactAssemblyScope scope(&masm, kInstructionSize);
10089     __ lsrv(x0, x0, xzr);
10090   }
10091 
10092   __ Lsr(x16, x0, x1);
10093   __ Lsr(x17, x0, x2);
10094   __ Lsr(x18, x0, x3);
10095   __ Lsr(x19, x0, x4);
10096   __ Lsr(x20, x0, x5);
10097   __ Lsr(x21, x0, x6);
10098 
10099   __ Lsr(w22, w0, w1);
10100   __ Lsr(w23, w0, w2);
10101   __ Lsr(w24, w0, w3);
10102   __ Lsr(w25, w0, w4);
10103   __ Lsr(w26, w0, w5);
10104   __ Lsr(w27, w0, w6);
10105   END();
10106 
10107   RUN();
10108 
10109   ASSERT_EQUAL_64(value, x0);
10110   ASSERT_EQUAL_64(value >> (shift[0] & 63), x16);
10111   ASSERT_EQUAL_64(value >> (shift[1] & 63), x17);
10112   ASSERT_EQUAL_64(value >> (shift[2] & 63), x18);
10113   ASSERT_EQUAL_64(value >> (shift[3] & 63), x19);
10114   ASSERT_EQUAL_64(value >> (shift[4] & 63), x20);
10115   ASSERT_EQUAL_64(value >> (shift[5] & 63), x21);
10116 
10117   value &= 0xffffffff;
10118   ASSERT_EQUAL_32(value >> (shift[0] & 31), w22);
10119   ASSERT_EQUAL_32(value >> (shift[1] & 31), w23);
10120   ASSERT_EQUAL_32(value >> (shift[2] & 31), w24);
10121   ASSERT_EQUAL_32(value >> (shift[3] & 31), w25);
10122   ASSERT_EQUAL_32(value >> (shift[4] & 31), w26);
10123   ASSERT_EQUAL_32(value >> (shift[5] & 31), w27);
10124 
10125   TEARDOWN();
10126 }
10127 
10128 
TEST(asrv)10129 TEST(asrv) {
10130   SETUP();
10131 
10132   int64_t value = 0xfedcba98fedcba98;
10133   int shift[] = {1, 3, 5, 9, 17, 33};
10134 
10135   START();
10136   __ Mov(x0, value);
10137   __ Mov(w1, shift[0]);
10138   __ Mov(w2, shift[1]);
10139   __ Mov(w3, shift[2]);
10140   __ Mov(w4, shift[3]);
10141   __ Mov(w5, shift[4]);
10142   __ Mov(w6, shift[5]);
10143 
10144   // The MacroAssembler does not allow zr as an argument.
10145   {
10146     ExactAssemblyScope scope(&masm, kInstructionSize);
10147     __ asrv(x0, x0, xzr);
10148   }
10149 
10150   __ Asr(x16, x0, x1);
10151   __ Asr(x17, x0, x2);
10152   __ Asr(x18, x0, x3);
10153   __ Asr(x19, x0, x4);
10154   __ Asr(x20, x0, x5);
10155   __ Asr(x21, x0, x6);
10156 
10157   __ Asr(w22, w0, w1);
10158   __ Asr(w23, w0, w2);
10159   __ Asr(w24, w0, w3);
10160   __ Asr(w25, w0, w4);
10161   __ Asr(w26, w0, w5);
10162   __ Asr(w27, w0, w6);
10163   END();
10164 
10165   RUN();
10166 
10167   ASSERT_EQUAL_64(value, x0);
10168   ASSERT_EQUAL_64(value >> (shift[0] & 63), x16);
10169   ASSERT_EQUAL_64(value >> (shift[1] & 63), x17);
10170   ASSERT_EQUAL_64(value >> (shift[2] & 63), x18);
10171   ASSERT_EQUAL_64(value >> (shift[3] & 63), x19);
10172   ASSERT_EQUAL_64(value >> (shift[4] & 63), x20);
10173   ASSERT_EQUAL_64(value >> (shift[5] & 63), x21);
10174 
10175   int32_t value32 = static_cast<int32_t>(value & 0xffffffff);
10176   ASSERT_EQUAL_32(value32 >> (shift[0] & 31), w22);
10177   ASSERT_EQUAL_32(value32 >> (shift[1] & 31), w23);
10178   ASSERT_EQUAL_32(value32 >> (shift[2] & 31), w24);
10179   ASSERT_EQUAL_32(value32 >> (shift[3] & 31), w25);
10180   ASSERT_EQUAL_32(value32 >> (shift[4] & 31), w26);
10181   ASSERT_EQUAL_32(value32 >> (shift[5] & 31), w27);
10182 
10183   TEARDOWN();
10184 }
10185 
10186 
TEST(rorv)10187 TEST(rorv) {
10188   SETUP();
10189 
10190   uint64_t value = 0x0123456789abcdef;
10191   int shift[] = {4, 8, 12, 16, 24, 36};
10192 
10193   START();
10194   __ Mov(x0, value);
10195   __ Mov(w1, shift[0]);
10196   __ Mov(w2, shift[1]);
10197   __ Mov(w3, shift[2]);
10198   __ Mov(w4, shift[3]);
10199   __ Mov(w5, shift[4]);
10200   __ Mov(w6, shift[5]);
10201 
10202   // The MacroAssembler does not allow zr as an argument.
10203   {
10204     ExactAssemblyScope scope(&masm, kInstructionSize);
10205     __ rorv(x0, x0, xzr);
10206   }
10207 
10208   __ Ror(x16, x0, x1);
10209   __ Ror(x17, x0, x2);
10210   __ Ror(x18, x0, x3);
10211   __ Ror(x19, x0, x4);
10212   __ Ror(x20, x0, x5);
10213   __ Ror(x21, x0, x6);
10214 
10215   __ Ror(w22, w0, w1);
10216   __ Ror(w23, w0, w2);
10217   __ Ror(w24, w0, w3);
10218   __ Ror(w25, w0, w4);
10219   __ Ror(w26, w0, w5);
10220   __ Ror(w27, w0, w6);
10221   END();
10222 
10223   RUN();
10224 
10225   ASSERT_EQUAL_64(value, x0);
10226   ASSERT_EQUAL_64(0xf0123456789abcde, x16);
10227   ASSERT_EQUAL_64(0xef0123456789abcd, x17);
10228   ASSERT_EQUAL_64(0xdef0123456789abc, x18);
10229   ASSERT_EQUAL_64(0xcdef0123456789ab, x19);
10230   ASSERT_EQUAL_64(0xabcdef0123456789, x20);
10231   ASSERT_EQUAL_64(0x789abcdef0123456, x21);
10232   ASSERT_EQUAL_32(0xf89abcde, w22);
10233   ASSERT_EQUAL_32(0xef89abcd, w23);
10234   ASSERT_EQUAL_32(0xdef89abc, w24);
10235   ASSERT_EQUAL_32(0xcdef89ab, w25);
10236   ASSERT_EQUAL_32(0xabcdef89, w26);
10237   ASSERT_EQUAL_32(0xf89abcde, w27);
10238 
10239   TEARDOWN();
10240 }
10241 
10242 
TEST(bfm)10243 TEST(bfm) {
10244   SETUP();
10245 
10246   START();
10247   __ Mov(x1, 0x0123456789abcdef);
10248 
10249   __ Mov(x10, 0x8888888888888888);
10250   __ Mov(x11, 0x8888888888888888);
10251   __ Mov(x12, 0x8888888888888888);
10252   __ Mov(x13, 0x8888888888888888);
10253   __ Mov(x14, 0xffffffffffffffff);
10254   __ Mov(w20, 0x88888888);
10255   __ Mov(w21, 0x88888888);
10256 
10257   __ Bfm(x10, x1, 16, 31);
10258   __ Bfm(x11, x1, 32, 15);
10259 
10260   __ Bfm(w20, w1, 16, 23);
10261   __ Bfm(w21, w1, 24, 15);
10262 
10263   // Aliases.
10264   __ Bfi(x12, x1, 16, 8);
10265   __ Bfxil(x13, x1, 16, 8);
10266   __ Bfc(x14, 16, 8);
10267   END();
10268 
10269   RUN();
10270 
10271 
10272   ASSERT_EQUAL_64(0x88888888888889ab, x10);
10273   ASSERT_EQUAL_64(0x8888cdef88888888, x11);
10274 
10275   ASSERT_EQUAL_32(0x888888ab, w20);
10276   ASSERT_EQUAL_32(0x88cdef88, w21);
10277 
10278   ASSERT_EQUAL_64(0x8888888888ef8888, x12);
10279   ASSERT_EQUAL_64(0x88888888888888ab, x13);
10280   ASSERT_EQUAL_64(0xffffffffff00ffff, x14);
10281 
10282   TEARDOWN();
10283 }
10284 
10285 
TEST(sbfm)10286 TEST(sbfm) {
10287   SETUP();
10288 
10289   START();
10290   __ Mov(x1, 0x0123456789abcdef);
10291   __ Mov(x2, 0xfedcba9876543210);
10292 
10293   __ Sbfm(x10, x1, 16, 31);
10294   __ Sbfm(x11, x1, 32, 15);
10295   __ Sbfm(x12, x1, 32, 47);
10296   __ Sbfm(x13, x1, 48, 35);
10297 
10298   __ Sbfm(w14, w1, 16, 23);
10299   __ Sbfm(w15, w1, 24, 15);
10300   __ Sbfm(w16, w2, 16, 23);
10301   __ Sbfm(w17, w2, 24, 15);
10302 
10303   // Aliases.
10304   __ Asr(x18, x1, 32);
10305   __ Asr(x19, x2, 32);
10306   __ Sbfiz(x20, x1, 8, 16);
10307   __ Sbfiz(x21, x2, 8, 16);
10308   __ Sbfx(x22, x1, 8, 16);
10309   __ Sbfx(x23, x2, 8, 16);
10310   __ Sxtb(x24, w1);
10311   __ Sxtb(x25, x2);
10312   __ Sxth(x26, w1);
10313   __ Sxth(x27, x2);
10314   __ Sxtw(x28, w1);
10315   __ Sxtw(x29, x2);
10316   END();
10317 
10318   RUN();
10319 
10320 
10321   ASSERT_EQUAL_64(0xffffffffffff89ab, x10);
10322   ASSERT_EQUAL_64(0xffffcdef00000000, x11);
10323   ASSERT_EQUAL_64(0x0000000000004567, x12);
10324   ASSERT_EQUAL_64(0x000789abcdef0000, x13);
10325 
10326   ASSERT_EQUAL_32(0xffffffab, w14);
10327   ASSERT_EQUAL_32(0xffcdef00, w15);
10328   ASSERT_EQUAL_32(0x00000054, w16);
10329   ASSERT_EQUAL_32(0x00321000, w17);
10330 
10331   ASSERT_EQUAL_64(0x0000000001234567, x18);
10332   ASSERT_EQUAL_64(0xfffffffffedcba98, x19);
10333   ASSERT_EQUAL_64(0xffffffffffcdef00, x20);
10334   ASSERT_EQUAL_64(0x0000000000321000, x21);
10335   ASSERT_EQUAL_64(0xffffffffffffabcd, x22);
10336   ASSERT_EQUAL_64(0x0000000000005432, x23);
10337   ASSERT_EQUAL_64(0xffffffffffffffef, x24);
10338   ASSERT_EQUAL_64(0x0000000000000010, x25);
10339   ASSERT_EQUAL_64(0xffffffffffffcdef, x26);
10340   ASSERT_EQUAL_64(0x0000000000003210, x27);
10341   ASSERT_EQUAL_64(0xffffffff89abcdef, x28);
10342   ASSERT_EQUAL_64(0x0000000076543210, x29);
10343 
10344   TEARDOWN();
10345 }
10346 
10347 
TEST(ubfm)10348 TEST(ubfm) {
10349   SETUP();
10350 
10351   START();
10352   __ Mov(x1, 0x0123456789abcdef);
10353   __ Mov(x2, 0xfedcba9876543210);
10354 
10355   __ Mov(x10, 0x8888888888888888);
10356   __ Mov(x11, 0x8888888888888888);
10357 
10358   __ Ubfm(x10, x1, 16, 31);
10359   __ Ubfm(x11, x1, 32, 15);
10360   __ Ubfm(x12, x1, 32, 47);
10361   __ Ubfm(x13, x1, 48, 35);
10362 
10363   __ Ubfm(w25, w1, 16, 23);
10364   __ Ubfm(w26, w1, 24, 15);
10365   __ Ubfm(w27, w2, 16, 23);
10366   __ Ubfm(w28, w2, 24, 15);
10367 
10368   // Aliases
10369   __ Lsl(x15, x1, 63);
10370   __ Lsl(x16, x1, 0);
10371   __ Lsr(x17, x1, 32);
10372   __ Ubfiz(x18, x1, 8, 16);
10373   __ Ubfx(x19, x1, 8, 16);
10374   __ Uxtb(x20, x1);
10375   __ Uxth(x21, x1);
10376   __ Uxtw(x22, x1);
10377   END();
10378 
10379   RUN();
10380 
10381   ASSERT_EQUAL_64(0x00000000000089ab, x10);
10382   ASSERT_EQUAL_64(0x0000cdef00000000, x11);
10383   ASSERT_EQUAL_64(0x0000000000004567, x12);
10384   ASSERT_EQUAL_64(0x000789abcdef0000, x13);
10385 
10386   ASSERT_EQUAL_32(0x000000ab, w25);
10387   ASSERT_EQUAL_32(0x00cdef00, w26);
10388   ASSERT_EQUAL_32(0x00000054, w27);
10389   ASSERT_EQUAL_32(0x00321000, w28);
10390 
10391   ASSERT_EQUAL_64(0x8000000000000000, x15);
10392   ASSERT_EQUAL_64(0x0123456789abcdef, x16);
10393   ASSERT_EQUAL_64(0x0000000001234567, x17);
10394   ASSERT_EQUAL_64(0x0000000000cdef00, x18);
10395   ASSERT_EQUAL_64(0x000000000000abcd, x19);
10396   ASSERT_EQUAL_64(0x00000000000000ef, x20);
10397   ASSERT_EQUAL_64(0x000000000000cdef, x21);
10398   ASSERT_EQUAL_64(0x0000000089abcdef, x22);
10399 
10400   TEARDOWN();
10401 }
10402 
10403 
TEST(extr)10404 TEST(extr) {
10405   SETUP();
10406 
10407   START();
10408   __ Mov(x1, 0x0123456789abcdef);
10409   __ Mov(x2, 0xfedcba9876543210);
10410 
10411   __ Extr(w10, w1, w2, 0);
10412   __ Extr(w11, w1, w2, 1);
10413   __ Extr(x12, x2, x1, 2);
10414 
10415   __ Ror(w13, w1, 0);
10416   __ Ror(w14, w2, 17);
10417   __ Ror(w15, w1, 31);
10418   __ Ror(x18, x2, 0);
10419   __ Ror(x19, x2, 1);
10420   __ Ror(x20, x1, 63);
10421   END();
10422 
10423   RUN();
10424 
10425   ASSERT_EQUAL_64(0x76543210, x10);
10426   ASSERT_EQUAL_64(0xbb2a1908, x11);
10427   ASSERT_EQUAL_64(0x0048d159e26af37b, x12);
10428   ASSERT_EQUAL_64(0x89abcdef, x13);
10429   ASSERT_EQUAL_64(0x19083b2a, x14);
10430   ASSERT_EQUAL_64(0x13579bdf, x15);
10431   ASSERT_EQUAL_64(0xfedcba9876543210, x18);
10432   ASSERT_EQUAL_64(0x7f6e5d4c3b2a1908, x19);
10433   ASSERT_EQUAL_64(0x02468acf13579bde, x20);
10434 
10435   TEARDOWN();
10436 }
10437 
10438 
TEST(fmov_imm)10439 TEST(fmov_imm) {
10440   SETUP_WITH_FEATURES(CPUFeatures::kFP, CPUFeatures::kFPHalf);
10441 
10442   START();
10443   __ Fmov(s1, 255.0);
10444   __ Fmov(d2, 12.34567);
10445   __ Fmov(s3, 0.0);
10446   __ Fmov(d4, 0.0);
10447   __ Fmov(s5, kFP32PositiveInfinity);
10448   __ Fmov(d6, kFP64NegativeInfinity);
10449   __ Fmov(h7, RawbitsToFloat16(0x6400U));
10450   __ Fmov(h8, kFP16PositiveInfinity);
10451   __ Fmov(s11, 1.0);
10452   __ Fmov(h12, RawbitsToFloat16(0x7BFF));
10453   __ Fmov(h13, RawbitsToFloat16(0x57F2));
10454   __ Fmov(d22, -13.0);
10455   __ Fmov(h23, RawbitsToFloat16(0xC500U));
10456   __ Fmov(h24, Float16(-5.0));
10457   __ Fmov(h25, Float16(2049.0));
10458   __ Fmov(h21, RawbitsToFloat16(0x6404U));
10459   __ Fmov(h26, RawbitsToFloat16(0x0U));
10460   __ Fmov(h27, RawbitsToFloat16(0x7e00U));
10461   END();
10462 #ifdef VIXL_INCLUDE_SIMULATOR_AARCH64
10463   RUN();
10464 
10465   ASSERT_EQUAL_FP32(255.0, s1);
10466   ASSERT_EQUAL_FP64(12.34567, d2);
10467   ASSERT_EQUAL_FP32(0.0, s3);
10468   ASSERT_EQUAL_FP64(0.0, d4);
10469   ASSERT_EQUAL_FP32(kFP32PositiveInfinity, s5);
10470   ASSERT_EQUAL_FP64(kFP64NegativeInfinity, d6);
10471   ASSERT_EQUAL_FP16(RawbitsToFloat16(0x6400U), h7);
10472   ASSERT_EQUAL_FP16(kFP16PositiveInfinity, h8);
10473   ASSERT_EQUAL_FP32(1.0, s11);
10474   ASSERT_EQUAL_FP16(RawbitsToFloat16(0x7BFF), h12);
10475   ASSERT_EQUAL_FP16(RawbitsToFloat16(0x57F2U), h13);
10476   ASSERT_EQUAL_FP16(RawbitsToFloat16(0x6404), h21);
10477   ASSERT_EQUAL_FP64(-13.0, d22);
10478   ASSERT_EQUAL_FP16(Float16(-5.0), h23);
10479   ASSERT_EQUAL_FP16(RawbitsToFloat16(0xC500), h24);
10480   // 2049 is unpresentable.
10481   ASSERT_EQUAL_FP16(RawbitsToFloat16(0x6800), h25);
10482   ASSERT_EQUAL_FP16(kFP16PositiveZero, h26);
10483   // NaN check.
10484   ASSERT_EQUAL_FP16(RawbitsToFloat16(0x7e00), h27);
10485 #endif
10486 
10487   TEARDOWN();
10488 }
10489 
10490 
TEST(fmov_vec_imm)10491 TEST(fmov_vec_imm) {
10492   SETUP_WITH_FEATURES(CPUFeatures::kNEON,
10493                       CPUFeatures::kFP,
10494                       CPUFeatures::kNEONHalf);
10495 
10496   START();
10497 
10498   __ Fmov(v0.V2S(), 20.0);
10499   __ Fmov(v1.V4S(), 1024.0);
10500 
10501   __ Fmov(v2.V4H(), RawbitsToFloat16(0xC500U));
10502   __ Fmov(v3.V8H(), RawbitsToFloat16(0x4A80U));
10503 
10504   END();
10505 #ifdef VIXL_INCLUDE_SIMULATOR_AARCH64
10506   RUN();
10507 
10508   ASSERT_EQUAL_64(0x41A0000041A00000, d0);
10509   ASSERT_EQUAL_128(0x4480000044800000, 0x4480000044800000, q1);
10510   ASSERT_EQUAL_64(0xC500C500C500C500, d2);
10511   ASSERT_EQUAL_128(0x4A804A804A804A80, 0x4A804A804A804A80, q3);
10512 #endif
10513 
10514   TEARDOWN();
10515 }
10516 
10517 
TEST(fmov_reg)10518 TEST(fmov_reg) {
10519   SETUP_WITH_FEATURES(CPUFeatures::kNEON,
10520                       CPUFeatures::kFP,
10521                       CPUFeatures::kFPHalf);
10522 
10523   START();
10524 
10525   __ Fmov(h3, RawbitsToFloat16(0xCA80U));
10526   __ Fmov(h7, h3);
10527   __ Fmov(h8, -5.0);
10528   __ Fmov(w3, h8);
10529   __ Fmov(h9, w3);
10530   __ Fmov(h8, Float16(1024.0));
10531   __ Fmov(x4, h8);
10532   __ Fmov(h10, x4);
10533   __ Fmov(s20, 1.0);
10534   __ Fmov(w10, s20);
10535   __ Fmov(s30, w10);
10536   __ Fmov(s5, s20);
10537   __ Fmov(d1, -13.0);
10538   __ Fmov(x1, d1);
10539   __ Fmov(d2, x1);
10540   __ Fmov(d4, d1);
10541   __ Fmov(d6, RawbitsToDouble(0x0123456789abcdef));
10542   __ Fmov(s6, s6);
10543   __ Fmov(d0, 0.0);
10544   __ Fmov(v0.D(), 1, x1);
10545   __ Fmov(x2, v0.D(), 1);
10546 
10547   END();
10548 #ifdef VIXL_INCLUDE_SIMULATOR_AARCH64
10549   RUN();
10550 
10551   ASSERT_EQUAL_FP16(RawbitsToFloat16(0xCA80U), h7);
10552   ASSERT_EQUAL_FP16(RawbitsToFloat16(0xC500U), h9);
10553   ASSERT_EQUAL_32(0x0000C500, w3);
10554   ASSERT_EQUAL_64(0x0000000000006400, x4);
10555   ASSERT_EQUAL_FP16(RawbitsToFloat16(0x6400), h10);
10556   ASSERT_EQUAL_32(FloatToRawbits(1.0), w10);
10557   ASSERT_EQUAL_FP32(1.0, s30);
10558   ASSERT_EQUAL_FP32(1.0, s5);
10559   ASSERT_EQUAL_64(DoubleToRawbits(-13.0), x1);
10560   ASSERT_EQUAL_FP64(-13.0, d2);
10561   ASSERT_EQUAL_FP64(-13.0, d4);
10562   ASSERT_EQUAL_FP32(RawbitsToFloat(0x89abcdef), s6);
10563   ASSERT_EQUAL_128(DoubleToRawbits(-13.0), 0x0000000000000000, q0);
10564   ASSERT_EQUAL_64(DoubleToRawbits(-13.0), x2);
10565 #endif
10566 
10567   TEARDOWN();
10568 }
10569 
10570 
TEST(fadd)10571 TEST(fadd) {
10572   SETUP_WITH_FEATURES(CPUFeatures::kFP);
10573 
10574   START();
10575   __ Fmov(s14, -0.0f);
10576   __ Fmov(s15, kFP32PositiveInfinity);
10577   __ Fmov(s16, kFP32NegativeInfinity);
10578   __ Fmov(s17, 3.25f);
10579   __ Fmov(s18, 1.0f);
10580   __ Fmov(s19, 0.0f);
10581 
10582   __ Fmov(d26, -0.0);
10583   __ Fmov(d27, kFP64PositiveInfinity);
10584   __ Fmov(d28, kFP64NegativeInfinity);
10585   __ Fmov(d29, 0.0);
10586   __ Fmov(d30, -2.0);
10587   __ Fmov(d31, 2.25);
10588 
10589   __ Fadd(s0, s17, s18);
10590   __ Fadd(s1, s18, s19);
10591   __ Fadd(s2, s14, s18);
10592   __ Fadd(s3, s15, s18);
10593   __ Fadd(s4, s16, s18);
10594   __ Fadd(s5, s15, s16);
10595   __ Fadd(s6, s16, s15);
10596 
10597   __ Fadd(d7, d30, d31);
10598   __ Fadd(d8, d29, d31);
10599   __ Fadd(d9, d26, d31);
10600   __ Fadd(d10, d27, d31);
10601   __ Fadd(d11, d28, d31);
10602   __ Fadd(d12, d27, d28);
10603   __ Fadd(d13, d28, d27);
10604   END();
10605 
10606   RUN();
10607 
10608   ASSERT_EQUAL_FP32(4.25, s0);
10609   ASSERT_EQUAL_FP32(1.0, s1);
10610   ASSERT_EQUAL_FP32(1.0, s2);
10611   ASSERT_EQUAL_FP32(kFP32PositiveInfinity, s3);
10612   ASSERT_EQUAL_FP32(kFP32NegativeInfinity, s4);
10613   ASSERT_EQUAL_FP32(kFP32DefaultNaN, s5);
10614   ASSERT_EQUAL_FP32(kFP32DefaultNaN, s6);
10615   ASSERT_EQUAL_FP64(0.25, d7);
10616   ASSERT_EQUAL_FP64(2.25, d8);
10617   ASSERT_EQUAL_FP64(2.25, d9);
10618   ASSERT_EQUAL_FP64(kFP64PositiveInfinity, d10);
10619   ASSERT_EQUAL_FP64(kFP64NegativeInfinity, d11);
10620   ASSERT_EQUAL_FP64(kFP64DefaultNaN, d12);
10621   ASSERT_EQUAL_FP64(kFP64DefaultNaN, d13);
10622 
10623   TEARDOWN();
10624 }
10625 
10626 
TEST(fadd_h)10627 TEST(fadd_h) {
10628   SETUP_WITH_FEATURES(CPUFeatures::kFP, CPUFeatures::kFPHalf);
10629 
10630   START();
10631   __ Fmov(h14, -0.0f);
10632   __ Fmov(h15, kFP16PositiveInfinity);
10633   __ Fmov(h16, kFP16NegativeInfinity);
10634   __ Fmov(h17, 3.25f);
10635   __ Fmov(h18, 1.0);
10636   __ Fmov(h19, 0.0f);
10637   __ Fmov(h20, 5.0f);
10638 
10639   __ Fadd(h0, h17, h18);
10640   __ Fadd(h1, h18, h19);
10641   __ Fadd(h2, h14, h18);
10642   __ Fadd(h3, h15, h18);
10643   __ Fadd(h4, h16, h18);
10644   __ Fadd(h5, h15, h16);
10645   __ Fadd(h6, h16, h15);
10646   __ Fadd(h7, h20, h20);
10647   END();
10648 
10649 #ifdef VIXL_INCLUDE_SIMULATOR_AARCH64
10650   RUN();
10651 
10652   ASSERT_EQUAL_FP16(Float16(4.25), h0);
10653   ASSERT_EQUAL_FP16(Float16(1.0), h1);
10654   ASSERT_EQUAL_FP16(Float16(1.0), h2);
10655   ASSERT_EQUAL_FP16(kFP16PositiveInfinity, h3);
10656   ASSERT_EQUAL_FP16(kFP16NegativeInfinity, h4);
10657   ASSERT_EQUAL_FP16(kFP16DefaultNaN, h5);
10658   ASSERT_EQUAL_FP16(kFP16DefaultNaN, h6);
10659   ASSERT_EQUAL_FP16(Float16(10.0), h7);
10660   TEARDOWN();
10661 #endif
10662 }
10663 
10664 
TEST(fadd_h_neon)10665 TEST(fadd_h_neon) {
10666   SETUP_WITH_FEATURES(CPUFeatures::kNEON,
10667                       CPUFeatures::kFP,
10668                       CPUFeatures::kNEONHalf);
10669 
10670   START();
10671   __ Fmov(v0.V4H(), 24.0);
10672   __ Fmov(v1.V4H(), 1024.0);
10673   __ Fmov(v2.V8H(), 5.5);
10674   __ Fmov(v3.V8H(), 2048.0);
10675   __ Fmov(v4.V8H(), kFP16PositiveInfinity);
10676   __ Fmov(v5.V8H(), kFP16NegativeInfinity);
10677   __ Fmov(v6.V4H(), RawbitsToFloat16(0x7c2f));
10678   __ Fmov(v7.V8H(), RawbitsToFloat16(0xfe0f));
10679 
10680   __ Fadd(v8.V4H(), v1.V4H(), v0.V4H());
10681   __ Fadd(v9.V8H(), v3.V8H(), v2.V8H());
10682   __ Fadd(v10.V4H(), v4.V4H(), v3.V4H());
10683 
10684   __ Fadd(v11.V4H(), v6.V4H(), v1.V4H());
10685   __ Fadd(v12.V4H(), v7.V4H(), v7.V4H());
10686 
10687   END();
10688 
10689 #ifdef VIXL_INCLUDE_SIMULATOR_AARCH64
10690   RUN();
10691 
10692   ASSERT_EQUAL_128(0x0000000000000000, 0x6418641864186418, q8);
10693   // 2053.5 is unrepresentable in FP16.
10694   ASSERT_EQUAL_128(0x6803680368036803, 0x6803680368036803, q9);
10695 
10696   // Note: we test NaNs here as vectors aren't covered by process_nans_half
10697   // and we don't have traces for half-precision enabled hardware.
10698   // Default (Signalling NaN)
10699   ASSERT_EQUAL_128(0x0000000000000000, 0x7c007c007c007c00, q10);
10700   // Quiet NaN from Signalling.
10701   ASSERT_EQUAL_128(0x0000000000000000, 0x7e2f7e2f7e2f7e2f, q11);
10702   // Quiet NaN.
10703   ASSERT_EQUAL_128(0x0000000000000000, 0xfe0ffe0ffe0ffe0f, q12);
10704   TEARDOWN();
10705 #endif
10706 }
10707 
10708 
TEST(fsub)10709 TEST(fsub) {
10710   SETUP_WITH_FEATURES(CPUFeatures::kFP);
10711 
10712   START();
10713   __ Fmov(s14, -0.0f);
10714   __ Fmov(s15, kFP32PositiveInfinity);
10715   __ Fmov(s16, kFP32NegativeInfinity);
10716   __ Fmov(s17, 3.25f);
10717   __ Fmov(s18, 1.0f);
10718   __ Fmov(s19, 0.0f);
10719 
10720   __ Fmov(d26, -0.0);
10721   __ Fmov(d27, kFP64PositiveInfinity);
10722   __ Fmov(d28, kFP64NegativeInfinity);
10723   __ Fmov(d29, 0.0);
10724   __ Fmov(d30, -2.0);
10725   __ Fmov(d31, 2.25);
10726 
10727   __ Fsub(s0, s17, s18);
10728   __ Fsub(s1, s18, s19);
10729   __ Fsub(s2, s14, s18);
10730   __ Fsub(s3, s18, s15);
10731   __ Fsub(s4, s18, s16);
10732   __ Fsub(s5, s15, s15);
10733   __ Fsub(s6, s16, s16);
10734 
10735   __ Fsub(d7, d30, d31);
10736   __ Fsub(d8, d29, d31);
10737   __ Fsub(d9, d26, d31);
10738   __ Fsub(d10, d31, d27);
10739   __ Fsub(d11, d31, d28);
10740   __ Fsub(d12, d27, d27);
10741   __ Fsub(d13, d28, d28);
10742   END();
10743 
10744   RUN();
10745 
10746   ASSERT_EQUAL_FP32(2.25, s0);
10747   ASSERT_EQUAL_FP32(1.0, s1);
10748   ASSERT_EQUAL_FP32(-1.0, s2);
10749   ASSERT_EQUAL_FP32(kFP32NegativeInfinity, s3);
10750   ASSERT_EQUAL_FP32(kFP32PositiveInfinity, s4);
10751   ASSERT_EQUAL_FP32(kFP32DefaultNaN, s5);
10752   ASSERT_EQUAL_FP32(kFP32DefaultNaN, s6);
10753   ASSERT_EQUAL_FP64(-4.25, d7);
10754   ASSERT_EQUAL_FP64(-2.25, d8);
10755   ASSERT_EQUAL_FP64(-2.25, d9);
10756   ASSERT_EQUAL_FP64(kFP64NegativeInfinity, d10);
10757   ASSERT_EQUAL_FP64(kFP64PositiveInfinity, d11);
10758   ASSERT_EQUAL_FP64(kFP64DefaultNaN, d12);
10759   ASSERT_EQUAL_FP64(kFP64DefaultNaN, d13);
10760 
10761   TEARDOWN();
10762 }
10763 
10764 
TEST(fsub_h)10765 TEST(fsub_h) {
10766   SETUP_WITH_FEATURES(CPUFeatures::kFP, CPUFeatures::kFPHalf);
10767 
10768   START();
10769   __ Fmov(h14, -0.0f);
10770   __ Fmov(h15, kFP16PositiveInfinity);
10771   __ Fmov(h16, kFP16NegativeInfinity);
10772   __ Fmov(h17, 3.25f);
10773   __ Fmov(h18, 1.0f);
10774   __ Fmov(h19, 0.0f);
10775 
10776   __ Fsub(h0, h17, h18);
10777   __ Fsub(h1, h18, h19);
10778   __ Fsub(h2, h14, h18);
10779   __ Fsub(h3, h18, h15);
10780   __ Fsub(h4, h18, h16);
10781   __ Fsub(h5, h15, h15);
10782   __ Fsub(h6, h16, h16);
10783   END();
10784 
10785 #ifdef VIXL_INCLUDE_SIMULATOR_AARCH64
10786   RUN();
10787 
10788   ASSERT_EQUAL_FP16(Float16(2.25), h0);
10789   ASSERT_EQUAL_FP16(Float16(1.0), h1);
10790   ASSERT_EQUAL_FP16(Float16(-1.0), h2);
10791   ASSERT_EQUAL_FP16(kFP16NegativeInfinity, h3);
10792   ASSERT_EQUAL_FP16(kFP16PositiveInfinity, h4);
10793   ASSERT_EQUAL_FP16(kFP16DefaultNaN, h5);
10794   ASSERT_EQUAL_FP16(kFP16DefaultNaN, h6);
10795   TEARDOWN();
10796 #endif
10797 }
10798 
10799 
TEST(fsub_h_neon)10800 TEST(fsub_h_neon) {
10801   SETUP_WITH_FEATURES(CPUFeatures::kNEON,
10802                       CPUFeatures::kFP,
10803                       CPUFeatures::kNEONHalf);
10804 
10805   START();
10806   __ Fmov(v0.V4H(), 24.0);
10807   __ Fmov(v1.V4H(), 1024.0);
10808   __ Fmov(v2.V8H(), 5.5);
10809   __ Fmov(v3.V8H(), 2048.0);
10810   __ Fmov(v4.V4H(), kFP16PositiveInfinity);
10811   __ Fmov(v5.V4H(), kFP16NegativeInfinity);
10812   __ Fmov(v6.V4H(), RawbitsToFloat16(0x7c22));
10813   __ Fmov(v7.V8H(), RawbitsToFloat16(0xfe02));
10814 
10815   __ Fsub(v0.V4H(), v1.V4H(), v0.V4H());
10816   __ Fsub(v8.V8H(), v3.V8H(), v2.V8H());
10817   __ Fsub(v9.V4H(), v4.V4H(), v3.V4H());
10818   __ Fsub(v10.V4H(), v0.V4H(), v1.V4H());
10819 
10820   __ Fsub(v11.V4H(), v6.V4H(), v2.V4H());
10821   __ Fsub(v12.V4H(), v7.V4H(), v7.V4H());
10822   END();
10823 
10824 #ifdef VIXL_INCLUDE_SIMULATOR_AARCH64
10825   RUN();
10826 
10827   ASSERT_EQUAL_128(0x0000000000000000, 0x63d063d063d063d0, q0);
10828   // 2042.5 is unpresentable in FP16:
10829   ASSERT_EQUAL_128(0x67fa67fa67fa67fa, 0x67fa67fa67fa67fa, q8);
10830 
10831   // Note: we test NaNs here as vectors aren't covered by process_nans_half
10832   // and we don't have traces for half-precision enabled hardware.
10833   // Signalling (Default) NaN.
10834   ASSERT_EQUAL_128(0x0000000000000000, 0x7c007c007c007c00, q9);
10835   ASSERT_EQUAL_128(0x0000000000000000, 0xce00ce00ce00ce00, q10);
10836   // Quiet NaN from Signalling.
10837   ASSERT_EQUAL_128(0x0000000000000000, 0x7e227e227e227e22, q11);
10838   // Quiet NaN.
10839   ASSERT_EQUAL_128(0x0000000000000000, 0xfe02fe02fe02fe02, q12);
10840 
10841   TEARDOWN();
10842 #endif
10843 }
10844 
10845 
TEST(fmul)10846 TEST(fmul) {
10847   SETUP_WITH_FEATURES(CPUFeatures::kFP);
10848 
10849   START();
10850   __ Fmov(s14, -0.0f);
10851   __ Fmov(s15, kFP32PositiveInfinity);
10852   __ Fmov(s16, kFP32NegativeInfinity);
10853   __ Fmov(s17, 3.25f);
10854   __ Fmov(s18, 2.0f);
10855   __ Fmov(s19, 0.0f);
10856   __ Fmov(s20, -2.0f);
10857 
10858   __ Fmov(d26, -0.0);
10859   __ Fmov(d27, kFP64PositiveInfinity);
10860   __ Fmov(d28, kFP64NegativeInfinity);
10861   __ Fmov(d29, 0.0);
10862   __ Fmov(d30, -2.0);
10863   __ Fmov(d31, 2.25);
10864 
10865   __ Fmul(s0, s17, s18);
10866   __ Fmul(s1, s18, s19);
10867   __ Fmul(s2, s14, s14);
10868   __ Fmul(s3, s15, s20);
10869   __ Fmul(s4, s16, s20);
10870   __ Fmul(s5, s15, s19);
10871   __ Fmul(s6, s19, s16);
10872 
10873   __ Fmul(d7, d30, d31);
10874   __ Fmul(d8, d29, d31);
10875   __ Fmul(d9, d26, d26);
10876   __ Fmul(d10, d27, d30);
10877   __ Fmul(d11, d28, d30);
10878   __ Fmul(d12, d27, d29);
10879   __ Fmul(d13, d29, d28);
10880   END();
10881 
10882   RUN();
10883 
10884   ASSERT_EQUAL_FP32(6.5, s0);
10885   ASSERT_EQUAL_FP32(0.0, s1);
10886   ASSERT_EQUAL_FP32(0.0, s2);
10887   ASSERT_EQUAL_FP32(kFP32NegativeInfinity, s3);
10888   ASSERT_EQUAL_FP32(kFP32PositiveInfinity, s4);
10889   ASSERT_EQUAL_FP32(kFP32DefaultNaN, s5);
10890   ASSERT_EQUAL_FP32(kFP32DefaultNaN, s6);
10891   ASSERT_EQUAL_FP64(-4.5, d7);
10892   ASSERT_EQUAL_FP64(0.0, d8);
10893   ASSERT_EQUAL_FP64(0.0, d9);
10894   ASSERT_EQUAL_FP64(kFP64NegativeInfinity, d10);
10895   ASSERT_EQUAL_FP64(kFP64PositiveInfinity, d11);
10896   ASSERT_EQUAL_FP64(kFP64DefaultNaN, d12);
10897   ASSERT_EQUAL_FP64(kFP64DefaultNaN, d13);
10898 
10899   TEARDOWN();
10900 }
10901 
10902 
TEST(fmul_h)10903 TEST(fmul_h) {
10904   SETUP_WITH_FEATURES(CPUFeatures::kFP, CPUFeatures::kFPHalf);
10905 
10906   START();
10907   __ Fmov(h14, -0.0f);
10908   __ Fmov(h15, kFP16PositiveInfinity);
10909   __ Fmov(h16, kFP16NegativeInfinity);
10910   __ Fmov(h17, 3.25f);
10911   __ Fmov(h18, 2.0f);
10912   __ Fmov(h19, 0.0f);
10913   __ Fmov(h20, -2.0f);
10914 
10915   __ Fmul(h0, h17, h18);
10916   __ Fmul(h1, h18, h19);
10917   __ Fmul(h2, h14, h14);
10918   __ Fmul(h3, h15, h20);
10919   __ Fmul(h4, h16, h20);
10920   __ Fmul(h5, h15, h19);
10921   __ Fmul(h6, h19, h16);
10922   END();
10923 
10924 #ifdef VIXL_INCLUDE_SIMULATOR_AARCH64
10925   RUN();
10926 
10927   ASSERT_EQUAL_FP16(Float16(6.5), h0);
10928   ASSERT_EQUAL_FP16(Float16(0.0), h1);
10929   ASSERT_EQUAL_FP16(Float16(0.0), h2);
10930   ASSERT_EQUAL_FP16(kFP16NegativeInfinity, h3);
10931   ASSERT_EQUAL_FP16(kFP16PositiveInfinity, h4);
10932   ASSERT_EQUAL_FP16(kFP16DefaultNaN, h5);
10933   ASSERT_EQUAL_FP16(kFP16DefaultNaN, h6);
10934 #endif  // VIXL_INCLUDE_SIMULATOR_AARCH64
10935 
10936   TEARDOWN();
10937 }
10938 
10939 
TEST(fmul_h_neon)10940 TEST(fmul_h_neon) {
10941   SETUP_WITH_FEATURES(CPUFeatures::kNEON,
10942                       CPUFeatures::kFP,
10943                       CPUFeatures::kNEONHalf);
10944 
10945   START();
10946   __ Fmov(v0.V4H(), 24.0);
10947   __ Fmov(v1.V4H(), -2.0);
10948   __ Fmov(v2.V8H(), 5.5);
10949   __ Fmov(v3.V8H(), 0.5);
10950   __ Fmov(v4.V4H(), kFP16PositiveInfinity);
10951   __ Fmov(v5.V4H(), kFP16NegativeInfinity);
10952 
10953   __ Fmul(v6.V4H(), v1.V4H(), v0.V4H());
10954   __ Fmul(v7.V8H(), v3.V8H(), v2.V8H());
10955   __ Fmul(v8.V4H(), v4.V4H(), v3.V4H());
10956   __ Fmul(v9.V4H(), v0.V4H(), v1.V4H());
10957   __ Fmul(v10.V4H(), v5.V4H(), v0.V4H());
10958   END();
10959 
10960 #ifdef VIXL_INCLUDE_SIMULATOR_AARCH64
10961   RUN();
10962 
10963   ASSERT_EQUAL_128(0x0000000000000000, 0xd200d200d200d200, q6);
10964   ASSERT_EQUAL_128(0x4180418041804180, 0x4180418041804180, q7);
10965   ASSERT_EQUAL_128(0x0000000000000000, 0x7c007c007c007c00, q8);
10966   ASSERT_EQUAL_128(0x0000000000000000, 0xd200d200d200d200, q9);
10967   ASSERT_EQUAL_128(0x0000000000000000, 0xfc00fc00fc00fc00, q10);
10968 #endif  // VIXL_INCLUDE_SIMULATOR_AARCH64
10969 
10970   TEARDOWN();
10971 }
10972 
10973 
TEST(fnmul_h)10974 TEST(fnmul_h) {
10975   SETUP_WITH_FEATURES(CPUFeatures::kFP, CPUFeatures::kFPHalf);
10976 
10977   START();
10978   __ Fmov(h14, -0.0f);
10979   __ Fmov(h15, kFP16PositiveInfinity);
10980   __ Fmov(h16, kFP16NegativeInfinity);
10981   __ Fmov(h17, 3.25f);
10982   __ Fmov(h18, 2.0f);
10983   __ Fmov(h19, 0.0f);
10984   __ Fmov(h20, -2.0f);
10985 
10986   __ Fnmul(h0, h17, h18);
10987   __ Fnmul(h1, h18, h19);
10988   __ Fnmul(h2, h14, h14);
10989   __ Fnmul(h3, h15, h20);
10990   __ Fnmul(h4, h16, h20);
10991   __ Fnmul(h5, h15, h19);
10992   __ Fnmul(h6, h19, h16);
10993   END();
10994 
10995 #ifdef VIXL_INCLUDE_SIMULATOR_AARCH64
10996   RUN();
10997 
10998   ASSERT_EQUAL_FP16(Float16(-6.5), h0);
10999   ASSERT_EQUAL_FP16(Float16(-0.0), h1);
11000   ASSERT_EQUAL_FP16(Float16(-0.0), h2);
11001   ASSERT_EQUAL_FP16(kFP16PositiveInfinity, h3);
11002   ASSERT_EQUAL_FP16(kFP16NegativeInfinity, h4);
11003   ASSERT_EQUAL_FP16(RawbitsToFloat16(0xfe00), h5);
11004   ASSERT_EQUAL_FP16(RawbitsToFloat16(0xfe00), h6);
11005 #endif  // VIXL_INCLUDE_SIMULATOR_AARCH64
11006 
11007   TEARDOWN();
11008 }
11009 
11010 
FmaddFmsubHelper(double n,double m,double a,double fmadd,double fmsub,double fnmadd,double fnmsub)11011 static void FmaddFmsubHelper(double n,
11012                              double m,
11013                              double a,
11014                              double fmadd,
11015                              double fmsub,
11016                              double fnmadd,
11017                              double fnmsub) {
11018   SETUP_WITH_FEATURES(CPUFeatures::kFP);
11019 
11020   START();
11021 
11022   __ Fmov(d0, n);
11023   __ Fmov(d1, m);
11024   __ Fmov(d2, a);
11025   __ Fmadd(d28, d0, d1, d2);
11026   __ Fmsub(d29, d0, d1, d2);
11027   __ Fnmadd(d30, d0, d1, d2);
11028   __ Fnmsub(d31, d0, d1, d2);
11029 
11030   END();
11031   RUN();
11032 
11033   ASSERT_EQUAL_FP64(fmadd, d28);
11034   ASSERT_EQUAL_FP64(fmsub, d29);
11035   ASSERT_EQUAL_FP64(fnmadd, d30);
11036   ASSERT_EQUAL_FP64(fnmsub, d31);
11037 
11038   TEARDOWN();
11039 }
11040 
11041 
TEST(fmadd_fmsub_double)11042 TEST(fmadd_fmsub_double) {
11043   // It's hard to check the result of fused operations because the only way to
11044   // calculate the result is using fma, which is what the Simulator uses anyway.
11045 
11046   // Basic operation.
11047   FmaddFmsubHelper(1.0, 2.0, 3.0, 5.0, 1.0, -5.0, -1.0);
11048   FmaddFmsubHelper(-1.0, 2.0, 3.0, 1.0, 5.0, -1.0, -5.0);
11049 
11050   // Check the sign of exact zeroes.
11051   //               n     m     a     fmadd  fmsub  fnmadd fnmsub
11052   FmaddFmsubHelper(-0.0, +0.0, -0.0, -0.0, +0.0, +0.0, +0.0);
11053   FmaddFmsubHelper(+0.0, +0.0, -0.0, +0.0, -0.0, +0.0, +0.0);
11054   FmaddFmsubHelper(+0.0, +0.0, +0.0, +0.0, +0.0, -0.0, +0.0);
11055   FmaddFmsubHelper(-0.0, +0.0, +0.0, +0.0, +0.0, +0.0, -0.0);
11056   FmaddFmsubHelper(+0.0, -0.0, -0.0, -0.0, +0.0, +0.0, +0.0);
11057   FmaddFmsubHelper(-0.0, -0.0, -0.0, +0.0, -0.0, +0.0, +0.0);
11058   FmaddFmsubHelper(-0.0, -0.0, +0.0, +0.0, +0.0, -0.0, +0.0);
11059   FmaddFmsubHelper(+0.0, -0.0, +0.0, +0.0, +0.0, +0.0, -0.0);
11060 
11061   // Check NaN generation.
11062   FmaddFmsubHelper(kFP64PositiveInfinity,
11063                    0.0,
11064                    42.0,
11065                    kFP64DefaultNaN,
11066                    kFP64DefaultNaN,
11067                    kFP64DefaultNaN,
11068                    kFP64DefaultNaN);
11069   FmaddFmsubHelper(0.0,
11070                    kFP64PositiveInfinity,
11071                    42.0,
11072                    kFP64DefaultNaN,
11073                    kFP64DefaultNaN,
11074                    kFP64DefaultNaN,
11075                    kFP64DefaultNaN);
11076   FmaddFmsubHelper(kFP64PositiveInfinity,
11077                    1.0,
11078                    kFP64PositiveInfinity,
11079                    kFP64PositiveInfinity,  //  inf + ( inf * 1) = inf
11080                    kFP64DefaultNaN,        //  inf + (-inf * 1) = NaN
11081                    kFP64NegativeInfinity,  // -inf + (-inf * 1) = -inf
11082                    kFP64DefaultNaN);       // -inf + ( inf * 1) = NaN
11083   FmaddFmsubHelper(kFP64NegativeInfinity,
11084                    1.0,
11085                    kFP64PositiveInfinity,
11086                    kFP64DefaultNaN,         //  inf + (-inf * 1) = NaN
11087                    kFP64PositiveInfinity,   //  inf + ( inf * 1) = inf
11088                    kFP64DefaultNaN,         // -inf + ( inf * 1) = NaN
11089                    kFP64NegativeInfinity);  // -inf + (-inf * 1) = -inf
11090 }
11091 
11092 
FmaddFmsubHelper(float n,float m,float a,float fmadd,float fmsub,float fnmadd,float fnmsub)11093 static void FmaddFmsubHelper(float n,
11094                              float m,
11095                              float a,
11096                              float fmadd,
11097                              float fmsub,
11098                              float fnmadd,
11099                              float fnmsub) {
11100   SETUP_WITH_FEATURES(CPUFeatures::kFP);
11101 
11102   START();
11103 
11104   __ Fmov(s0, n);
11105   __ Fmov(s1, m);
11106   __ Fmov(s2, a);
11107   __ Fmadd(s28, s0, s1, s2);
11108   __ Fmsub(s29, s0, s1, s2);
11109   __ Fnmadd(s30, s0, s1, s2);
11110   __ Fnmsub(s31, s0, s1, s2);
11111 
11112   END();
11113   RUN();
11114 
11115   ASSERT_EQUAL_FP32(fmadd, s28);
11116   ASSERT_EQUAL_FP32(fmsub, s29);
11117   ASSERT_EQUAL_FP32(fnmadd, s30);
11118   ASSERT_EQUAL_FP32(fnmsub, s31);
11119 
11120   TEARDOWN();
11121 }
11122 
11123 
TEST(fmadd_fmsub_float)11124 TEST(fmadd_fmsub_float) {
11125   // It's hard to check the result of fused operations because the only way to
11126   // calculate the result is using fma, which is what the simulator uses anyway.
11127 
11128   // Basic operation.
11129   FmaddFmsubHelper(1.0f, 2.0f, 3.0f, 5.0f, 1.0f, -5.0f, -1.0f);
11130   FmaddFmsubHelper(-1.0f, 2.0f, 3.0f, 1.0f, 5.0f, -1.0f, -5.0f);
11131 
11132   // Check the sign of exact zeroes.
11133   //               n      m      a      fmadd  fmsub  fnmadd fnmsub
11134   FmaddFmsubHelper(-0.0f, +0.0f, -0.0f, -0.0f, +0.0f, +0.0f, +0.0f);
11135   FmaddFmsubHelper(+0.0f, +0.0f, -0.0f, +0.0f, -0.0f, +0.0f, +0.0f);
11136   FmaddFmsubHelper(+0.0f, +0.0f, +0.0f, +0.0f, +0.0f, -0.0f, +0.0f);
11137   FmaddFmsubHelper(-0.0f, +0.0f, +0.0f, +0.0f, +0.0f, +0.0f, -0.0f);
11138   FmaddFmsubHelper(+0.0f, -0.0f, -0.0f, -0.0f, +0.0f, +0.0f, +0.0f);
11139   FmaddFmsubHelper(-0.0f, -0.0f, -0.0f, +0.0f, -0.0f, +0.0f, +0.0f);
11140   FmaddFmsubHelper(-0.0f, -0.0f, +0.0f, +0.0f, +0.0f, -0.0f, +0.0f);
11141   FmaddFmsubHelper(+0.0f, -0.0f, +0.0f, +0.0f, +0.0f, +0.0f, -0.0f);
11142 
11143   // Check NaN generation.
11144   FmaddFmsubHelper(kFP32PositiveInfinity,
11145                    0.0f,
11146                    42.0f,
11147                    kFP32DefaultNaN,
11148                    kFP32DefaultNaN,
11149                    kFP32DefaultNaN,
11150                    kFP32DefaultNaN);
11151   FmaddFmsubHelper(0.0f,
11152                    kFP32PositiveInfinity,
11153                    42.0f,
11154                    kFP32DefaultNaN,
11155                    kFP32DefaultNaN,
11156                    kFP32DefaultNaN,
11157                    kFP32DefaultNaN);
11158   FmaddFmsubHelper(kFP32PositiveInfinity,
11159                    1.0f,
11160                    kFP32PositiveInfinity,
11161                    kFP32PositiveInfinity,  //  inf + ( inf * 1) = inf
11162                    kFP32DefaultNaN,        //  inf + (-inf * 1) = NaN
11163                    kFP32NegativeInfinity,  // -inf + (-inf * 1) = -inf
11164                    kFP32DefaultNaN);       // -inf + ( inf * 1) = NaN
11165   FmaddFmsubHelper(kFP32NegativeInfinity,
11166                    1.0f,
11167                    kFP32PositiveInfinity,
11168                    kFP32DefaultNaN,         //  inf + (-inf * 1) = NaN
11169                    kFP32PositiveInfinity,   //  inf + ( inf * 1) = inf
11170                    kFP32DefaultNaN,         // -inf + ( inf * 1) = NaN
11171                    kFP32NegativeInfinity);  // -inf + (-inf * 1) = -inf
11172 }
11173 
11174 
TEST(fmadd_fmsub_double_nans)11175 TEST(fmadd_fmsub_double_nans) {
11176   // Make sure that NaN propagation works correctly.
11177   double s1 = RawbitsToDouble(0x7ff5555511111111);
11178   double s2 = RawbitsToDouble(0x7ff5555522222222);
11179   double sa = RawbitsToDouble(0x7ff55555aaaaaaaa);
11180   double q1 = RawbitsToDouble(0x7ffaaaaa11111111);
11181   double q2 = RawbitsToDouble(0x7ffaaaaa22222222);
11182   double qa = RawbitsToDouble(0x7ffaaaaaaaaaaaaa);
11183   VIXL_ASSERT(IsSignallingNaN(s1));
11184   VIXL_ASSERT(IsSignallingNaN(s2));
11185   VIXL_ASSERT(IsSignallingNaN(sa));
11186   VIXL_ASSERT(IsQuietNaN(q1));
11187   VIXL_ASSERT(IsQuietNaN(q2));
11188   VIXL_ASSERT(IsQuietNaN(qa));
11189 
11190   // The input NaNs after passing through ProcessNaN.
11191   double s1_proc = RawbitsToDouble(0x7ffd555511111111);
11192   double s2_proc = RawbitsToDouble(0x7ffd555522222222);
11193   double sa_proc = RawbitsToDouble(0x7ffd5555aaaaaaaa);
11194   double q1_proc = q1;
11195   double q2_proc = q2;
11196   double qa_proc = qa;
11197   VIXL_ASSERT(IsQuietNaN(s1_proc));
11198   VIXL_ASSERT(IsQuietNaN(s2_proc));
11199   VIXL_ASSERT(IsQuietNaN(sa_proc));
11200   VIXL_ASSERT(IsQuietNaN(q1_proc));
11201   VIXL_ASSERT(IsQuietNaN(q2_proc));
11202   VIXL_ASSERT(IsQuietNaN(qa_proc));
11203 
11204   // Negated NaNs as it would be done on ARMv8 hardware.
11205   double s1_proc_neg = RawbitsToDouble(0xfffd555511111111);
11206   double sa_proc_neg = RawbitsToDouble(0xfffd5555aaaaaaaa);
11207   double q1_proc_neg = RawbitsToDouble(0xfffaaaaa11111111);
11208   double qa_proc_neg = RawbitsToDouble(0xfffaaaaaaaaaaaaa);
11209   VIXL_ASSERT(IsQuietNaN(s1_proc_neg));
11210   VIXL_ASSERT(IsQuietNaN(sa_proc_neg));
11211   VIXL_ASSERT(IsQuietNaN(q1_proc_neg));
11212   VIXL_ASSERT(IsQuietNaN(qa_proc_neg));
11213 
11214   // Quiet NaNs are propagated.
11215   FmaddFmsubHelper(q1, 0, 0, q1_proc, q1_proc_neg, q1_proc_neg, q1_proc);
11216   FmaddFmsubHelper(0, q2, 0, q2_proc, q2_proc, q2_proc, q2_proc);
11217   FmaddFmsubHelper(0, 0, qa, qa_proc, qa_proc, qa_proc_neg, qa_proc_neg);
11218   FmaddFmsubHelper(q1, q2, 0, q1_proc, q1_proc_neg, q1_proc_neg, q1_proc);
11219   FmaddFmsubHelper(0, q2, qa, qa_proc, qa_proc, qa_proc_neg, qa_proc_neg);
11220   FmaddFmsubHelper(q1, 0, qa, qa_proc, qa_proc, qa_proc_neg, qa_proc_neg);
11221   FmaddFmsubHelper(q1, q2, qa, qa_proc, qa_proc, qa_proc_neg, qa_proc_neg);
11222 
11223   // Signalling NaNs are propagated, and made quiet.
11224   FmaddFmsubHelper(s1, 0, 0, s1_proc, s1_proc_neg, s1_proc_neg, s1_proc);
11225   FmaddFmsubHelper(0, s2, 0, s2_proc, s2_proc, s2_proc, s2_proc);
11226   FmaddFmsubHelper(0, 0, sa, sa_proc, sa_proc, sa_proc_neg, sa_proc_neg);
11227   FmaddFmsubHelper(s1, s2, 0, s1_proc, s1_proc_neg, s1_proc_neg, s1_proc);
11228   FmaddFmsubHelper(0, s2, sa, sa_proc, sa_proc, sa_proc_neg, sa_proc_neg);
11229   FmaddFmsubHelper(s1, 0, sa, sa_proc, sa_proc, sa_proc_neg, sa_proc_neg);
11230   FmaddFmsubHelper(s1, s2, sa, sa_proc, sa_proc, sa_proc_neg, sa_proc_neg);
11231 
11232   // Signalling NaNs take precedence over quiet NaNs.
11233   FmaddFmsubHelper(s1, q2, qa, s1_proc, s1_proc_neg, s1_proc_neg, s1_proc);
11234   FmaddFmsubHelper(q1, s2, qa, s2_proc, s2_proc, s2_proc, s2_proc);
11235   FmaddFmsubHelper(q1, q2, sa, sa_proc, sa_proc, sa_proc_neg, sa_proc_neg);
11236   FmaddFmsubHelper(s1, s2, qa, s1_proc, s1_proc_neg, s1_proc_neg, s1_proc);
11237   FmaddFmsubHelper(q1, s2, sa, sa_proc, sa_proc, sa_proc_neg, sa_proc_neg);
11238   FmaddFmsubHelper(s1, q2, sa, sa_proc, sa_proc, sa_proc_neg, sa_proc_neg);
11239   FmaddFmsubHelper(s1, s2, sa, sa_proc, sa_proc, sa_proc_neg, sa_proc_neg);
11240 
11241   // A NaN generated by the intermediate op1 * op2 overrides a quiet NaN in a.
11242   FmaddFmsubHelper(0,
11243                    kFP64PositiveInfinity,
11244                    qa,
11245                    kFP64DefaultNaN,
11246                    kFP64DefaultNaN,
11247                    kFP64DefaultNaN,
11248                    kFP64DefaultNaN);
11249   FmaddFmsubHelper(kFP64PositiveInfinity,
11250                    0,
11251                    qa,
11252                    kFP64DefaultNaN,
11253                    kFP64DefaultNaN,
11254                    kFP64DefaultNaN,
11255                    kFP64DefaultNaN);
11256   FmaddFmsubHelper(0,
11257                    kFP64NegativeInfinity,
11258                    qa,
11259                    kFP64DefaultNaN,
11260                    kFP64DefaultNaN,
11261                    kFP64DefaultNaN,
11262                    kFP64DefaultNaN);
11263   FmaddFmsubHelper(kFP64NegativeInfinity,
11264                    0,
11265                    qa,
11266                    kFP64DefaultNaN,
11267                    kFP64DefaultNaN,
11268                    kFP64DefaultNaN,
11269                    kFP64DefaultNaN);
11270 }
11271 
11272 
TEST(fmadd_fmsub_float_nans)11273 TEST(fmadd_fmsub_float_nans) {
11274   // Make sure that NaN propagation works correctly.
11275   float s1 = RawbitsToFloat(0x7f951111);
11276   float s2 = RawbitsToFloat(0x7f952222);
11277   float sa = RawbitsToFloat(0x7f95aaaa);
11278   float q1 = RawbitsToFloat(0x7fea1111);
11279   float q2 = RawbitsToFloat(0x7fea2222);
11280   float qa = RawbitsToFloat(0x7feaaaaa);
11281   VIXL_ASSERT(IsSignallingNaN(s1));
11282   VIXL_ASSERT(IsSignallingNaN(s2));
11283   VIXL_ASSERT(IsSignallingNaN(sa));
11284   VIXL_ASSERT(IsQuietNaN(q1));
11285   VIXL_ASSERT(IsQuietNaN(q2));
11286   VIXL_ASSERT(IsQuietNaN(qa));
11287 
11288   // The input NaNs after passing through ProcessNaN.
11289   float s1_proc = RawbitsToFloat(0x7fd51111);
11290   float s2_proc = RawbitsToFloat(0x7fd52222);
11291   float sa_proc = RawbitsToFloat(0x7fd5aaaa);
11292   float q1_proc = q1;
11293   float q2_proc = q2;
11294   float qa_proc = qa;
11295   VIXL_ASSERT(IsQuietNaN(s1_proc));
11296   VIXL_ASSERT(IsQuietNaN(s2_proc));
11297   VIXL_ASSERT(IsQuietNaN(sa_proc));
11298   VIXL_ASSERT(IsQuietNaN(q1_proc));
11299   VIXL_ASSERT(IsQuietNaN(q2_proc));
11300   VIXL_ASSERT(IsQuietNaN(qa_proc));
11301 
11302   // Negated NaNs as it would be done on ARMv8 hardware.
11303   float s1_proc_neg = RawbitsToFloat(0xffd51111);
11304   float sa_proc_neg = RawbitsToFloat(0xffd5aaaa);
11305   float q1_proc_neg = RawbitsToFloat(0xffea1111);
11306   float qa_proc_neg = RawbitsToFloat(0xffeaaaaa);
11307   VIXL_ASSERT(IsQuietNaN(s1_proc_neg));
11308   VIXL_ASSERT(IsQuietNaN(sa_proc_neg));
11309   VIXL_ASSERT(IsQuietNaN(q1_proc_neg));
11310   VIXL_ASSERT(IsQuietNaN(qa_proc_neg));
11311 
11312   // Quiet NaNs are propagated.
11313   FmaddFmsubHelper(q1, 0, 0, q1_proc, q1_proc_neg, q1_proc_neg, q1_proc);
11314   FmaddFmsubHelper(0, q2, 0, q2_proc, q2_proc, q2_proc, q2_proc);
11315   FmaddFmsubHelper(0, 0, qa, qa_proc, qa_proc, qa_proc_neg, qa_proc_neg);
11316   FmaddFmsubHelper(q1, q2, 0, q1_proc, q1_proc_neg, q1_proc_neg, q1_proc);
11317   FmaddFmsubHelper(0, q2, qa, qa_proc, qa_proc, qa_proc_neg, qa_proc_neg);
11318   FmaddFmsubHelper(q1, 0, qa, qa_proc, qa_proc, qa_proc_neg, qa_proc_neg);
11319   FmaddFmsubHelper(q1, q2, qa, qa_proc, qa_proc, qa_proc_neg, qa_proc_neg);
11320 
11321   // Signalling NaNs are propagated, and made quiet.
11322   FmaddFmsubHelper(s1, 0, 0, s1_proc, s1_proc_neg, s1_proc_neg, s1_proc);
11323   FmaddFmsubHelper(0, s2, 0, s2_proc, s2_proc, s2_proc, s2_proc);
11324   FmaddFmsubHelper(0, 0, sa, sa_proc, sa_proc, sa_proc_neg, sa_proc_neg);
11325   FmaddFmsubHelper(s1, s2, 0, s1_proc, s1_proc_neg, s1_proc_neg, s1_proc);
11326   FmaddFmsubHelper(0, s2, sa, sa_proc, sa_proc, sa_proc_neg, sa_proc_neg);
11327   FmaddFmsubHelper(s1, 0, sa, sa_proc, sa_proc, sa_proc_neg, sa_proc_neg);
11328   FmaddFmsubHelper(s1, s2, sa, sa_proc, sa_proc, sa_proc_neg, sa_proc_neg);
11329 
11330   // Signalling NaNs take precedence over quiet NaNs.
11331   FmaddFmsubHelper(s1, q2, qa, s1_proc, s1_proc_neg, s1_proc_neg, s1_proc);
11332   FmaddFmsubHelper(q1, s2, qa, s2_proc, s2_proc, s2_proc, s2_proc);
11333   FmaddFmsubHelper(q1, q2, sa, sa_proc, sa_proc, sa_proc_neg, sa_proc_neg);
11334   FmaddFmsubHelper(s1, s2, qa, s1_proc, s1_proc_neg, s1_proc_neg, s1_proc);
11335   FmaddFmsubHelper(q1, s2, sa, sa_proc, sa_proc, sa_proc_neg, sa_proc_neg);
11336   FmaddFmsubHelper(s1, q2, sa, sa_proc, sa_proc, sa_proc_neg, sa_proc_neg);
11337   FmaddFmsubHelper(s1, s2, sa, sa_proc, sa_proc, sa_proc_neg, sa_proc_neg);
11338 
11339   // A NaN generated by the intermediate op1 * op2 overrides a quiet NaN in a.
11340   FmaddFmsubHelper(0,
11341                    kFP32PositiveInfinity,
11342                    qa,
11343                    kFP32DefaultNaN,
11344                    kFP32DefaultNaN,
11345                    kFP32DefaultNaN,
11346                    kFP32DefaultNaN);
11347   FmaddFmsubHelper(kFP32PositiveInfinity,
11348                    0,
11349                    qa,
11350                    kFP32DefaultNaN,
11351                    kFP32DefaultNaN,
11352                    kFP32DefaultNaN,
11353                    kFP32DefaultNaN);
11354   FmaddFmsubHelper(0,
11355                    kFP32NegativeInfinity,
11356                    qa,
11357                    kFP32DefaultNaN,
11358                    kFP32DefaultNaN,
11359                    kFP32DefaultNaN,
11360                    kFP32DefaultNaN);
11361   FmaddFmsubHelper(kFP32NegativeInfinity,
11362                    0,
11363                    qa,
11364                    kFP32DefaultNaN,
11365                    kFP32DefaultNaN,
11366                    kFP32DefaultNaN,
11367                    kFP32DefaultNaN);
11368 }
11369 
11370 
TEST(fdiv)11371 TEST(fdiv) {
11372   SETUP_WITH_FEATURES(CPUFeatures::kFP);
11373 
11374   START();
11375   __ Fmov(s14, -0.0f);
11376   __ Fmov(s15, kFP32PositiveInfinity);
11377   __ Fmov(s16, kFP32NegativeInfinity);
11378   __ Fmov(s17, 3.25f);
11379   __ Fmov(s18, 2.0f);
11380   __ Fmov(s19, 2.0f);
11381   __ Fmov(s20, -2.0f);
11382 
11383   __ Fmov(d26, -0.0);
11384   __ Fmov(d27, kFP64PositiveInfinity);
11385   __ Fmov(d28, kFP64NegativeInfinity);
11386   __ Fmov(d29, 0.0);
11387   __ Fmov(d30, -2.0);
11388   __ Fmov(d31, 2.25);
11389 
11390   __ Fdiv(s0, s17, s18);
11391   __ Fdiv(s1, s18, s19);
11392   __ Fdiv(s2, s14, s18);
11393   __ Fdiv(s3, s18, s15);
11394   __ Fdiv(s4, s18, s16);
11395   __ Fdiv(s5, s15, s16);
11396   __ Fdiv(s6, s14, s14);
11397 
11398   __ Fdiv(d7, d31, d30);
11399   __ Fdiv(d8, d29, d31);
11400   __ Fdiv(d9, d26, d31);
11401   __ Fdiv(d10, d31, d27);
11402   __ Fdiv(d11, d31, d28);
11403   __ Fdiv(d12, d28, d27);
11404   __ Fdiv(d13, d29, d29);
11405   END();
11406 
11407   RUN();
11408 
11409   ASSERT_EQUAL_FP32(1.625f, s0);
11410   ASSERT_EQUAL_FP32(1.0f, s1);
11411   ASSERT_EQUAL_FP32(-0.0f, s2);
11412   ASSERT_EQUAL_FP32(0.0f, s3);
11413   ASSERT_EQUAL_FP32(-0.0f, s4);
11414   ASSERT_EQUAL_FP32(kFP32DefaultNaN, s5);
11415   ASSERT_EQUAL_FP32(kFP32DefaultNaN, s6);
11416   ASSERT_EQUAL_FP64(-1.125, d7);
11417   ASSERT_EQUAL_FP64(0.0, d8);
11418   ASSERT_EQUAL_FP64(-0.0, d9);
11419   ASSERT_EQUAL_FP64(0.0, d10);
11420   ASSERT_EQUAL_FP64(-0.0, d11);
11421   ASSERT_EQUAL_FP64(kFP64DefaultNaN, d12);
11422   ASSERT_EQUAL_FP64(kFP64DefaultNaN, d13);
11423 
11424   TEARDOWN();
11425 }
11426 
11427 
TEST(fdiv_h)11428 TEST(fdiv_h) {
11429   SETUP_WITH_FEATURES(CPUFeatures::kFP, CPUFeatures::kFPHalf);
11430 
11431   START();
11432   __ Fmov(h14, -0.0f);
11433   __ Fmov(h15, kFP16PositiveInfinity);
11434   __ Fmov(h16, kFP16NegativeInfinity);
11435   __ Fmov(h17, 3.25f);
11436   __ Fmov(h18, 2.0f);
11437   __ Fmov(h19, 2.0f);
11438   __ Fmov(h20, -2.0f);
11439 
11440   __ Fdiv(h0, h17, h18);
11441   __ Fdiv(h1, h18, h19);
11442   __ Fdiv(h2, h14, h18);
11443   __ Fdiv(h3, h18, h15);
11444   __ Fdiv(h4, h18, h16);
11445   __ Fdiv(h5, h15, h16);
11446   __ Fdiv(h6, h14, h14);
11447   END();
11448 
11449 #ifdef VIXL_INCLUDE_SIMULATOR_AARCH64
11450   RUN();
11451 
11452   ASSERT_EQUAL_FP16(Float16(1.625f), h0);
11453   ASSERT_EQUAL_FP16(Float16(1.0f), h1);
11454   ASSERT_EQUAL_FP16(Float16(-0.0f), h2);
11455   ASSERT_EQUAL_FP16(Float16(0.0f), h3);
11456   ASSERT_EQUAL_FP16(Float16(-0.0f), h4);
11457   ASSERT_EQUAL_FP16(kFP16DefaultNaN, h5);
11458   ASSERT_EQUAL_FP16(kFP16DefaultNaN, h6);
11459 #endif  // VIXL_INCLUDE_SIMULATOR_AARCH64
11460 
11461   TEARDOWN();
11462 }
11463 
11464 
TEST(fdiv_h_neon)11465 TEST(fdiv_h_neon) {
11466   SETUP_WITH_FEATURES(CPUFeatures::kNEON,
11467                       CPUFeatures::kFP,
11468                       CPUFeatures::kNEONHalf);
11469 
11470   START();
11471   __ Fmov(v0.V4H(), 24.0);
11472   __ Fmov(v1.V4H(), -2.0);
11473   __ Fmov(v2.V8H(), 5.5);
11474   __ Fmov(v3.V8H(), 0.5);
11475   __ Fmov(v4.V4H(), kFP16PositiveInfinity);
11476   __ Fmov(v5.V4H(), kFP16NegativeInfinity);
11477 
11478   __ Fdiv(v6.V4H(), v0.V4H(), v1.V4H());
11479   __ Fdiv(v7.V8H(), v2.V8H(), v3.V8H());
11480   __ Fdiv(v8.V4H(), v4.V4H(), v3.V4H());
11481   __ Fdiv(v9.V4H(), v1.V4H(), v0.V4H());
11482   __ Fdiv(v10.V4H(), v5.V4H(), v0.V4H());
11483   END();
11484 
11485 #ifdef VIXL_INCLUDE_SIMULATOR_AARCH64
11486   RUN();
11487 
11488   ASSERT_EQUAL_128(0x0000000000000000, 0xca00ca00ca00ca00, q6);
11489   ASSERT_EQUAL_128(0x4980498049804980, 0x4980498049804980, q7);
11490   ASSERT_EQUAL_128(0x0000000000000000, 0x7c007c007c007c00, q8);
11491   // -0.083333... is unrepresentable in FP16:
11492   ASSERT_EQUAL_128(0x0000000000000000, 0xad55ad55ad55ad55, q9);
11493   ASSERT_EQUAL_128(0x0000000000000000, 0xfc00fc00fc00fc00, q10);
11494 #endif  // VIXL_INCLUDE_SIMULATOR_AARCH64
11495 
11496   TEARDOWN();
11497 }
11498 
11499 
MinMaxHelper(Float16 n,Float16 m,bool min,Float16 quiet_nan_substitute=Float16 (0.0))11500 static Float16 MinMaxHelper(Float16 n,
11501                             Float16 m,
11502                             bool min,
11503                             Float16 quiet_nan_substitute = Float16(0.0)) {
11504   const uint64_t kFP16QuietNaNMask = 0x0200;
11505   uint16_t raw_n = Float16ToRawbits(n);
11506   uint16_t raw_m = Float16ToRawbits(m);
11507 
11508   if (IsSignallingNaN(n)) {
11509     // n is signalling NaN.
11510     return RawbitsToFloat16(raw_n | kFP16QuietNaNMask);
11511   } else if (IsSignallingNaN(m)) {
11512     // m is signalling NaN.
11513     return RawbitsToFloat16(raw_m | kFP16QuietNaNMask);
11514   } else if (IsZero(quiet_nan_substitute)) {
11515     if (IsNaN(n)) {
11516       // n is quiet NaN.
11517       return n;
11518     } else if (IsNaN(m)) {
11519       // m is quiet NaN.
11520       return m;
11521     }
11522   } else {
11523     // Substitute n or m if one is quiet, but not both.
11524     if (IsNaN(n) && !IsNaN(m)) {
11525       // n is quiet NaN: replace with substitute.
11526       n = quiet_nan_substitute;
11527     } else if (!IsNaN(n) && IsNaN(m)) {
11528       // m is quiet NaN: replace with substitute.
11529       m = quiet_nan_substitute;
11530     }
11531   }
11532 
11533   uint16_t sign_mask = 0x8000;
11534   if (IsZero(n) && IsZero(m) && ((raw_n & sign_mask) != (raw_m & sign_mask))) {
11535     return min ? Float16(-0.0) : Float16(0.0);
11536   }
11537 
11538   if (FPToDouble(n, kIgnoreDefaultNaN) < FPToDouble(m, kIgnoreDefaultNaN)) {
11539     return min ? n : m;
11540   }
11541   return min ? m : n;
11542 }
11543 
11544 
MinMaxHelper(float n,float m,bool min,float quiet_nan_substitute=0.0)11545 static float MinMaxHelper(float n,
11546                           float m,
11547                           bool min,
11548                           float quiet_nan_substitute = 0.0) {
11549   const uint64_t kFP32QuietNaNMask = 0x00400000;
11550   uint32_t raw_n = FloatToRawbits(n);
11551   uint32_t raw_m = FloatToRawbits(m);
11552 
11553   if (IsNaN(n) && ((raw_n & kFP32QuietNaNMask) == 0)) {
11554     // n is signalling NaN.
11555     return RawbitsToFloat(raw_n | kFP32QuietNaNMask);
11556   } else if (IsNaN(m) && ((raw_m & kFP32QuietNaNMask) == 0)) {
11557     // m is signalling NaN.
11558     return RawbitsToFloat(raw_m | kFP32QuietNaNMask);
11559   } else if (quiet_nan_substitute == 0.0) {
11560     if (IsNaN(n)) {
11561       // n is quiet NaN.
11562       return n;
11563     } else if (IsNaN(m)) {
11564       // m is quiet NaN.
11565       return m;
11566     }
11567   } else {
11568     // Substitute n or m if one is quiet, but not both.
11569     if (IsNaN(n) && !IsNaN(m)) {
11570       // n is quiet NaN: replace with substitute.
11571       n = quiet_nan_substitute;
11572     } else if (!IsNaN(n) && IsNaN(m)) {
11573       // m is quiet NaN: replace with substitute.
11574       m = quiet_nan_substitute;
11575     }
11576   }
11577 
11578   if ((n == 0.0) && (m == 0.0) && (copysign(1.0, n) != copysign(1.0, m))) {
11579     return min ? -0.0 : 0.0;
11580   }
11581 
11582   return min ? fminf(n, m) : fmaxf(n, m);
11583 }
11584 
11585 
MinMaxHelper(double n,double m,bool min,double quiet_nan_substitute=0.0)11586 static double MinMaxHelper(double n,
11587                            double m,
11588                            bool min,
11589                            double quiet_nan_substitute = 0.0) {
11590   const uint64_t kFP64QuietNaNMask = 0x0008000000000000;
11591   uint64_t raw_n = DoubleToRawbits(n);
11592   uint64_t raw_m = DoubleToRawbits(m);
11593 
11594   if (IsNaN(n) && ((raw_n & kFP64QuietNaNMask) == 0)) {
11595     // n is signalling NaN.
11596     return RawbitsToDouble(raw_n | kFP64QuietNaNMask);
11597   } else if (IsNaN(m) && ((raw_m & kFP64QuietNaNMask) == 0)) {
11598     // m is signalling NaN.
11599     return RawbitsToDouble(raw_m | kFP64QuietNaNMask);
11600   } else if (quiet_nan_substitute == 0.0) {
11601     if (IsNaN(n)) {
11602       // n is quiet NaN.
11603       return n;
11604     } else if (IsNaN(m)) {
11605       // m is quiet NaN.
11606       return m;
11607     }
11608   } else {
11609     // Substitute n or m if one is quiet, but not both.
11610     if (IsNaN(n) && !IsNaN(m)) {
11611       // n is quiet NaN: replace with substitute.
11612       n = quiet_nan_substitute;
11613     } else if (!IsNaN(n) && IsNaN(m)) {
11614       // m is quiet NaN: replace with substitute.
11615       m = quiet_nan_substitute;
11616     }
11617   }
11618 
11619   if ((n == 0.0) && (m == 0.0) && (copysign(1.0, n) != copysign(1.0, m))) {
11620     return min ? -0.0 : 0.0;
11621   }
11622 
11623   return min ? fmin(n, m) : fmax(n, m);
11624 }
11625 
11626 
FminFmaxDoubleHelper(double n,double m,double min,double max,double minnm,double maxnm)11627 static void FminFmaxDoubleHelper(
11628     double n, double m, double min, double max, double minnm, double maxnm) {
11629   SETUP_WITH_FEATURES(CPUFeatures::kFP);
11630 
11631   START();
11632   __ Fmov(d0, n);
11633   __ Fmov(d1, m);
11634   __ Fmin(d28, d0, d1);
11635   __ Fmax(d29, d0, d1);
11636   __ Fminnm(d30, d0, d1);
11637   __ Fmaxnm(d31, d0, d1);
11638   END();
11639 
11640   RUN();
11641 
11642   ASSERT_EQUAL_FP64(min, d28);
11643   ASSERT_EQUAL_FP64(max, d29);
11644   ASSERT_EQUAL_FP64(minnm, d30);
11645   ASSERT_EQUAL_FP64(maxnm, d31);
11646 
11647   TEARDOWN();
11648 }
11649 
11650 
TEST(fmax_fmin_d)11651 TEST(fmax_fmin_d) {
11652   // Use non-standard NaNs to check that the payload bits are preserved.
11653   double snan = RawbitsToDouble(0x7ff5555512345678);
11654   double qnan = RawbitsToDouble(0x7ffaaaaa87654321);
11655 
11656   double snan_processed = RawbitsToDouble(0x7ffd555512345678);
11657   double qnan_processed = qnan;
11658 
11659   VIXL_ASSERT(IsSignallingNaN(snan));
11660   VIXL_ASSERT(IsQuietNaN(qnan));
11661   VIXL_ASSERT(IsQuietNaN(snan_processed));
11662   VIXL_ASSERT(IsQuietNaN(qnan_processed));
11663 
11664   // Bootstrap tests.
11665   FminFmaxDoubleHelper(0, 0, 0, 0, 0, 0);
11666   FminFmaxDoubleHelper(0, 1, 0, 1, 0, 1);
11667   FminFmaxDoubleHelper(kFP64PositiveInfinity,
11668                        kFP64NegativeInfinity,
11669                        kFP64NegativeInfinity,
11670                        kFP64PositiveInfinity,
11671                        kFP64NegativeInfinity,
11672                        kFP64PositiveInfinity);
11673   FminFmaxDoubleHelper(snan,
11674                        0,
11675                        snan_processed,
11676                        snan_processed,
11677                        snan_processed,
11678                        snan_processed);
11679   FminFmaxDoubleHelper(0,
11680                        snan,
11681                        snan_processed,
11682                        snan_processed,
11683                        snan_processed,
11684                        snan_processed);
11685   FminFmaxDoubleHelper(qnan, 0, qnan_processed, qnan_processed, 0, 0);
11686   FminFmaxDoubleHelper(0, qnan, qnan_processed, qnan_processed, 0, 0);
11687   FminFmaxDoubleHelper(qnan,
11688                        snan,
11689                        snan_processed,
11690                        snan_processed,
11691                        snan_processed,
11692                        snan_processed);
11693   FminFmaxDoubleHelper(snan,
11694                        qnan,
11695                        snan_processed,
11696                        snan_processed,
11697                        snan_processed,
11698                        snan_processed);
11699 
11700   // Iterate over all combinations of inputs.
11701   double inputs[] = {DBL_MAX,
11702                      DBL_MIN,
11703                      1.0,
11704                      0.0,
11705                      -DBL_MAX,
11706                      -DBL_MIN,
11707                      -1.0,
11708                      -0.0,
11709                      kFP64PositiveInfinity,
11710                      kFP64NegativeInfinity,
11711                      kFP64QuietNaN,
11712                      kFP64SignallingNaN};
11713 
11714   const int count = sizeof(inputs) / sizeof(inputs[0]);
11715 
11716   for (int in = 0; in < count; in++) {
11717     double n = inputs[in];
11718     for (int im = 0; im < count; im++) {
11719       double m = inputs[im];
11720       FminFmaxDoubleHelper(n,
11721                            m,
11722                            MinMaxHelper(n, m, true),
11723                            MinMaxHelper(n, m, false),
11724                            MinMaxHelper(n, m, true, kFP64PositiveInfinity),
11725                            MinMaxHelper(n, m, false, kFP64NegativeInfinity));
11726     }
11727   }
11728 }
11729 
11730 
FminFmaxFloatHelper(float n,float m,float min,float max,float minnm,float maxnm)11731 static void FminFmaxFloatHelper(
11732     float n, float m, float min, float max, float minnm, float maxnm) {
11733   SETUP_WITH_FEATURES(CPUFeatures::kFP);
11734 
11735   START();
11736   __ Fmov(s0, n);
11737   __ Fmov(s1, m);
11738   __ Fmin(s28, s0, s1);
11739   __ Fmax(s29, s0, s1);
11740   __ Fminnm(s30, s0, s1);
11741   __ Fmaxnm(s31, s0, s1);
11742   END();
11743 
11744   RUN();
11745 
11746   ASSERT_EQUAL_FP32(min, s28);
11747   ASSERT_EQUAL_FP32(max, s29);
11748   ASSERT_EQUAL_FP32(minnm, s30);
11749   ASSERT_EQUAL_FP32(maxnm, s31);
11750 
11751   TEARDOWN();
11752 }
11753 
11754 
TEST(fmax_fmin_s)11755 TEST(fmax_fmin_s) {
11756   // Use non-standard NaNs to check that the payload bits are preserved.
11757   float snan = RawbitsToFloat(0x7f951234);
11758   float qnan = RawbitsToFloat(0x7fea8765);
11759 
11760   float snan_processed = RawbitsToFloat(0x7fd51234);
11761   float qnan_processed = qnan;
11762 
11763   VIXL_ASSERT(IsSignallingNaN(snan));
11764   VIXL_ASSERT(IsQuietNaN(qnan));
11765   VIXL_ASSERT(IsQuietNaN(snan_processed));
11766   VIXL_ASSERT(IsQuietNaN(qnan_processed));
11767 
11768   // Bootstrap tests.
11769   FminFmaxFloatHelper(0, 0, 0, 0, 0, 0);
11770   FminFmaxFloatHelper(0, 1, 0, 1, 0, 1);
11771   FminFmaxFloatHelper(kFP32PositiveInfinity,
11772                       kFP32NegativeInfinity,
11773                       kFP32NegativeInfinity,
11774                       kFP32PositiveInfinity,
11775                       kFP32NegativeInfinity,
11776                       kFP32PositiveInfinity);
11777   FminFmaxFloatHelper(snan,
11778                       0,
11779                       snan_processed,
11780                       snan_processed,
11781                       snan_processed,
11782                       snan_processed);
11783   FminFmaxFloatHelper(0,
11784                       snan,
11785                       snan_processed,
11786                       snan_processed,
11787                       snan_processed,
11788                       snan_processed);
11789   FminFmaxFloatHelper(qnan, 0, qnan_processed, qnan_processed, 0, 0);
11790   FminFmaxFloatHelper(0, qnan, qnan_processed, qnan_processed, 0, 0);
11791   FminFmaxFloatHelper(qnan,
11792                       snan,
11793                       snan_processed,
11794                       snan_processed,
11795                       snan_processed,
11796                       snan_processed);
11797   FminFmaxFloatHelper(snan,
11798                       qnan,
11799                       snan_processed,
11800                       snan_processed,
11801                       snan_processed,
11802                       snan_processed);
11803 
11804   // Iterate over all combinations of inputs.
11805   float inputs[] = {FLT_MAX,
11806                     FLT_MIN,
11807                     1.0,
11808                     0.0,
11809                     -FLT_MAX,
11810                     -FLT_MIN,
11811                     -1.0,
11812                     -0.0,
11813                     kFP32PositiveInfinity,
11814                     kFP32NegativeInfinity,
11815                     kFP32QuietNaN,
11816                     kFP32SignallingNaN};
11817 
11818   const int count = sizeof(inputs) / sizeof(inputs[0]);
11819 
11820   for (int in = 0; in < count; in++) {
11821     float n = inputs[in];
11822     for (int im = 0; im < count; im++) {
11823       float m = inputs[im];
11824       FminFmaxFloatHelper(n,
11825                           m,
11826                           MinMaxHelper(n, m, true),
11827                           MinMaxHelper(n, m, false),
11828                           MinMaxHelper(n, m, true, kFP32PositiveInfinity),
11829                           MinMaxHelper(n, m, false, kFP32NegativeInfinity));
11830     }
11831   }
11832 }
11833 
11834 
Float16ToV4H(Float16 f)11835 static uint64_t Float16ToV4H(Float16 f) {
11836   uint64_t bits = static_cast<uint64_t>(Float16ToRawbits(f));
11837   return (bits << 48) | (bits << 32) | (bits << 16) | bits;
11838 }
11839 
11840 
FminFmaxFloat16Helper(Float16 n,Float16 m,Float16 min,Float16 max,Float16 minnm,Float16 maxnm)11841 static void FminFmaxFloat16Helper(Float16 n,
11842                                   Float16 m,
11843                                   Float16 min,
11844                                   Float16 max,
11845                                   Float16 minnm,
11846                                   Float16 maxnm) {
11847   SETUP_WITH_FEATURES(CPUFeatures::kNEON,
11848                       CPUFeatures::kFP,
11849                       CPUFeatures::kNEONHalf,
11850                       CPUFeatures::kFPHalf);
11851 
11852   START();
11853   __ Fmov(h0, n);
11854   __ Fmov(h1, m);
11855   __ Fmov(v0.V8H(), n);
11856   __ Fmov(v1.V8H(), m);
11857   __ Fmin(h28, h0, h1);
11858   __ Fmin(v2.V4H(), v0.V4H(), v1.V4H());
11859   __ Fmin(v3.V8H(), v0.V8H(), v1.V8H());
11860   __ Fmax(h29, h0, h1);
11861   __ Fmax(v4.V4H(), v0.V4H(), v1.V4H());
11862   __ Fmax(v5.V8H(), v0.V8H(), v1.V8H());
11863   __ Fminnm(h30, h0, h1);
11864   __ Fminnm(v6.V4H(), v0.V4H(), v1.V4H());
11865   __ Fminnm(v7.V8H(), v0.V8H(), v1.V8H());
11866   __ Fmaxnm(h31, h0, h1);
11867   __ Fmaxnm(v8.V4H(), v0.V4H(), v1.V4H());
11868   __ Fmaxnm(v9.V8H(), v0.V8H(), v1.V8H());
11869   END();
11870 
11871   uint64_t min_vec = Float16ToV4H(min);
11872   uint64_t max_vec = Float16ToV4H(max);
11873   uint64_t minnm_vec = Float16ToV4H(minnm);
11874   uint64_t maxnm_vec = Float16ToV4H(maxnm);
11875 
11876 #ifdef VIXL_INCLUDE_SIMULATOR_AARCH64
11877   RUN();
11878 
11879   ASSERT_EQUAL_FP16(min, h28);
11880   ASSERT_EQUAL_FP16(max, h29);
11881   ASSERT_EQUAL_FP16(minnm, h30);
11882   ASSERT_EQUAL_FP16(maxnm, h31);
11883 
11884 
11885   ASSERT_EQUAL_128(0, min_vec, v2);
11886   ASSERT_EQUAL_128(min_vec, min_vec, v3);
11887   ASSERT_EQUAL_128(0, max_vec, v4);
11888   ASSERT_EQUAL_128(max_vec, max_vec, v5);
11889   ASSERT_EQUAL_128(0, minnm_vec, v6);
11890   ASSERT_EQUAL_128(minnm_vec, minnm_vec, v7);
11891   ASSERT_EQUAL_128(0, maxnm_vec, v8);
11892   ASSERT_EQUAL_128(maxnm_vec, maxnm_vec, v9);
11893 #else
11894   USE(min_vec, max_vec, minnm_vec, maxnm_vec);
11895 #endif  // VIXL_INCLUDE_SIMULATOR_AARCH64
11896 
11897   TEARDOWN();
11898 }
11899 
11900 
TEST(fmax_fmin_h)11901 TEST(fmax_fmin_h) {
11902   // Use non-standard NaNs to check that the payload bits are preserved.
11903   Float16 snan = RawbitsToFloat16(0x7c12);
11904   Float16 qnan = RawbitsToFloat16(0x7e34);
11905 
11906   Float16 snan_processed = RawbitsToFloat16(0x7e12);
11907   Float16 qnan_processed = qnan;
11908 
11909   VIXL_ASSERT(IsSignallingNaN(snan));
11910   VIXL_ASSERT(IsQuietNaN(qnan));
11911   VIXL_ASSERT(IsQuietNaN(snan_processed));
11912   VIXL_ASSERT(IsQuietNaN(qnan_processed));
11913 
11914   // Bootstrap tests.
11915   FminFmaxFloat16Helper(Float16(0),
11916                         Float16(0),
11917                         Float16(0),
11918                         Float16(0),
11919                         Float16(0),
11920                         Float16(0));
11921   FminFmaxFloat16Helper(Float16(0),
11922                         Float16(1),
11923                         Float16(0),
11924                         Float16(1),
11925                         Float16(0),
11926                         Float16(1));
11927   FminFmaxFloat16Helper(kFP16PositiveInfinity,
11928                         kFP16NegativeInfinity,
11929                         kFP16NegativeInfinity,
11930                         kFP16PositiveInfinity,
11931                         kFP16NegativeInfinity,
11932                         kFP16PositiveInfinity);
11933   FminFmaxFloat16Helper(snan,
11934                         Float16(0),
11935                         snan_processed,
11936                         snan_processed,
11937                         snan_processed,
11938                         snan_processed);
11939   FminFmaxFloat16Helper(Float16(0),
11940                         snan,
11941                         snan_processed,
11942                         snan_processed,
11943                         snan_processed,
11944                         snan_processed);
11945   FminFmaxFloat16Helper(qnan,
11946                         Float16(0),
11947                         qnan_processed,
11948                         qnan_processed,
11949                         Float16(0),
11950                         Float16(0));
11951   FminFmaxFloat16Helper(Float16(0),
11952                         qnan,
11953                         qnan_processed,
11954                         qnan_processed,
11955                         Float16(0),
11956                         Float16(0));
11957   FminFmaxFloat16Helper(qnan,
11958                         snan,
11959                         snan_processed,
11960                         snan_processed,
11961                         snan_processed,
11962                         snan_processed);
11963   FminFmaxFloat16Helper(snan,
11964                         qnan,
11965                         snan_processed,
11966                         snan_processed,
11967                         snan_processed,
11968                         snan_processed);
11969 
11970   // Iterate over all combinations of inputs.
11971   Float16 inputs[] = {RawbitsToFloat16(0x7bff),
11972                       RawbitsToFloat16(0x0400),
11973                       Float16(1.0),
11974                       Float16(0.0),
11975                       RawbitsToFloat16(0xfbff),
11976                       RawbitsToFloat16(0x8400),
11977                       Float16(-1.0),
11978                       Float16(-0.0),
11979                       kFP16PositiveInfinity,
11980                       kFP16NegativeInfinity,
11981                       kFP16QuietNaN,
11982                       kFP16SignallingNaN};
11983 
11984   const int count = sizeof(inputs) / sizeof(inputs[0]);
11985 
11986   for (int in = 0; in < count; in++) {
11987     Float16 n = inputs[in];
11988     for (int im = 0; im < count; im++) {
11989       Float16 m = inputs[im];
11990       FminFmaxFloat16Helper(n,
11991                             m,
11992                             MinMaxHelper(n, m, true),
11993                             MinMaxHelper(n, m, false),
11994                             MinMaxHelper(n, m, true, kFP16PositiveInfinity),
11995                             MinMaxHelper(n, m, false, kFP16NegativeInfinity));
11996     }
11997   }
11998 }
11999 
12000 
TEST(fccmp)12001 TEST(fccmp) {
12002   SETUP_WITH_FEATURES(CPUFeatures::kFP);
12003 
12004   START();
12005   __ Fmov(s16, 0.0);
12006   __ Fmov(s17, 0.5);
12007   __ Fmov(d18, -0.5);
12008   __ Fmov(d19, -1.0);
12009   __ Mov(x20, 0);
12010   __ Mov(x21, 0x7ff0000000000001);  // Double precision NaN.
12011   __ Fmov(d21, x21);
12012   __ Mov(w22, 0x7f800001);  // Single precision NaN.
12013   __ Fmov(s22, w22);
12014 
12015   __ Cmp(x20, 0);
12016   __ Fccmp(s16, s16, NoFlag, eq);
12017   __ Mrs(x0, NZCV);
12018 
12019   __ Cmp(x20, 0);
12020   __ Fccmp(s16, s16, VFlag, ne);
12021   __ Mrs(x1, NZCV);
12022 
12023   __ Cmp(x20, 0);
12024   __ Fccmp(s16, s17, CFlag, ge);
12025   __ Mrs(x2, NZCV);
12026 
12027   __ Cmp(x20, 0);
12028   __ Fccmp(s16, s17, CVFlag, lt);
12029   __ Mrs(x3, NZCV);
12030 
12031   __ Cmp(x20, 0);
12032   __ Fccmp(d18, d18, ZFlag, le);
12033   __ Mrs(x4, NZCV);
12034 
12035   __ Cmp(x20, 0);
12036   __ Fccmp(d18, d18, ZVFlag, gt);
12037   __ Mrs(x5, NZCV);
12038 
12039   __ Cmp(x20, 0);
12040   __ Fccmp(d18, d19, ZCVFlag, ls);
12041   __ Mrs(x6, NZCV);
12042 
12043   __ Cmp(x20, 0);
12044   __ Fccmp(d18, d19, NFlag, hi);
12045   __ Mrs(x7, NZCV);
12046 
12047   // The Macro Assembler does not allow al or nv as condition.
12048   {
12049     ExactAssemblyScope scope(&masm, kInstructionSize);
12050     __ fccmp(s16, s16, NFlag, al);
12051   }
12052   __ Mrs(x8, NZCV);
12053 
12054   {
12055     ExactAssemblyScope scope(&masm, kInstructionSize);
12056     __ fccmp(d18, d18, NFlag, nv);
12057   }
12058   __ Mrs(x9, NZCV);
12059 
12060   __ Cmp(x20, 0);
12061   __ Fccmpe(s16, s16, NoFlag, eq);
12062   __ Mrs(x10, NZCV);
12063 
12064   __ Cmp(x20, 0);
12065   __ Fccmpe(d18, d19, ZCVFlag, ls);
12066   __ Mrs(x11, NZCV);
12067 
12068   __ Cmp(x20, 0);
12069   __ Fccmpe(d21, d21, NoFlag, eq);
12070   __ Mrs(x12, NZCV);
12071 
12072   __ Cmp(x20, 0);
12073   __ Fccmpe(s22, s22, NoFlag, eq);
12074   __ Mrs(x13, NZCV);
12075   END();
12076 
12077   RUN();
12078 
12079   ASSERT_EQUAL_32(ZCFlag, w0);
12080   ASSERT_EQUAL_32(VFlag, w1);
12081   ASSERT_EQUAL_32(NFlag, w2);
12082   ASSERT_EQUAL_32(CVFlag, w3);
12083   ASSERT_EQUAL_32(ZCFlag, w4);
12084   ASSERT_EQUAL_32(ZVFlag, w5);
12085   ASSERT_EQUAL_32(CFlag, w6);
12086   ASSERT_EQUAL_32(NFlag, w7);
12087   ASSERT_EQUAL_32(ZCFlag, w8);
12088   ASSERT_EQUAL_32(ZCFlag, w9);
12089   ASSERT_EQUAL_32(ZCFlag, w10);
12090   ASSERT_EQUAL_32(CFlag, w11);
12091   ASSERT_EQUAL_32(CVFlag, w12);
12092   ASSERT_EQUAL_32(CVFlag, w13);
12093 
12094   TEARDOWN();
12095 }
12096 
12097 
TEST(fccmp_h)12098 TEST(fccmp_h) {
12099   SETUP_WITH_FEATURES(CPUFeatures::kFP, CPUFeatures::kFPHalf);
12100 
12101   START();
12102   __ Fmov(h16, Float16(0.0));
12103   __ Fmov(h17, Float16(0.5));
12104   __ Mov(x20, 0);
12105   __ Fmov(h21, kFP16DefaultNaN);
12106 
12107   __ Cmp(x20, 0);
12108   __ Fccmp(h16, h16, NoFlag, eq);
12109   __ Mrs(x0, NZCV);
12110 
12111   __ Cmp(x20, 0);
12112   __ Fccmp(h16, h16, VFlag, ne);
12113   __ Mrs(x1, NZCV);
12114 
12115   __ Cmp(x20, 0);
12116   __ Fccmp(h16, h17, CFlag, ge);
12117   __ Mrs(x2, NZCV);
12118 
12119   __ Cmp(x20, 0);
12120   __ Fccmp(h16, h17, CVFlag, lt);
12121   __ Mrs(x3, NZCV);
12122 
12123   // The Macro Assembler does not allow al or nv as condition.
12124   {
12125     ExactAssemblyScope scope(&masm, kInstructionSize);
12126     __ fccmp(h16, h16, NFlag, al);
12127   }
12128   __ Mrs(x4, NZCV);
12129   {
12130     ExactAssemblyScope scope(&masm, kInstructionSize);
12131     __ fccmp(h16, h16, NFlag, nv);
12132   }
12133   __ Mrs(x5, NZCV);
12134 
12135   __ Cmp(x20, 0);
12136   __ Fccmpe(h16, h16, NoFlag, eq);
12137   __ Mrs(x6, NZCV);
12138 
12139   __ Cmp(x20, 0);
12140   __ Fccmpe(h16, h21, NoFlag, eq);
12141   __ Mrs(x7, NZCV);
12142 
12143   __ Cmp(x20, 0);
12144   __ Fccmpe(h21, h16, NoFlag, eq);
12145   __ Mrs(x8, NZCV);
12146 
12147   __ Cmp(x20, 0);
12148   __ Fccmpe(h21, h21, NoFlag, eq);
12149   __ Mrs(x9, NZCV);
12150   END();
12151 
12152 #ifdef VIXL_INCLUDE_SIMULATOR_AARCH64
12153   RUN();
12154   ASSERT_EQUAL_32(ZCFlag, w0);
12155   ASSERT_EQUAL_32(VFlag, w1);
12156   ASSERT_EQUAL_32(NFlag, w2);
12157   ASSERT_EQUAL_32(CVFlag, w3);
12158   ASSERT_EQUAL_32(ZCFlag, w4);
12159   ASSERT_EQUAL_32(ZCFlag, w5);
12160   ASSERT_EQUAL_32(ZCFlag, w6);
12161   ASSERT_EQUAL_32(CVFlag, w7);
12162   ASSERT_EQUAL_32(CVFlag, w8);
12163   ASSERT_EQUAL_32(CVFlag, w9);
12164 #endif
12165 
12166   TEARDOWN();
12167 }
12168 
12169 
TEST(fcmp)12170 TEST(fcmp) {
12171   SETUP_WITH_FEATURES(CPUFeatures::kFP);
12172 
12173   START();
12174 
12175   // Some of these tests require a floating-point scratch register assigned to
12176   // the macro assembler, but most do not.
12177   {
12178     UseScratchRegisterScope temps(&masm);
12179     temps.ExcludeAll();
12180     temps.Include(ip0, ip1);
12181 
12182     __ Fmov(s8, 0.0);
12183     __ Fmov(s9, 0.5);
12184     __ Mov(w18, 0x7f800001);  // Single precision NaN.
12185     __ Fmov(s18, w18);
12186 
12187     __ Fcmp(s8, s8);
12188     __ Mrs(x0, NZCV);
12189     __ Fcmp(s8, s9);
12190     __ Mrs(x1, NZCV);
12191     __ Fcmp(s9, s8);
12192     __ Mrs(x2, NZCV);
12193     __ Fcmp(s8, s18);
12194     __ Mrs(x3, NZCV);
12195     __ Fcmp(s18, s18);
12196     __ Mrs(x4, NZCV);
12197     __ Fcmp(s8, 0.0);
12198     __ Mrs(x5, NZCV);
12199     temps.Include(d0);
12200     __ Fcmp(s8, 255.0);
12201     temps.Exclude(d0);
12202     __ Mrs(x6, NZCV);
12203 
12204     __ Fmov(d19, 0.0);
12205     __ Fmov(d20, 0.5);
12206     __ Mov(x21, 0x7ff0000000000001);  // Double precision NaN.
12207     __ Fmov(d21, x21);
12208 
12209     __ Fcmp(d19, d19);
12210     __ Mrs(x10, NZCV);
12211     __ Fcmp(d19, d20);
12212     __ Mrs(x11, NZCV);
12213     __ Fcmp(d20, d19);
12214     __ Mrs(x12, NZCV);
12215     __ Fcmp(d19, d21);
12216     __ Mrs(x13, NZCV);
12217     __ Fcmp(d21, d21);
12218     __ Mrs(x14, NZCV);
12219     __ Fcmp(d19, 0.0);
12220     __ Mrs(x15, NZCV);
12221     temps.Include(d0);
12222     __ Fcmp(d19, 12.3456);
12223     temps.Exclude(d0);
12224     __ Mrs(x16, NZCV);
12225 
12226     __ Fcmpe(s8, s8);
12227     __ Mrs(x22, NZCV);
12228     __ Fcmpe(s8, 0.0);
12229     __ Mrs(x23, NZCV);
12230     __ Fcmpe(d19, d19);
12231     __ Mrs(x24, NZCV);
12232     __ Fcmpe(d19, 0.0);
12233     __ Mrs(x25, NZCV);
12234     __ Fcmpe(s18, s18);
12235     __ Mrs(x26, NZCV);
12236     __ Fcmpe(d21, d21);
12237     __ Mrs(x27, NZCV);
12238   }
12239 
12240   END();
12241 
12242   RUN();
12243 
12244   ASSERT_EQUAL_32(ZCFlag, w0);
12245   ASSERT_EQUAL_32(NFlag, w1);
12246   ASSERT_EQUAL_32(CFlag, w2);
12247   ASSERT_EQUAL_32(CVFlag, w3);
12248   ASSERT_EQUAL_32(CVFlag, w4);
12249   ASSERT_EQUAL_32(ZCFlag, w5);
12250   ASSERT_EQUAL_32(NFlag, w6);
12251   ASSERT_EQUAL_32(ZCFlag, w10);
12252   ASSERT_EQUAL_32(NFlag, w11);
12253   ASSERT_EQUAL_32(CFlag, w12);
12254   ASSERT_EQUAL_32(CVFlag, w13);
12255   ASSERT_EQUAL_32(CVFlag, w14);
12256   ASSERT_EQUAL_32(ZCFlag, w15);
12257   ASSERT_EQUAL_32(NFlag, w16);
12258   ASSERT_EQUAL_32(ZCFlag, w22);
12259   ASSERT_EQUAL_32(ZCFlag, w23);
12260   ASSERT_EQUAL_32(ZCFlag, w24);
12261   ASSERT_EQUAL_32(ZCFlag, w25);
12262   ASSERT_EQUAL_32(CVFlag, w26);
12263   ASSERT_EQUAL_32(CVFlag, w27);
12264 
12265   TEARDOWN();
12266 }
12267 
12268 
TEST(fcmp_h)12269 TEST(fcmp_h) {
12270   SETUP_WITH_FEATURES(CPUFeatures::kFP, CPUFeatures::kFPHalf);
12271 
12272   START();
12273 
12274   // Some of these tests require a floating-point scratch register assigned to
12275   // the macro assembler, but most do not.
12276   {
12277     UseScratchRegisterScope temps(&masm);
12278     temps.ExcludeAll();
12279     temps.Include(ip0, ip1);
12280 
12281     __ Fmov(h8, Float16(0.0));
12282     __ Fmov(h9, Float16(0.5));
12283     __ Fmov(h18, kFP16DefaultNaN);
12284 
12285     __ Fcmp(h8, h8);
12286     __ Mrs(x0, NZCV);
12287     __ Fcmp(h8, h9);
12288     __ Mrs(x1, NZCV);
12289     __ Fcmp(h9, h8);
12290     __ Mrs(x2, NZCV);
12291     __ Fcmp(h8, h18);
12292     __ Mrs(x3, NZCV);
12293     __ Fcmp(h18, h18);
12294     __ Mrs(x4, NZCV);
12295     __ Fcmp(h8, 0.0);
12296     __ Mrs(x5, NZCV);
12297     temps.Include(d0);
12298     __ Fcmp(h8, 255.0);
12299     temps.Exclude(d0);
12300     __ Mrs(x6, NZCV);
12301 
12302     __ Fcmpe(h8, h8);
12303     __ Mrs(x22, NZCV);
12304     __ Fcmpe(h8, 0.0);
12305     __ Mrs(x23, NZCV);
12306     __ Fcmpe(h8, h18);
12307     __ Mrs(x24, NZCV);
12308     __ Fcmpe(h18, h8);
12309     __ Mrs(x25, NZCV);
12310     __ Fcmpe(h18, h18);
12311     __ Mrs(x26, NZCV);
12312   }
12313 
12314   END();
12315 
12316 #ifdef VIXL_INCLUDE_SIMULATOR_AARCH64
12317   RUN();
12318   ASSERT_EQUAL_32(ZCFlag, w0);
12319   ASSERT_EQUAL_32(NFlag, w1);
12320   ASSERT_EQUAL_32(CFlag, w2);
12321   ASSERT_EQUAL_32(CVFlag, w3);
12322   ASSERT_EQUAL_32(CVFlag, w4);
12323   ASSERT_EQUAL_32(ZCFlag, w5);
12324   ASSERT_EQUAL_32(NFlag, w6);
12325   ASSERT_EQUAL_32(ZCFlag, w22);
12326   ASSERT_EQUAL_32(ZCFlag, w23);
12327   ASSERT_EQUAL_32(CVFlag, w24);
12328   ASSERT_EQUAL_32(CVFlag, w25);
12329   ASSERT_EQUAL_32(CVFlag, w26);
12330 #endif
12331 
12332   TEARDOWN();
12333 }
12334 
12335 
TEST(fcsel)12336 TEST(fcsel) {
12337   SETUP_WITH_FEATURES(CPUFeatures::kFP);
12338 
12339   START();
12340   __ Mov(x16, 0);
12341   __ Fmov(s16, 1.0);
12342   __ Fmov(s17, 2.0);
12343   __ Fmov(d18, 3.0);
12344   __ Fmov(d19, 4.0);
12345 
12346   __ Cmp(x16, 0);
12347   __ Fcsel(s0, s16, s17, eq);
12348   __ Fcsel(s1, s16, s17, ne);
12349   __ Fcsel(d2, d18, d19, eq);
12350   __ Fcsel(d3, d18, d19, ne);
12351   // The Macro Assembler does not allow al or nv as condition.
12352   {
12353     ExactAssemblyScope scope(&masm, 2 * kInstructionSize);
12354     __ fcsel(s4, s16, s17, al);
12355     __ fcsel(d5, d18, d19, nv);
12356   }
12357   END();
12358 
12359   RUN();
12360 
12361   ASSERT_EQUAL_FP32(1.0, s0);
12362   ASSERT_EQUAL_FP32(2.0, s1);
12363   ASSERT_EQUAL_FP64(3.0, d2);
12364   ASSERT_EQUAL_FP64(4.0, d3);
12365   ASSERT_EQUAL_FP32(1.0, s4);
12366   ASSERT_EQUAL_FP64(3.0, d5);
12367 
12368   TEARDOWN();
12369 }
12370 
12371 
TEST(fcsel_h)12372 TEST(fcsel_h) {
12373   SETUP_WITH_FEATURES(CPUFeatures::kFP, CPUFeatures::kFPHalf);
12374 
12375   START();
12376   __ Mov(x16, 0);
12377   __ Fmov(h16, Float16(1.0));
12378   __ Fmov(h17, Float16(2.0));
12379 
12380   __ Cmp(x16, 0);
12381   __ Fcsel(h0, h16, h17, eq);
12382   __ Fcsel(h1, h16, h17, ne);
12383   // The Macro Assembler does not allow al or nv as condition.
12384   {
12385     ExactAssemblyScope scope(&masm, 2 * kInstructionSize);
12386     __ fcsel(h4, h16, h17, al);
12387     __ fcsel(h5, h16, h17, nv);
12388   }
12389   END();
12390 
12391 #ifdef VIXL_INCLUDE_SIMULATOR_AARCH64
12392   RUN();
12393   ASSERT_EQUAL_FP16(Float16(1.0), h0);
12394   ASSERT_EQUAL_FP16(Float16(2.0), h1);
12395   ASSERT_EQUAL_FP16(Float16(1.0), h4);
12396   ASSERT_EQUAL_FP16(Float16(1.0), h5);
12397 #endif
12398 
12399   TEARDOWN();
12400 }
12401 
12402 
TEST(fneg)12403 TEST(fneg) {
12404   SETUP_WITH_FEATURES(CPUFeatures::kFP);
12405 
12406   START();
12407   __ Fmov(s16, 1.0);
12408   __ Fmov(s17, 0.0);
12409   __ Fmov(s18, kFP32PositiveInfinity);
12410   __ Fmov(d19, 1.0);
12411   __ Fmov(d20, 0.0);
12412   __ Fmov(d21, kFP64PositiveInfinity);
12413 
12414   __ Fneg(s0, s16);
12415   __ Fneg(s1, s0);
12416   __ Fneg(s2, s17);
12417   __ Fneg(s3, s2);
12418   __ Fneg(s4, s18);
12419   __ Fneg(s5, s4);
12420   __ Fneg(d6, d19);
12421   __ Fneg(d7, d6);
12422   __ Fneg(d8, d20);
12423   __ Fneg(d9, d8);
12424   __ Fneg(d10, d21);
12425   __ Fneg(d11, d10);
12426   END();
12427 
12428   RUN();
12429 
12430   ASSERT_EQUAL_FP32(-1.0, s0);
12431   ASSERT_EQUAL_FP32(1.0, s1);
12432   ASSERT_EQUAL_FP32(-0.0, s2);
12433   ASSERT_EQUAL_FP32(0.0, s3);
12434   ASSERT_EQUAL_FP32(kFP32NegativeInfinity, s4);
12435   ASSERT_EQUAL_FP32(kFP32PositiveInfinity, s5);
12436   ASSERT_EQUAL_FP64(-1.0, d6);
12437   ASSERT_EQUAL_FP64(1.0, d7);
12438   ASSERT_EQUAL_FP64(-0.0, d8);
12439   ASSERT_EQUAL_FP64(0.0, d9);
12440   ASSERT_EQUAL_FP64(kFP64NegativeInfinity, d10);
12441   ASSERT_EQUAL_FP64(kFP64PositiveInfinity, d11);
12442 
12443   TEARDOWN();
12444 }
12445 
12446 
TEST(fabs)12447 TEST(fabs) {
12448   SETUP_WITH_FEATURES(CPUFeatures::kFP);
12449 
12450   START();
12451   __ Fmov(s16, -1.0);
12452   __ Fmov(s17, -0.0);
12453   __ Fmov(s18, kFP32NegativeInfinity);
12454   __ Fmov(d19, -1.0);
12455   __ Fmov(d20, -0.0);
12456   __ Fmov(d21, kFP64NegativeInfinity);
12457 
12458   __ Fabs(s0, s16);
12459   __ Fabs(s1, s0);
12460   __ Fabs(s2, s17);
12461   __ Fabs(s3, s18);
12462   __ Fabs(d4, d19);
12463   __ Fabs(d5, d4);
12464   __ Fabs(d6, d20);
12465   __ Fabs(d7, d21);
12466   END();
12467 
12468   RUN();
12469 
12470   ASSERT_EQUAL_FP32(1.0, s0);
12471   ASSERT_EQUAL_FP32(1.0, s1);
12472   ASSERT_EQUAL_FP32(0.0, s2);
12473   ASSERT_EQUAL_FP32(kFP32PositiveInfinity, s3);
12474   ASSERT_EQUAL_FP64(1.0, d4);
12475   ASSERT_EQUAL_FP64(1.0, d5);
12476   ASSERT_EQUAL_FP64(0.0, d6);
12477   ASSERT_EQUAL_FP64(kFP64PositiveInfinity, d7);
12478 
12479   TEARDOWN();
12480 }
12481 
12482 
TEST(fsqrt)12483 TEST(fsqrt) {
12484   SETUP_WITH_FEATURES(CPUFeatures::kFP);
12485 
12486   START();
12487   __ Fmov(s16, 0.0);
12488   __ Fmov(s17, 1.0);
12489   __ Fmov(s18, 0.25);
12490   __ Fmov(s19, 65536.0);
12491   __ Fmov(s20, -0.0);
12492   __ Fmov(s21, kFP32PositiveInfinity);
12493   __ Fmov(s22, -1.0);
12494   __ Fmov(d23, 0.0);
12495   __ Fmov(d24, 1.0);
12496   __ Fmov(d25, 0.25);
12497   __ Fmov(d26, 4294967296.0);
12498   __ Fmov(d27, -0.0);
12499   __ Fmov(d28, kFP64PositiveInfinity);
12500   __ Fmov(d29, -1.0);
12501 
12502   __ Fsqrt(s0, s16);
12503   __ Fsqrt(s1, s17);
12504   __ Fsqrt(s2, s18);
12505   __ Fsqrt(s3, s19);
12506   __ Fsqrt(s4, s20);
12507   __ Fsqrt(s5, s21);
12508   __ Fsqrt(s6, s22);
12509   __ Fsqrt(d7, d23);
12510   __ Fsqrt(d8, d24);
12511   __ Fsqrt(d9, d25);
12512   __ Fsqrt(d10, d26);
12513   __ Fsqrt(d11, d27);
12514   __ Fsqrt(d12, d28);
12515   __ Fsqrt(d13, d29);
12516   END();
12517 
12518   RUN();
12519 
12520   ASSERT_EQUAL_FP32(0.0, s0);
12521   ASSERT_EQUAL_FP32(1.0, s1);
12522   ASSERT_EQUAL_FP32(0.5, s2);
12523   ASSERT_EQUAL_FP32(256.0, s3);
12524   ASSERT_EQUAL_FP32(-0.0, s4);
12525   ASSERT_EQUAL_FP32(kFP32PositiveInfinity, s5);
12526   ASSERT_EQUAL_FP32(kFP32DefaultNaN, s6);
12527   ASSERT_EQUAL_FP64(0.0, d7);
12528   ASSERT_EQUAL_FP64(1.0, d8);
12529   ASSERT_EQUAL_FP64(0.5, d9);
12530   ASSERT_EQUAL_FP64(65536.0, d10);
12531   ASSERT_EQUAL_FP64(-0.0, d11);
12532   ASSERT_EQUAL_FP64(kFP32PositiveInfinity, d12);
12533   ASSERT_EQUAL_FP64(kFP64DefaultNaN, d13);
12534 
12535   TEARDOWN();
12536 }
12537 
12538 
TEST(frinta)12539 TEST(frinta) {
12540   SETUP_WITH_FEATURES(CPUFeatures::kFP);
12541 
12542   START();
12543   __ Fmov(s16, 1.0);
12544   __ Fmov(s17, 1.1);
12545   __ Fmov(s18, 1.5);
12546   __ Fmov(s19, 1.9);
12547   __ Fmov(s20, 2.5);
12548   __ Fmov(s21, -1.5);
12549   __ Fmov(s22, -2.5);
12550   __ Fmov(s23, kFP32PositiveInfinity);
12551   __ Fmov(s24, kFP32NegativeInfinity);
12552   __ Fmov(s25, 0.0);
12553   __ Fmov(s26, -0.0);
12554   __ Fmov(s27, -0.2);
12555 
12556   __ Frinta(s0, s16);
12557   __ Frinta(s1, s17);
12558   __ Frinta(s2, s18);
12559   __ Frinta(s3, s19);
12560   __ Frinta(s4, s20);
12561   __ Frinta(s5, s21);
12562   __ Frinta(s6, s22);
12563   __ Frinta(s7, s23);
12564   __ Frinta(s8, s24);
12565   __ Frinta(s9, s25);
12566   __ Frinta(s10, s26);
12567   __ Frinta(s11, s27);
12568 
12569   __ Fmov(d16, 1.0);
12570   __ Fmov(d17, 1.1);
12571   __ Fmov(d18, 1.5);
12572   __ Fmov(d19, 1.9);
12573   __ Fmov(d20, 2.5);
12574   __ Fmov(d21, -1.5);
12575   __ Fmov(d22, -2.5);
12576   __ Fmov(d23, kFP32PositiveInfinity);
12577   __ Fmov(d24, kFP32NegativeInfinity);
12578   __ Fmov(d25, 0.0);
12579   __ Fmov(d26, -0.0);
12580   __ Fmov(d27, -0.2);
12581 
12582   __ Frinta(d12, d16);
12583   __ Frinta(d13, d17);
12584   __ Frinta(d14, d18);
12585   __ Frinta(d15, d19);
12586   __ Frinta(d16, d20);
12587   __ Frinta(d17, d21);
12588   __ Frinta(d18, d22);
12589   __ Frinta(d19, d23);
12590   __ Frinta(d20, d24);
12591   __ Frinta(d21, d25);
12592   __ Frinta(d22, d26);
12593   __ Frinta(d23, d27);
12594   END();
12595 
12596   RUN();
12597 
12598   ASSERT_EQUAL_FP32(1.0, s0);
12599   ASSERT_EQUAL_FP32(1.0, s1);
12600   ASSERT_EQUAL_FP32(2.0, s2);
12601   ASSERT_EQUAL_FP32(2.0, s3);
12602   ASSERT_EQUAL_FP32(3.0, s4);
12603   ASSERT_EQUAL_FP32(-2.0, s5);
12604   ASSERT_EQUAL_FP32(-3.0, s6);
12605   ASSERT_EQUAL_FP32(kFP32PositiveInfinity, s7);
12606   ASSERT_EQUAL_FP32(kFP32NegativeInfinity, s8);
12607   ASSERT_EQUAL_FP32(0.0, s9);
12608   ASSERT_EQUAL_FP32(-0.0, s10);
12609   ASSERT_EQUAL_FP32(-0.0, s11);
12610   ASSERT_EQUAL_FP64(1.0, d12);
12611   ASSERT_EQUAL_FP64(1.0, d13);
12612   ASSERT_EQUAL_FP64(2.0, d14);
12613   ASSERT_EQUAL_FP64(2.0, d15);
12614   ASSERT_EQUAL_FP64(3.0, d16);
12615   ASSERT_EQUAL_FP64(-2.0, d17);
12616   ASSERT_EQUAL_FP64(-3.0, d18);
12617   ASSERT_EQUAL_FP64(kFP64PositiveInfinity, d19);
12618   ASSERT_EQUAL_FP64(kFP64NegativeInfinity, d20);
12619   ASSERT_EQUAL_FP64(0.0, d21);
12620   ASSERT_EQUAL_FP64(-0.0, d22);
12621   ASSERT_EQUAL_FP64(-0.0, d23);
12622 
12623   TEARDOWN();
12624 }
12625 
12626 
TEST(frinti)12627 TEST(frinti) {
12628   // VIXL only supports the round-to-nearest FPCR mode, so this test has the
12629   // same results as frintn.
12630   SETUP_WITH_FEATURES(CPUFeatures::kFP);
12631 
12632   START();
12633   __ Fmov(s16, 1.0);
12634   __ Fmov(s17, 1.1);
12635   __ Fmov(s18, 1.5);
12636   __ Fmov(s19, 1.9);
12637   __ Fmov(s20, 2.5);
12638   __ Fmov(s21, -1.5);
12639   __ Fmov(s22, -2.5);
12640   __ Fmov(s23, kFP32PositiveInfinity);
12641   __ Fmov(s24, kFP32NegativeInfinity);
12642   __ Fmov(s25, 0.0);
12643   __ Fmov(s26, -0.0);
12644   __ Fmov(s27, -0.2);
12645 
12646   __ Frinti(s0, s16);
12647   __ Frinti(s1, s17);
12648   __ Frinti(s2, s18);
12649   __ Frinti(s3, s19);
12650   __ Frinti(s4, s20);
12651   __ Frinti(s5, s21);
12652   __ Frinti(s6, s22);
12653   __ Frinti(s7, s23);
12654   __ Frinti(s8, s24);
12655   __ Frinti(s9, s25);
12656   __ Frinti(s10, s26);
12657   __ Frinti(s11, s27);
12658 
12659   __ Fmov(d16, 1.0);
12660   __ Fmov(d17, 1.1);
12661   __ Fmov(d18, 1.5);
12662   __ Fmov(d19, 1.9);
12663   __ Fmov(d20, 2.5);
12664   __ Fmov(d21, -1.5);
12665   __ Fmov(d22, -2.5);
12666   __ Fmov(d23, kFP32PositiveInfinity);
12667   __ Fmov(d24, kFP32NegativeInfinity);
12668   __ Fmov(d25, 0.0);
12669   __ Fmov(d26, -0.0);
12670   __ Fmov(d27, -0.2);
12671 
12672   __ Frinti(d12, d16);
12673   __ Frinti(d13, d17);
12674   __ Frinti(d14, d18);
12675   __ Frinti(d15, d19);
12676   __ Frinti(d16, d20);
12677   __ Frinti(d17, d21);
12678   __ Frinti(d18, d22);
12679   __ Frinti(d19, d23);
12680   __ Frinti(d20, d24);
12681   __ Frinti(d21, d25);
12682   __ Frinti(d22, d26);
12683   __ Frinti(d23, d27);
12684   END();
12685 
12686   RUN();
12687 
12688   ASSERT_EQUAL_FP32(1.0, s0);
12689   ASSERT_EQUAL_FP32(1.0, s1);
12690   ASSERT_EQUAL_FP32(2.0, s2);
12691   ASSERT_EQUAL_FP32(2.0, s3);
12692   ASSERT_EQUAL_FP32(2.0, s4);
12693   ASSERT_EQUAL_FP32(-2.0, s5);
12694   ASSERT_EQUAL_FP32(-2.0, s6);
12695   ASSERT_EQUAL_FP32(kFP32PositiveInfinity, s7);
12696   ASSERT_EQUAL_FP32(kFP32NegativeInfinity, s8);
12697   ASSERT_EQUAL_FP32(0.0, s9);
12698   ASSERT_EQUAL_FP32(-0.0, s10);
12699   ASSERT_EQUAL_FP32(-0.0, s11);
12700   ASSERT_EQUAL_FP64(1.0, d12);
12701   ASSERT_EQUAL_FP64(1.0, d13);
12702   ASSERT_EQUAL_FP64(2.0, d14);
12703   ASSERT_EQUAL_FP64(2.0, d15);
12704   ASSERT_EQUAL_FP64(2.0, d16);
12705   ASSERT_EQUAL_FP64(-2.0, d17);
12706   ASSERT_EQUAL_FP64(-2.0, d18);
12707   ASSERT_EQUAL_FP64(kFP64PositiveInfinity, d19);
12708   ASSERT_EQUAL_FP64(kFP64NegativeInfinity, d20);
12709   ASSERT_EQUAL_FP64(0.0, d21);
12710   ASSERT_EQUAL_FP64(-0.0, d22);
12711   ASSERT_EQUAL_FP64(-0.0, d23);
12712 
12713   TEARDOWN();
12714 }
12715 
12716 
TEST(frintm)12717 TEST(frintm) {
12718   SETUP_WITH_FEATURES(CPUFeatures::kFP);
12719 
12720   START();
12721   __ Fmov(s16, 1.0);
12722   __ Fmov(s17, 1.1);
12723   __ Fmov(s18, 1.5);
12724   __ Fmov(s19, 1.9);
12725   __ Fmov(s20, 2.5);
12726   __ Fmov(s21, -1.5);
12727   __ Fmov(s22, -2.5);
12728   __ Fmov(s23, kFP32PositiveInfinity);
12729   __ Fmov(s24, kFP32NegativeInfinity);
12730   __ Fmov(s25, 0.0);
12731   __ Fmov(s26, -0.0);
12732   __ Fmov(s27, -0.2);
12733 
12734   __ Frintm(s0, s16);
12735   __ Frintm(s1, s17);
12736   __ Frintm(s2, s18);
12737   __ Frintm(s3, s19);
12738   __ Frintm(s4, s20);
12739   __ Frintm(s5, s21);
12740   __ Frintm(s6, s22);
12741   __ Frintm(s7, s23);
12742   __ Frintm(s8, s24);
12743   __ Frintm(s9, s25);
12744   __ Frintm(s10, s26);
12745   __ Frintm(s11, s27);
12746 
12747   __ Fmov(d16, 1.0);
12748   __ Fmov(d17, 1.1);
12749   __ Fmov(d18, 1.5);
12750   __ Fmov(d19, 1.9);
12751   __ Fmov(d20, 2.5);
12752   __ Fmov(d21, -1.5);
12753   __ Fmov(d22, -2.5);
12754   __ Fmov(d23, kFP32PositiveInfinity);
12755   __ Fmov(d24, kFP32NegativeInfinity);
12756   __ Fmov(d25, 0.0);
12757   __ Fmov(d26, -0.0);
12758   __ Fmov(d27, -0.2);
12759 
12760   __ Frintm(d12, d16);
12761   __ Frintm(d13, d17);
12762   __ Frintm(d14, d18);
12763   __ Frintm(d15, d19);
12764   __ Frintm(d16, d20);
12765   __ Frintm(d17, d21);
12766   __ Frintm(d18, d22);
12767   __ Frintm(d19, d23);
12768   __ Frintm(d20, d24);
12769   __ Frintm(d21, d25);
12770   __ Frintm(d22, d26);
12771   __ Frintm(d23, d27);
12772   END();
12773 
12774   RUN();
12775 
12776   ASSERT_EQUAL_FP32(1.0, s0);
12777   ASSERT_EQUAL_FP32(1.0, s1);
12778   ASSERT_EQUAL_FP32(1.0, s2);
12779   ASSERT_EQUAL_FP32(1.0, s3);
12780   ASSERT_EQUAL_FP32(2.0, s4);
12781   ASSERT_EQUAL_FP32(-2.0, s5);
12782   ASSERT_EQUAL_FP32(-3.0, s6);
12783   ASSERT_EQUAL_FP32(kFP32PositiveInfinity, s7);
12784   ASSERT_EQUAL_FP32(kFP32NegativeInfinity, s8);
12785   ASSERT_EQUAL_FP32(0.0, s9);
12786   ASSERT_EQUAL_FP32(-0.0, s10);
12787   ASSERT_EQUAL_FP32(-1.0, s11);
12788   ASSERT_EQUAL_FP64(1.0, d12);
12789   ASSERT_EQUAL_FP64(1.0, d13);
12790   ASSERT_EQUAL_FP64(1.0, d14);
12791   ASSERT_EQUAL_FP64(1.0, d15);
12792   ASSERT_EQUAL_FP64(2.0, d16);
12793   ASSERT_EQUAL_FP64(-2.0, d17);
12794   ASSERT_EQUAL_FP64(-3.0, d18);
12795   ASSERT_EQUAL_FP64(kFP64PositiveInfinity, d19);
12796   ASSERT_EQUAL_FP64(kFP64NegativeInfinity, d20);
12797   ASSERT_EQUAL_FP64(0.0, d21);
12798   ASSERT_EQUAL_FP64(-0.0, d22);
12799   ASSERT_EQUAL_FP64(-1.0, d23);
12800 
12801   TEARDOWN();
12802 }
12803 
12804 
TEST(frintn)12805 TEST(frintn) {
12806   SETUP_WITH_FEATURES(CPUFeatures::kFP);
12807 
12808   START();
12809   __ Fmov(s16, 1.0);
12810   __ Fmov(s17, 1.1);
12811   __ Fmov(s18, 1.5);
12812   __ Fmov(s19, 1.9);
12813   __ Fmov(s20, 2.5);
12814   __ Fmov(s21, -1.5);
12815   __ Fmov(s22, -2.5);
12816   __ Fmov(s23, kFP32PositiveInfinity);
12817   __ Fmov(s24, kFP32NegativeInfinity);
12818   __ Fmov(s25, 0.0);
12819   __ Fmov(s26, -0.0);
12820   __ Fmov(s27, -0.2);
12821 
12822   __ Frintn(s0, s16);
12823   __ Frintn(s1, s17);
12824   __ Frintn(s2, s18);
12825   __ Frintn(s3, s19);
12826   __ Frintn(s4, s20);
12827   __ Frintn(s5, s21);
12828   __ Frintn(s6, s22);
12829   __ Frintn(s7, s23);
12830   __ Frintn(s8, s24);
12831   __ Frintn(s9, s25);
12832   __ Frintn(s10, s26);
12833   __ Frintn(s11, s27);
12834 
12835   __ Fmov(d16, 1.0);
12836   __ Fmov(d17, 1.1);
12837   __ Fmov(d18, 1.5);
12838   __ Fmov(d19, 1.9);
12839   __ Fmov(d20, 2.5);
12840   __ Fmov(d21, -1.5);
12841   __ Fmov(d22, -2.5);
12842   __ Fmov(d23, kFP32PositiveInfinity);
12843   __ Fmov(d24, kFP32NegativeInfinity);
12844   __ Fmov(d25, 0.0);
12845   __ Fmov(d26, -0.0);
12846   __ Fmov(d27, -0.2);
12847 
12848   __ Frintn(d12, d16);
12849   __ Frintn(d13, d17);
12850   __ Frintn(d14, d18);
12851   __ Frintn(d15, d19);
12852   __ Frintn(d16, d20);
12853   __ Frintn(d17, d21);
12854   __ Frintn(d18, d22);
12855   __ Frintn(d19, d23);
12856   __ Frintn(d20, d24);
12857   __ Frintn(d21, d25);
12858   __ Frintn(d22, d26);
12859   __ Frintn(d23, d27);
12860   END();
12861 
12862   RUN();
12863 
12864   ASSERT_EQUAL_FP32(1.0, s0);
12865   ASSERT_EQUAL_FP32(1.0, s1);
12866   ASSERT_EQUAL_FP32(2.0, s2);
12867   ASSERT_EQUAL_FP32(2.0, s3);
12868   ASSERT_EQUAL_FP32(2.0, s4);
12869   ASSERT_EQUAL_FP32(-2.0, s5);
12870   ASSERT_EQUAL_FP32(-2.0, s6);
12871   ASSERT_EQUAL_FP32(kFP32PositiveInfinity, s7);
12872   ASSERT_EQUAL_FP32(kFP32NegativeInfinity, s8);
12873   ASSERT_EQUAL_FP32(0.0, s9);
12874   ASSERT_EQUAL_FP32(-0.0, s10);
12875   ASSERT_EQUAL_FP32(-0.0, s11);
12876   ASSERT_EQUAL_FP64(1.0, d12);
12877   ASSERT_EQUAL_FP64(1.0, d13);
12878   ASSERT_EQUAL_FP64(2.0, d14);
12879   ASSERT_EQUAL_FP64(2.0, d15);
12880   ASSERT_EQUAL_FP64(2.0, d16);
12881   ASSERT_EQUAL_FP64(-2.0, d17);
12882   ASSERT_EQUAL_FP64(-2.0, d18);
12883   ASSERT_EQUAL_FP64(kFP64PositiveInfinity, d19);
12884   ASSERT_EQUAL_FP64(kFP64NegativeInfinity, d20);
12885   ASSERT_EQUAL_FP64(0.0, d21);
12886   ASSERT_EQUAL_FP64(-0.0, d22);
12887   ASSERT_EQUAL_FP64(-0.0, d23);
12888 
12889   TEARDOWN();
12890 }
12891 
12892 
TEST(frintp)12893 TEST(frintp) {
12894   SETUP_WITH_FEATURES(CPUFeatures::kFP);
12895 
12896   START();
12897   __ Fmov(s16, 1.0);
12898   __ Fmov(s17, 1.1);
12899   __ Fmov(s18, 1.5);
12900   __ Fmov(s19, 1.9);
12901   __ Fmov(s20, 2.5);
12902   __ Fmov(s21, -1.5);
12903   __ Fmov(s22, -2.5);
12904   __ Fmov(s23, kFP32PositiveInfinity);
12905   __ Fmov(s24, kFP32NegativeInfinity);
12906   __ Fmov(s25, 0.0);
12907   __ Fmov(s26, -0.0);
12908   __ Fmov(s27, -0.2);
12909 
12910   __ Frintp(s0, s16);
12911   __ Frintp(s1, s17);
12912   __ Frintp(s2, s18);
12913   __ Frintp(s3, s19);
12914   __ Frintp(s4, s20);
12915   __ Frintp(s5, s21);
12916   __ Frintp(s6, s22);
12917   __ Frintp(s7, s23);
12918   __ Frintp(s8, s24);
12919   __ Frintp(s9, s25);
12920   __ Frintp(s10, s26);
12921   __ Frintp(s11, s27);
12922 
12923   __ Fmov(d16, 1.0);
12924   __ Fmov(d17, 1.1);
12925   __ Fmov(d18, 1.5);
12926   __ Fmov(d19, 1.9);
12927   __ Fmov(d20, 2.5);
12928   __ Fmov(d21, -1.5);
12929   __ Fmov(d22, -2.5);
12930   __ Fmov(d23, kFP32PositiveInfinity);
12931   __ Fmov(d24, kFP32NegativeInfinity);
12932   __ Fmov(d25, 0.0);
12933   __ Fmov(d26, -0.0);
12934   __ Fmov(d27, -0.2);
12935 
12936   __ Frintp(d12, d16);
12937   __ Frintp(d13, d17);
12938   __ Frintp(d14, d18);
12939   __ Frintp(d15, d19);
12940   __ Frintp(d16, d20);
12941   __ Frintp(d17, d21);
12942   __ Frintp(d18, d22);
12943   __ Frintp(d19, d23);
12944   __ Frintp(d20, d24);
12945   __ Frintp(d21, d25);
12946   __ Frintp(d22, d26);
12947   __ Frintp(d23, d27);
12948   END();
12949 
12950   RUN();
12951 
12952   ASSERT_EQUAL_FP32(1.0, s0);
12953   ASSERT_EQUAL_FP32(2.0, s1);
12954   ASSERT_EQUAL_FP32(2.0, s2);
12955   ASSERT_EQUAL_FP32(2.0, s3);
12956   ASSERT_EQUAL_FP32(3.0, s4);
12957   ASSERT_EQUAL_FP32(-1.0, s5);
12958   ASSERT_EQUAL_FP32(-2.0, s6);
12959   ASSERT_EQUAL_FP32(kFP32PositiveInfinity, s7);
12960   ASSERT_EQUAL_FP32(kFP32NegativeInfinity, s8);
12961   ASSERT_EQUAL_FP32(0.0, s9);
12962   ASSERT_EQUAL_FP32(-0.0, s10);
12963   ASSERT_EQUAL_FP32(-0.0, s11);
12964   ASSERT_EQUAL_FP64(1.0, d12);
12965   ASSERT_EQUAL_FP64(2.0, d13);
12966   ASSERT_EQUAL_FP64(2.0, d14);
12967   ASSERT_EQUAL_FP64(2.0, d15);
12968   ASSERT_EQUAL_FP64(3.0, d16);
12969   ASSERT_EQUAL_FP64(-1.0, d17);
12970   ASSERT_EQUAL_FP64(-2.0, d18);
12971   ASSERT_EQUAL_FP64(kFP64PositiveInfinity, d19);
12972   ASSERT_EQUAL_FP64(kFP64NegativeInfinity, d20);
12973   ASSERT_EQUAL_FP64(0.0, d21);
12974   ASSERT_EQUAL_FP64(-0.0, d22);
12975   ASSERT_EQUAL_FP64(-0.0, d23);
12976 
12977   TEARDOWN();
12978 }
12979 
12980 
TEST(frintx)12981 TEST(frintx) {
12982   // VIXL only supports the round-to-nearest FPCR mode, and it doesn't support
12983   // FP exceptions, so this test has the same results as frintn (and frinti).
12984   SETUP_WITH_FEATURES(CPUFeatures::kFP);
12985 
12986   START();
12987   __ Fmov(s16, 1.0);
12988   __ Fmov(s17, 1.1);
12989   __ Fmov(s18, 1.5);
12990   __ Fmov(s19, 1.9);
12991   __ Fmov(s20, 2.5);
12992   __ Fmov(s21, -1.5);
12993   __ Fmov(s22, -2.5);
12994   __ Fmov(s23, kFP32PositiveInfinity);
12995   __ Fmov(s24, kFP32NegativeInfinity);
12996   __ Fmov(s25, 0.0);
12997   __ Fmov(s26, -0.0);
12998   __ Fmov(s27, -0.2);
12999 
13000   __ Frintx(s0, s16);
13001   __ Frintx(s1, s17);
13002   __ Frintx(s2, s18);
13003   __ Frintx(s3, s19);
13004   __ Frintx(s4, s20);
13005   __ Frintx(s5, s21);
13006   __ Frintx(s6, s22);
13007   __ Frintx(s7, s23);
13008   __ Frintx(s8, s24);
13009   __ Frintx(s9, s25);
13010   __ Frintx(s10, s26);
13011   __ Frintx(s11, s27);
13012 
13013   __ Fmov(d16, 1.0);
13014   __ Fmov(d17, 1.1);
13015   __ Fmov(d18, 1.5);
13016   __ Fmov(d19, 1.9);
13017   __ Fmov(d20, 2.5);
13018   __ Fmov(d21, -1.5);
13019   __ Fmov(d22, -2.5);
13020   __ Fmov(d23, kFP32PositiveInfinity);
13021   __ Fmov(d24, kFP32NegativeInfinity);
13022   __ Fmov(d25, 0.0);
13023   __ Fmov(d26, -0.0);
13024   __ Fmov(d27, -0.2);
13025 
13026   __ Frintx(d12, d16);
13027   __ Frintx(d13, d17);
13028   __ Frintx(d14, d18);
13029   __ Frintx(d15, d19);
13030   __ Frintx(d16, d20);
13031   __ Frintx(d17, d21);
13032   __ Frintx(d18, d22);
13033   __ Frintx(d19, d23);
13034   __ Frintx(d20, d24);
13035   __ Frintx(d21, d25);
13036   __ Frintx(d22, d26);
13037   __ Frintx(d23, d27);
13038   END();
13039 
13040   RUN();
13041 
13042   ASSERT_EQUAL_FP32(1.0, s0);
13043   ASSERT_EQUAL_FP32(1.0, s1);
13044   ASSERT_EQUAL_FP32(2.0, s2);
13045   ASSERT_EQUAL_FP32(2.0, s3);
13046   ASSERT_EQUAL_FP32(2.0, s4);
13047   ASSERT_EQUAL_FP32(-2.0, s5);
13048   ASSERT_EQUAL_FP32(-2.0, s6);
13049   ASSERT_EQUAL_FP32(kFP32PositiveInfinity, s7);
13050   ASSERT_EQUAL_FP32(kFP32NegativeInfinity, s8);
13051   ASSERT_EQUAL_FP32(0.0, s9);
13052   ASSERT_EQUAL_FP32(-0.0, s10);
13053   ASSERT_EQUAL_FP32(-0.0, s11);
13054   ASSERT_EQUAL_FP64(1.0, d12);
13055   ASSERT_EQUAL_FP64(1.0, d13);
13056   ASSERT_EQUAL_FP64(2.0, d14);
13057   ASSERT_EQUAL_FP64(2.0, d15);
13058   ASSERT_EQUAL_FP64(2.0, d16);
13059   ASSERT_EQUAL_FP64(-2.0, d17);
13060   ASSERT_EQUAL_FP64(-2.0, d18);
13061   ASSERT_EQUAL_FP64(kFP64PositiveInfinity, d19);
13062   ASSERT_EQUAL_FP64(kFP64NegativeInfinity, d20);
13063   ASSERT_EQUAL_FP64(0.0, d21);
13064   ASSERT_EQUAL_FP64(-0.0, d22);
13065   ASSERT_EQUAL_FP64(-0.0, d23);
13066 
13067   TEARDOWN();
13068 }
13069 
13070 
TEST(frintz)13071 TEST(frintz) {
13072   SETUP_WITH_FEATURES(CPUFeatures::kFP);
13073 
13074   START();
13075   __ Fmov(s16, 1.0);
13076   __ Fmov(s17, 1.1);
13077   __ Fmov(s18, 1.5);
13078   __ Fmov(s19, 1.9);
13079   __ Fmov(s20, 2.5);
13080   __ Fmov(s21, -1.5);
13081   __ Fmov(s22, -2.5);
13082   __ Fmov(s23, kFP32PositiveInfinity);
13083   __ Fmov(s24, kFP32NegativeInfinity);
13084   __ Fmov(s25, 0.0);
13085   __ Fmov(s26, -0.0);
13086 
13087   __ Frintz(s0, s16);
13088   __ Frintz(s1, s17);
13089   __ Frintz(s2, s18);
13090   __ Frintz(s3, s19);
13091   __ Frintz(s4, s20);
13092   __ Frintz(s5, s21);
13093   __ Frintz(s6, s22);
13094   __ Frintz(s7, s23);
13095   __ Frintz(s8, s24);
13096   __ Frintz(s9, s25);
13097   __ Frintz(s10, s26);
13098 
13099   __ Fmov(d16, 1.0);
13100   __ Fmov(d17, 1.1);
13101   __ Fmov(d18, 1.5);
13102   __ Fmov(d19, 1.9);
13103   __ Fmov(d20, 2.5);
13104   __ Fmov(d21, -1.5);
13105   __ Fmov(d22, -2.5);
13106   __ Fmov(d23, kFP32PositiveInfinity);
13107   __ Fmov(d24, kFP32NegativeInfinity);
13108   __ Fmov(d25, 0.0);
13109   __ Fmov(d26, -0.0);
13110 
13111   __ Frintz(d11, d16);
13112   __ Frintz(d12, d17);
13113   __ Frintz(d13, d18);
13114   __ Frintz(d14, d19);
13115   __ Frintz(d15, d20);
13116   __ Frintz(d16, d21);
13117   __ Frintz(d17, d22);
13118   __ Frintz(d18, d23);
13119   __ Frintz(d19, d24);
13120   __ Frintz(d20, d25);
13121   __ Frintz(d21, d26);
13122   END();
13123 
13124   RUN();
13125 
13126   ASSERT_EQUAL_FP32(1.0, s0);
13127   ASSERT_EQUAL_FP32(1.0, s1);
13128   ASSERT_EQUAL_FP32(1.0, s2);
13129   ASSERT_EQUAL_FP32(1.0, s3);
13130   ASSERT_EQUAL_FP32(2.0, s4);
13131   ASSERT_EQUAL_FP32(-1.0, s5);
13132   ASSERT_EQUAL_FP32(-2.0, s6);
13133   ASSERT_EQUAL_FP32(kFP32PositiveInfinity, s7);
13134   ASSERT_EQUAL_FP32(kFP32NegativeInfinity, s8);
13135   ASSERT_EQUAL_FP32(0.0, s9);
13136   ASSERT_EQUAL_FP32(-0.0, s10);
13137   ASSERT_EQUAL_FP64(1.0, d11);
13138   ASSERT_EQUAL_FP64(1.0, d12);
13139   ASSERT_EQUAL_FP64(1.0, d13);
13140   ASSERT_EQUAL_FP64(1.0, d14);
13141   ASSERT_EQUAL_FP64(2.0, d15);
13142   ASSERT_EQUAL_FP64(-1.0, d16);
13143   ASSERT_EQUAL_FP64(-2.0, d17);
13144   ASSERT_EQUAL_FP64(kFP64PositiveInfinity, d18);
13145   ASSERT_EQUAL_FP64(kFP64NegativeInfinity, d19);
13146   ASSERT_EQUAL_FP64(0.0, d20);
13147   ASSERT_EQUAL_FP64(-0.0, d21);
13148 
13149   TEARDOWN();
13150 }
13151 
13152 
TEST(fcvt_ds)13153 TEST(fcvt_ds) {
13154   SETUP_WITH_FEATURES(CPUFeatures::kFP);
13155 
13156   START();
13157   __ Fmov(s16, 1.0);
13158   __ Fmov(s17, 1.1);
13159   __ Fmov(s18, 1.5);
13160   __ Fmov(s19, 1.9);
13161   __ Fmov(s20, 2.5);
13162   __ Fmov(s21, -1.5);
13163   __ Fmov(s22, -2.5);
13164   __ Fmov(s23, kFP32PositiveInfinity);
13165   __ Fmov(s24, kFP32NegativeInfinity);
13166   __ Fmov(s25, 0.0);
13167   __ Fmov(s26, -0.0);
13168   __ Fmov(s27, FLT_MAX);
13169   __ Fmov(s28, FLT_MIN);
13170   __ Fmov(s29, RawbitsToFloat(0x7fc12345));  // Quiet NaN.
13171   __ Fmov(s30, RawbitsToFloat(0x7f812345));  // Signalling NaN.
13172 
13173   __ Fcvt(d0, s16);
13174   __ Fcvt(d1, s17);
13175   __ Fcvt(d2, s18);
13176   __ Fcvt(d3, s19);
13177   __ Fcvt(d4, s20);
13178   __ Fcvt(d5, s21);
13179   __ Fcvt(d6, s22);
13180   __ Fcvt(d7, s23);
13181   __ Fcvt(d8, s24);
13182   __ Fcvt(d9, s25);
13183   __ Fcvt(d10, s26);
13184   __ Fcvt(d11, s27);
13185   __ Fcvt(d12, s28);
13186   __ Fcvt(d13, s29);
13187   __ Fcvt(d14, s30);
13188   END();
13189 
13190   RUN();
13191 
13192   ASSERT_EQUAL_FP64(1.0f, d0);
13193   ASSERT_EQUAL_FP64(1.1f, d1);
13194   ASSERT_EQUAL_FP64(1.5f, d2);
13195   ASSERT_EQUAL_FP64(1.9f, d3);
13196   ASSERT_EQUAL_FP64(2.5f, d4);
13197   ASSERT_EQUAL_FP64(-1.5f, d5);
13198   ASSERT_EQUAL_FP64(-2.5f, d6);
13199   ASSERT_EQUAL_FP64(kFP64PositiveInfinity, d7);
13200   ASSERT_EQUAL_FP64(kFP64NegativeInfinity, d8);
13201   ASSERT_EQUAL_FP64(0.0f, d9);
13202   ASSERT_EQUAL_FP64(-0.0f, d10);
13203   ASSERT_EQUAL_FP64(FLT_MAX, d11);
13204   ASSERT_EQUAL_FP64(FLT_MIN, d12);
13205 
13206   // Check that the NaN payload is preserved according to Aarch64 conversion
13207   // rules:
13208   //  - The sign bit is preserved.
13209   //  - The top bit of the mantissa is forced to 1 (making it a quiet NaN).
13210   //  - The remaining mantissa bits are copied until they run out.
13211   //  - The low-order bits that haven't already been assigned are set to 0.
13212   ASSERT_EQUAL_FP64(RawbitsToDouble(0x7ff82468a0000000), d13);
13213   ASSERT_EQUAL_FP64(RawbitsToDouble(0x7ff82468a0000000), d14);
13214 
13215   TEARDOWN();
13216 }
13217 
13218 
TEST(fcvt_sd)13219 TEST(fcvt_sd) {
13220   // Test simple conversions here. Complex behaviour (such as rounding
13221   // specifics) are tested in the simulator tests.
13222 
13223   SETUP_WITH_FEATURES(CPUFeatures::kFP);
13224 
13225   START();
13226   __ Fmov(d16, 1.0);
13227   __ Fmov(d17, 1.1);
13228   __ Fmov(d18, 1.5);
13229   __ Fmov(d19, 1.9);
13230   __ Fmov(d20, 2.5);
13231   __ Fmov(d21, -1.5);
13232   __ Fmov(d22, -2.5);
13233   __ Fmov(d23, kFP32PositiveInfinity);
13234   __ Fmov(d24, kFP32NegativeInfinity);
13235   __ Fmov(d25, 0.0);
13236   __ Fmov(d26, -0.0);
13237   __ Fmov(d27, FLT_MAX);
13238   __ Fmov(d28, FLT_MIN);
13239   __ Fmov(d29, RawbitsToDouble(0x7ff82468a0000000));  // Quiet NaN.
13240   __ Fmov(d30, RawbitsToDouble(0x7ff02468a0000000));  // Signalling NaN.
13241 
13242   __ Fcvt(s0, d16);
13243   __ Fcvt(s1, d17);
13244   __ Fcvt(s2, d18);
13245   __ Fcvt(s3, d19);
13246   __ Fcvt(s4, d20);
13247   __ Fcvt(s5, d21);
13248   __ Fcvt(s6, d22);
13249   __ Fcvt(s7, d23);
13250   __ Fcvt(s8, d24);
13251   __ Fcvt(s9, d25);
13252   __ Fcvt(s10, d26);
13253   __ Fcvt(s11, d27);
13254   __ Fcvt(s12, d28);
13255   __ Fcvt(s13, d29);
13256   __ Fcvt(s14, d30);
13257   END();
13258 
13259   RUN();
13260 
13261   ASSERT_EQUAL_FP32(1.0f, s0);
13262   ASSERT_EQUAL_FP32(1.1f, s1);
13263   ASSERT_EQUAL_FP32(1.5f, s2);
13264   ASSERT_EQUAL_FP32(1.9f, s3);
13265   ASSERT_EQUAL_FP32(2.5f, s4);
13266   ASSERT_EQUAL_FP32(-1.5f, s5);
13267   ASSERT_EQUAL_FP32(-2.5f, s6);
13268   ASSERT_EQUAL_FP32(kFP32PositiveInfinity, s7);
13269   ASSERT_EQUAL_FP32(kFP32NegativeInfinity, s8);
13270   ASSERT_EQUAL_FP32(0.0f, s9);
13271   ASSERT_EQUAL_FP32(-0.0f, s10);
13272   ASSERT_EQUAL_FP32(FLT_MAX, s11);
13273   ASSERT_EQUAL_FP32(FLT_MIN, s12);
13274 
13275   // Check that the NaN payload is preserved according to Aarch64 conversion
13276   // rules:
13277   //  - The sign bit is preserved.
13278   //  - The top bit of the mantissa is forced to 1 (making it a quiet NaN).
13279   //  - The remaining mantissa bits are copied until they run out.
13280   //  - The low-order bits that haven't already been assigned are set to 0.
13281   ASSERT_EQUAL_FP32(RawbitsToFloat(0x7fc12345), s13);
13282   ASSERT_EQUAL_FP32(RawbitsToFloat(0x7fc12345), s14);
13283 
13284   TEARDOWN();
13285 }
13286 
13287 
TEST(fcvt_half)13288 TEST(fcvt_half) {
13289   SETUP_WITH_FEATURES(CPUFeatures::kFP);
13290 
13291   START();
13292   Label done;
13293   {
13294     // Check all exact conversions from half to float and back.
13295     Label ok, fail;
13296     __ Mov(w0, 0);
13297     for (int i = 0; i < 0xffff; i += 3) {
13298       if ((i & 0x7c00) == 0x7c00) continue;
13299       __ Mov(w1, i);
13300       __ Fmov(s1, w1);
13301       __ Fcvt(s2, h1);
13302       __ Fcvt(h2, s2);
13303       __ Fmov(w2, s2);
13304       __ Cmp(w1, w2);
13305       __ B(&fail, ne);
13306     }
13307     __ B(&ok);
13308     __ Bind(&fail);
13309     __ Mov(w0, 1);
13310     __ B(&done);
13311     __ Bind(&ok);
13312   }
13313   {
13314     // Check all exact conversions from half to double and back.
13315     Label ok, fail;
13316     for (int i = 0; i < 0xffff; i += 3) {
13317       if ((i & 0x7c00) == 0x7c00) continue;
13318       __ Mov(w1, i);
13319       __ Fmov(s1, w1);
13320       __ Fcvt(d2, h1);
13321       __ Fcvt(h2, d2);
13322       __ Fmov(w2, s2);
13323       __ Cmp(w1, w2);
13324       __ B(&fail, ne);
13325     }
13326     __ B(&ok);
13327     __ Bind(&fail);
13328     __ Mov(w0, 2);
13329     __ Bind(&ok);
13330   }
13331   __ Bind(&done);
13332 
13333   // Check some other interesting values.
13334   __ Fmov(s0, kFP32PositiveInfinity);
13335   __ Fmov(s1, kFP32NegativeInfinity);
13336   __ Fmov(s2, 65504);       // Max half precision.
13337   __ Fmov(s3, 6.10352e-5);  // Min positive normal.
13338   __ Fmov(s4, 6.09756e-5);  // Max subnormal.
13339   __ Fmov(s5, 5.96046e-8);  // Min positive subnormal.
13340   __ Fmov(s6, 5e-9);        // Not representable -> zero.
13341   __ Fmov(s7, -0.0);
13342   __ Fcvt(h0, s0);
13343   __ Fcvt(h1, s1);
13344   __ Fcvt(h2, s2);
13345   __ Fcvt(h3, s3);
13346   __ Fcvt(h4, s4);
13347   __ Fcvt(h5, s5);
13348   __ Fcvt(h6, s6);
13349   __ Fcvt(h7, s7);
13350 
13351   __ Fmov(d20, kFP64PositiveInfinity);
13352   __ Fmov(d21, kFP64NegativeInfinity);
13353   __ Fmov(d22, 65504);       // Max half precision.
13354   __ Fmov(d23, 6.10352e-5);  // Min positive normal.
13355   __ Fmov(d24, 6.09756e-5);  // Max subnormal.
13356   __ Fmov(d25, 5.96046e-8);  // Min positive subnormal.
13357   __ Fmov(d26, 5e-9);        // Not representable -> zero.
13358   __ Fmov(d27, -0.0);
13359   __ Fcvt(h20, d20);
13360   __ Fcvt(h21, d21);
13361   __ Fcvt(h22, d22);
13362   __ Fcvt(h23, d23);
13363   __ Fcvt(h24, d24);
13364   __ Fcvt(h25, d25);
13365   __ Fcvt(h26, d26);
13366   __ Fcvt(h27, d27);
13367   END();
13368 
13369   RUN();
13370 
13371   ASSERT_EQUAL_32(0, w0);  // 1 => float failed, 2 => double failed.
13372   ASSERT_EQUAL_128(0, Float16ToRawbits(kFP16PositiveInfinity), q0);
13373   ASSERT_EQUAL_128(0, Float16ToRawbits(kFP16NegativeInfinity), q1);
13374   ASSERT_EQUAL_128(0, 0x7bff, q2);
13375   ASSERT_EQUAL_128(0, 0x0400, q3);
13376   ASSERT_EQUAL_128(0, 0x03ff, q4);
13377   ASSERT_EQUAL_128(0, 0x0001, q5);
13378   ASSERT_EQUAL_128(0, 0, q6);
13379   ASSERT_EQUAL_128(0, 0x8000, q7);
13380   ASSERT_EQUAL_128(0, Float16ToRawbits(kFP16PositiveInfinity), q20);
13381   ASSERT_EQUAL_128(0, Float16ToRawbits(kFP16NegativeInfinity), q21);
13382   ASSERT_EQUAL_128(0, 0x7bff, q22);
13383   ASSERT_EQUAL_128(0, 0x0400, q23);
13384   ASSERT_EQUAL_128(0, 0x03ff, q24);
13385   ASSERT_EQUAL_128(0, 0x0001, q25);
13386   ASSERT_EQUAL_128(0, 0, q26);
13387   ASSERT_EQUAL_128(0, 0x8000, q27);
13388   TEARDOWN();
13389 }
13390 
13391 
TEST(fcvtas)13392 TEST(fcvtas) {
13393   SETUP_WITH_FEATURES(CPUFeatures::kFP);
13394 
13395   START();
13396   __ Fmov(s0, 1.0);
13397   __ Fmov(s1, 1.1);
13398   __ Fmov(s2, 2.5);
13399   __ Fmov(s3, -2.5);
13400   __ Fmov(s4, kFP32PositiveInfinity);
13401   __ Fmov(s5, kFP32NegativeInfinity);
13402   __ Fmov(s6, 0x7fffff80);  // Largest float < INT32_MAX.
13403   __ Fneg(s7, s6);          // Smallest float > INT32_MIN.
13404   __ Fmov(d8, 1.0);
13405   __ Fmov(d9, 1.1);
13406   __ Fmov(d10, 2.5);
13407   __ Fmov(d11, -2.5);
13408   __ Fmov(d12, kFP64PositiveInfinity);
13409   __ Fmov(d13, kFP64NegativeInfinity);
13410   __ Fmov(d14, kWMaxInt - 1);
13411   __ Fmov(d15, kWMinInt + 1);
13412   __ Fmov(s17, 1.1);
13413   __ Fmov(s18, 2.5);
13414   __ Fmov(s19, -2.5);
13415   __ Fmov(s20, kFP32PositiveInfinity);
13416   __ Fmov(s21, kFP32NegativeInfinity);
13417   __ Fmov(s22, 0x7fffff8000000000);  // Largest float < INT64_MAX.
13418   __ Fneg(s23, s22);                 // Smallest float > INT64_MIN.
13419   __ Fmov(d24, 1.1);
13420   __ Fmov(d25, 2.5);
13421   __ Fmov(d26, -2.5);
13422   __ Fmov(d27, kFP64PositiveInfinity);
13423   __ Fmov(d28, kFP64NegativeInfinity);
13424   __ Fmov(d29, 0x7ffffffffffffc00);  // Largest double < INT64_MAX.
13425   __ Fneg(d30, d29);                 // Smallest double > INT64_MIN.
13426 
13427   __ Fcvtas(w0, s0);
13428   __ Fcvtas(w1, s1);
13429   __ Fcvtas(w2, s2);
13430   __ Fcvtas(w3, s3);
13431   __ Fcvtas(w4, s4);
13432   __ Fcvtas(w5, s5);
13433   __ Fcvtas(w6, s6);
13434   __ Fcvtas(w7, s7);
13435   __ Fcvtas(w8, d8);
13436   __ Fcvtas(w9, d9);
13437   __ Fcvtas(w10, d10);
13438   __ Fcvtas(w11, d11);
13439   __ Fcvtas(w12, d12);
13440   __ Fcvtas(w13, d13);
13441   __ Fcvtas(w14, d14);
13442   __ Fcvtas(w15, d15);
13443   __ Fcvtas(x17, s17);
13444   __ Fcvtas(x18, s18);
13445   __ Fcvtas(x19, s19);
13446   __ Fcvtas(x20, s20);
13447   __ Fcvtas(x21, s21);
13448   __ Fcvtas(x22, s22);
13449   __ Fcvtas(x23, s23);
13450   __ Fcvtas(x24, d24);
13451   __ Fcvtas(x25, d25);
13452   __ Fcvtas(x26, d26);
13453   __ Fcvtas(x27, d27);
13454   __ Fcvtas(x28, d28);
13455   __ Fcvtas(x29, d29);
13456   __ Fcvtas(x30, d30);
13457   END();
13458 
13459   RUN();
13460 
13461   ASSERT_EQUAL_64(1, x0);
13462   ASSERT_EQUAL_64(1, x1);
13463   ASSERT_EQUAL_64(3, x2);
13464   ASSERT_EQUAL_64(0xfffffffd, x3);
13465   ASSERT_EQUAL_64(0x7fffffff, x4);
13466   ASSERT_EQUAL_64(0x80000000, x5);
13467   ASSERT_EQUAL_64(0x7fffff80, x6);
13468   ASSERT_EQUAL_64(0x80000080, x7);
13469   ASSERT_EQUAL_64(1, x8);
13470   ASSERT_EQUAL_64(1, x9);
13471   ASSERT_EQUAL_64(3, x10);
13472   ASSERT_EQUAL_64(0xfffffffd, x11);
13473   ASSERT_EQUAL_64(0x7fffffff, x12);
13474   ASSERT_EQUAL_64(0x80000000, x13);
13475   ASSERT_EQUAL_64(0x7ffffffe, x14);
13476   ASSERT_EQUAL_64(0x80000001, x15);
13477   ASSERT_EQUAL_64(1, x17);
13478   ASSERT_EQUAL_64(3, x18);
13479   ASSERT_EQUAL_64(0xfffffffffffffffd, x19);
13480   ASSERT_EQUAL_64(0x7fffffffffffffff, x20);
13481   ASSERT_EQUAL_64(0x8000000000000000, x21);
13482   ASSERT_EQUAL_64(0x7fffff8000000000, x22);
13483   ASSERT_EQUAL_64(0x8000008000000000, x23);
13484   ASSERT_EQUAL_64(1, x24);
13485   ASSERT_EQUAL_64(3, x25);
13486   ASSERT_EQUAL_64(0xfffffffffffffffd, x26);
13487   ASSERT_EQUAL_64(0x7fffffffffffffff, x27);
13488   ASSERT_EQUAL_64(0x8000000000000000, x28);
13489   ASSERT_EQUAL_64(0x7ffffffffffffc00, x29);
13490   ASSERT_EQUAL_64(0x8000000000000400, x30);
13491 
13492   TEARDOWN();
13493 }
13494 
13495 
TEST(fcvtau)13496 TEST(fcvtau) {
13497   SETUP_WITH_FEATURES(CPUFeatures::kFP);
13498 
13499   START();
13500   __ Fmov(s0, 1.0);
13501   __ Fmov(s1, 1.1);
13502   __ Fmov(s2, 2.5);
13503   __ Fmov(s3, -2.5);
13504   __ Fmov(s4, kFP32PositiveInfinity);
13505   __ Fmov(s5, kFP32NegativeInfinity);
13506   __ Fmov(s6, 0xffffff00);  // Largest float < UINT32_MAX.
13507   __ Fmov(d8, 1.0);
13508   __ Fmov(d9, 1.1);
13509   __ Fmov(d10, 2.5);
13510   __ Fmov(d11, -2.5);
13511   __ Fmov(d12, kFP64PositiveInfinity);
13512   __ Fmov(d13, kFP64NegativeInfinity);
13513   __ Fmov(d14, 0xfffffffe);
13514   __ Fmov(s16, 1.0);
13515   __ Fmov(s17, 1.1);
13516   __ Fmov(s18, 2.5);
13517   __ Fmov(s19, -2.5);
13518   __ Fmov(s20, kFP32PositiveInfinity);
13519   __ Fmov(s21, kFP32NegativeInfinity);
13520   __ Fmov(s22, 0xffffff0000000000);  // Largest float < UINT64_MAX.
13521   __ Fmov(d24, 1.1);
13522   __ Fmov(d25, 2.5);
13523   __ Fmov(d26, -2.5);
13524   __ Fmov(d27, kFP64PositiveInfinity);
13525   __ Fmov(d28, kFP64NegativeInfinity);
13526   __ Fmov(d29, 0xfffffffffffff800);  // Largest double < UINT64_MAX.
13527   __ Fmov(s30, 0x100000000);
13528 
13529   __ Fcvtau(w0, s0);
13530   __ Fcvtau(w1, s1);
13531   __ Fcvtau(w2, s2);
13532   __ Fcvtau(w3, s3);
13533   __ Fcvtau(w4, s4);
13534   __ Fcvtau(w5, s5);
13535   __ Fcvtau(w6, s6);
13536   __ Fcvtau(w8, d8);
13537   __ Fcvtau(w9, d9);
13538   __ Fcvtau(w10, d10);
13539   __ Fcvtau(w11, d11);
13540   __ Fcvtau(w12, d12);
13541   __ Fcvtau(w13, d13);
13542   __ Fcvtau(w14, d14);
13543   __ Fcvtau(w15, d15);
13544   __ Fcvtau(x16, s16);
13545   __ Fcvtau(x17, s17);
13546   __ Fcvtau(x18, s18);
13547   __ Fcvtau(x19, s19);
13548   __ Fcvtau(x20, s20);
13549   __ Fcvtau(x21, s21);
13550   __ Fcvtau(x22, s22);
13551   __ Fcvtau(x24, d24);
13552   __ Fcvtau(x25, d25);
13553   __ Fcvtau(x26, d26);
13554   __ Fcvtau(x27, d27);
13555   __ Fcvtau(x28, d28);
13556   __ Fcvtau(x29, d29);
13557   __ Fcvtau(w30, s30);
13558   END();
13559 
13560   RUN();
13561 
13562   ASSERT_EQUAL_64(1, x0);
13563   ASSERT_EQUAL_64(1, x1);
13564   ASSERT_EQUAL_64(3, x2);
13565   ASSERT_EQUAL_64(0, x3);
13566   ASSERT_EQUAL_64(0xffffffff, x4);
13567   ASSERT_EQUAL_64(0, x5);
13568   ASSERT_EQUAL_64(0xffffff00, x6);
13569   ASSERT_EQUAL_64(1, x8);
13570   ASSERT_EQUAL_64(1, x9);
13571   ASSERT_EQUAL_64(3, x10);
13572   ASSERT_EQUAL_64(0, x11);
13573   ASSERT_EQUAL_64(0xffffffff, x12);
13574   ASSERT_EQUAL_64(0, x13);
13575   ASSERT_EQUAL_64(0xfffffffe, x14);
13576   ASSERT_EQUAL_64(1, x16);
13577   ASSERT_EQUAL_64(1, x17);
13578   ASSERT_EQUAL_64(3, x18);
13579   ASSERT_EQUAL_64(0, x19);
13580   ASSERT_EQUAL_64(0xffffffffffffffff, x20);
13581   ASSERT_EQUAL_64(0, x21);
13582   ASSERT_EQUAL_64(0xffffff0000000000, x22);
13583   ASSERT_EQUAL_64(1, x24);
13584   ASSERT_EQUAL_64(3, x25);
13585   ASSERT_EQUAL_64(0, x26);
13586   ASSERT_EQUAL_64(0xffffffffffffffff, x27);
13587   ASSERT_EQUAL_64(0, x28);
13588   ASSERT_EQUAL_64(0xfffffffffffff800, x29);
13589   ASSERT_EQUAL_64(0xffffffff, x30);
13590 
13591   TEARDOWN();
13592 }
13593 
13594 
TEST(fcvtms)13595 TEST(fcvtms) {
13596   SETUP_WITH_FEATURES(CPUFeatures::kFP);
13597 
13598   START();
13599   __ Fmov(s0, 1.0);
13600   __ Fmov(s1, 1.1);
13601   __ Fmov(s2, 1.5);
13602   __ Fmov(s3, -1.5);
13603   __ Fmov(s4, kFP32PositiveInfinity);
13604   __ Fmov(s5, kFP32NegativeInfinity);
13605   __ Fmov(s6, 0x7fffff80);  // Largest float < INT32_MAX.
13606   __ Fneg(s7, s6);          // Smallest float > INT32_MIN.
13607   __ Fmov(d8, 1.0);
13608   __ Fmov(d9, 1.1);
13609   __ Fmov(d10, 1.5);
13610   __ Fmov(d11, -1.5);
13611   __ Fmov(d12, kFP64PositiveInfinity);
13612   __ Fmov(d13, kFP64NegativeInfinity);
13613   __ Fmov(d14, kWMaxInt - 1);
13614   __ Fmov(d15, kWMinInt + 1);
13615   __ Fmov(s17, 1.1);
13616   __ Fmov(s18, 1.5);
13617   __ Fmov(s19, -1.5);
13618   __ Fmov(s20, kFP32PositiveInfinity);
13619   __ Fmov(s21, kFP32NegativeInfinity);
13620   __ Fmov(s22, 0x7fffff8000000000);  // Largest float < INT64_MAX.
13621   __ Fneg(s23, s22);                 // Smallest float > INT64_MIN.
13622   __ Fmov(d24, 1.1);
13623   __ Fmov(d25, 1.5);
13624   __ Fmov(d26, -1.5);
13625   __ Fmov(d27, kFP64PositiveInfinity);
13626   __ Fmov(d28, kFP64NegativeInfinity);
13627   __ Fmov(d29, 0x7ffffffffffffc00);  // Largest double < INT64_MAX.
13628   __ Fneg(d30, d29);                 // Smallest double > INT64_MIN.
13629 
13630   __ Fcvtms(w0, s0);
13631   __ Fcvtms(w1, s1);
13632   __ Fcvtms(w2, s2);
13633   __ Fcvtms(w3, s3);
13634   __ Fcvtms(w4, s4);
13635   __ Fcvtms(w5, s5);
13636   __ Fcvtms(w6, s6);
13637   __ Fcvtms(w7, s7);
13638   __ Fcvtms(w8, d8);
13639   __ Fcvtms(w9, d9);
13640   __ Fcvtms(w10, d10);
13641   __ Fcvtms(w11, d11);
13642   __ Fcvtms(w12, d12);
13643   __ Fcvtms(w13, d13);
13644   __ Fcvtms(w14, d14);
13645   __ Fcvtms(w15, d15);
13646   __ Fcvtms(x17, s17);
13647   __ Fcvtms(x18, s18);
13648   __ Fcvtms(x19, s19);
13649   __ Fcvtms(x20, s20);
13650   __ Fcvtms(x21, s21);
13651   __ Fcvtms(x22, s22);
13652   __ Fcvtms(x23, s23);
13653   __ Fcvtms(x24, d24);
13654   __ Fcvtms(x25, d25);
13655   __ Fcvtms(x26, d26);
13656   __ Fcvtms(x27, d27);
13657   __ Fcvtms(x28, d28);
13658   __ Fcvtms(x29, d29);
13659   __ Fcvtms(x30, d30);
13660   END();
13661 
13662   RUN();
13663 
13664   ASSERT_EQUAL_64(1, x0);
13665   ASSERT_EQUAL_64(1, x1);
13666   ASSERT_EQUAL_64(1, x2);
13667   ASSERT_EQUAL_64(0xfffffffe, x3);
13668   ASSERT_EQUAL_64(0x7fffffff, x4);
13669   ASSERT_EQUAL_64(0x80000000, x5);
13670   ASSERT_EQUAL_64(0x7fffff80, x6);
13671   ASSERT_EQUAL_64(0x80000080, x7);
13672   ASSERT_EQUAL_64(1, x8);
13673   ASSERT_EQUAL_64(1, x9);
13674   ASSERT_EQUAL_64(1, x10);
13675   ASSERT_EQUAL_64(0xfffffffe, x11);
13676   ASSERT_EQUAL_64(0x7fffffff, x12);
13677   ASSERT_EQUAL_64(0x80000000, x13);
13678   ASSERT_EQUAL_64(0x7ffffffe, x14);
13679   ASSERT_EQUAL_64(0x80000001, x15);
13680   ASSERT_EQUAL_64(1, x17);
13681   ASSERT_EQUAL_64(1, x18);
13682   ASSERT_EQUAL_64(0xfffffffffffffffe, x19);
13683   ASSERT_EQUAL_64(0x7fffffffffffffff, x20);
13684   ASSERT_EQUAL_64(0x8000000000000000, x21);
13685   ASSERT_EQUAL_64(0x7fffff8000000000, x22);
13686   ASSERT_EQUAL_64(0x8000008000000000, x23);
13687   ASSERT_EQUAL_64(1, x24);
13688   ASSERT_EQUAL_64(1, x25);
13689   ASSERT_EQUAL_64(0xfffffffffffffffe, x26);
13690   ASSERT_EQUAL_64(0x7fffffffffffffff, x27);
13691   ASSERT_EQUAL_64(0x8000000000000000, x28);
13692   ASSERT_EQUAL_64(0x7ffffffffffffc00, x29);
13693   ASSERT_EQUAL_64(0x8000000000000400, x30);
13694 
13695   TEARDOWN();
13696 }
13697 
13698 
TEST(fcvtmu)13699 TEST(fcvtmu) {
13700   SETUP_WITH_FEATURES(CPUFeatures::kFP);
13701 
13702   START();
13703   __ Fmov(s0, 1.0);
13704   __ Fmov(s1, 1.1);
13705   __ Fmov(s2, 1.5);
13706   __ Fmov(s3, -1.5);
13707   __ Fmov(s4, kFP32PositiveInfinity);
13708   __ Fmov(s5, kFP32NegativeInfinity);
13709   __ Fmov(s6, 0x7fffff80);  // Largest float < INT32_MAX.
13710   __ Fneg(s7, s6);          // Smallest float > INT32_MIN.
13711   __ Fmov(d8, 1.0);
13712   __ Fmov(d9, 1.1);
13713   __ Fmov(d10, 1.5);
13714   __ Fmov(d11, -1.5);
13715   __ Fmov(d12, kFP64PositiveInfinity);
13716   __ Fmov(d13, kFP64NegativeInfinity);
13717   __ Fmov(d14, kWMaxInt - 1);
13718   __ Fmov(d15, kWMinInt + 1);
13719   __ Fmov(s17, 1.1);
13720   __ Fmov(s18, 1.5);
13721   __ Fmov(s19, -1.5);
13722   __ Fmov(s20, kFP32PositiveInfinity);
13723   __ Fmov(s21, kFP32NegativeInfinity);
13724   __ Fmov(s22, 0x7fffff8000000000);  // Largest float < INT64_MAX.
13725   __ Fneg(s23, s22);                 // Smallest float > INT64_MIN.
13726   __ Fmov(d24, 1.1);
13727   __ Fmov(d25, 1.5);
13728   __ Fmov(d26, -1.5);
13729   __ Fmov(d27, kFP64PositiveInfinity);
13730   __ Fmov(d28, kFP64NegativeInfinity);
13731   __ Fmov(d29, 0x7ffffffffffffc00);  // Largest double < INT64_MAX.
13732   __ Fneg(d30, d29);                 // Smallest double > INT64_MIN.
13733 
13734   __ Fcvtmu(w0, s0);
13735   __ Fcvtmu(w1, s1);
13736   __ Fcvtmu(w2, s2);
13737   __ Fcvtmu(w3, s3);
13738   __ Fcvtmu(w4, s4);
13739   __ Fcvtmu(w5, s5);
13740   __ Fcvtmu(w6, s6);
13741   __ Fcvtmu(w7, s7);
13742   __ Fcvtmu(w8, d8);
13743   __ Fcvtmu(w9, d9);
13744   __ Fcvtmu(w10, d10);
13745   __ Fcvtmu(w11, d11);
13746   __ Fcvtmu(w12, d12);
13747   __ Fcvtmu(w13, d13);
13748   __ Fcvtmu(w14, d14);
13749   __ Fcvtmu(x17, s17);
13750   __ Fcvtmu(x18, s18);
13751   __ Fcvtmu(x19, s19);
13752   __ Fcvtmu(x20, s20);
13753   __ Fcvtmu(x21, s21);
13754   __ Fcvtmu(x22, s22);
13755   __ Fcvtmu(x23, s23);
13756   __ Fcvtmu(x24, d24);
13757   __ Fcvtmu(x25, d25);
13758   __ Fcvtmu(x26, d26);
13759   __ Fcvtmu(x27, d27);
13760   __ Fcvtmu(x28, d28);
13761   __ Fcvtmu(x29, d29);
13762   __ Fcvtmu(x30, d30);
13763   END();
13764 
13765   RUN();
13766 
13767   ASSERT_EQUAL_64(1, x0);
13768   ASSERT_EQUAL_64(1, x1);
13769   ASSERT_EQUAL_64(1, x2);
13770   ASSERT_EQUAL_64(0, x3);
13771   ASSERT_EQUAL_64(0xffffffff, x4);
13772   ASSERT_EQUAL_64(0, x5);
13773   ASSERT_EQUAL_64(0x7fffff80, x6);
13774   ASSERT_EQUAL_64(0, x7);
13775   ASSERT_EQUAL_64(1, x8);
13776   ASSERT_EQUAL_64(1, x9);
13777   ASSERT_EQUAL_64(1, x10);
13778   ASSERT_EQUAL_64(0, x11);
13779   ASSERT_EQUAL_64(0xffffffff, x12);
13780   ASSERT_EQUAL_64(0, x13);
13781   ASSERT_EQUAL_64(0x7ffffffe, x14);
13782   ASSERT_EQUAL_64(1, x17);
13783   ASSERT_EQUAL_64(1, x18);
13784   ASSERT_EQUAL_64(0, x19);
13785   ASSERT_EQUAL_64(0xffffffffffffffff, x20);
13786   ASSERT_EQUAL_64(0, x21);
13787   ASSERT_EQUAL_64(0x7fffff8000000000, x22);
13788   ASSERT_EQUAL_64(0, x23);
13789   ASSERT_EQUAL_64(1, x24);
13790   ASSERT_EQUAL_64(1, x25);
13791   ASSERT_EQUAL_64(0, x26);
13792   ASSERT_EQUAL_64(0xffffffffffffffff, x27);
13793   ASSERT_EQUAL_64(0, x28);
13794   ASSERT_EQUAL_64(0x7ffffffffffffc00, x29);
13795   ASSERT_EQUAL_64(0, x30);
13796 
13797   TEARDOWN();
13798 }
13799 
13800 
TEST(fcvtns)13801 TEST(fcvtns) {
13802   SETUP_WITH_FEATURES(CPUFeatures::kFP);
13803 
13804   START();
13805   __ Fmov(s0, 1.0);
13806   __ Fmov(s1, 1.1);
13807   __ Fmov(s2, 1.5);
13808   __ Fmov(s3, -1.5);
13809   __ Fmov(s4, kFP32PositiveInfinity);
13810   __ Fmov(s5, kFP32NegativeInfinity);
13811   __ Fmov(s6, 0x7fffff80);  // Largest float < INT32_MAX.
13812   __ Fneg(s7, s6);          // Smallest float > INT32_MIN.
13813   __ Fmov(d8, 1.0);
13814   __ Fmov(d9, 1.1);
13815   __ Fmov(d10, 1.5);
13816   __ Fmov(d11, -1.5);
13817   __ Fmov(d12, kFP64PositiveInfinity);
13818   __ Fmov(d13, kFP64NegativeInfinity);
13819   __ Fmov(d14, kWMaxInt - 1);
13820   __ Fmov(d15, kWMinInt + 1);
13821   __ Fmov(s17, 1.1);
13822   __ Fmov(s18, 1.5);
13823   __ Fmov(s19, -1.5);
13824   __ Fmov(s20, kFP32PositiveInfinity);
13825   __ Fmov(s21, kFP32NegativeInfinity);
13826   __ Fmov(s22, 0x7fffff8000000000);  // Largest float < INT64_MAX.
13827   __ Fneg(s23, s22);                 // Smallest float > INT64_MIN.
13828   __ Fmov(d24, 1.1);
13829   __ Fmov(d25, 1.5);
13830   __ Fmov(d26, -1.5);
13831   __ Fmov(d27, kFP64PositiveInfinity);
13832   __ Fmov(d28, kFP64NegativeInfinity);
13833   __ Fmov(d29, 0x7ffffffffffffc00);  // Largest double < INT64_MAX.
13834   __ Fneg(d30, d29);                 // Smallest double > INT64_MIN.
13835 
13836   __ Fcvtns(w0, s0);
13837   __ Fcvtns(w1, s1);
13838   __ Fcvtns(w2, s2);
13839   __ Fcvtns(w3, s3);
13840   __ Fcvtns(w4, s4);
13841   __ Fcvtns(w5, s5);
13842   __ Fcvtns(w6, s6);
13843   __ Fcvtns(w7, s7);
13844   __ Fcvtns(w8, d8);
13845   __ Fcvtns(w9, d9);
13846   __ Fcvtns(w10, d10);
13847   __ Fcvtns(w11, d11);
13848   __ Fcvtns(w12, d12);
13849   __ Fcvtns(w13, d13);
13850   __ Fcvtns(w14, d14);
13851   __ Fcvtns(w15, d15);
13852   __ Fcvtns(x17, s17);
13853   __ Fcvtns(x18, s18);
13854   __ Fcvtns(x19, s19);
13855   __ Fcvtns(x20, s20);
13856   __ Fcvtns(x21, s21);
13857   __ Fcvtns(x22, s22);
13858   __ Fcvtns(x23, s23);
13859   __ Fcvtns(x24, d24);
13860   __ Fcvtns(x25, d25);
13861   __ Fcvtns(x26, d26);
13862   __ Fcvtns(x27, d27);
13863   __ Fcvtns(x28, d28);
13864   __ Fcvtns(x29, d29);
13865   __ Fcvtns(x30, d30);
13866   END();
13867 
13868   RUN();
13869 
13870   ASSERT_EQUAL_64(1, x0);
13871   ASSERT_EQUAL_64(1, x1);
13872   ASSERT_EQUAL_64(2, x2);
13873   ASSERT_EQUAL_64(0xfffffffe, x3);
13874   ASSERT_EQUAL_64(0x7fffffff, x4);
13875   ASSERT_EQUAL_64(0x80000000, x5);
13876   ASSERT_EQUAL_64(0x7fffff80, x6);
13877   ASSERT_EQUAL_64(0x80000080, x7);
13878   ASSERT_EQUAL_64(1, x8);
13879   ASSERT_EQUAL_64(1, x9);
13880   ASSERT_EQUAL_64(2, x10);
13881   ASSERT_EQUAL_64(0xfffffffe, x11);
13882   ASSERT_EQUAL_64(0x7fffffff, x12);
13883   ASSERT_EQUAL_64(0x80000000, x13);
13884   ASSERT_EQUAL_64(0x7ffffffe, x14);
13885   ASSERT_EQUAL_64(0x80000001, x15);
13886   ASSERT_EQUAL_64(1, x17);
13887   ASSERT_EQUAL_64(2, x18);
13888   ASSERT_EQUAL_64(0xfffffffffffffffe, x19);
13889   ASSERT_EQUAL_64(0x7fffffffffffffff, x20);
13890   ASSERT_EQUAL_64(0x8000000000000000, x21);
13891   ASSERT_EQUAL_64(0x7fffff8000000000, x22);
13892   ASSERT_EQUAL_64(0x8000008000000000, x23);
13893   ASSERT_EQUAL_64(1, x24);
13894   ASSERT_EQUAL_64(2, x25);
13895   ASSERT_EQUAL_64(0xfffffffffffffffe, x26);
13896   ASSERT_EQUAL_64(0x7fffffffffffffff, x27);
13897   ASSERT_EQUAL_64(0x8000000000000000, x28);
13898   ASSERT_EQUAL_64(0x7ffffffffffffc00, x29);
13899   ASSERT_EQUAL_64(0x8000000000000400, x30);
13900 
13901   TEARDOWN();
13902 }
13903 
13904 
TEST(fcvtnu)13905 TEST(fcvtnu) {
13906   SETUP_WITH_FEATURES(CPUFeatures::kFP);
13907 
13908   START();
13909   __ Fmov(s0, 1.0);
13910   __ Fmov(s1, 1.1);
13911   __ Fmov(s2, 1.5);
13912   __ Fmov(s3, -1.5);
13913   __ Fmov(s4, kFP32PositiveInfinity);
13914   __ Fmov(s5, kFP32NegativeInfinity);
13915   __ Fmov(s6, 0xffffff00);  // Largest float < UINT32_MAX.
13916   __ Fmov(d8, 1.0);
13917   __ Fmov(d9, 1.1);
13918   __ Fmov(d10, 1.5);
13919   __ Fmov(d11, -1.5);
13920   __ Fmov(d12, kFP64PositiveInfinity);
13921   __ Fmov(d13, kFP64NegativeInfinity);
13922   __ Fmov(d14, 0xfffffffe);
13923   __ Fmov(s16, 1.0);
13924   __ Fmov(s17, 1.1);
13925   __ Fmov(s18, 1.5);
13926   __ Fmov(s19, -1.5);
13927   __ Fmov(s20, kFP32PositiveInfinity);
13928   __ Fmov(s21, kFP32NegativeInfinity);
13929   __ Fmov(s22, 0xffffff0000000000);  // Largest float < UINT64_MAX.
13930   __ Fmov(d24, 1.1);
13931   __ Fmov(d25, 1.5);
13932   __ Fmov(d26, -1.5);
13933   __ Fmov(d27, kFP64PositiveInfinity);
13934   __ Fmov(d28, kFP64NegativeInfinity);
13935   __ Fmov(d29, 0xfffffffffffff800);  // Largest double < UINT64_MAX.
13936   __ Fmov(s30, 0x100000000);
13937 
13938   __ Fcvtnu(w0, s0);
13939   __ Fcvtnu(w1, s1);
13940   __ Fcvtnu(w2, s2);
13941   __ Fcvtnu(w3, s3);
13942   __ Fcvtnu(w4, s4);
13943   __ Fcvtnu(w5, s5);
13944   __ Fcvtnu(w6, s6);
13945   __ Fcvtnu(w8, d8);
13946   __ Fcvtnu(w9, d9);
13947   __ Fcvtnu(w10, d10);
13948   __ Fcvtnu(w11, d11);
13949   __ Fcvtnu(w12, d12);
13950   __ Fcvtnu(w13, d13);
13951   __ Fcvtnu(w14, d14);
13952   __ Fcvtnu(w15, d15);
13953   __ Fcvtnu(x16, s16);
13954   __ Fcvtnu(x17, s17);
13955   __ Fcvtnu(x18, s18);
13956   __ Fcvtnu(x19, s19);
13957   __ Fcvtnu(x20, s20);
13958   __ Fcvtnu(x21, s21);
13959   __ Fcvtnu(x22, s22);
13960   __ Fcvtnu(x24, d24);
13961   __ Fcvtnu(x25, d25);
13962   __ Fcvtnu(x26, d26);
13963   __ Fcvtnu(x27, d27);
13964   __ Fcvtnu(x28, d28);
13965   __ Fcvtnu(x29, d29);
13966   __ Fcvtnu(w30, s30);
13967   END();
13968 
13969   RUN();
13970 
13971   ASSERT_EQUAL_64(1, x0);
13972   ASSERT_EQUAL_64(1, x1);
13973   ASSERT_EQUAL_64(2, x2);
13974   ASSERT_EQUAL_64(0, x3);
13975   ASSERT_EQUAL_64(0xffffffff, x4);
13976   ASSERT_EQUAL_64(0, x5);
13977   ASSERT_EQUAL_64(0xffffff00, x6);
13978   ASSERT_EQUAL_64(1, x8);
13979   ASSERT_EQUAL_64(1, x9);
13980   ASSERT_EQUAL_64(2, x10);
13981   ASSERT_EQUAL_64(0, x11);
13982   ASSERT_EQUAL_64(0xffffffff, x12);
13983   ASSERT_EQUAL_64(0, x13);
13984   ASSERT_EQUAL_64(0xfffffffe, x14);
13985   ASSERT_EQUAL_64(1, x16);
13986   ASSERT_EQUAL_64(1, x17);
13987   ASSERT_EQUAL_64(2, x18);
13988   ASSERT_EQUAL_64(0, x19);
13989   ASSERT_EQUAL_64(0xffffffffffffffff, x20);
13990   ASSERT_EQUAL_64(0, x21);
13991   ASSERT_EQUAL_64(0xffffff0000000000, x22);
13992   ASSERT_EQUAL_64(1, x24);
13993   ASSERT_EQUAL_64(2, x25);
13994   ASSERT_EQUAL_64(0, x26);
13995   ASSERT_EQUAL_64(0xffffffffffffffff, x27);
13996   ASSERT_EQUAL_64(0, x28);
13997   ASSERT_EQUAL_64(0xfffffffffffff800, x29);
13998   ASSERT_EQUAL_64(0xffffffff, x30);
13999 
14000   TEARDOWN();
14001 }
14002 
14003 
TEST(fcvtzs)14004 TEST(fcvtzs) {
14005   SETUP_WITH_FEATURES(CPUFeatures::kFP);
14006 
14007   START();
14008   __ Fmov(s0, 1.0);
14009   __ Fmov(s1, 1.1);
14010   __ Fmov(s2, 1.5);
14011   __ Fmov(s3, -1.5);
14012   __ Fmov(s4, kFP32PositiveInfinity);
14013   __ Fmov(s5, kFP32NegativeInfinity);
14014   __ Fmov(s6, 0x7fffff80);  // Largest float < INT32_MAX.
14015   __ Fneg(s7, s6);          // Smallest float > INT32_MIN.
14016   __ Fmov(d8, 1.0);
14017   __ Fmov(d9, 1.1);
14018   __ Fmov(d10, 1.5);
14019   __ Fmov(d11, -1.5);
14020   __ Fmov(d12, kFP64PositiveInfinity);
14021   __ Fmov(d13, kFP64NegativeInfinity);
14022   __ Fmov(d14, kWMaxInt - 1);
14023   __ Fmov(d15, kWMinInt + 1);
14024   __ Fmov(s17, 1.1);
14025   __ Fmov(s18, 1.5);
14026   __ Fmov(s19, -1.5);
14027   __ Fmov(s20, kFP32PositiveInfinity);
14028   __ Fmov(s21, kFP32NegativeInfinity);
14029   __ Fmov(s22, 0x7fffff8000000000);  // Largest float < INT64_MAX.
14030   __ Fneg(s23, s22);                 // Smallest float > INT64_MIN.
14031   __ Fmov(d24, 1.1);
14032   __ Fmov(d25, 1.5);
14033   __ Fmov(d26, -1.5);
14034   __ Fmov(d27, kFP64PositiveInfinity);
14035   __ Fmov(d28, kFP64NegativeInfinity);
14036   __ Fmov(d29, 0x7ffffffffffffc00);  // Largest double < INT64_MAX.
14037   __ Fneg(d30, d29);                 // Smallest double > INT64_MIN.
14038 
14039   __ Fcvtzs(w0, s0);
14040   __ Fcvtzs(w1, s1);
14041   __ Fcvtzs(w2, s2);
14042   __ Fcvtzs(w3, s3);
14043   __ Fcvtzs(w4, s4);
14044   __ Fcvtzs(w5, s5);
14045   __ Fcvtzs(w6, s6);
14046   __ Fcvtzs(w7, s7);
14047   __ Fcvtzs(w8, d8);
14048   __ Fcvtzs(w9, d9);
14049   __ Fcvtzs(w10, d10);
14050   __ Fcvtzs(w11, d11);
14051   __ Fcvtzs(w12, d12);
14052   __ Fcvtzs(w13, d13);
14053   __ Fcvtzs(w14, d14);
14054   __ Fcvtzs(w15, d15);
14055   __ Fcvtzs(x17, s17);
14056   __ Fcvtzs(x18, s18);
14057   __ Fcvtzs(x19, s19);
14058   __ Fcvtzs(x20, s20);
14059   __ Fcvtzs(x21, s21);
14060   __ Fcvtzs(x22, s22);
14061   __ Fcvtzs(x23, s23);
14062   __ Fcvtzs(x24, d24);
14063   __ Fcvtzs(x25, d25);
14064   __ Fcvtzs(x26, d26);
14065   __ Fcvtzs(x27, d27);
14066   __ Fcvtzs(x28, d28);
14067   __ Fcvtzs(x29, d29);
14068   __ Fcvtzs(x30, d30);
14069   END();
14070 
14071   RUN();
14072 
14073   ASSERT_EQUAL_64(1, x0);
14074   ASSERT_EQUAL_64(1, x1);
14075   ASSERT_EQUAL_64(1, x2);
14076   ASSERT_EQUAL_64(0xffffffff, x3);
14077   ASSERT_EQUAL_64(0x7fffffff, x4);
14078   ASSERT_EQUAL_64(0x80000000, x5);
14079   ASSERT_EQUAL_64(0x7fffff80, x6);
14080   ASSERT_EQUAL_64(0x80000080, x7);
14081   ASSERT_EQUAL_64(1, x8);
14082   ASSERT_EQUAL_64(1, x9);
14083   ASSERT_EQUAL_64(1, x10);
14084   ASSERT_EQUAL_64(0xffffffff, x11);
14085   ASSERT_EQUAL_64(0x7fffffff, x12);
14086   ASSERT_EQUAL_64(0x80000000, x13);
14087   ASSERT_EQUAL_64(0x7ffffffe, x14);
14088   ASSERT_EQUAL_64(0x80000001, x15);
14089   ASSERT_EQUAL_64(1, x17);
14090   ASSERT_EQUAL_64(1, x18);
14091   ASSERT_EQUAL_64(0xffffffffffffffff, x19);
14092   ASSERT_EQUAL_64(0x7fffffffffffffff, x20);
14093   ASSERT_EQUAL_64(0x8000000000000000, x21);
14094   ASSERT_EQUAL_64(0x7fffff8000000000, x22);
14095   ASSERT_EQUAL_64(0x8000008000000000, x23);
14096   ASSERT_EQUAL_64(1, x24);
14097   ASSERT_EQUAL_64(1, x25);
14098   ASSERT_EQUAL_64(0xffffffffffffffff, x26);
14099   ASSERT_EQUAL_64(0x7fffffffffffffff, x27);
14100   ASSERT_EQUAL_64(0x8000000000000000, x28);
14101   ASSERT_EQUAL_64(0x7ffffffffffffc00, x29);
14102   ASSERT_EQUAL_64(0x8000000000000400, x30);
14103 
14104   TEARDOWN();
14105 }
14106 
FjcvtzsHelper(uint64_t value,uint64_t expected,uint32_t expected_z)14107 void FjcvtzsHelper(uint64_t value, uint64_t expected, uint32_t expected_z) {
14108   SETUP_WITH_FEATURES(CPUFeatures::kFP, CPUFeatures::kJSCVT);
14109   START();
14110   __ Fmov(d0, RawbitsToDouble(value));
14111   __ Fjcvtzs(w0, d0);
14112   __ Mrs(x1, NZCV);
14113   END();
14114 
14115 #ifdef VIXL_INCLUDE_SIMULATOR_AARCH64
14116   RUN();
14117   ASSERT_EQUAL_64(expected, x0);
14118   ASSERT_EQUAL_32(expected_z, w1);
14119 #else
14120   USE(expected);
14121   USE(expected_z);
14122 #endif
14123 
14124   TEARDOWN();
14125 }
14126 
TEST(fjcvtzs)14127 TEST(fjcvtzs) {
14128   /* Simple values. */
14129   FjcvtzsHelper(0x0000000000000000, 0, ZFlag);   // 0.0
14130   FjcvtzsHelper(0x0010000000000000, 0, NoFlag);  // The smallest normal value.
14131   FjcvtzsHelper(0x3fdfffffffffffff, 0, NoFlag);  // The value just below 0.5.
14132   FjcvtzsHelper(0x3fe0000000000000, 0, NoFlag);  // 0.5
14133   FjcvtzsHelper(0x3fe0000000000001, 0, NoFlag);  // The value just above 0.5.
14134   FjcvtzsHelper(0x3fefffffffffffff, 0, NoFlag);  // The value just below 1.0.
14135   FjcvtzsHelper(0x3ff0000000000000, 1, ZFlag);   // 1.0
14136   FjcvtzsHelper(0x3ff0000000000001, 1, NoFlag);  // The value just above 1.0.
14137   FjcvtzsHelper(0x3ff8000000000000, 1, NoFlag);  // 1.5
14138   FjcvtzsHelper(0x4024000000000000, 10, ZFlag);  // 10
14139   FjcvtzsHelper(0x7fefffffffffffff, 0, NoFlag);  // The largest finite value.
14140 
14141   /* Infinity. */
14142   FjcvtzsHelper(0x7ff0000000000000, 0, NoFlag);
14143 
14144   /* NaNs. */
14145   /*  - Quiet NaNs */
14146   FjcvtzsHelper(0x7ff923456789abcd, 0, NoFlag);
14147   FjcvtzsHelper(0x7ff8000000000000, 0, NoFlag);
14148   /*  - Signalling NaNs */
14149   FjcvtzsHelper(0x7ff123456789abcd, 0, NoFlag);
14150   FjcvtzsHelper(0x7ff0000000000001, 0, NoFlag);
14151 
14152   /* Subnormals. */
14153   /*  - A recognisable bit pattern. */
14154   FjcvtzsHelper(0x000123456789abcd, 0, NoFlag);
14155   /*  - The largest subnormal value. */
14156   FjcvtzsHelper(0x000fffffffffffff, 0, NoFlag);
14157   /*  - The smallest subnormal value. */
14158   FjcvtzsHelper(0x0000000000000001, 0, NoFlag);
14159 
14160   /* The same values again, but negated. */
14161   FjcvtzsHelper(0x8000000000000000, 0, NoFlag);
14162   FjcvtzsHelper(0x8010000000000000, 0, NoFlag);
14163   FjcvtzsHelper(0xbfdfffffffffffff, 0, NoFlag);
14164   FjcvtzsHelper(0xbfe0000000000000, 0, NoFlag);
14165   FjcvtzsHelper(0xbfe0000000000001, 0, NoFlag);
14166   FjcvtzsHelper(0xbfefffffffffffff, 0, NoFlag);
14167   FjcvtzsHelper(0xbff0000000000000, 0xffffffff, ZFlag);
14168   FjcvtzsHelper(0xbff0000000000001, 0xffffffff, NoFlag);
14169   FjcvtzsHelper(0xbff8000000000000, 0xffffffff, NoFlag);
14170   FjcvtzsHelper(0xc024000000000000, 0xfffffff6, ZFlag);
14171   FjcvtzsHelper(0xffefffffffffffff, 0, NoFlag);
14172   FjcvtzsHelper(0xfff0000000000000, 0, NoFlag);
14173   FjcvtzsHelper(0xfff923456789abcd, 0, NoFlag);
14174   FjcvtzsHelper(0xfff8000000000000, 0, NoFlag);
14175   FjcvtzsHelper(0xfff123456789abcd, 0, NoFlag);
14176   FjcvtzsHelper(0xfff0000000000001, 0, NoFlag);
14177   FjcvtzsHelper(0x800123456789abcd, 0, NoFlag);
14178   FjcvtzsHelper(0x800fffffffffffff, 0, NoFlag);
14179   FjcvtzsHelper(0x8000000000000001, 0, NoFlag);
14180 
14181   // Test floating-point numbers of every possible exponent, most of the
14182   // expected values are zero but there is a range of exponents where the
14183   // results are shifted parts of this mantissa.
14184   uint64_t mantissa = 0x0001234567890abc;
14185 
14186   // Between an exponent of 0 and 52, only some of the top bits of the
14187   // mantissa are above the decimal position of doubles so the mantissa is
14188   // shifted to the right down to just those top bits. Above 52, all bits
14189   // of the mantissa are shifted left above the decimal position until it
14190   // reaches 52 + 64 where all the bits are shifted out of the range of 64-bit
14191   // integers.
14192   int first_exp_boundary = 52;
14193   int second_exp_boundary = first_exp_boundary + 64;
14194   for (int exponent = 0; exponent < 2048; exponent++) {
14195     int e = exponent - 1023;
14196 
14197     uint64_t expected = 0;
14198     if (e < 0) {
14199       expected = 0;
14200     } else if (e <= first_exp_boundary) {
14201       expected = (UINT64_C(1) << e) | (mantissa >> (52 - e));
14202       expected &= 0xffffffff;
14203     } else if (e < second_exp_boundary) {
14204       expected = (mantissa << (e - 52)) & 0xffffffff;
14205     } else {
14206       expected = 0;
14207     }
14208 
14209     uint64_t value = (static_cast<uint64_t>(exponent) << 52) | mantissa;
14210     FjcvtzsHelper(value, expected, NoFlag);
14211     FjcvtzsHelper(value | kDSignMask, (-expected) & 0xffffffff, NoFlag);
14212   }
14213 }
14214 
TEST(fcvtzu)14215 TEST(fcvtzu) {
14216   SETUP_WITH_FEATURES(CPUFeatures::kFP);
14217 
14218   START();
14219   __ Fmov(s0, 1.0);
14220   __ Fmov(s1, 1.1);
14221   __ Fmov(s2, 1.5);
14222   __ Fmov(s3, -1.5);
14223   __ Fmov(s4, kFP32PositiveInfinity);
14224   __ Fmov(s5, kFP32NegativeInfinity);
14225   __ Fmov(s6, 0x7fffff80);  // Largest float < INT32_MAX.
14226   __ Fneg(s7, s6);          // Smallest float > INT32_MIN.
14227   __ Fmov(d8, 1.0);
14228   __ Fmov(d9, 1.1);
14229   __ Fmov(d10, 1.5);
14230   __ Fmov(d11, -1.5);
14231   __ Fmov(d12, kFP64PositiveInfinity);
14232   __ Fmov(d13, kFP64NegativeInfinity);
14233   __ Fmov(d14, kWMaxInt - 1);
14234   __ Fmov(d15, kWMinInt + 1);
14235   __ Fmov(s17, 1.1);
14236   __ Fmov(s18, 1.5);
14237   __ Fmov(s19, -1.5);
14238   __ Fmov(s20, kFP32PositiveInfinity);
14239   __ Fmov(s21, kFP32NegativeInfinity);
14240   __ Fmov(s22, 0x7fffff8000000000);  // Largest float < INT64_MAX.
14241   __ Fneg(s23, s22);                 // Smallest float > INT64_MIN.
14242   __ Fmov(d24, 1.1);
14243   __ Fmov(d25, 1.5);
14244   __ Fmov(d26, -1.5);
14245   __ Fmov(d27, kFP64PositiveInfinity);
14246   __ Fmov(d28, kFP64NegativeInfinity);
14247   __ Fmov(d29, 0x7ffffffffffffc00);  // Largest double < INT64_MAX.
14248   __ Fneg(d30, d29);                 // Smallest double > INT64_MIN.
14249 
14250   __ Fcvtzu(w0, s0);
14251   __ Fcvtzu(w1, s1);
14252   __ Fcvtzu(w2, s2);
14253   __ Fcvtzu(w3, s3);
14254   __ Fcvtzu(w4, s4);
14255   __ Fcvtzu(w5, s5);
14256   __ Fcvtzu(w6, s6);
14257   __ Fcvtzu(w7, s7);
14258   __ Fcvtzu(w8, d8);
14259   __ Fcvtzu(w9, d9);
14260   __ Fcvtzu(w10, d10);
14261   __ Fcvtzu(w11, d11);
14262   __ Fcvtzu(w12, d12);
14263   __ Fcvtzu(w13, d13);
14264   __ Fcvtzu(w14, d14);
14265   __ Fcvtzu(x17, s17);
14266   __ Fcvtzu(x18, s18);
14267   __ Fcvtzu(x19, s19);
14268   __ Fcvtzu(x20, s20);
14269   __ Fcvtzu(x21, s21);
14270   __ Fcvtzu(x22, s22);
14271   __ Fcvtzu(x23, s23);
14272   __ Fcvtzu(x24, d24);
14273   __ Fcvtzu(x25, d25);
14274   __ Fcvtzu(x26, d26);
14275   __ Fcvtzu(x27, d27);
14276   __ Fcvtzu(x28, d28);
14277   __ Fcvtzu(x29, d29);
14278   __ Fcvtzu(x30, d30);
14279   END();
14280 
14281   RUN();
14282 
14283   ASSERT_EQUAL_64(1, x0);
14284   ASSERT_EQUAL_64(1, x1);
14285   ASSERT_EQUAL_64(1, x2);
14286   ASSERT_EQUAL_64(0, x3);
14287   ASSERT_EQUAL_64(0xffffffff, x4);
14288   ASSERT_EQUAL_64(0, x5);
14289   ASSERT_EQUAL_64(0x7fffff80, x6);
14290   ASSERT_EQUAL_64(0, x7);
14291   ASSERT_EQUAL_64(1, x8);
14292   ASSERT_EQUAL_64(1, x9);
14293   ASSERT_EQUAL_64(1, x10);
14294   ASSERT_EQUAL_64(0, x11);
14295   ASSERT_EQUAL_64(0xffffffff, x12);
14296   ASSERT_EQUAL_64(0, x13);
14297   ASSERT_EQUAL_64(0x7ffffffe, x14);
14298   ASSERT_EQUAL_64(1, x17);
14299   ASSERT_EQUAL_64(1, x18);
14300   ASSERT_EQUAL_64(0, x19);
14301   ASSERT_EQUAL_64(0xffffffffffffffff, x20);
14302   ASSERT_EQUAL_64(0, x21);
14303   ASSERT_EQUAL_64(0x7fffff8000000000, x22);
14304   ASSERT_EQUAL_64(0, x23);
14305   ASSERT_EQUAL_64(1, x24);
14306   ASSERT_EQUAL_64(1, x25);
14307   ASSERT_EQUAL_64(0, x26);
14308   ASSERT_EQUAL_64(0xffffffffffffffff, x27);
14309   ASSERT_EQUAL_64(0, x28);
14310   ASSERT_EQUAL_64(0x7ffffffffffffc00, x29);
14311   ASSERT_EQUAL_64(0, x30);
14312 
14313   TEARDOWN();
14314 }
14315 
14316 
TEST(neon_fcvtl)14317 TEST(neon_fcvtl) {
14318   SETUP_WITH_FEATURES(CPUFeatures::kNEON, CPUFeatures::kFP);
14319 
14320   START();
14321 
14322   __ Movi(v0.V2D(), 0x000080007efffeff, 0x3100b1007c00fc00);
14323   __ Movi(v1.V2D(), 0x03ff83ff00038003, 0x000180017c01fc01);
14324   __ Movi(v2.V2D(), 0x3e200000be200000, 0x7f800000ff800000);
14325   __ Movi(v3.V2D(), 0x0000000080000000, 0x7f8fffffff8fffff);
14326   __ Movi(v4.V2D(), 0x7fcfffffffcfffff, 0x0000000180000001);
14327   __ Fcvtl(v16.V4S(), v0.V4H());
14328   __ Fcvtl2(v17.V4S(), v0.V8H());
14329   __ Fcvtl(v18.V4S(), v1.V4H());
14330   __ Fcvtl2(v19.V4S(), v1.V8H());
14331 
14332   __ Fcvtl(v20.V2D(), v2.V2S());
14333   __ Fcvtl2(v21.V2D(), v2.V4S());
14334   __ Fcvtl(v22.V2D(), v3.V2S());
14335   __ Fcvtl2(v23.V2D(), v3.V4S());
14336   __ Fcvtl(v24.V2D(), v4.V2S());
14337   __ Fcvtl2(v25.V2D(), v4.V4S());
14338 
14339   END();
14340 
14341   RUN();
14342   ASSERT_EQUAL_128(0x3e200000be200000, 0x7f800000ff800000, q16);
14343   ASSERT_EQUAL_128(0x0000000080000000, 0x7fdfe000ffdfe000, q17);
14344   ASSERT_EQUAL_128(0x33800000b3800000, 0x7fc02000ffc02000, q18);
14345   ASSERT_EQUAL_128(0x387fc000b87fc000, 0x34400000b4400000, q19);
14346   ASSERT_EQUAL_128(0x7ff0000000000000, 0xfff0000000000000, q20);
14347   ASSERT_EQUAL_128(0x3fc4000000000000, 0xbfc4000000000000, q21);
14348   ASSERT_EQUAL_128(0x7ff9ffffe0000000, 0xfff9ffffe0000000, q22);
14349   ASSERT_EQUAL_128(0x0000000000000000, 0x8000000000000000, q23);
14350   ASSERT_EQUAL_128(0x36a0000000000000, 0xb6a0000000000000, q24);
14351   ASSERT_EQUAL_128(0x7ff9ffffe0000000, 0xfff9ffffe0000000, q25);
14352   TEARDOWN();
14353 }
14354 
14355 
TEST(neon_fcvtn)14356 TEST(neon_fcvtn) {
14357   SETUP_WITH_FEATURES(CPUFeatures::kNEON, CPUFeatures::kFP);
14358 
14359   START();
14360 
14361   __ Movi(v0.V2D(), 0x3e200000be200000, 0x7f800000ff800000);
14362   __ Movi(v1.V2D(), 0x0000000080000000, 0x7f8fffffff8fffff);
14363   __ Movi(v2.V2D(), 0x7fcfffffffcfffff, 0x0000000180000001);
14364   __ Movi(v3.V2D(), 0x3fc4000000000000, 0xbfc4000000000000);
14365   __ Movi(v4.V2D(), 0x7ff0000000000000, 0xfff0000000000000);
14366   __ Movi(v5.V2D(), 0x0000000000000000, 0x8000000000000000);
14367   __ Movi(v6.V2D(), 0x7ff0ffffffffffff, 0xfff0ffffffffffff);
14368   __ Movi(v7.V2D(), 0x7ff8ffffffffffff, 0xfff8ffffffffffff);
14369   __ Movi(v8.V2D(), 0x0000000000000001, 0x8000000000000001);
14370 
14371   __ Fcvtn(v16.V4H(), v0.V4S());
14372   __ Fcvtn2(v16.V8H(), v1.V4S());
14373   __ Fcvtn(v17.V4H(), v2.V4S());
14374   __ Fcvtn(v18.V2S(), v3.V2D());
14375   __ Fcvtn2(v18.V4S(), v4.V2D());
14376   __ Fcvtn(v19.V2S(), v5.V2D());
14377   __ Fcvtn2(v19.V4S(), v6.V2D());
14378   __ Fcvtn(v20.V2S(), v7.V2D());
14379   __ Fcvtn2(v20.V4S(), v8.V2D());
14380   END();
14381 
14382   RUN();
14383   ASSERT_EQUAL_128(0x000080007e7ffe7f, 0x3100b1007c00fc00, q16);
14384   ASSERT_EQUAL_64(0x7e7ffe7f00008000, d17);
14385   ASSERT_EQUAL_128(0x7f800000ff800000, 0x3e200000be200000, q18);
14386   ASSERT_EQUAL_128(0x7fc7ffffffc7ffff, 0x0000000080000000, q19);
14387   ASSERT_EQUAL_128(0x0000000080000000, 0x7fc7ffffffc7ffff, q20);
14388   TEARDOWN();
14389 }
14390 
14391 
TEST(neon_fcvtxn)14392 TEST(neon_fcvtxn) {
14393   SETUP_WITH_FEATURES(CPUFeatures::kNEON, CPUFeatures::kFP);
14394 
14395   START();
14396   __ Movi(v0.V2D(), 0x3e200000be200000, 0x7f800000ff800000);
14397   __ Movi(v1.V2D(), 0x0000000080000000, 0x7f8fffffff8fffff);
14398   __ Movi(v2.V2D(), 0x7fcfffffffcfffff, 0x0000000180000001);
14399   __ Movi(v3.V2D(), 0x3fc4000000000000, 0xbfc4000000000000);
14400   __ Movi(v4.V2D(), 0x7ff0000000000000, 0xfff0000000000000);
14401   __ Movi(v5.V2D(), 0x0000000000000000, 0x8000000000000000);
14402   __ Movi(v6.V2D(), 0x7ff0ffffffffffff, 0xfff0ffffffffffff);
14403   __ Movi(v7.V2D(), 0x7ff8ffffffffffff, 0xfff8ffffffffffff);
14404   __ Movi(v8.V2D(), 0x0000000000000001, 0x8000000000000001);
14405   __ Movi(v9.V2D(), 0x41ed000000000000, 0x41efffffffefffff);
14406   __ Fcvtxn(v16.V2S(), v0.V2D());
14407   __ Fcvtxn2(v16.V4S(), v1.V2D());
14408   __ Fcvtxn(v17.V2S(), v2.V2D());
14409   __ Fcvtxn2(v17.V4S(), v3.V2D());
14410   __ Fcvtxn(v18.V2S(), v4.V2D());
14411   __ Fcvtxn2(v18.V4S(), v5.V2D());
14412   __ Fcvtxn(v19.V2S(), v6.V2D());
14413   __ Fcvtxn2(v19.V4S(), v7.V2D());
14414   __ Fcvtxn(v20.V2S(), v8.V2D());
14415   __ Fcvtxn2(v20.V4S(), v9.V2D());
14416   __ Fcvtxn(s21, d0);
14417   END();
14418 
14419   RUN();
14420   ASSERT_EQUAL_128(0x000000017f7fffff, 0x310000057f7fffff, q16);
14421   ASSERT_EQUAL_128(0x3e200000be200000, 0x7f7fffff00000001, q17);
14422   ASSERT_EQUAL_128(0x0000000080000000, 0x7f800000ff800000, q18);
14423   ASSERT_EQUAL_128(0x7fc7ffffffc7ffff, 0x7fc7ffffffc7ffff, q19);
14424   ASSERT_EQUAL_128(0x4f6800004f7fffff, 0x0000000180000001, q20);
14425   ASSERT_EQUAL_128(0, 0x7f7fffff, q21);
14426   TEARDOWN();
14427 }
14428 
14429 
14430 // Test that scvtf and ucvtf can convert the 64-bit input into the expected
14431 // value. All possible values of 'fbits' are tested. The expected value is
14432 // modified accordingly in each case.
14433 //
14434 // The expected value is specified as the bit encoding of the expected double
14435 // produced by scvtf (expected_scvtf_bits) as well as ucvtf
14436 // (expected_ucvtf_bits).
14437 //
14438 // Where the input value is representable by int32_t or uint32_t, conversions
14439 // from W registers will also be tested.
TestUScvtfHelper(uint64_t in,uint64_t expected_scvtf_bits,uint64_t expected_ucvtf_bits)14440 static void TestUScvtfHelper(uint64_t in,
14441                              uint64_t expected_scvtf_bits,
14442                              uint64_t expected_ucvtf_bits) {
14443   uint64_t u64 = in;
14444   uint32_t u32 = u64 & 0xffffffff;
14445   int64_t s64 = static_cast<int64_t>(in);
14446   int32_t s32 = s64 & 0x7fffffff;
14447 
14448   bool cvtf_s32 = (s64 == s32);
14449   bool cvtf_u32 = (u64 == u32);
14450 
14451   double results_scvtf_x[65];
14452   double results_ucvtf_x[65];
14453   double results_scvtf_w[33];
14454   double results_ucvtf_w[33];
14455 
14456   SETUP_WITH_FEATURES(CPUFeatures::kFP);
14457 
14458   START();
14459 
14460   __ Mov(x0, reinterpret_cast<uintptr_t>(results_scvtf_x));
14461   __ Mov(x1, reinterpret_cast<uintptr_t>(results_ucvtf_x));
14462   __ Mov(x2, reinterpret_cast<uintptr_t>(results_scvtf_w));
14463   __ Mov(x3, reinterpret_cast<uintptr_t>(results_ucvtf_w));
14464 
14465   __ Mov(x10, s64);
14466 
14467   // Corrupt the top word, in case it is accidentally used during W-register
14468   // conversions.
14469   __ Mov(x11, 0x5555555555555555);
14470   __ Bfi(x11, x10, 0, kWRegSize);
14471 
14472   // Test integer conversions.
14473   __ Scvtf(d0, x10);
14474   __ Ucvtf(d1, x10);
14475   __ Scvtf(d2, w11);
14476   __ Ucvtf(d3, w11);
14477   __ Str(d0, MemOperand(x0));
14478   __ Str(d1, MemOperand(x1));
14479   __ Str(d2, MemOperand(x2));
14480   __ Str(d3, MemOperand(x3));
14481 
14482   // Test all possible values of fbits.
14483   for (int fbits = 1; fbits <= 32; fbits++) {
14484     __ Scvtf(d0, x10, fbits);
14485     __ Ucvtf(d1, x10, fbits);
14486     __ Scvtf(d2, w11, fbits);
14487     __ Ucvtf(d3, w11, fbits);
14488     __ Str(d0, MemOperand(x0, fbits * kDRegSizeInBytes));
14489     __ Str(d1, MemOperand(x1, fbits * kDRegSizeInBytes));
14490     __ Str(d2, MemOperand(x2, fbits * kDRegSizeInBytes));
14491     __ Str(d3, MemOperand(x3, fbits * kDRegSizeInBytes));
14492   }
14493 
14494   // Conversions from W registers can only handle fbits values <= 32, so just
14495   // test conversions from X registers for 32 < fbits <= 64.
14496   for (int fbits = 33; fbits <= 64; fbits++) {
14497     __ Scvtf(d0, x10, fbits);
14498     __ Ucvtf(d1, x10, fbits);
14499     __ Str(d0, MemOperand(x0, fbits * kDRegSizeInBytes));
14500     __ Str(d1, MemOperand(x1, fbits * kDRegSizeInBytes));
14501   }
14502 
14503   END();
14504   RUN();
14505 
14506   // Check the results.
14507   double expected_scvtf_base = RawbitsToDouble(expected_scvtf_bits);
14508   double expected_ucvtf_base = RawbitsToDouble(expected_ucvtf_bits);
14509 
14510   for (int fbits = 0; fbits <= 32; fbits++) {
14511     double expected_scvtf = expected_scvtf_base / std::pow(2, fbits);
14512     double expected_ucvtf = expected_ucvtf_base / std::pow(2, fbits);
14513     ASSERT_EQUAL_FP64(expected_scvtf, results_scvtf_x[fbits]);
14514     ASSERT_EQUAL_FP64(expected_ucvtf, results_ucvtf_x[fbits]);
14515     if (cvtf_s32) ASSERT_EQUAL_FP64(expected_scvtf, results_scvtf_w[fbits]);
14516     if (cvtf_u32) ASSERT_EQUAL_FP64(expected_ucvtf, results_ucvtf_w[fbits]);
14517   }
14518   for (int fbits = 33; fbits <= 64; fbits++) {
14519     double expected_scvtf = expected_scvtf_base / std::pow(2, fbits);
14520     double expected_ucvtf = expected_ucvtf_base / std::pow(2, fbits);
14521     ASSERT_EQUAL_FP64(expected_scvtf, results_scvtf_x[fbits]);
14522     ASSERT_EQUAL_FP64(expected_ucvtf, results_ucvtf_x[fbits]);
14523   }
14524 
14525   TEARDOWN();
14526 }
14527 
14528 
TEST(scvtf_ucvtf_double)14529 TEST(scvtf_ucvtf_double) {
14530   // Simple conversions of positive numbers which require no rounding; the
14531   // results should not depened on the rounding mode, and ucvtf and scvtf should
14532   // produce the same result.
14533   TestUScvtfHelper(0x0000000000000000, 0x0000000000000000, 0x0000000000000000);
14534   TestUScvtfHelper(0x0000000000000001, 0x3ff0000000000000, 0x3ff0000000000000);
14535   TestUScvtfHelper(0x0000000040000000, 0x41d0000000000000, 0x41d0000000000000);
14536   TestUScvtfHelper(0x0000000100000000, 0x41f0000000000000, 0x41f0000000000000);
14537   TestUScvtfHelper(0x4000000000000000, 0x43d0000000000000, 0x43d0000000000000);
14538   // Test mantissa extremities.
14539   TestUScvtfHelper(0x4000000000000400, 0x43d0000000000001, 0x43d0000000000001);
14540   // The largest int32_t that fits in a double.
14541   TestUScvtfHelper(0x000000007fffffff, 0x41dfffffffc00000, 0x41dfffffffc00000);
14542   // Values that would be negative if treated as an int32_t.
14543   TestUScvtfHelper(0x00000000ffffffff, 0x41efffffffe00000, 0x41efffffffe00000);
14544   TestUScvtfHelper(0x0000000080000000, 0x41e0000000000000, 0x41e0000000000000);
14545   TestUScvtfHelper(0x0000000080000001, 0x41e0000000200000, 0x41e0000000200000);
14546   // The largest int64_t that fits in a double.
14547   TestUScvtfHelper(0x7ffffffffffffc00, 0x43dfffffffffffff, 0x43dfffffffffffff);
14548   // Check for bit pattern reproduction.
14549   TestUScvtfHelper(0x0123456789abcde0, 0x43723456789abcde, 0x43723456789abcde);
14550   TestUScvtfHelper(0x0000000012345678, 0x41b2345678000000, 0x41b2345678000000);
14551 
14552   // Simple conversions of negative int64_t values. These require no rounding,
14553   // and the results should not depend on the rounding mode.
14554   TestUScvtfHelper(0xffffffffc0000000, 0xc1d0000000000000, 0x43effffffff80000);
14555   TestUScvtfHelper(0xffffffff00000000, 0xc1f0000000000000, 0x43efffffffe00000);
14556   TestUScvtfHelper(0xc000000000000000, 0xc3d0000000000000, 0x43e8000000000000);
14557 
14558   // Conversions which require rounding.
14559   TestUScvtfHelper(0x1000000000000000, 0x43b0000000000000, 0x43b0000000000000);
14560   TestUScvtfHelper(0x1000000000000001, 0x43b0000000000000, 0x43b0000000000000);
14561   TestUScvtfHelper(0x1000000000000080, 0x43b0000000000000, 0x43b0000000000000);
14562   TestUScvtfHelper(0x1000000000000081, 0x43b0000000000001, 0x43b0000000000001);
14563   TestUScvtfHelper(0x1000000000000100, 0x43b0000000000001, 0x43b0000000000001);
14564   TestUScvtfHelper(0x1000000000000101, 0x43b0000000000001, 0x43b0000000000001);
14565   TestUScvtfHelper(0x1000000000000180, 0x43b0000000000002, 0x43b0000000000002);
14566   TestUScvtfHelper(0x1000000000000181, 0x43b0000000000002, 0x43b0000000000002);
14567   TestUScvtfHelper(0x1000000000000200, 0x43b0000000000002, 0x43b0000000000002);
14568   TestUScvtfHelper(0x1000000000000201, 0x43b0000000000002, 0x43b0000000000002);
14569   TestUScvtfHelper(0x1000000000000280, 0x43b0000000000002, 0x43b0000000000002);
14570   TestUScvtfHelper(0x1000000000000281, 0x43b0000000000003, 0x43b0000000000003);
14571   TestUScvtfHelper(0x1000000000000300, 0x43b0000000000003, 0x43b0000000000003);
14572   // Check rounding of negative int64_t values (and large uint64_t values).
14573   TestUScvtfHelper(0x8000000000000000, 0xc3e0000000000000, 0x43e0000000000000);
14574   TestUScvtfHelper(0x8000000000000001, 0xc3e0000000000000, 0x43e0000000000000);
14575   TestUScvtfHelper(0x8000000000000200, 0xc3e0000000000000, 0x43e0000000000000);
14576   TestUScvtfHelper(0x8000000000000201, 0xc3dfffffffffffff, 0x43e0000000000000);
14577   TestUScvtfHelper(0x8000000000000400, 0xc3dfffffffffffff, 0x43e0000000000000);
14578   TestUScvtfHelper(0x8000000000000401, 0xc3dfffffffffffff, 0x43e0000000000001);
14579   TestUScvtfHelper(0x8000000000000600, 0xc3dffffffffffffe, 0x43e0000000000001);
14580   TestUScvtfHelper(0x8000000000000601, 0xc3dffffffffffffe, 0x43e0000000000001);
14581   TestUScvtfHelper(0x8000000000000800, 0xc3dffffffffffffe, 0x43e0000000000001);
14582   TestUScvtfHelper(0x8000000000000801, 0xc3dffffffffffffe, 0x43e0000000000001);
14583   TestUScvtfHelper(0x8000000000000a00, 0xc3dffffffffffffe, 0x43e0000000000001);
14584   TestUScvtfHelper(0x8000000000000a01, 0xc3dffffffffffffd, 0x43e0000000000001);
14585   TestUScvtfHelper(0x8000000000000c00, 0xc3dffffffffffffd, 0x43e0000000000002);
14586   // Round up to produce a result that's too big for the input to represent.
14587   TestUScvtfHelper(0x7ffffffffffffe00, 0x43e0000000000000, 0x43e0000000000000);
14588   TestUScvtfHelper(0x7fffffffffffffff, 0x43e0000000000000, 0x43e0000000000000);
14589   TestUScvtfHelper(0xfffffffffffffc00, 0xc090000000000000, 0x43f0000000000000);
14590   TestUScvtfHelper(0xffffffffffffffff, 0xbff0000000000000, 0x43f0000000000000);
14591 }
14592 
14593 
14594 // The same as TestUScvtfHelper, but convert to floats.
TestUScvtf32Helper(uint64_t in,uint32_t expected_scvtf_bits,uint32_t expected_ucvtf_bits)14595 static void TestUScvtf32Helper(uint64_t in,
14596                                uint32_t expected_scvtf_bits,
14597                                uint32_t expected_ucvtf_bits) {
14598   uint64_t u64 = in;
14599   uint32_t u32 = u64 & 0xffffffff;
14600   int64_t s64 = static_cast<int64_t>(in);
14601   int32_t s32 = s64 & 0x7fffffff;
14602 
14603   bool cvtf_s32 = (s64 == s32);
14604   bool cvtf_u32 = (u64 == u32);
14605 
14606   float results_scvtf_x[65];
14607   float results_ucvtf_x[65];
14608   float results_scvtf_w[33];
14609   float results_ucvtf_w[33];
14610 
14611   SETUP_WITH_FEATURES(CPUFeatures::kFP);
14612 
14613   START();
14614 
14615   __ Mov(x0, reinterpret_cast<uintptr_t>(results_scvtf_x));
14616   __ Mov(x1, reinterpret_cast<uintptr_t>(results_ucvtf_x));
14617   __ Mov(x2, reinterpret_cast<uintptr_t>(results_scvtf_w));
14618   __ Mov(x3, reinterpret_cast<uintptr_t>(results_ucvtf_w));
14619 
14620   __ Mov(x10, s64);
14621 
14622   // Corrupt the top word, in case it is accidentally used during W-register
14623   // conversions.
14624   __ Mov(x11, 0x5555555555555555);
14625   __ Bfi(x11, x10, 0, kWRegSize);
14626 
14627   // Test integer conversions.
14628   __ Scvtf(s0, x10);
14629   __ Ucvtf(s1, x10);
14630   __ Scvtf(s2, w11);
14631   __ Ucvtf(s3, w11);
14632   __ Str(s0, MemOperand(x0));
14633   __ Str(s1, MemOperand(x1));
14634   __ Str(s2, MemOperand(x2));
14635   __ Str(s3, MemOperand(x3));
14636 
14637   // Test all possible values of fbits.
14638   for (int fbits = 1; fbits <= 32; fbits++) {
14639     __ Scvtf(s0, x10, fbits);
14640     __ Ucvtf(s1, x10, fbits);
14641     __ Scvtf(s2, w11, fbits);
14642     __ Ucvtf(s3, w11, fbits);
14643     __ Str(s0, MemOperand(x0, fbits * kSRegSizeInBytes));
14644     __ Str(s1, MemOperand(x1, fbits * kSRegSizeInBytes));
14645     __ Str(s2, MemOperand(x2, fbits * kSRegSizeInBytes));
14646     __ Str(s3, MemOperand(x3, fbits * kSRegSizeInBytes));
14647   }
14648 
14649   // Conversions from W registers can only handle fbits values <= 32, so just
14650   // test conversions from X registers for 32 < fbits <= 64.
14651   for (int fbits = 33; fbits <= 64; fbits++) {
14652     __ Scvtf(s0, x10, fbits);
14653     __ Ucvtf(s1, x10, fbits);
14654     __ Str(s0, MemOperand(x0, fbits * kSRegSizeInBytes));
14655     __ Str(s1, MemOperand(x1, fbits * kSRegSizeInBytes));
14656   }
14657 
14658   END();
14659   RUN();
14660 
14661   // Check the results.
14662   float expected_scvtf_base = RawbitsToFloat(expected_scvtf_bits);
14663   float expected_ucvtf_base = RawbitsToFloat(expected_ucvtf_bits);
14664 
14665   for (int fbits = 0; fbits <= 32; fbits++) {
14666     float expected_scvtf = expected_scvtf_base / std::pow(2.0f, fbits);
14667     float expected_ucvtf = expected_ucvtf_base / std::pow(2.0f, fbits);
14668     ASSERT_EQUAL_FP32(expected_scvtf, results_scvtf_x[fbits]);
14669     ASSERT_EQUAL_FP32(expected_ucvtf, results_ucvtf_x[fbits]);
14670     if (cvtf_s32) ASSERT_EQUAL_FP32(expected_scvtf, results_scvtf_w[fbits]);
14671     if (cvtf_u32) ASSERT_EQUAL_FP32(expected_ucvtf, results_ucvtf_w[fbits]);
14672   }
14673   for (int fbits = 33; fbits <= 64; fbits++) {
14674     float expected_scvtf = expected_scvtf_base / std::pow(2.0f, fbits);
14675     float expected_ucvtf = expected_ucvtf_base / std::pow(2.0f, fbits);
14676     ASSERT_EQUAL_FP32(expected_scvtf, results_scvtf_x[fbits]);
14677     ASSERT_EQUAL_FP32(expected_ucvtf, results_ucvtf_x[fbits]);
14678   }
14679 
14680   TEARDOWN();
14681 }
14682 
14683 
TEST(scvtf_ucvtf_float)14684 TEST(scvtf_ucvtf_float) {
14685   // Simple conversions of positive numbers which require no rounding; the
14686   // results should not depened on the rounding mode, and ucvtf and scvtf should
14687   // produce the same result.
14688   TestUScvtf32Helper(0x0000000000000000, 0x00000000, 0x00000000);
14689   TestUScvtf32Helper(0x0000000000000001, 0x3f800000, 0x3f800000);
14690   TestUScvtf32Helper(0x0000000040000000, 0x4e800000, 0x4e800000);
14691   TestUScvtf32Helper(0x0000000100000000, 0x4f800000, 0x4f800000);
14692   TestUScvtf32Helper(0x4000000000000000, 0x5e800000, 0x5e800000);
14693   // Test mantissa extremities.
14694   TestUScvtf32Helper(0x0000000000800001, 0x4b000001, 0x4b000001);
14695   TestUScvtf32Helper(0x4000008000000000, 0x5e800001, 0x5e800001);
14696   // The largest int32_t that fits in a float.
14697   TestUScvtf32Helper(0x000000007fffff80, 0x4effffff, 0x4effffff);
14698   // Values that would be negative if treated as an int32_t.
14699   TestUScvtf32Helper(0x00000000ffffff00, 0x4f7fffff, 0x4f7fffff);
14700   TestUScvtf32Helper(0x0000000080000000, 0x4f000000, 0x4f000000);
14701   TestUScvtf32Helper(0x0000000080000100, 0x4f000001, 0x4f000001);
14702   // The largest int64_t that fits in a float.
14703   TestUScvtf32Helper(0x7fffff8000000000, 0x5effffff, 0x5effffff);
14704   // Check for bit pattern reproduction.
14705   TestUScvtf32Helper(0x0000000000876543, 0x4b076543, 0x4b076543);
14706 
14707   // Simple conversions of negative int64_t values. These require no rounding,
14708   // and the results should not depend on the rounding mode.
14709   TestUScvtf32Helper(0xfffffc0000000000, 0xd4800000, 0x5f7ffffc);
14710   TestUScvtf32Helper(0xc000000000000000, 0xde800000, 0x5f400000);
14711 
14712   // Conversions which require rounding.
14713   TestUScvtf32Helper(0x0000800000000000, 0x57000000, 0x57000000);
14714   TestUScvtf32Helper(0x0000800000000001, 0x57000000, 0x57000000);
14715   TestUScvtf32Helper(0x0000800000800000, 0x57000000, 0x57000000);
14716   TestUScvtf32Helper(0x0000800000800001, 0x57000001, 0x57000001);
14717   TestUScvtf32Helper(0x0000800001000000, 0x57000001, 0x57000001);
14718   TestUScvtf32Helper(0x0000800001000001, 0x57000001, 0x57000001);
14719   TestUScvtf32Helper(0x0000800001800000, 0x57000002, 0x57000002);
14720   TestUScvtf32Helper(0x0000800001800001, 0x57000002, 0x57000002);
14721   TestUScvtf32Helper(0x0000800002000000, 0x57000002, 0x57000002);
14722   TestUScvtf32Helper(0x0000800002000001, 0x57000002, 0x57000002);
14723   TestUScvtf32Helper(0x0000800002800000, 0x57000002, 0x57000002);
14724   TestUScvtf32Helper(0x0000800002800001, 0x57000003, 0x57000003);
14725   TestUScvtf32Helper(0x0000800003000000, 0x57000003, 0x57000003);
14726   // Check rounding of negative int64_t values (and large uint64_t values).
14727   TestUScvtf32Helper(0x8000000000000000, 0xdf000000, 0x5f000000);
14728   TestUScvtf32Helper(0x8000000000000001, 0xdf000000, 0x5f000000);
14729   TestUScvtf32Helper(0x8000004000000000, 0xdf000000, 0x5f000000);
14730   TestUScvtf32Helper(0x8000004000000001, 0xdeffffff, 0x5f000000);
14731   TestUScvtf32Helper(0x8000008000000000, 0xdeffffff, 0x5f000000);
14732   TestUScvtf32Helper(0x8000008000000001, 0xdeffffff, 0x5f000001);
14733   TestUScvtf32Helper(0x800000c000000000, 0xdefffffe, 0x5f000001);
14734   TestUScvtf32Helper(0x800000c000000001, 0xdefffffe, 0x5f000001);
14735   TestUScvtf32Helper(0x8000010000000000, 0xdefffffe, 0x5f000001);
14736   TestUScvtf32Helper(0x8000010000000001, 0xdefffffe, 0x5f000001);
14737   TestUScvtf32Helper(0x8000014000000000, 0xdefffffe, 0x5f000001);
14738   TestUScvtf32Helper(0x8000014000000001, 0xdefffffd, 0x5f000001);
14739   TestUScvtf32Helper(0x8000018000000000, 0xdefffffd, 0x5f000002);
14740   // Round up to produce a result that's too big for the input to represent.
14741   TestUScvtf32Helper(0x000000007fffffc0, 0x4f000000, 0x4f000000);
14742   TestUScvtf32Helper(0x000000007fffffff, 0x4f000000, 0x4f000000);
14743   TestUScvtf32Helper(0x00000000ffffff80, 0x4f800000, 0x4f800000);
14744   TestUScvtf32Helper(0x00000000ffffffff, 0x4f800000, 0x4f800000);
14745   TestUScvtf32Helper(0x7fffffc000000000, 0x5f000000, 0x5f000000);
14746   TestUScvtf32Helper(0x7fffffffffffffff, 0x5f000000, 0x5f000000);
14747   TestUScvtf32Helper(0xffffff8000000000, 0xd3000000, 0x5f800000);
14748   TestUScvtf32Helper(0xffffffffffffffff, 0xbf800000, 0x5f800000);
14749 }
14750 
14751 
TEST(system_mrs)14752 TEST(system_mrs) {
14753   SETUP();
14754 
14755   START();
14756   __ Mov(w0, 0);
14757   __ Mov(w1, 1);
14758   __ Mov(w2, 0x80000000);
14759 
14760   // Set the Z and C flags.
14761   __ Cmp(w0, w0);
14762   __ Mrs(x3, NZCV);
14763 
14764   // Set the N flag.
14765   __ Cmp(w0, w1);
14766   __ Mrs(x4, NZCV);
14767 
14768   // Set the Z, C and V flags.
14769   __ Adds(w0, w2, w2);
14770   __ Mrs(x5, NZCV);
14771 
14772   // Read the default FPCR.
14773   __ Mrs(x6, FPCR);
14774   END();
14775 
14776   RUN();
14777 
14778   // NZCV
14779   ASSERT_EQUAL_32(ZCFlag, w3);
14780   ASSERT_EQUAL_32(NFlag, w4);
14781   ASSERT_EQUAL_32(ZCVFlag, w5);
14782 
14783   // FPCR
14784   // The default FPCR on Linux-based platforms is 0.
14785   ASSERT_EQUAL_32(0, w6);
14786 
14787   TEARDOWN();
14788 }
14789 
14790 
TEST(system_msr)14791 TEST(system_msr) {
14792   // All FPCR fields that must be implemented: AHP, DN, FZ, RMode
14793   const uint64_t fpcr_core = 0x07c00000;
14794 
14795   // All FPCR fields (including fields which may be read-as-zero):
14796   //  Stride, Len
14797   //  IDE, IXE, UFE, OFE, DZE, IOE
14798   const uint64_t fpcr_all = fpcr_core | 0x00379f00;
14799 
14800   SETUP();
14801 
14802   START();
14803   __ Mov(w0, 0);
14804   __ Mov(w1, 0x7fffffff);
14805 
14806   __ Mov(x7, 0);
14807 
14808   __ Mov(x10, NVFlag);
14809   __ Cmp(w0, w0);     // Set Z and C.
14810   __ Msr(NZCV, x10);  // Set N and V.
14811   // The Msr should have overwritten every flag set by the Cmp.
14812   __ Cinc(x7, x7, mi);  // N
14813   __ Cinc(x7, x7, ne);  // !Z
14814   __ Cinc(x7, x7, lo);  // !C
14815   __ Cinc(x7, x7, vs);  // V
14816 
14817   __ Mov(x10, ZCFlag);
14818   __ Cmn(w1, w1);     // Set N and V.
14819   __ Msr(NZCV, x10);  // Set Z and C.
14820   // The Msr should have overwritten every flag set by the Cmn.
14821   __ Cinc(x7, x7, pl);  // !N
14822   __ Cinc(x7, x7, eq);  // Z
14823   __ Cinc(x7, x7, hs);  // C
14824   __ Cinc(x7, x7, vc);  // !V
14825 
14826   // All core FPCR fields must be writable.
14827   __ Mov(x8, fpcr_core);
14828   __ Msr(FPCR, x8);
14829   __ Mrs(x8, FPCR);
14830 
14831   // All FPCR fields, including optional ones. This part of the test doesn't
14832   // achieve much other than ensuring that supported fields can be cleared by
14833   // the next test.
14834   __ Mov(x9, fpcr_all);
14835   __ Msr(FPCR, x9);
14836   __ Mrs(x9, FPCR);
14837   __ And(x9, x9, fpcr_core);
14838 
14839   // The undefined bits must ignore writes.
14840   // It's conceivable that a future version of the architecture could use these
14841   // fields (making this test fail), but in the meantime this is a useful test
14842   // for the simulator.
14843   __ Mov(x10, ~fpcr_all);
14844   __ Msr(FPCR, x10);
14845   __ Mrs(x10, FPCR);
14846 
14847   END();
14848 
14849   RUN();
14850 
14851   // We should have incremented x7 (from 0) exactly 8 times.
14852   ASSERT_EQUAL_64(8, x7);
14853 
14854   ASSERT_EQUAL_64(fpcr_core, x8);
14855   ASSERT_EQUAL_64(fpcr_core, x9);
14856   ASSERT_EQUAL_64(0, x10);
14857 
14858   TEARDOWN();
14859 }
14860 
14861 
TEST(system_pauth_a)14862 TEST(system_pauth_a) {
14863   SETUP_WITH_FEATURES(CPUFeatures::kPAuth);
14864   START();
14865 
14866   // Exclude x16 and x17 from the scratch register list so we can use
14867   // Pac/Autia1716 safely.
14868   UseScratchRegisterScope temps(&masm);
14869   temps.Exclude(x16, x17);
14870   temps.Include(x10, x11);
14871 
14872   // Backup stack pointer.
14873   __ Mov(x20, sp);
14874 
14875   // Modifiers
14876   __ Mov(x16, 0x477d469dec0b8760);
14877   __ Mov(sp, 0x477d469dec0b8760);
14878 
14879   // Generate PACs using the 3 system instructions.
14880   __ Mov(x17, 0x0000000012345678);
14881   __ Pacia1716();
14882   __ Mov(x0, x17);
14883 
14884   __ Mov(lr, 0x0000000012345678);
14885   __ Paciaz();
14886   __ Mov(x1, lr);
14887 
14888   __ Mov(lr, 0x0000000012345678);
14889   __ Paciasp();
14890   __ Mov(x2, lr);
14891 
14892   // Authenticate the pointers above.
14893   __ Mov(x17, x0);
14894   __ Autia1716();
14895   __ Mov(x3, x17);
14896 
14897   __ Mov(lr, x1);
14898   __ Autiaz();
14899   __ Mov(x4, lr);
14900 
14901   __ Mov(lr, x2);
14902   __ Autiasp();
14903   __ Mov(x5, lr);
14904 
14905   // Attempt to authenticate incorrect pointers.
14906   __ Mov(x17, x1);
14907   __ Autia1716();
14908   __ Mov(x6, x17);
14909 
14910   __ Mov(lr, x0);
14911   __ Autiaz();
14912   __ Mov(x7, lr);
14913 
14914   __ Mov(lr, x1);
14915   __ Autiasp();
14916   __ Mov(x8, lr);
14917 
14918   // Strip the pac code from the pointer in x0.
14919   __ Mov(lr, x0);
14920   __ Xpaclri();
14921   __ Mov(x9, lr);
14922 
14923   // Restore stack pointer.
14924   __ Mov(sp, x20);
14925 
14926   // Mask out just the PAC code bits.
14927   // TODO: use Simulator::CalculatePACMask in a nice way.
14928   __ And(x0, x0, 0x007f000000000000);
14929   __ And(x1, x1, 0x007f000000000000);
14930   __ And(x2, x2, 0x007f000000000000);
14931 
14932   END();
14933 
14934 #ifdef VIXL_INCLUDE_SIMULATOR_AARCH64
14935   RUN();
14936 
14937   // Check PAC codes have been generated and aren't equal.
14938   // NOTE: with a different ComputePAC implementation, there may be a collision.
14939   ASSERT_NOT_EQUAL_64(0, x0);
14940   ASSERT_NOT_EQUAL_64(0, x1);
14941   ASSERT_NOT_EQUAL_64(0, x2);
14942   ASSERT_NOT_EQUAL_64(x0, x1);
14943   ASSERT_EQUAL_64(x0, x2);
14944 
14945   // Pointers correctly authenticated.
14946   ASSERT_EQUAL_64(0x0000000012345678, x3);
14947   ASSERT_EQUAL_64(0x0000000012345678, x4);
14948   ASSERT_EQUAL_64(0x0000000012345678, x5);
14949 
14950   // Pointers corrupted after failing to authenticate.
14951   ASSERT_EQUAL_64(0x0020000012345678, x6);
14952   ASSERT_EQUAL_64(0x0020000012345678, x7);
14953   ASSERT_EQUAL_64(0x0020000012345678, x8);
14954 
14955   // Pointer with code stripped.
14956   ASSERT_EQUAL_64(0x0000000012345678, x9);
14957 #endif  // VIXL_INCLUDE_SIMULATOR_AARCH64
14958 
14959   TEARDOWN();
14960 }
14961 
14962 
TEST(system_pauth_b)14963 TEST(system_pauth_b) {
14964   SETUP_WITH_FEATURES(CPUFeatures::kPAuth);
14965   START();
14966 
14967   // Exclude x16 and x17 from the scratch register list so we can use
14968   // Pac/Autia1716 safely.
14969   UseScratchRegisterScope temps(&masm);
14970   temps.Exclude(x16, x17);
14971   temps.Include(x10, x11);
14972 
14973   // Backup stack pointer.
14974   __ Mov(x20, sp);
14975 
14976   // Modifiers
14977   __ Mov(x16, 0x477d469dec0b8760);
14978   __ Mov(sp, 0x477d469dec0b8760);
14979 
14980   // Generate PACs using the 3 system instructions.
14981   __ Mov(x17, 0x0000000012345678);
14982   __ Pacib1716();
14983   __ Mov(x0, x17);
14984 
14985   __ Mov(lr, 0x0000000012345678);
14986   __ Pacibz();
14987   __ Mov(x1, lr);
14988 
14989   __ Mov(lr, 0x0000000012345678);
14990   __ Pacibsp();
14991   __ Mov(x2, lr);
14992 
14993   // Authenticate the pointers above.
14994   __ Mov(x17, x0);
14995   __ Autib1716();
14996   __ Mov(x3, x17);
14997 
14998   __ Mov(lr, x1);
14999   __ Autibz();
15000   __ Mov(x4, lr);
15001 
15002   __ Mov(lr, x2);
15003   __ Autibsp();
15004   __ Mov(x5, lr);
15005 
15006   // Attempt to authenticate incorrect pointers.
15007   __ Mov(x17, x1);
15008   __ Autib1716();
15009   __ Mov(x6, x17);
15010 
15011   __ Mov(lr, x0);
15012   __ Autibz();
15013   __ Mov(x7, lr);
15014 
15015   __ Mov(lr, x1);
15016   __ Autibsp();
15017   __ Mov(x8, lr);
15018 
15019   // Strip the pac code from the pointer in x0.
15020   __ Mov(lr, x0);
15021   __ Xpaclri();
15022   __ Mov(x9, lr);
15023 
15024   // Restore stack pointer.
15025   __ Mov(sp, x20);
15026 
15027   // Mask out just the PAC code bits.
15028   // TODO: use Simulator::CalculatePACMask in a nice way.
15029   __ And(x0, x0, 0x007f000000000000);
15030   __ And(x1, x1, 0x007f000000000000);
15031   __ And(x2, x2, 0x007f000000000000);
15032 
15033   END();
15034 
15035 #ifdef VIXL_INCLUDE_SIMULATOR_AARCH64
15036   RUN();
15037 
15038   // Check PAC codes have been generated and aren't equal.
15039   // NOTE: with a different ComputePAC implementation, there may be a collision.
15040   ASSERT_NOT_EQUAL_64(0, x0);
15041   ASSERT_NOT_EQUAL_64(0, x1);
15042   ASSERT_NOT_EQUAL_64(0, x2);
15043   ASSERT_NOT_EQUAL_64(x0, x1);
15044   ASSERT_EQUAL_64(x0, x2);
15045 
15046   // Pointers correctly authenticated.
15047   ASSERT_EQUAL_64(0x0000000012345678, x3);
15048   ASSERT_EQUAL_64(0x0000000012345678, x4);
15049   ASSERT_EQUAL_64(0x0000000012345678, x5);
15050 
15051   // Pointers corrupted after failing to authenticate.
15052   ASSERT_EQUAL_64(0x0040000012345678, x6);
15053   ASSERT_EQUAL_64(0x0040000012345678, x7);
15054   ASSERT_EQUAL_64(0x0040000012345678, x8);
15055 
15056   // Pointer with code stripped.
15057   ASSERT_EQUAL_64(0x0000000012345678, x9);
15058 #endif  // VIXL_INCLUDE_SIMULATOR_AARCH64
15059 
15060   TEARDOWN();
15061 }
15062 
15063 #ifdef VIXL_NEGATIVE_TESTING
TEST(system_pauth_negative_test)15064 TEST(system_pauth_negative_test) {
15065   SETUP_WITH_FEATURES(CPUFeatures::kPAuth);
15066   START();
15067 
15068   // Test for an assert (independent of order).
15069   MUST_FAIL_WITH_MESSAGE(__ Pacia1716(),
15070                          "Assertion failed "
15071                          "(!GetScratchRegisterList()->IncludesAliasOf(");
15072 
15073   // Test for x16 assert.
15074   {
15075     UseScratchRegisterScope temps(&masm);
15076     temps.Exclude(x17);
15077     temps.Include(x16);
15078     MUST_FAIL_WITH_MESSAGE(__ Pacia1716(),
15079                            "Assertion failed "
15080                            "(!GetScratchRegisterList()->IncludesAliasOf(x16))");
15081   }
15082 
15083   // Test for x17 assert.
15084   {
15085     UseScratchRegisterScope temps(&masm);
15086     temps.Exclude(x16);
15087     temps.Include(x17);
15088     MUST_FAIL_WITH_MESSAGE(__ Pacia1716(),
15089                            "Assertion failed "
15090                            "(!GetScratchRegisterList()->IncludesAliasOf(x17))");
15091   }
15092 
15093   // Repeat first test for other 1716 instructions.
15094   MUST_FAIL_WITH_MESSAGE(__ Pacib1716(),
15095                          "Assertion failed "
15096                          "(!GetScratchRegisterList()->IncludesAliasOf(");
15097   MUST_FAIL_WITH_MESSAGE(__ Autia1716(),
15098                          "Assertion failed "
15099                          "(!GetScratchRegisterList()->IncludesAliasOf(");
15100   MUST_FAIL_WITH_MESSAGE(__ Autib1716(),
15101                          "Assertion failed "
15102                          "(!GetScratchRegisterList()->IncludesAliasOf(");
15103 
15104   END();
15105   TEARDOWN();
15106 }
15107 #endif  // VIXL_NEGATIVE_TESTING
15108 
15109 
TEST(system)15110 TEST(system) {
15111   // RegisterDump::Dump uses NEON.
15112   SETUP_WITH_FEATURES(CPUFeatures::kNEON, CPUFeatures::kRAS);
15113   RegisterDump before;
15114 
15115   START();
15116   before.Dump(&masm);
15117   __ Nop();
15118   __ Esb();
15119   __ Csdb();
15120   END();
15121 
15122   RUN();
15123 
15124   ASSERT_EQUAL_REGISTERS(before);
15125   ASSERT_EQUAL_NZCV(before.flags_nzcv());
15126 
15127   TEARDOWN();
15128 }
15129 
15130 
TEST(zero_dest)15131 TEST(zero_dest) {
15132   // RegisterDump::Dump uses NEON.
15133   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
15134   RegisterDump before;
15135 
15136   START();
15137   // Preserve the stack pointer, in case we clobber it.
15138   __ Mov(x30, sp);
15139   // Initialize the other registers used in this test.
15140   uint64_t literal_base = 0x0100001000100101;
15141   __ Mov(x0, 0);
15142   __ Mov(x1, literal_base);
15143   for (unsigned i = 2; i < x30.GetCode(); i++) {
15144     __ Add(Register::GetXRegFromCode(i), Register::GetXRegFromCode(i - 1), x1);
15145   }
15146   before.Dump(&masm);
15147 
15148   // All of these instructions should be NOPs in these forms, but have
15149   // alternate forms which can write into the stack pointer.
15150   {
15151     ExactAssemblyScope scope(&masm, 3 * 7 * kInstructionSize);
15152     __ add(xzr, x0, x1);
15153     __ add(xzr, x1, xzr);
15154     __ add(xzr, xzr, x1);
15155 
15156     __ and_(xzr, x0, x2);
15157     __ and_(xzr, x2, xzr);
15158     __ and_(xzr, xzr, x2);
15159 
15160     __ bic(xzr, x0, x3);
15161     __ bic(xzr, x3, xzr);
15162     __ bic(xzr, xzr, x3);
15163 
15164     __ eon(xzr, x0, x4);
15165     __ eon(xzr, x4, xzr);
15166     __ eon(xzr, xzr, x4);
15167 
15168     __ eor(xzr, x0, x5);
15169     __ eor(xzr, x5, xzr);
15170     __ eor(xzr, xzr, x5);
15171 
15172     __ orr(xzr, x0, x6);
15173     __ orr(xzr, x6, xzr);
15174     __ orr(xzr, xzr, x6);
15175 
15176     __ sub(xzr, x0, x7);
15177     __ sub(xzr, x7, xzr);
15178     __ sub(xzr, xzr, x7);
15179   }
15180 
15181   // Swap the saved stack pointer with the real one. If sp was written
15182   // during the test, it will show up in x30. This is done because the test
15183   // framework assumes that sp will be valid at the end of the test.
15184   __ Mov(x29, x30);
15185   __ Mov(x30, sp);
15186   __ Mov(sp, x29);
15187   // We used x29 as a scratch register, so reset it to make sure it doesn't
15188   // trigger a test failure.
15189   __ Add(x29, x28, x1);
15190   END();
15191 
15192   RUN();
15193 
15194   ASSERT_EQUAL_REGISTERS(before);
15195   ASSERT_EQUAL_NZCV(before.flags_nzcv());
15196 
15197   TEARDOWN();
15198 }
15199 
15200 
TEST(zero_dest_setflags)15201 TEST(zero_dest_setflags) {
15202   // RegisterDump::Dump uses NEON.
15203   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
15204   RegisterDump before;
15205 
15206   START();
15207   // Preserve the stack pointer, in case we clobber it.
15208   __ Mov(x30, sp);
15209   // Initialize the other registers used in this test.
15210   uint64_t literal_base = 0x0100001000100101;
15211   __ Mov(x0, 0);
15212   __ Mov(x1, literal_base);
15213   for (int i = 2; i < 30; i++) {
15214     __ Add(Register::GetXRegFromCode(i), Register::GetXRegFromCode(i - 1), x1);
15215   }
15216   before.Dump(&masm);
15217 
15218   // All of these instructions should only write to the flags in these forms,
15219   // but have alternate forms which can write into the stack pointer.
15220   {
15221     ExactAssemblyScope scope(&masm, 6 * kInstructionSize);
15222     __ adds(xzr, x0, Operand(x1, UXTX));
15223     __ adds(xzr, x1, Operand(xzr, UXTX));
15224     __ adds(xzr, x1, 1234);
15225     __ adds(xzr, x0, x1);
15226     __ adds(xzr, x1, xzr);
15227     __ adds(xzr, xzr, x1);
15228   }
15229 
15230   {
15231     ExactAssemblyScope scope(&masm, 5 * kInstructionSize);
15232     __ ands(xzr, x2, ~0xf);
15233     __ ands(xzr, xzr, ~0xf);
15234     __ ands(xzr, x0, x2);
15235     __ ands(xzr, x2, xzr);
15236     __ ands(xzr, xzr, x2);
15237   }
15238 
15239   {
15240     ExactAssemblyScope scope(&masm, 5 * kInstructionSize);
15241     __ bics(xzr, x3, ~0xf);
15242     __ bics(xzr, xzr, ~0xf);
15243     __ bics(xzr, x0, x3);
15244     __ bics(xzr, x3, xzr);
15245     __ bics(xzr, xzr, x3);
15246   }
15247 
15248   {
15249     ExactAssemblyScope scope(&masm, 6 * kInstructionSize);
15250     __ subs(xzr, x0, Operand(x3, UXTX));
15251     __ subs(xzr, x3, Operand(xzr, UXTX));
15252     __ subs(xzr, x3, 1234);
15253     __ subs(xzr, x0, x3);
15254     __ subs(xzr, x3, xzr);
15255     __ subs(xzr, xzr, x3);
15256   }
15257 
15258   // Swap the saved stack pointer with the real one. If sp was written
15259   // during the test, it will show up in x30. This is done because the test
15260   // framework assumes that sp will be valid at the end of the test.
15261   __ Mov(x29, x30);
15262   __ Mov(x30, sp);
15263   __ Mov(sp, x29);
15264   // We used x29 as a scratch register, so reset it to make sure it doesn't
15265   // trigger a test failure.
15266   __ Add(x29, x28, x1);
15267   END();
15268 
15269   RUN();
15270 
15271   ASSERT_EQUAL_REGISTERS(before);
15272 
15273   TEARDOWN();
15274 }
15275 
15276 
TEST(stack_pointer_override)15277 TEST(stack_pointer_override) {
15278   // This test generates some stack maintenance code, but the test only checks
15279   // the reported state.
15280   SETUP();
15281   START();
15282 
15283   // The default stack pointer in VIXL is sp.
15284   VIXL_CHECK(sp.Is(__ StackPointer()));
15285   __ SetStackPointer(x0);
15286   VIXL_CHECK(x0.Is(__ StackPointer()));
15287   __ SetStackPointer(x28);
15288   VIXL_CHECK(x28.Is(__ StackPointer()));
15289   __ SetStackPointer(sp);
15290   VIXL_CHECK(sp.Is(__ StackPointer()));
15291 
15292   END();
15293   RUN();
15294   TEARDOWN();
15295 }
15296 
15297 
TEST(peek_poke_simple)15298 TEST(peek_poke_simple) {
15299   SETUP();
15300   START();
15301 
15302   static const RegList x0_to_x3 =
15303       x0.GetBit() | x1.GetBit() | x2.GetBit() | x3.GetBit();
15304   static const RegList x10_to_x13 =
15305       x10.GetBit() | x11.GetBit() | x12.GetBit() | x13.GetBit();
15306 
15307   // The literal base is chosen to have two useful properties:
15308   //  * When multiplied by small values (such as a register index), this value
15309   //    is clearly readable in the result.
15310   //  * The value is not formed from repeating fixed-size smaller values, so it
15311   //    can be used to detect endianness-related errors.
15312   uint64_t literal_base = 0x0100001000100101;
15313 
15314   // Initialize the registers.
15315   __ Mov(x0, literal_base);
15316   __ Add(x1, x0, x0);
15317   __ Add(x2, x1, x0);
15318   __ Add(x3, x2, x0);
15319 
15320   __ Claim(32);
15321 
15322   // Simple exchange.
15323   //  After this test:
15324   //    x0-x3 should be unchanged.
15325   //    w10-w13 should contain the lower words of x0-x3.
15326   __ Poke(x0, 0);
15327   __ Poke(x1, 8);
15328   __ Poke(x2, 16);
15329   __ Poke(x3, 24);
15330   Clobber(&masm, x0_to_x3);
15331   __ Peek(x0, 0);
15332   __ Peek(x1, 8);
15333   __ Peek(x2, 16);
15334   __ Peek(x3, 24);
15335 
15336   __ Poke(w0, 0);
15337   __ Poke(w1, 4);
15338   __ Poke(w2, 8);
15339   __ Poke(w3, 12);
15340   Clobber(&masm, x10_to_x13);
15341   __ Peek(w10, 0);
15342   __ Peek(w11, 4);
15343   __ Peek(w12, 8);
15344   __ Peek(w13, 12);
15345 
15346   __ Drop(32);
15347 
15348   END();
15349   RUN();
15350 
15351   ASSERT_EQUAL_64(literal_base * 1, x0);
15352   ASSERT_EQUAL_64(literal_base * 2, x1);
15353   ASSERT_EQUAL_64(literal_base * 3, x2);
15354   ASSERT_EQUAL_64(literal_base * 4, x3);
15355 
15356   ASSERT_EQUAL_64((literal_base * 1) & 0xffffffff, x10);
15357   ASSERT_EQUAL_64((literal_base * 2) & 0xffffffff, x11);
15358   ASSERT_EQUAL_64((literal_base * 3) & 0xffffffff, x12);
15359   ASSERT_EQUAL_64((literal_base * 4) & 0xffffffff, x13);
15360 
15361   TEARDOWN();
15362 }
15363 
15364 
TEST(peek_poke_unaligned)15365 TEST(peek_poke_unaligned) {
15366   SETUP();
15367   START();
15368 
15369   // The literal base is chosen to have two useful properties:
15370   //  * When multiplied by small values (such as a register index), this value
15371   //    is clearly readable in the result.
15372   //  * The value is not formed from repeating fixed-size smaller values, so it
15373   //    can be used to detect endianness-related errors.
15374   uint64_t literal_base = 0x0100001000100101;
15375 
15376   // Initialize the registers.
15377   __ Mov(x0, literal_base);
15378   __ Add(x1, x0, x0);
15379   __ Add(x2, x1, x0);
15380   __ Add(x3, x2, x0);
15381   __ Add(x4, x3, x0);
15382   __ Add(x5, x4, x0);
15383   __ Add(x6, x5, x0);
15384 
15385   __ Claim(32);
15386 
15387   // Unaligned exchanges.
15388   //  After this test:
15389   //    x0-x6 should be unchanged.
15390   //    w10-w12 should contain the lower words of x0-x2.
15391   __ Poke(x0, 1);
15392   Clobber(&masm, x0.GetBit());
15393   __ Peek(x0, 1);
15394   __ Poke(x1, 2);
15395   Clobber(&masm, x1.GetBit());
15396   __ Peek(x1, 2);
15397   __ Poke(x2, 3);
15398   Clobber(&masm, x2.GetBit());
15399   __ Peek(x2, 3);
15400   __ Poke(x3, 4);
15401   Clobber(&masm, x3.GetBit());
15402   __ Peek(x3, 4);
15403   __ Poke(x4, 5);
15404   Clobber(&masm, x4.GetBit());
15405   __ Peek(x4, 5);
15406   __ Poke(x5, 6);
15407   Clobber(&masm, x5.GetBit());
15408   __ Peek(x5, 6);
15409   __ Poke(x6, 7);
15410   Clobber(&masm, x6.GetBit());
15411   __ Peek(x6, 7);
15412 
15413   __ Poke(w0, 1);
15414   Clobber(&masm, w10.GetBit());
15415   __ Peek(w10, 1);
15416   __ Poke(w1, 2);
15417   Clobber(&masm, w11.GetBit());
15418   __ Peek(w11, 2);
15419   __ Poke(w2, 3);
15420   Clobber(&masm, w12.GetBit());
15421   __ Peek(w12, 3);
15422 
15423   __ Drop(32);
15424 
15425   END();
15426   RUN();
15427 
15428   ASSERT_EQUAL_64(literal_base * 1, x0);
15429   ASSERT_EQUAL_64(literal_base * 2, x1);
15430   ASSERT_EQUAL_64(literal_base * 3, x2);
15431   ASSERT_EQUAL_64(literal_base * 4, x3);
15432   ASSERT_EQUAL_64(literal_base * 5, x4);
15433   ASSERT_EQUAL_64(literal_base * 6, x5);
15434   ASSERT_EQUAL_64(literal_base * 7, x6);
15435 
15436   ASSERT_EQUAL_64((literal_base * 1) & 0xffffffff, x10);
15437   ASSERT_EQUAL_64((literal_base * 2) & 0xffffffff, x11);
15438   ASSERT_EQUAL_64((literal_base * 3) & 0xffffffff, x12);
15439 
15440   TEARDOWN();
15441 }
15442 
15443 
TEST(peek_poke_endianness)15444 TEST(peek_poke_endianness) {
15445   SETUP();
15446   START();
15447 
15448   // The literal base is chosen to have two useful properties:
15449   //  * When multiplied by small values (such as a register index), this value
15450   //    is clearly readable in the result.
15451   //  * The value is not formed from repeating fixed-size smaller values, so it
15452   //    can be used to detect endianness-related errors.
15453   uint64_t literal_base = 0x0100001000100101;
15454 
15455   // Initialize the registers.
15456   __ Mov(x0, literal_base);
15457   __ Add(x1, x0, x0);
15458 
15459   __ Claim(32);
15460 
15461   // Endianness tests.
15462   //  After this section:
15463   //    x4 should match x0[31:0]:x0[63:32]
15464   //    w5 should match w1[15:0]:w1[31:16]
15465   __ Poke(x0, 0);
15466   __ Poke(x0, 8);
15467   __ Peek(x4, 4);
15468 
15469   __ Poke(w1, 0);
15470   __ Poke(w1, 4);
15471   __ Peek(w5, 2);
15472 
15473   __ Drop(32);
15474 
15475   END();
15476   RUN();
15477 
15478   uint64_t x0_expected = literal_base * 1;
15479   uint64_t x1_expected = literal_base * 2;
15480   uint64_t x4_expected = (x0_expected << 32) | (x0_expected >> 32);
15481   uint64_t x5_expected =
15482       ((x1_expected << 16) & 0xffff0000) | ((x1_expected >> 16) & 0x0000ffff);
15483 
15484   ASSERT_EQUAL_64(x0_expected, x0);
15485   ASSERT_EQUAL_64(x1_expected, x1);
15486   ASSERT_EQUAL_64(x4_expected, x4);
15487   ASSERT_EQUAL_64(x5_expected, x5);
15488 
15489   TEARDOWN();
15490 }
15491 
15492 
TEST(peek_poke_mixed)15493 TEST(peek_poke_mixed) {
15494   SETUP();
15495   START();
15496 
15497   // Acquire all temps from the MacroAssembler. They are used arbitrarily below.
15498   UseScratchRegisterScope temps(&masm);
15499   temps.ExcludeAll();
15500 
15501   // The literal base is chosen to have two useful properties:
15502   //  * When multiplied by small values (such as a register index), this value
15503   //    is clearly readable in the result.
15504   //  * The value is not formed from repeating fixed-size smaller values, so it
15505   //    can be used to detect endianness-related errors.
15506   uint64_t literal_base = 0x0100001000100101;
15507 
15508   // Initialize the registers.
15509   __ Mov(x0, literal_base);
15510   __ Add(x1, x0, x0);
15511   __ Add(x2, x1, x0);
15512   __ Add(x3, x2, x0);
15513 
15514   __ Claim(32);
15515 
15516   // Mix with other stack operations.
15517   //  After this section:
15518   //    x0-x3 should be unchanged.
15519   //    x6 should match x1[31:0]:x0[63:32]
15520   //    w7 should match x1[15:0]:x0[63:48]
15521   __ Poke(x1, 8);
15522   __ Poke(x0, 0);
15523   {
15524     VIXL_ASSERT(__ StackPointer().Is(sp));
15525     __ Mov(x4, __ StackPointer());
15526     __ SetStackPointer(x4);
15527 
15528     __ Poke(wzr, 0);  // Clobber the space we're about to drop.
15529     __ Drop(4);
15530     __ Peek(x6, 0);
15531     __ Claim(8);
15532     __ Peek(w7, 10);
15533     __ Poke(x3, 28);
15534     __ Poke(xzr, 0);  // Clobber the space we're about to drop.
15535     __ Drop(8);
15536     __ Poke(x2, 12);
15537     __ Push(w0);
15538 
15539     __ Mov(sp, __ StackPointer());
15540     __ SetStackPointer(sp);
15541   }
15542 
15543   __ Pop(x0, x1, x2, x3);
15544 
15545   END();
15546   RUN();
15547 
15548   uint64_t x0_expected = literal_base * 1;
15549   uint64_t x1_expected = literal_base * 2;
15550   uint64_t x2_expected = literal_base * 3;
15551   uint64_t x3_expected = literal_base * 4;
15552   uint64_t x6_expected = (x1_expected << 32) | (x0_expected >> 32);
15553   uint64_t x7_expected =
15554       ((x1_expected << 16) & 0xffff0000) | ((x0_expected >> 48) & 0x0000ffff);
15555 
15556   ASSERT_EQUAL_64(x0_expected, x0);
15557   ASSERT_EQUAL_64(x1_expected, x1);
15558   ASSERT_EQUAL_64(x2_expected, x2);
15559   ASSERT_EQUAL_64(x3_expected, x3);
15560   ASSERT_EQUAL_64(x6_expected, x6);
15561   ASSERT_EQUAL_64(x7_expected, x7);
15562 
15563   TEARDOWN();
15564 }
15565 
15566 
TEST(peek_poke_reglist)15567 TEST(peek_poke_reglist) {
15568   SETUP_WITH_FEATURES(CPUFeatures::kFP);
15569 
15570   START();
15571 
15572   // Acquire all temps from the MacroAssembler. They are used arbitrarily below.
15573   UseScratchRegisterScope temps(&masm);
15574   temps.ExcludeAll();
15575 
15576   // The literal base is chosen to have two useful properties:
15577   //  * When multiplied by small values (such as a register index), this value
15578   //    is clearly readable in the result.
15579   //  * The value is not formed from repeating fixed-size smaller values, so it
15580   //    can be used to detect endianness-related errors.
15581   uint64_t base = 0x0100001000100101;
15582 
15583   // Initialize the registers.
15584   __ Mov(x1, base);
15585   __ Add(x2, x1, x1);
15586   __ Add(x3, x2, x1);
15587   __ Add(x4, x3, x1);
15588 
15589   CPURegList list_1(x1, x2, x3, x4);
15590   CPURegList list_2(x11, x12, x13, x14);
15591   int list_1_size = list_1.GetTotalSizeInBytes();
15592 
15593   __ Claim(2 * list_1_size);
15594 
15595   __ PokeCPURegList(list_1, 0);
15596   __ PokeXRegList(list_1.GetList(), list_1_size);
15597   __ PeekCPURegList(list_2, 2 * kXRegSizeInBytes);
15598   __ PeekXRegList(x15.GetBit(), kWRegSizeInBytes);
15599   __ PeekWRegList(w16.GetBit() | w17.GetBit(), 3 * kXRegSizeInBytes);
15600 
15601   __ Drop(2 * list_1_size);
15602 
15603 
15604   uint64_t base_d = 0x1010010001000010;
15605 
15606   // Initialize the registers.
15607   __ Mov(x1, base_d);
15608   __ Add(x2, x1, x1);
15609   __ Add(x3, x2, x1);
15610   __ Add(x4, x3, x1);
15611   __ Fmov(d1, x1);
15612   __ Fmov(d2, x2);
15613   __ Fmov(d3, x3);
15614   __ Fmov(d4, x4);
15615 
15616   CPURegList list_d_1(d1, d2, d3, d4);
15617   CPURegList list_d_2(d11, d12, d13, d14);
15618   int list_d_1_size = list_d_1.GetTotalSizeInBytes();
15619 
15620   __ Claim(2 * list_d_1_size);
15621 
15622   __ PokeCPURegList(list_d_1, 0);
15623   __ PokeDRegList(list_d_1.GetList(), list_d_1_size);
15624   __ PeekCPURegList(list_d_2, 2 * kDRegSizeInBytes);
15625   __ PeekDRegList(d15.GetBit(), kSRegSizeInBytes);
15626   __ PeekSRegList(s16.GetBit() | s17.GetBit(), 3 * kDRegSizeInBytes);
15627 
15628   __ Drop(2 * list_d_1_size);
15629 
15630 
15631   END();
15632   RUN();
15633 
15634   ASSERT_EQUAL_64(3 * base, x11);
15635   ASSERT_EQUAL_64(4 * base, x12);
15636   ASSERT_EQUAL_64(1 * base, x13);
15637   ASSERT_EQUAL_64(2 * base, x14);
15638   ASSERT_EQUAL_64(((1 * base) >> kWRegSize) | ((2 * base) << kWRegSize), x15);
15639   ASSERT_EQUAL_64(2 * base, x14);
15640   ASSERT_EQUAL_32((4 * base) & kWRegMask, w16);
15641   ASSERT_EQUAL_32((4 * base) >> kWRegSize, w17);
15642 
15643   ASSERT_EQUAL_FP64(RawbitsToDouble(3 * base_d), d11);
15644   ASSERT_EQUAL_FP64(RawbitsToDouble(4 * base_d), d12);
15645   ASSERT_EQUAL_FP64(RawbitsToDouble(1 * base_d), d13);
15646   ASSERT_EQUAL_FP64(RawbitsToDouble(2 * base_d), d14);
15647   ASSERT_EQUAL_FP64(RawbitsToDouble((base_d >> kSRegSize) |
15648                                     ((2 * base_d) << kSRegSize)),
15649                     d15);
15650   ASSERT_EQUAL_FP64(RawbitsToDouble(2 * base_d), d14);
15651   ASSERT_EQUAL_FP32(RawbitsToFloat((4 * base_d) & kSRegMask), s16);
15652   ASSERT_EQUAL_FP32(RawbitsToFloat((4 * base_d) >> kSRegSize), s17);
15653 
15654   TEARDOWN();
15655 }
15656 
15657 
TEST(load_store_reglist)15658 TEST(load_store_reglist) {
15659   SETUP_WITH_FEATURES(CPUFeatures::kFP);
15660 
15661   START();
15662 
15663   // The literal base is chosen to have two useful properties:
15664   //  * When multiplied by small values (such as a register index), this value
15665   //    is clearly readable in the result.
15666   //  * The value is not formed from repeating fixed-size smaller values, so it
15667   //    can be used to detect endianness-related errors.
15668   uint64_t high_base = UINT32_C(0x01000010);
15669   uint64_t low_base = UINT32_C(0x00100101);
15670   uint64_t base = (high_base << 32) | low_base;
15671   uint64_t array[21];
15672   memset(array, 0, sizeof(array));
15673 
15674   // Initialize the registers.
15675   __ Mov(x1, base);
15676   __ Add(x2, x1, x1);
15677   __ Add(x3, x2, x1);
15678   __ Add(x4, x3, x1);
15679   __ Fmov(d1, x1);
15680   __ Fmov(d2, x2);
15681   __ Fmov(d3, x3);
15682   __ Fmov(d4, x4);
15683   __ Fmov(d5, x1);
15684   __ Fmov(d6, x2);
15685   __ Fmov(d7, x3);
15686   __ Fmov(d8, x4);
15687 
15688   Register reg_base = x20;
15689   Register reg_index = x21;
15690   int size_stored = 0;
15691 
15692   __ Mov(reg_base, reinterpret_cast<uintptr_t>(&array));
15693 
15694   // Test aligned accesses.
15695   CPURegList list_src(w1, w2, w3, w4);
15696   CPURegList list_dst(w11, w12, w13, w14);
15697   CPURegList list_fp_src_1(d1, d2, d3, d4);
15698   CPURegList list_fp_dst_1(d11, d12, d13, d14);
15699 
15700   __ StoreCPURegList(list_src, MemOperand(reg_base, 0 * sizeof(uint64_t)));
15701   __ LoadCPURegList(list_dst, MemOperand(reg_base, 0 * sizeof(uint64_t)));
15702   size_stored += 4 * kWRegSizeInBytes;
15703 
15704   __ Mov(reg_index, size_stored);
15705   __ StoreCPURegList(list_src, MemOperand(reg_base, reg_index));
15706   __ LoadCPURegList(list_dst, MemOperand(reg_base, reg_index));
15707   size_stored += 4 * kWRegSizeInBytes;
15708 
15709   __ StoreCPURegList(list_fp_src_1, MemOperand(reg_base, size_stored));
15710   __ LoadCPURegList(list_fp_dst_1, MemOperand(reg_base, size_stored));
15711   size_stored += 4 * kDRegSizeInBytes;
15712 
15713   __ Mov(reg_index, size_stored);
15714   __ StoreCPURegList(list_fp_src_1, MemOperand(reg_base, reg_index));
15715   __ LoadCPURegList(list_fp_dst_1, MemOperand(reg_base, reg_index));
15716   size_stored += 4 * kDRegSizeInBytes;
15717 
15718   // Test unaligned accesses.
15719   CPURegList list_fp_src_2(d5, d6, d7, d8);
15720   CPURegList list_fp_dst_2(d15, d16, d17, d18);
15721 
15722   __ Str(wzr, MemOperand(reg_base, size_stored));
15723   size_stored += 1 * kWRegSizeInBytes;
15724   __ StoreCPURegList(list_fp_src_2, MemOperand(reg_base, size_stored));
15725   __ LoadCPURegList(list_fp_dst_2, MemOperand(reg_base, size_stored));
15726   size_stored += 4 * kDRegSizeInBytes;
15727 
15728   __ Mov(reg_index, size_stored);
15729   __ StoreCPURegList(list_fp_src_2, MemOperand(reg_base, reg_index));
15730   __ LoadCPURegList(list_fp_dst_2, MemOperand(reg_base, reg_index));
15731 
15732   END();
15733   RUN();
15734 
15735   VIXL_CHECK(array[0] == (1 * low_base) + (2 * low_base << kWRegSize));
15736   VIXL_CHECK(array[1] == (3 * low_base) + (4 * low_base << kWRegSize));
15737   VIXL_CHECK(array[2] == (1 * low_base) + (2 * low_base << kWRegSize));
15738   VIXL_CHECK(array[3] == (3 * low_base) + (4 * low_base << kWRegSize));
15739   VIXL_CHECK(array[4] == 1 * base);
15740   VIXL_CHECK(array[5] == 2 * base);
15741   VIXL_CHECK(array[6] == 3 * base);
15742   VIXL_CHECK(array[7] == 4 * base);
15743   VIXL_CHECK(array[8] == 1 * base);
15744   VIXL_CHECK(array[9] == 2 * base);
15745   VIXL_CHECK(array[10] == 3 * base);
15746   VIXL_CHECK(array[11] == 4 * base);
15747   VIXL_CHECK(array[12] == ((1 * low_base) << kSRegSize));
15748   VIXL_CHECK(array[13] == (((2 * low_base) << kSRegSize) | (1 * high_base)));
15749   VIXL_CHECK(array[14] == (((3 * low_base) << kSRegSize) | (2 * high_base)));
15750   VIXL_CHECK(array[15] == (((4 * low_base) << kSRegSize) | (3 * high_base)));
15751   VIXL_CHECK(array[16] == (((1 * low_base) << kSRegSize) | (4 * high_base)));
15752   VIXL_CHECK(array[17] == (((2 * low_base) << kSRegSize) | (1 * high_base)));
15753   VIXL_CHECK(array[18] == (((3 * low_base) << kSRegSize) | (2 * high_base)));
15754   VIXL_CHECK(array[19] == (((4 * low_base) << kSRegSize) | (3 * high_base)));
15755   VIXL_CHECK(array[20] == (4 * high_base));
15756 
15757   ASSERT_EQUAL_64(1 * low_base, x11);
15758   ASSERT_EQUAL_64(2 * low_base, x12);
15759   ASSERT_EQUAL_64(3 * low_base, x13);
15760   ASSERT_EQUAL_64(4 * low_base, x14);
15761   ASSERT_EQUAL_FP64(RawbitsToDouble(1 * base), d11);
15762   ASSERT_EQUAL_FP64(RawbitsToDouble(2 * base), d12);
15763   ASSERT_EQUAL_FP64(RawbitsToDouble(3 * base), d13);
15764   ASSERT_EQUAL_FP64(RawbitsToDouble(4 * base), d14);
15765   ASSERT_EQUAL_FP64(RawbitsToDouble(1 * base), d15);
15766   ASSERT_EQUAL_FP64(RawbitsToDouble(2 * base), d16);
15767   ASSERT_EQUAL_FP64(RawbitsToDouble(3 * base), d17);
15768   ASSERT_EQUAL_FP64(RawbitsToDouble(4 * base), d18);
15769 
15770   TEARDOWN();
15771 }
15772 
15773 
15774 // This enum is used only as an argument to the push-pop test helpers.
15775 enum PushPopMethod {
15776   // Push or Pop using the Push and Pop methods, with blocks of up to four
15777   // registers. (Smaller blocks will be used if necessary.)
15778   PushPopByFour,
15779 
15780   // Use Push<Size>RegList and Pop<Size>RegList to transfer the registers.
15781   PushPopRegList
15782 };
15783 
15784 
15785 // For the PushPop* tests, use the maximum number of registers that the test
15786 // supports (where a reg_count argument would otherwise be provided).
15787 static int const kPushPopUseMaxRegCount = -1;
15788 
15789 // Test a simple push-pop pattern:
15790 //  * Claim <claim> bytes to set the stack alignment.
15791 //  * Push <reg_count> registers with size <reg_size>.
15792 //  * Clobber the register contents.
15793 //  * Pop <reg_count> registers to restore the original contents.
15794 //  * Drop <claim> bytes to restore the original stack pointer.
15795 //
15796 // Different push and pop methods can be specified independently to test for
15797 // proper word-endian behaviour.
PushPopSimpleHelper(int reg_count,int claim,int reg_size,PushPopMethod push_method,PushPopMethod pop_method)15798 static void PushPopSimpleHelper(int reg_count,
15799                                 int claim,
15800                                 int reg_size,
15801                                 PushPopMethod push_method,
15802                                 PushPopMethod pop_method) {
15803   SETUP();
15804 
15805   START();
15806 
15807   // Arbitrarily pick a register to use as a stack pointer.
15808   const Register& stack_pointer = x20;
15809   const RegList allowed = ~stack_pointer.GetBit();
15810   if (reg_count == kPushPopUseMaxRegCount) {
15811     reg_count = CountSetBits(allowed, kNumberOfRegisters);
15812   }
15813   // Work out which registers to use, based on reg_size.
15814   Register r[kNumberOfRegisters];
15815   Register x[kNumberOfRegisters];
15816   RegList list =
15817       PopulateRegisterArray(NULL, x, r, reg_size, reg_count, allowed);
15818 
15819   // Acquire all temps from the MacroAssembler. They are used arbitrarily below.
15820   UseScratchRegisterScope temps(&masm);
15821   temps.ExcludeAll();
15822 
15823   // The literal base is chosen to have two useful properties:
15824   //  * When multiplied by small values (such as a register index), this value
15825   //    is clearly readable in the result.
15826   //  * The value is not formed from repeating fixed-size smaller values, so it
15827   //    can be used to detect endianness-related errors.
15828   uint64_t literal_base = 0x0100001000100101;
15829 
15830   {
15831     VIXL_ASSERT(__ StackPointer().Is(sp));
15832     __ Mov(stack_pointer, __ StackPointer());
15833     __ SetStackPointer(stack_pointer);
15834 
15835     int i;
15836 
15837     // Initialize the registers.
15838     for (i = 0; i < reg_count; i++) {
15839       // Always write into the X register, to ensure that the upper word is
15840       // properly ignored by Push when testing W registers.
15841       __ Mov(x[i], literal_base * i);
15842     }
15843 
15844     // Claim memory first, as requested.
15845     __ Claim(claim);
15846 
15847     switch (push_method) {
15848       case PushPopByFour:
15849         // Push high-numbered registers first (to the highest addresses).
15850         for (i = reg_count; i >= 4; i -= 4) {
15851           __ Push(r[i - 1], r[i - 2], r[i - 3], r[i - 4]);
15852         }
15853         // Finish off the leftovers.
15854         switch (i) {
15855           case 3:
15856             __ Push(r[2], r[1], r[0]);
15857             break;
15858           case 2:
15859             __ Push(r[1], r[0]);
15860             break;
15861           case 1:
15862             __ Push(r[0]);
15863             break;
15864           default:
15865             VIXL_ASSERT(i == 0);
15866             break;
15867         }
15868         break;
15869       case PushPopRegList:
15870         __ PushSizeRegList(list, reg_size);
15871         break;
15872     }
15873 
15874     // Clobber all the registers, to ensure that they get repopulated by Pop.
15875     Clobber(&masm, list);
15876 
15877     switch (pop_method) {
15878       case PushPopByFour:
15879         // Pop low-numbered registers first (from the lowest addresses).
15880         for (i = 0; i <= (reg_count - 4); i += 4) {
15881           __ Pop(r[i], r[i + 1], r[i + 2], r[i + 3]);
15882         }
15883         // Finish off the leftovers.
15884         switch (reg_count - i) {
15885           case 3:
15886             __ Pop(r[i], r[i + 1], r[i + 2]);
15887             break;
15888           case 2:
15889             __ Pop(r[i], r[i + 1]);
15890             break;
15891           case 1:
15892             __ Pop(r[i]);
15893             break;
15894           default:
15895             VIXL_ASSERT(i == reg_count);
15896             break;
15897         }
15898         break;
15899       case PushPopRegList:
15900         __ PopSizeRegList(list, reg_size);
15901         break;
15902     }
15903 
15904     // Drop memory to restore stack_pointer.
15905     __ Drop(claim);
15906 
15907     __ Mov(sp, __ StackPointer());
15908     __ SetStackPointer(sp);
15909   }
15910 
15911   END();
15912 
15913   RUN();
15914 
15915   // Check that the register contents were preserved.
15916   // Always use ASSERT_EQUAL_64, even when testing W registers, so we can test
15917   // that the upper word was properly cleared by Pop.
15918   literal_base &= (0xffffffffffffffff >> (64 - reg_size));
15919   for (int i = 0; i < reg_count; i++) {
15920     if (x[i].Is(xzr)) {
15921       ASSERT_EQUAL_64(0, x[i]);
15922     } else {
15923       ASSERT_EQUAL_64(literal_base * i, x[i]);
15924     }
15925   }
15926 
15927   TEARDOWN();
15928 }
15929 
15930 
TEST(push_pop_xreg_simple_32)15931 TEST(push_pop_xreg_simple_32) {
15932   for (int claim = 0; claim <= 8; claim++) {
15933     for (int count = 0; count <= 8; count++) {
15934       PushPopSimpleHelper(count,
15935                           claim,
15936                           kWRegSize,
15937                           PushPopByFour,
15938                           PushPopByFour);
15939       PushPopSimpleHelper(count,
15940                           claim,
15941                           kWRegSize,
15942                           PushPopByFour,
15943                           PushPopRegList);
15944       PushPopSimpleHelper(count,
15945                           claim,
15946                           kWRegSize,
15947                           PushPopRegList,
15948                           PushPopByFour);
15949       PushPopSimpleHelper(count,
15950                           claim,
15951                           kWRegSize,
15952                           PushPopRegList,
15953                           PushPopRegList);
15954     }
15955     // Test with the maximum number of registers.
15956     PushPopSimpleHelper(kPushPopUseMaxRegCount,
15957                         claim,
15958                         kWRegSize,
15959                         PushPopByFour,
15960                         PushPopByFour);
15961     PushPopSimpleHelper(kPushPopUseMaxRegCount,
15962                         claim,
15963                         kWRegSize,
15964                         PushPopByFour,
15965                         PushPopRegList);
15966     PushPopSimpleHelper(kPushPopUseMaxRegCount,
15967                         claim,
15968                         kWRegSize,
15969                         PushPopRegList,
15970                         PushPopByFour);
15971     PushPopSimpleHelper(kPushPopUseMaxRegCount,
15972                         claim,
15973                         kWRegSize,
15974                         PushPopRegList,
15975                         PushPopRegList);
15976   }
15977 }
15978 
15979 
TEST(push_pop_xreg_simple_64)15980 TEST(push_pop_xreg_simple_64) {
15981   for (int claim = 0; claim <= 8; claim++) {
15982     for (int count = 0; count <= 8; count++) {
15983       PushPopSimpleHelper(count,
15984                           claim,
15985                           kXRegSize,
15986                           PushPopByFour,
15987                           PushPopByFour);
15988       PushPopSimpleHelper(count,
15989                           claim,
15990                           kXRegSize,
15991                           PushPopByFour,
15992                           PushPopRegList);
15993       PushPopSimpleHelper(count,
15994                           claim,
15995                           kXRegSize,
15996                           PushPopRegList,
15997                           PushPopByFour);
15998       PushPopSimpleHelper(count,
15999                           claim,
16000                           kXRegSize,
16001                           PushPopRegList,
16002                           PushPopRegList);
16003     }
16004     // Test with the maximum number of registers.
16005     PushPopSimpleHelper(kPushPopUseMaxRegCount,
16006                         claim,
16007                         kXRegSize,
16008                         PushPopByFour,
16009                         PushPopByFour);
16010     PushPopSimpleHelper(kPushPopUseMaxRegCount,
16011                         claim,
16012                         kXRegSize,
16013                         PushPopByFour,
16014                         PushPopRegList);
16015     PushPopSimpleHelper(kPushPopUseMaxRegCount,
16016                         claim,
16017                         kXRegSize,
16018                         PushPopRegList,
16019                         PushPopByFour);
16020     PushPopSimpleHelper(kPushPopUseMaxRegCount,
16021                         claim,
16022                         kXRegSize,
16023                         PushPopRegList,
16024                         PushPopRegList);
16025   }
16026 }
16027 
16028 // For the PushPopFP* tests, use the maximum number of registers that the test
16029 // supports (where a reg_count argument would otherwise be provided).
16030 static int const kPushPopFPUseMaxRegCount = -1;
16031 
16032 // Test a simple push-pop pattern:
16033 //  * Claim <claim> bytes to set the stack alignment.
16034 //  * Push <reg_count> FP registers with size <reg_size>.
16035 //  * Clobber the register contents.
16036 //  * Pop <reg_count> FP registers to restore the original contents.
16037 //  * Drop <claim> bytes to restore the original stack pointer.
16038 //
16039 // Different push and pop methods can be specified independently to test for
16040 // proper word-endian behaviour.
PushPopFPSimpleHelper(int reg_count,int claim,int reg_size,PushPopMethod push_method,PushPopMethod pop_method)16041 static void PushPopFPSimpleHelper(int reg_count,
16042                                   int claim,
16043                                   int reg_size,
16044                                   PushPopMethod push_method,
16045                                   PushPopMethod pop_method) {
16046   SETUP_WITH_FEATURES((reg_count == 0) ? CPUFeatures::kNone : CPUFeatures::kFP);
16047 
16048   START();
16049 
16050   // We can use any floating-point register. None of them are reserved for
16051   // debug code, for example.
16052   static RegList const allowed = ~0;
16053   if (reg_count == kPushPopFPUseMaxRegCount) {
16054     reg_count = CountSetBits(allowed, kNumberOfFPRegisters);
16055   }
16056   // Work out which registers to use, based on reg_size.
16057   FPRegister v[kNumberOfRegisters];
16058   FPRegister d[kNumberOfRegisters];
16059   RegList list =
16060       PopulateFPRegisterArray(NULL, d, v, reg_size, reg_count, allowed);
16061 
16062   // Arbitrarily pick a register to use as a stack pointer.
16063   const Register& stack_pointer = x10;
16064 
16065   // Acquire all temps from the MacroAssembler. They are used arbitrarily below.
16066   UseScratchRegisterScope temps(&masm);
16067   temps.ExcludeAll();
16068 
16069   // The literal base is chosen to have two useful properties:
16070   //  * When multiplied (using an integer) by small values (such as a register
16071   //    index), this value is clearly readable in the result.
16072   //  * The value is not formed from repeating fixed-size smaller values, so it
16073   //    can be used to detect endianness-related errors.
16074   //  * It is never a floating-point NaN, and will therefore always compare
16075   //    equal to itself.
16076   uint64_t literal_base = 0x0100001000100101;
16077 
16078   {
16079     VIXL_ASSERT(__ StackPointer().Is(sp));
16080     __ Mov(stack_pointer, __ StackPointer());
16081     __ SetStackPointer(stack_pointer);
16082 
16083     int i;
16084 
16085     // Initialize the registers, using X registers to load the literal.
16086     __ Mov(x0, 0);
16087     __ Mov(x1, literal_base);
16088     for (i = 0; i < reg_count; i++) {
16089       // Always write into the D register, to ensure that the upper word is
16090       // properly ignored by Push when testing S registers.
16091       __ Fmov(d[i], x0);
16092       // Calculate the next literal.
16093       __ Add(x0, x0, x1);
16094     }
16095 
16096     // Claim memory first, as requested.
16097     __ Claim(claim);
16098 
16099     switch (push_method) {
16100       case PushPopByFour:
16101         // Push high-numbered registers first (to the highest addresses).
16102         for (i = reg_count; i >= 4; i -= 4) {
16103           __ Push(v[i - 1], v[i - 2], v[i - 3], v[i - 4]);
16104         }
16105         // Finish off the leftovers.
16106         switch (i) {
16107           case 3:
16108             __ Push(v[2], v[1], v[0]);
16109             break;
16110           case 2:
16111             __ Push(v[1], v[0]);
16112             break;
16113           case 1:
16114             __ Push(v[0]);
16115             break;
16116           default:
16117             VIXL_ASSERT(i == 0);
16118             break;
16119         }
16120         break;
16121       case PushPopRegList:
16122         __ PushSizeRegList(list, reg_size, CPURegister::kVRegister);
16123         break;
16124     }
16125 
16126     // Clobber all the registers, to ensure that they get repopulated by Pop.
16127     ClobberFP(&masm, list);
16128 
16129     switch (pop_method) {
16130       case PushPopByFour:
16131         // Pop low-numbered registers first (from the lowest addresses).
16132         for (i = 0; i <= (reg_count - 4); i += 4) {
16133           __ Pop(v[i], v[i + 1], v[i + 2], v[i + 3]);
16134         }
16135         // Finish off the leftovers.
16136         switch (reg_count - i) {
16137           case 3:
16138             __ Pop(v[i], v[i + 1], v[i + 2]);
16139             break;
16140           case 2:
16141             __ Pop(v[i], v[i + 1]);
16142             break;
16143           case 1:
16144             __ Pop(v[i]);
16145             break;
16146           default:
16147             VIXL_ASSERT(i == reg_count);
16148             break;
16149         }
16150         break;
16151       case PushPopRegList:
16152         __ PopSizeRegList(list, reg_size, CPURegister::kVRegister);
16153         break;
16154     }
16155 
16156     // Drop memory to restore the stack pointer.
16157     __ Drop(claim);
16158 
16159     __ Mov(sp, __ StackPointer());
16160     __ SetStackPointer(sp);
16161   }
16162 
16163   END();
16164 
16165   RUN();
16166 
16167   // Check that the register contents were preserved.
16168   // Always use ASSERT_EQUAL_FP64, even when testing S registers, so we can
16169   // test that the upper word was properly cleared by Pop.
16170   literal_base &= (0xffffffffffffffff >> (64 - reg_size));
16171   for (int i = 0; i < reg_count; i++) {
16172     uint64_t literal = literal_base * i;
16173     double expected;
16174     memcpy(&expected, &literal, sizeof(expected));
16175     ASSERT_EQUAL_FP64(expected, d[i]);
16176   }
16177 
16178   TEARDOWN();
16179 }
16180 
16181 
TEST(push_pop_fp_xreg_simple_32)16182 TEST(push_pop_fp_xreg_simple_32) {
16183   for (int claim = 0; claim <= 8; claim++) {
16184     for (int count = 0; count <= 8; count++) {
16185       PushPopFPSimpleHelper(count,
16186                             claim,
16187                             kSRegSize,
16188                             PushPopByFour,
16189                             PushPopByFour);
16190       PushPopFPSimpleHelper(count,
16191                             claim,
16192                             kSRegSize,
16193                             PushPopByFour,
16194                             PushPopRegList);
16195       PushPopFPSimpleHelper(count,
16196                             claim,
16197                             kSRegSize,
16198                             PushPopRegList,
16199                             PushPopByFour);
16200       PushPopFPSimpleHelper(count,
16201                             claim,
16202                             kSRegSize,
16203                             PushPopRegList,
16204                             PushPopRegList);
16205     }
16206     // Test with the maximum number of registers.
16207     PushPopFPSimpleHelper(kPushPopFPUseMaxRegCount,
16208                           claim,
16209                           kSRegSize,
16210                           PushPopByFour,
16211                           PushPopByFour);
16212     PushPopFPSimpleHelper(kPushPopFPUseMaxRegCount,
16213                           claim,
16214                           kSRegSize,
16215                           PushPopByFour,
16216                           PushPopRegList);
16217     PushPopFPSimpleHelper(kPushPopFPUseMaxRegCount,
16218                           claim,
16219                           kSRegSize,
16220                           PushPopRegList,
16221                           PushPopByFour);
16222     PushPopFPSimpleHelper(kPushPopFPUseMaxRegCount,
16223                           claim,
16224                           kSRegSize,
16225                           PushPopRegList,
16226                           PushPopRegList);
16227   }
16228 }
16229 
16230 
TEST(push_pop_fp_xreg_simple_64)16231 TEST(push_pop_fp_xreg_simple_64) {
16232   for (int claim = 0; claim <= 8; claim++) {
16233     for (int count = 0; count <= 8; count++) {
16234       PushPopFPSimpleHelper(count,
16235                             claim,
16236                             kDRegSize,
16237                             PushPopByFour,
16238                             PushPopByFour);
16239       PushPopFPSimpleHelper(count,
16240                             claim,
16241                             kDRegSize,
16242                             PushPopByFour,
16243                             PushPopRegList);
16244       PushPopFPSimpleHelper(count,
16245                             claim,
16246                             kDRegSize,
16247                             PushPopRegList,
16248                             PushPopByFour);
16249       PushPopFPSimpleHelper(count,
16250                             claim,
16251                             kDRegSize,
16252                             PushPopRegList,
16253                             PushPopRegList);
16254     }
16255     // Test with the maximum number of registers.
16256     PushPopFPSimpleHelper(kPushPopFPUseMaxRegCount,
16257                           claim,
16258                           kDRegSize,
16259                           PushPopByFour,
16260                           PushPopByFour);
16261     PushPopFPSimpleHelper(kPushPopFPUseMaxRegCount,
16262                           claim,
16263                           kDRegSize,
16264                           PushPopByFour,
16265                           PushPopRegList);
16266     PushPopFPSimpleHelper(kPushPopFPUseMaxRegCount,
16267                           claim,
16268                           kDRegSize,
16269                           PushPopRegList,
16270                           PushPopByFour);
16271     PushPopFPSimpleHelper(kPushPopFPUseMaxRegCount,
16272                           claim,
16273                           kDRegSize,
16274                           PushPopRegList,
16275                           PushPopRegList);
16276   }
16277 }
16278 
16279 
16280 // Push and pop data using an overlapping combination of Push/Pop and
16281 // RegList-based methods.
PushPopMixedMethodsHelper(int claim,int reg_size)16282 static void PushPopMixedMethodsHelper(int claim, int reg_size) {
16283   SETUP();
16284 
16285   // Arbitrarily pick a register to use as a stack pointer.
16286   const Register& stack_pointer = x5;
16287   const RegList allowed = ~stack_pointer.GetBit();
16288   // Work out which registers to use, based on reg_size.
16289   Register r[10];
16290   Register x[10];
16291   PopulateRegisterArray(NULL, x, r, reg_size, 10, allowed);
16292 
16293   // Calculate some handy register lists.
16294   RegList r0_to_r3 = 0;
16295   for (int i = 0; i <= 3; i++) {
16296     r0_to_r3 |= x[i].GetBit();
16297   }
16298   RegList r4_to_r5 = 0;
16299   for (int i = 4; i <= 5; i++) {
16300     r4_to_r5 |= x[i].GetBit();
16301   }
16302   RegList r6_to_r9 = 0;
16303   for (int i = 6; i <= 9; i++) {
16304     r6_to_r9 |= x[i].GetBit();
16305   }
16306 
16307   // Acquire all temps from the MacroAssembler. They are used arbitrarily below.
16308   UseScratchRegisterScope temps(&masm);
16309   temps.ExcludeAll();
16310 
16311   // The literal base is chosen to have two useful properties:
16312   //  * When multiplied by small values (such as a register index), this value
16313   //    is clearly readable in the result.
16314   //  * The value is not formed from repeating fixed-size smaller values, so it
16315   //    can be used to detect endianness-related errors.
16316   uint64_t literal_base = 0x0100001000100101;
16317 
16318   START();
16319   {
16320     VIXL_ASSERT(__ StackPointer().Is(sp));
16321     __ Mov(stack_pointer, __ StackPointer());
16322     __ SetStackPointer(stack_pointer);
16323 
16324     // Claim memory first, as requested.
16325     __ Claim(claim);
16326 
16327     __ Mov(x[3], literal_base * 3);
16328     __ Mov(x[2], literal_base * 2);
16329     __ Mov(x[1], literal_base * 1);
16330     __ Mov(x[0], literal_base * 0);
16331 
16332     __ PushSizeRegList(r0_to_r3, reg_size);
16333     __ Push(r[3], r[2]);
16334 
16335     Clobber(&masm, r0_to_r3);
16336     __ PopSizeRegList(r0_to_r3, reg_size);
16337 
16338     __ Push(r[2], r[1], r[3], r[0]);
16339 
16340     Clobber(&masm, r4_to_r5);
16341     __ Pop(r[4], r[5]);
16342     Clobber(&masm, r6_to_r9);
16343     __ Pop(r[6], r[7], r[8], r[9]);
16344 
16345     // Drop memory to restore stack_pointer.
16346     __ Drop(claim);
16347 
16348     __ Mov(sp, __ StackPointer());
16349     __ SetStackPointer(sp);
16350   }
16351 
16352   END();
16353 
16354   RUN();
16355 
16356   // Always use ASSERT_EQUAL_64, even when testing W registers, so we can test
16357   // that the upper word was properly cleared by Pop.
16358   literal_base &= (0xffffffffffffffff >> (64 - reg_size));
16359 
16360   ASSERT_EQUAL_64(literal_base * 3, x[9]);
16361   ASSERT_EQUAL_64(literal_base * 2, x[8]);
16362   ASSERT_EQUAL_64(literal_base * 0, x[7]);
16363   ASSERT_EQUAL_64(literal_base * 3, x[6]);
16364   ASSERT_EQUAL_64(literal_base * 1, x[5]);
16365   ASSERT_EQUAL_64(literal_base * 2, x[4]);
16366 
16367   TEARDOWN();
16368 }
16369 
16370 
TEST(push_pop_xreg_mixed_methods_64)16371 TEST(push_pop_xreg_mixed_methods_64) {
16372   for (int claim = 0; claim <= 8; claim++) {
16373     PushPopMixedMethodsHelper(claim, kXRegSize);
16374   }
16375 }
16376 
16377 
TEST(push_pop_xreg_mixed_methods_32)16378 TEST(push_pop_xreg_mixed_methods_32) {
16379   for (int claim = 0; claim <= 8; claim++) {
16380     PushPopMixedMethodsHelper(claim, kWRegSize);
16381   }
16382 }
16383 
16384 
16385 // Push and pop data using overlapping X- and W-sized quantities.
PushPopWXOverlapHelper(int reg_count,int claim)16386 static void PushPopWXOverlapHelper(int reg_count, int claim) {
16387   SETUP();
16388 
16389   // Arbitrarily pick a register to use as a stack pointer.
16390   const Register& stack_pointer = x10;
16391   const RegList allowed = ~stack_pointer.GetBit();
16392   if (reg_count == kPushPopUseMaxRegCount) {
16393     reg_count = CountSetBits(allowed, kNumberOfRegisters);
16394   }
16395   // Work out which registers to use, based on reg_size.
16396   Register w[kNumberOfRegisters];
16397   Register x[kNumberOfRegisters];
16398   RegList list = PopulateRegisterArray(w, x, NULL, 0, reg_count, allowed);
16399 
16400   // The number of W-sized slots we expect to pop. When we pop, we alternate
16401   // between W and X registers, so we need reg_count*1.5 W-sized slots.
16402   int const requested_w_slots = reg_count + reg_count / 2;
16403 
16404   // Track what _should_ be on the stack, using W-sized slots.
16405   static int const kMaxWSlots = kNumberOfRegisters + kNumberOfRegisters / 2;
16406   uint32_t stack[kMaxWSlots];
16407   for (int i = 0; i < kMaxWSlots; i++) {
16408     stack[i] = 0xdeadbeef;
16409   }
16410 
16411   // Acquire all temps from the MacroAssembler. They are used arbitrarily below.
16412   UseScratchRegisterScope temps(&masm);
16413   temps.ExcludeAll();
16414 
16415   // The literal base is chosen to have two useful properties:
16416   //  * When multiplied by small values (such as a register index), this value
16417   //    is clearly readable in the result.
16418   //  * The value is not formed from repeating fixed-size smaller values, so it
16419   //    can be used to detect endianness-related errors.
16420   static uint64_t const literal_base = 0x0100001000100101;
16421   static uint64_t const literal_base_hi = literal_base >> 32;
16422   static uint64_t const literal_base_lo = literal_base & 0xffffffff;
16423   static uint64_t const literal_base_w = literal_base & 0xffffffff;
16424 
16425   START();
16426   {
16427     VIXL_ASSERT(__ StackPointer().Is(sp));
16428     __ Mov(stack_pointer, __ StackPointer());
16429     __ SetStackPointer(stack_pointer);
16430 
16431     // Initialize the registers.
16432     for (int i = 0; i < reg_count; i++) {
16433       // Always write into the X register, to ensure that the upper word is
16434       // properly ignored by Push when testing W registers.
16435       __ Mov(x[i], literal_base * i);
16436     }
16437 
16438     // Claim memory first, as requested.
16439     __ Claim(claim);
16440 
16441     // The push-pop pattern is as follows:
16442     // Push:           Pop:
16443     //  x[0](hi)   ->   w[0]
16444     //  x[0](lo)   ->   x[1](hi)
16445     //  w[1]       ->   x[1](lo)
16446     //  w[1]       ->   w[2]
16447     //  x[2](hi)   ->   x[2](hi)
16448     //  x[2](lo)   ->   x[2](lo)
16449     //  x[2](hi)   ->   w[3]
16450     //  x[2](lo)   ->   x[4](hi)
16451     //  x[2](hi)   ->   x[4](lo)
16452     //  x[2](lo)   ->   w[5]
16453     //  w[3]       ->   x[5](hi)
16454     //  w[3]       ->   x[6](lo)
16455     //  w[3]       ->   w[7]
16456     //  w[3]       ->   x[8](hi)
16457     //  x[4](hi)   ->   x[8](lo)
16458     //  x[4](lo)   ->   w[9]
16459     // ... pattern continues ...
16460     //
16461     // That is, registers are pushed starting with the lower numbers,
16462     // alternating between x and w registers, and pushing i%4+1 copies of each,
16463     // where i is the register number.
16464     // Registers are popped starting with the higher numbers one-by-one,
16465     // alternating between x and w registers, but only popping one at a time.
16466     //
16467     // This pattern provides a wide variety of alignment effects and overlaps.
16468 
16469     // ---- Push ----
16470 
16471     int active_w_slots = 0;
16472     for (int i = 0; active_w_slots < requested_w_slots; i++) {
16473       VIXL_ASSERT(i < reg_count);
16474       // In order to test various arguments to PushMultipleTimes, and to try to
16475       // exercise different alignment and overlap effects, we push each
16476       // register a different number of times.
16477       int times = i % 4 + 1;
16478       if (i & 1) {
16479         // Push odd-numbered registers as W registers.
16480         __ PushMultipleTimes(times, w[i]);
16481         // Fill in the expected stack slots.
16482         for (int j = 0; j < times; j++) {
16483           if (w[i].Is(wzr)) {
16484             // The zero register always writes zeroes.
16485             stack[active_w_slots++] = 0;
16486           } else {
16487             stack[active_w_slots++] = literal_base_w * i;
16488           }
16489         }
16490       } else {
16491         // Push even-numbered registers as X registers.
16492         __ PushMultipleTimes(times, x[i]);
16493         // Fill in the expected stack slots.
16494         for (int j = 0; j < times; j++) {
16495           if (x[i].Is(xzr)) {
16496             // The zero register always writes zeroes.
16497             stack[active_w_slots++] = 0;
16498             stack[active_w_slots++] = 0;
16499           } else {
16500             stack[active_w_slots++] = literal_base_hi * i;
16501             stack[active_w_slots++] = literal_base_lo * i;
16502           }
16503         }
16504       }
16505     }
16506     // Because we were pushing several registers at a time, we probably pushed
16507     // more than we needed to.
16508     if (active_w_slots > requested_w_slots) {
16509       __ Drop((active_w_slots - requested_w_slots) * kWRegSizeInBytes);
16510       // Bump the number of active W-sized slots back to where it should be,
16511       // and fill the empty space with a dummy value.
16512       do {
16513         stack[active_w_slots--] = 0xdeadbeef;
16514       } while (active_w_slots > requested_w_slots);
16515     }
16516 
16517     // ---- Pop ----
16518 
16519     Clobber(&masm, list);
16520 
16521     // If popping an even number of registers, the first one will be X-sized.
16522     // Otherwise, the first one will be W-sized.
16523     bool next_is_64 = !(reg_count & 1);
16524     for (int i = reg_count - 1; i >= 0; i--) {
16525       if (next_is_64) {
16526         __ Pop(x[i]);
16527         active_w_slots -= 2;
16528       } else {
16529         __ Pop(w[i]);
16530         active_w_slots -= 1;
16531       }
16532       next_is_64 = !next_is_64;
16533     }
16534     VIXL_ASSERT(active_w_slots == 0);
16535 
16536     // Drop memory to restore stack_pointer.
16537     __ Drop(claim);
16538 
16539     __ Mov(sp, __ StackPointer());
16540     __ SetStackPointer(sp);
16541   }
16542 
16543   END();
16544 
16545   RUN();
16546 
16547   int slot = 0;
16548   for (int i = 0; i < reg_count; i++) {
16549     // Even-numbered registers were written as W registers.
16550     // Odd-numbered registers were written as X registers.
16551     bool expect_64 = (i & 1);
16552     uint64_t expected;
16553 
16554     if (expect_64) {
16555       uint64_t hi = stack[slot++];
16556       uint64_t lo = stack[slot++];
16557       expected = (hi << 32) | lo;
16558     } else {
16559       expected = stack[slot++];
16560     }
16561 
16562     // Always use ASSERT_EQUAL_64, even when testing W registers, so we can
16563     // test that the upper word was properly cleared by Pop.
16564     if (x[i].Is(xzr)) {
16565       ASSERT_EQUAL_64(0, x[i]);
16566     } else {
16567       ASSERT_EQUAL_64(expected, x[i]);
16568     }
16569   }
16570   VIXL_ASSERT(slot == requested_w_slots);
16571 
16572   TEARDOWN();
16573 }
16574 
16575 
TEST(push_pop_xreg_wx_overlap)16576 TEST(push_pop_xreg_wx_overlap) {
16577   for (int claim = 0; claim <= 8; claim++) {
16578     for (int count = 1; count <= 8; count++) {
16579       PushPopWXOverlapHelper(count, claim);
16580     }
16581     // Test with the maximum number of registers.
16582     PushPopWXOverlapHelper(kPushPopUseMaxRegCount, claim);
16583   }
16584 }
16585 
16586 
TEST(push_pop_sp)16587 TEST(push_pop_sp) {
16588   SETUP();
16589 
16590   START();
16591 
16592   VIXL_ASSERT(sp.Is(__ StackPointer()));
16593 
16594   // Acquire all temps from the MacroAssembler. They are used arbitrarily below.
16595   UseScratchRegisterScope temps(&masm);
16596   temps.ExcludeAll();
16597 
16598   __ Mov(x3, 0x3333333333333333);
16599   __ Mov(x2, 0x2222222222222222);
16600   __ Mov(x1, 0x1111111111111111);
16601   __ Mov(x0, 0x0000000000000000);
16602   __ Claim(2 * kXRegSizeInBytes);
16603   __ PushXRegList(x0.GetBit() | x1.GetBit() | x2.GetBit() | x3.GetBit());
16604   __ Push(x3, x2);
16605   __ PopXRegList(x0.GetBit() | x1.GetBit() | x2.GetBit() | x3.GetBit());
16606   __ Push(x2, x1, x3, x0);
16607   __ Pop(x4, x5);
16608   __ Pop(x6, x7, x8, x9);
16609 
16610   __ Claim(2 * kXRegSizeInBytes);
16611   __ PushWRegList(w0.GetBit() | w1.GetBit() | w2.GetBit() | w3.GetBit());
16612   __ Push(w3, w1, w2, w0);
16613   __ PopWRegList(w10.GetBit() | w11.GetBit() | w12.GetBit() | w13.GetBit());
16614   __ Pop(w14, w15, w16, w17);
16615 
16616   __ Claim(2 * kXRegSizeInBytes);
16617   __ Push(w2, w2, w1, w1);
16618   __ Push(x3, x3);
16619   __ Pop(w18, w19, w20, w21);
16620   __ Pop(x22, x23);
16621 
16622   __ Claim(2 * kXRegSizeInBytes);
16623   __ PushXRegList(x1.GetBit() | x22.GetBit());
16624   __ PopXRegList(x24.GetBit() | x26.GetBit());
16625 
16626   __ Claim(2 * kXRegSizeInBytes);
16627   __ PushWRegList(w1.GetBit() | w2.GetBit() | w4.GetBit() | w22.GetBit());
16628   __ PopWRegList(w25.GetBit() | w27.GetBit() | w28.GetBit() | w29.GetBit());
16629 
16630   __ Claim(2 * kXRegSizeInBytes);
16631   __ PushXRegList(0);
16632   __ PopXRegList(0);
16633   __ PushXRegList(0xffffffff);
16634   __ PopXRegList(0xffffffff);
16635   __ Drop(12 * kXRegSizeInBytes);
16636   END();
16637 
16638   RUN();
16639 
16640   ASSERT_EQUAL_64(0x1111111111111111, x3);
16641   ASSERT_EQUAL_64(0x0000000000000000, x2);
16642   ASSERT_EQUAL_64(0x3333333333333333, x1);
16643   ASSERT_EQUAL_64(0x2222222222222222, x0);
16644   ASSERT_EQUAL_64(0x3333333333333333, x9);
16645   ASSERT_EQUAL_64(0x2222222222222222, x8);
16646   ASSERT_EQUAL_64(0x0000000000000000, x7);
16647   ASSERT_EQUAL_64(0x3333333333333333, x6);
16648   ASSERT_EQUAL_64(0x1111111111111111, x5);
16649   ASSERT_EQUAL_64(0x2222222222222222, x4);
16650 
16651   ASSERT_EQUAL_32(0x11111111U, w13);
16652   ASSERT_EQUAL_32(0x33333333U, w12);
16653   ASSERT_EQUAL_32(0x00000000U, w11);
16654   ASSERT_EQUAL_32(0x22222222U, w10);
16655   ASSERT_EQUAL_32(0x11111111U, w17);
16656   ASSERT_EQUAL_32(0x00000000U, w16);
16657   ASSERT_EQUAL_32(0x33333333U, w15);
16658   ASSERT_EQUAL_32(0x22222222U, w14);
16659 
16660   ASSERT_EQUAL_32(0x11111111U, w18);
16661   ASSERT_EQUAL_32(0x11111111U, w19);
16662   ASSERT_EQUAL_32(0x11111111U, w20);
16663   ASSERT_EQUAL_32(0x11111111U, w21);
16664   ASSERT_EQUAL_64(0x3333333333333333, x22);
16665   ASSERT_EQUAL_64(0x0000000000000000, x23);
16666 
16667   ASSERT_EQUAL_64(0x3333333333333333, x24);
16668   ASSERT_EQUAL_64(0x3333333333333333, x26);
16669 
16670   ASSERT_EQUAL_32(0x33333333U, w25);
16671   ASSERT_EQUAL_32(0x00000000U, w27);
16672   ASSERT_EQUAL_32(0x22222222U, w28);
16673   ASSERT_EQUAL_32(0x33333333U, w29);
16674   TEARDOWN();
16675 }
16676 
16677 
TEST(printf)16678 TEST(printf) {
16679   // RegisterDump::Dump uses NEON.
16680   // Printf uses FP to cast FP arguments to doubles.
16681   SETUP_WITH_FEATURES(CPUFeatures::kNEON, CPUFeatures::kFP);
16682 
16683   START();
16684 
16685   char const* test_plain_string = "Printf with no arguments.\n";
16686   char const* test_substring = "'This is a substring.'";
16687   RegisterDump before;
16688 
16689   // Initialize x29 to the value of the stack pointer. We will use x29 as a
16690   // temporary stack pointer later, and initializing it in this way allows the
16691   // RegisterDump check to pass.
16692   __ Mov(x29, __ StackPointer());
16693 
16694   // Test simple integer arguments.
16695   __ Mov(x0, 1234);
16696   __ Mov(x1, 0x1234);
16697 
16698   // Test simple floating-point arguments.
16699   __ Fmov(d0, 1.234);
16700 
16701   // Test pointer (string) arguments.
16702   __ Mov(x2, reinterpret_cast<uintptr_t>(test_substring));
16703 
16704   // Test the maximum number of arguments, and sign extension.
16705   __ Mov(w3, 0xffffffff);
16706   __ Mov(w4, 0xffffffff);
16707   __ Mov(x5, 0xffffffffffffffff);
16708   __ Mov(x6, 0xffffffffffffffff);
16709   __ Fmov(s1, 1.234);
16710   __ Fmov(s2, 2.345);
16711   __ Fmov(d3, 3.456);
16712   __ Fmov(d4, 4.567);
16713 
16714   // Test printing callee-saved registers.
16715   __ Mov(x28, 0x123456789abcdef);
16716   __ Fmov(d10, 42.0);
16717 
16718   // Test with three arguments.
16719   __ Mov(x10, 3);
16720   __ Mov(x11, 40);
16721   __ Mov(x12, 500);
16722 
16723   // A single character.
16724   __ Mov(w13, 'x');
16725 
16726   // Check that we don't clobber any registers.
16727   before.Dump(&masm);
16728 
16729   __ Printf(test_plain_string);  // NOLINT(runtime/printf)
16730   __ Printf("x0: %" PRId64 ", x1: 0x%08" PRIx64 "\n", x0, x1);
16731   __ Printf("w5: %" PRId32 ", x5: %" PRId64 "\n", w5, x5);
16732   __ Printf("d0: %f\n", d0);
16733   __ Printf("Test %%s: %s\n", x2);
16734   __ Printf("w3(uint32): %" PRIu32 "\nw4(int32): %" PRId32
16735             "\n"
16736             "x5(uint64): %" PRIu64 "\nx6(int64): %" PRId64 "\n",
16737             w3,
16738             w4,
16739             x5,
16740             x6);
16741   __ Printf("%%f: %f\n%%g: %g\n%%e: %e\n%%E: %E\n", s1, s2, d3, d4);
16742   __ Printf("0x%" PRIx32 ", 0x%" PRIx64 "\n", w28, x28);
16743   __ Printf("%g\n", d10);
16744   __ Printf("%%%%%s%%%c%%\n", x2, w13);
16745 
16746   // Print the stack pointer (sp).
16747   __ Printf("StackPointer(sp): 0x%016" PRIx64 ", 0x%08" PRIx32 "\n",
16748             __ StackPointer(),
16749             __ StackPointer().W());
16750 
16751   // Test with a different stack pointer.
16752   const Register old_stack_pointer = __ StackPointer();
16753   __ Mov(x29, old_stack_pointer);
16754   __ SetStackPointer(x29);
16755   // Print the stack pointer (not sp).
16756   __ Printf("StackPointer(not sp): 0x%016" PRIx64 ", 0x%08" PRIx32 "\n",
16757             __ StackPointer(),
16758             __ StackPointer().W());
16759   __ Mov(old_stack_pointer, __ StackPointer());
16760   __ SetStackPointer(old_stack_pointer);
16761 
16762   // Test with three arguments.
16763   __ Printf("3=%u, 4=%u, 5=%u\n", x10, x11, x12);
16764 
16765   // Mixed argument types.
16766   __ Printf("w3: %" PRIu32 ", s1: %f, x5: %" PRIu64 ", d3: %f\n",
16767             w3,
16768             s1,
16769             x5,
16770             d3);
16771   __ Printf("s1: %f, d3: %f, w3: %" PRId32 ", x5: %" PRId64 "\n",
16772             s1,
16773             d3,
16774             w3,
16775             x5);
16776 
16777   END();
16778   RUN();
16779 
16780   // We cannot easily test the output of the Printf sequences, and because
16781   // Printf preserves all registers by default, we can't look at the number of
16782   // bytes that were printed. However, the printf_no_preserve test should check
16783   // that, and here we just test that we didn't clobber any registers.
16784   ASSERT_EQUAL_REGISTERS(before);
16785 
16786   TEARDOWN();
16787 }
16788 
16789 
TEST(printf_no_preserve)16790 TEST(printf_no_preserve) {
16791   // PrintfNoPreserve uses FP to cast FP arguments to doubles.
16792   SETUP_WITH_FEATURES(CPUFeatures::kFP);
16793 
16794   START();
16795 
16796   char const* test_plain_string = "Printf with no arguments.\n";
16797   char const* test_substring = "'This is a substring.'";
16798 
16799   __ PrintfNoPreserve(test_plain_string);
16800   __ Mov(x19, x0);
16801 
16802   // Test simple integer arguments.
16803   __ Mov(x0, 1234);
16804   __ Mov(x1, 0x1234);
16805   __ PrintfNoPreserve("x0: %" PRId64 ", x1: 0x%08" PRIx64 "\n", x0, x1);
16806   __ Mov(x20, x0);
16807 
16808   // Test simple floating-point arguments.
16809   __ Fmov(d0, 1.234);
16810   __ PrintfNoPreserve("d0: %f\n", d0);
16811   __ Mov(x21, x0);
16812 
16813   // Test pointer (string) arguments.
16814   __ Mov(x2, reinterpret_cast<uintptr_t>(test_substring));
16815   __ PrintfNoPreserve("Test %%s: %s\n", x2);
16816   __ Mov(x22, x0);
16817 
16818   // Test the maximum number of arguments, and sign extension.
16819   __ Mov(w3, 0xffffffff);
16820   __ Mov(w4, 0xffffffff);
16821   __ Mov(x5, 0xffffffffffffffff);
16822   __ Mov(x6, 0xffffffffffffffff);
16823   __ PrintfNoPreserve("w3(uint32): %" PRIu32 "\nw4(int32): %" PRId32
16824                       "\n"
16825                       "x5(uint64): %" PRIu64 "\nx6(int64): %" PRId64 "\n",
16826                       w3,
16827                       w4,
16828                       x5,
16829                       x6);
16830   __ Mov(x23, x0);
16831 
16832   __ Fmov(s1, 1.234);
16833   __ Fmov(s2, 2.345);
16834   __ Fmov(d3, 3.456);
16835   __ Fmov(d4, 4.567);
16836   __ PrintfNoPreserve("%%f: %f\n%%g: %g\n%%e: %e\n%%E: %E\n", s1, s2, d3, d4);
16837   __ Mov(x24, x0);
16838 
16839   // Test printing callee-saved registers.
16840   __ Mov(x28, 0x123456789abcdef);
16841   __ PrintfNoPreserve("0x%" PRIx32 ", 0x%" PRIx64 "\n", w28, x28);
16842   __ Mov(x25, x0);
16843 
16844   __ Fmov(d10, 42.0);
16845   __ PrintfNoPreserve("%g\n", d10);
16846   __ Mov(x26, x0);
16847 
16848   // Test with a different stack pointer.
16849   const Register old_stack_pointer = __ StackPointer();
16850   __ Mov(x29, old_stack_pointer);
16851   __ SetStackPointer(x29);
16852   // Print the stack pointer (not sp).
16853   __ PrintfNoPreserve("StackPointer(not sp): 0x%016" PRIx64 ", 0x%08" PRIx32
16854                       "\n",
16855                       __ StackPointer(),
16856                       __ StackPointer().W());
16857   __ Mov(x27, x0);
16858   __ Mov(old_stack_pointer, __ StackPointer());
16859   __ SetStackPointer(old_stack_pointer);
16860 
16861   // Test with three arguments.
16862   __ Mov(x3, 3);
16863   __ Mov(x4, 40);
16864   __ Mov(x5, 500);
16865   __ PrintfNoPreserve("3=%u, 4=%u, 5=%u\n", x3, x4, x5);
16866   __ Mov(x28, x0);
16867 
16868   // Mixed argument types.
16869   __ Mov(w3, 0xffffffff);
16870   __ Fmov(s1, 1.234);
16871   __ Mov(x5, 0xffffffffffffffff);
16872   __ Fmov(d3, 3.456);
16873   __ PrintfNoPreserve("w3: %" PRIu32 ", s1: %f, x5: %" PRIu64 ", d3: %f\n",
16874                       w3,
16875                       s1,
16876                       x5,
16877                       d3);
16878   __ Mov(x29, x0);
16879 
16880   END();
16881   RUN();
16882 
16883   // We cannot easily test the exact output of the Printf sequences, but we can
16884   // use the return code to check that the string length was correct.
16885 
16886   // Printf with no arguments.
16887   ASSERT_EQUAL_64(strlen(test_plain_string), x19);
16888   // x0: 1234, x1: 0x00001234
16889   ASSERT_EQUAL_64(25, x20);
16890   // d0: 1.234000
16891   ASSERT_EQUAL_64(13, x21);
16892   // Test %s: 'This is a substring.'
16893   ASSERT_EQUAL_64(32, x22);
16894   // w3(uint32): 4294967295
16895   // w4(int32): -1
16896   // x5(uint64): 18446744073709551615
16897   // x6(int64): -1
16898   ASSERT_EQUAL_64(23 + 14 + 33 + 14, x23);
16899   // %f: 1.234000
16900   // %g: 2.345
16901   // %e: 3.456000e+00
16902   // %E: 4.567000E+00
16903   ASSERT_EQUAL_64(13 + 10 + 17 + 17, x24);
16904   // 0x89abcdef, 0x123456789abcdef
16905   ASSERT_EQUAL_64(30, x25);
16906   // 42
16907   ASSERT_EQUAL_64(3, x26);
16908   // StackPointer(not sp): 0x00007fb037ae2370, 0x37ae2370
16909   // Note: This is an example value, but the field width is fixed here so the
16910   // string length is still predictable.
16911   ASSERT_EQUAL_64(53, x27);
16912   // 3=3, 4=40, 5=500
16913   ASSERT_EQUAL_64(17, x28);
16914   // w3: 4294967295, s1: 1.234000, x5: 18446744073709551615, d3: 3.456000
16915   ASSERT_EQUAL_64(69, x29);
16916 
16917   TEARDOWN();
16918 }
16919 
16920 
16921 #ifndef VIXL_INCLUDE_SIMULATOR_AARCH64
TEST(trace)16922 TEST(trace) {
16923   // The Trace helper should not generate any code unless the simulator is being
16924   // used.
16925   SETUP();
16926   START();
16927 
16928   Label start;
16929   __ Bind(&start);
16930   __ Trace(LOG_ALL, TRACE_ENABLE);
16931   __ Trace(LOG_ALL, TRACE_DISABLE);
16932   VIXL_CHECK(__ GetSizeOfCodeGeneratedSince(&start) == 0);
16933 
16934   END();
16935   TEARDOWN();
16936 }
16937 #endif
16938 
16939 
16940 #ifndef VIXL_INCLUDE_SIMULATOR_AARCH64
TEST(log)16941 TEST(log) {
16942   // The Log helper should not generate any code unless the simulator is being
16943   // used.
16944   SETUP();
16945   START();
16946 
16947   Label start;
16948   __ Bind(&start);
16949   __ Log(LOG_ALL);
16950   VIXL_CHECK(__ GetSizeOfCodeGeneratedSince(&start) == 0);
16951 
16952   END();
16953   TEARDOWN();
16954 }
16955 #endif
16956 
16957 
TEST(blr_lr)16958 TEST(blr_lr) {
16959   // A simple test to check that the simulator correcty handle "blr lr".
16960   SETUP();
16961 
16962   START();
16963   Label target;
16964   Label end;
16965 
16966   __ Mov(x0, 0x0);
16967   __ Adr(lr, &target);
16968 
16969   __ Blr(lr);
16970   __ Mov(x0, 0xdeadbeef);
16971   __ B(&end);
16972 
16973   __ Bind(&target);
16974   __ Mov(x0, 0xc001c0de);
16975 
16976   __ Bind(&end);
16977   END();
16978 
16979   RUN();
16980 
16981   ASSERT_EQUAL_64(0xc001c0de, x0);
16982 
16983   TEARDOWN();
16984 }
16985 
16986 
TEST(barriers)16987 TEST(barriers) {
16988   // Generate all supported barriers, this is just a smoke test
16989   SETUP();
16990 
16991   START();
16992 
16993   // DMB
16994   __ Dmb(FullSystem, BarrierAll);
16995   __ Dmb(FullSystem, BarrierReads);
16996   __ Dmb(FullSystem, BarrierWrites);
16997   __ Dmb(FullSystem, BarrierOther);
16998 
16999   __ Dmb(InnerShareable, BarrierAll);
17000   __ Dmb(InnerShareable, BarrierReads);
17001   __ Dmb(InnerShareable, BarrierWrites);
17002   __ Dmb(InnerShareable, BarrierOther);
17003 
17004   __ Dmb(NonShareable, BarrierAll);
17005   __ Dmb(NonShareable, BarrierReads);
17006   __ Dmb(NonShareable, BarrierWrites);
17007   __ Dmb(NonShareable, BarrierOther);
17008 
17009   __ Dmb(OuterShareable, BarrierAll);
17010   __ Dmb(OuterShareable, BarrierReads);
17011   __ Dmb(OuterShareable, BarrierWrites);
17012   __ Dmb(OuterShareable, BarrierOther);
17013 
17014   // DSB
17015   __ Dsb(FullSystem, BarrierAll);
17016   __ Dsb(FullSystem, BarrierReads);
17017   __ Dsb(FullSystem, BarrierWrites);
17018   __ Dsb(FullSystem, BarrierOther);
17019 
17020   __ Dsb(InnerShareable, BarrierAll);
17021   __ Dsb(InnerShareable, BarrierReads);
17022   __ Dsb(InnerShareable, BarrierWrites);
17023   __ Dsb(InnerShareable, BarrierOther);
17024 
17025   __ Dsb(NonShareable, BarrierAll);
17026   __ Dsb(NonShareable, BarrierReads);
17027   __ Dsb(NonShareable, BarrierWrites);
17028   __ Dsb(NonShareable, BarrierOther);
17029 
17030   __ Dsb(OuterShareable, BarrierAll);
17031   __ Dsb(OuterShareable, BarrierReads);
17032   __ Dsb(OuterShareable, BarrierWrites);
17033   __ Dsb(OuterShareable, BarrierOther);
17034 
17035   // ISB
17036   __ Isb();
17037 
17038   END();
17039 
17040   RUN();
17041 
17042   TEARDOWN();
17043 }
17044 
17045 
TEST(process_nan_double)17046 TEST(process_nan_double) {
17047   // Make sure that NaN propagation works correctly.
17048   double sn = RawbitsToDouble(0x7ff5555511111111);
17049   double qn = RawbitsToDouble(0x7ffaaaaa11111111);
17050   VIXL_ASSERT(IsSignallingNaN(sn));
17051   VIXL_ASSERT(IsQuietNaN(qn));
17052 
17053   // The input NaNs after passing through ProcessNaN.
17054   double sn_proc = RawbitsToDouble(0x7ffd555511111111);
17055   double qn_proc = qn;
17056   VIXL_ASSERT(IsQuietNaN(sn_proc));
17057   VIXL_ASSERT(IsQuietNaN(qn_proc));
17058 
17059   SETUP_WITH_FEATURES(CPUFeatures::kFP);
17060 
17061   START();
17062 
17063   // Execute a number of instructions which all use ProcessNaN, and check that
17064   // they all handle the NaN correctly.
17065   __ Fmov(d0, sn);
17066   __ Fmov(d10, qn);
17067 
17068   // Operations that always propagate NaNs unchanged, even signalling NaNs.
17069   //   - Signalling NaN
17070   __ Fmov(d1, d0);
17071   __ Fabs(d2, d0);
17072   __ Fneg(d3, d0);
17073   //   - Quiet NaN
17074   __ Fmov(d11, d10);
17075   __ Fabs(d12, d10);
17076   __ Fneg(d13, d10);
17077 
17078   // Operations that use ProcessNaN.
17079   //   - Signalling NaN
17080   __ Fsqrt(d4, d0);
17081   __ Frinta(d5, d0);
17082   __ Frintn(d6, d0);
17083   __ Frintz(d7, d0);
17084   //   - Quiet NaN
17085   __ Fsqrt(d14, d10);
17086   __ Frinta(d15, d10);
17087   __ Frintn(d16, d10);
17088   __ Frintz(d17, d10);
17089 
17090   // The behaviour of fcvt is checked in TEST(fcvt_sd).
17091 
17092   END();
17093   RUN();
17094 
17095   uint64_t qn_raw = DoubleToRawbits(qn);
17096   uint64_t sn_raw = DoubleToRawbits(sn);
17097 
17098   //   - Signalling NaN
17099   ASSERT_EQUAL_FP64(sn, d1);
17100   ASSERT_EQUAL_FP64(RawbitsToDouble(sn_raw & ~kDSignMask), d2);
17101   ASSERT_EQUAL_FP64(RawbitsToDouble(sn_raw ^ kDSignMask), d3);
17102   //   - Quiet NaN
17103   ASSERT_EQUAL_FP64(qn, d11);
17104   ASSERT_EQUAL_FP64(RawbitsToDouble(qn_raw & ~kDSignMask), d12);
17105   ASSERT_EQUAL_FP64(RawbitsToDouble(qn_raw ^ kDSignMask), d13);
17106 
17107   //   - Signalling NaN
17108   ASSERT_EQUAL_FP64(sn_proc, d4);
17109   ASSERT_EQUAL_FP64(sn_proc, d5);
17110   ASSERT_EQUAL_FP64(sn_proc, d6);
17111   ASSERT_EQUAL_FP64(sn_proc, d7);
17112   //   - Quiet NaN
17113   ASSERT_EQUAL_FP64(qn_proc, d14);
17114   ASSERT_EQUAL_FP64(qn_proc, d15);
17115   ASSERT_EQUAL_FP64(qn_proc, d16);
17116   ASSERT_EQUAL_FP64(qn_proc, d17);
17117 
17118   TEARDOWN();
17119 }
17120 
17121 
TEST(process_nan_float)17122 TEST(process_nan_float) {
17123   // Make sure that NaN propagation works correctly.
17124   float sn = RawbitsToFloat(0x7f951111);
17125   float qn = RawbitsToFloat(0x7fea1111);
17126   VIXL_ASSERT(IsSignallingNaN(sn));
17127   VIXL_ASSERT(IsQuietNaN(qn));
17128 
17129   // The input NaNs after passing through ProcessNaN.
17130   float sn_proc = RawbitsToFloat(0x7fd51111);
17131   float qn_proc = qn;
17132   VIXL_ASSERT(IsQuietNaN(sn_proc));
17133   VIXL_ASSERT(IsQuietNaN(qn_proc));
17134 
17135   SETUP_WITH_FEATURES(CPUFeatures::kFP);
17136 
17137   START();
17138 
17139   // Execute a number of instructions which all use ProcessNaN, and check that
17140   // they all handle the NaN correctly.
17141   __ Fmov(s0, sn);
17142   __ Fmov(s10, qn);
17143 
17144   // Operations that always propagate NaNs unchanged, even signalling NaNs.
17145   //   - Signalling NaN
17146   __ Fmov(s1, s0);
17147   __ Fabs(s2, s0);
17148   __ Fneg(s3, s0);
17149   //   - Quiet NaN
17150   __ Fmov(s11, s10);
17151   __ Fabs(s12, s10);
17152   __ Fneg(s13, s10);
17153 
17154   // Operations that use ProcessNaN.
17155   //   - Signalling NaN
17156   __ Fsqrt(s4, s0);
17157   __ Frinta(s5, s0);
17158   __ Frintn(s6, s0);
17159   __ Frintz(s7, s0);
17160   //   - Quiet NaN
17161   __ Fsqrt(s14, s10);
17162   __ Frinta(s15, s10);
17163   __ Frintn(s16, s10);
17164   __ Frintz(s17, s10);
17165 
17166   // The behaviour of fcvt is checked in TEST(fcvt_sd).
17167 
17168   END();
17169   RUN();
17170 
17171   uint32_t qn_raw = FloatToRawbits(qn);
17172   uint32_t sn_raw = FloatToRawbits(sn);
17173 
17174   //   - Signalling NaN
17175   ASSERT_EQUAL_FP32(sn, s1);
17176   ASSERT_EQUAL_FP32(RawbitsToFloat(sn_raw & ~kSSignMask), s2);
17177   ASSERT_EQUAL_FP32(RawbitsToFloat(sn_raw ^ kSSignMask), s3);
17178   //   - Quiet NaN
17179   ASSERT_EQUAL_FP32(qn, s11);
17180   ASSERT_EQUAL_FP32(RawbitsToFloat(qn_raw & ~kSSignMask), s12);
17181   ASSERT_EQUAL_FP32(RawbitsToFloat(qn_raw ^ kSSignMask), s13);
17182 
17183   //   - Signalling NaN
17184   ASSERT_EQUAL_FP32(sn_proc, s4);
17185   ASSERT_EQUAL_FP32(sn_proc, s5);
17186   ASSERT_EQUAL_FP32(sn_proc, s6);
17187   ASSERT_EQUAL_FP32(sn_proc, s7);
17188   //   - Quiet NaN
17189   ASSERT_EQUAL_FP32(qn_proc, s14);
17190   ASSERT_EQUAL_FP32(qn_proc, s15);
17191   ASSERT_EQUAL_FP32(qn_proc, s16);
17192   ASSERT_EQUAL_FP32(qn_proc, s17);
17193 
17194   TEARDOWN();
17195 }
17196 
17197 // TODO: TEST(process_nan_half) {}
17198 
ProcessNaNsHelper(double n,double m,double expected)17199 static void ProcessNaNsHelper(double n, double m, double expected) {
17200   VIXL_ASSERT(IsNaN(n) || IsNaN(m));
17201   VIXL_ASSERT(IsNaN(expected));
17202 
17203   SETUP_WITH_FEATURES(CPUFeatures::kFP);
17204 
17205   START();
17206 
17207   // Execute a number of instructions which all use ProcessNaNs, and check that
17208   // they all propagate NaNs correctly.
17209   __ Fmov(d0, n);
17210   __ Fmov(d1, m);
17211 
17212   __ Fadd(d2, d0, d1);
17213   __ Fsub(d3, d0, d1);
17214   __ Fmul(d4, d0, d1);
17215   __ Fdiv(d5, d0, d1);
17216   __ Fmax(d6, d0, d1);
17217   __ Fmin(d7, d0, d1);
17218 
17219   END();
17220   RUN();
17221 
17222   ASSERT_EQUAL_FP64(expected, d2);
17223   ASSERT_EQUAL_FP64(expected, d3);
17224   ASSERT_EQUAL_FP64(expected, d4);
17225   ASSERT_EQUAL_FP64(expected, d5);
17226   ASSERT_EQUAL_FP64(expected, d6);
17227   ASSERT_EQUAL_FP64(expected, d7);
17228 
17229   TEARDOWN();
17230 }
17231 
17232 
TEST(process_nans_double)17233 TEST(process_nans_double) {
17234   // Make sure that NaN propagation works correctly.
17235   double sn = RawbitsToDouble(0x7ff5555511111111);
17236   double sm = RawbitsToDouble(0x7ff5555522222222);
17237   double qn = RawbitsToDouble(0x7ffaaaaa11111111);
17238   double qm = RawbitsToDouble(0x7ffaaaaa22222222);
17239   VIXL_ASSERT(IsSignallingNaN(sn));
17240   VIXL_ASSERT(IsSignallingNaN(sm));
17241   VIXL_ASSERT(IsQuietNaN(qn));
17242   VIXL_ASSERT(IsQuietNaN(qm));
17243 
17244   // The input NaNs after passing through ProcessNaN.
17245   double sn_proc = RawbitsToDouble(0x7ffd555511111111);
17246   double sm_proc = RawbitsToDouble(0x7ffd555522222222);
17247   double qn_proc = qn;
17248   double qm_proc = qm;
17249   VIXL_ASSERT(IsQuietNaN(sn_proc));
17250   VIXL_ASSERT(IsQuietNaN(sm_proc));
17251   VIXL_ASSERT(IsQuietNaN(qn_proc));
17252   VIXL_ASSERT(IsQuietNaN(qm_proc));
17253 
17254   // Quiet NaNs are propagated.
17255   ProcessNaNsHelper(qn, 0, qn_proc);
17256   ProcessNaNsHelper(0, qm, qm_proc);
17257   ProcessNaNsHelper(qn, qm, qn_proc);
17258 
17259   // Signalling NaNs are propagated, and made quiet.
17260   ProcessNaNsHelper(sn, 0, sn_proc);
17261   ProcessNaNsHelper(0, sm, sm_proc);
17262   ProcessNaNsHelper(sn, sm, sn_proc);
17263 
17264   // Signalling NaNs take precedence over quiet NaNs.
17265   ProcessNaNsHelper(sn, qm, sn_proc);
17266   ProcessNaNsHelper(qn, sm, sm_proc);
17267   ProcessNaNsHelper(sn, sm, sn_proc);
17268 }
17269 
17270 
ProcessNaNsHelper(float n,float m,float expected)17271 static void ProcessNaNsHelper(float n, float m, float expected) {
17272   VIXL_ASSERT(IsNaN(n) || IsNaN(m));
17273   VIXL_ASSERT(IsNaN(expected));
17274 
17275   SETUP_WITH_FEATURES(CPUFeatures::kFP);
17276 
17277   START();
17278 
17279   // Execute a number of instructions which all use ProcessNaNs, and check that
17280   // they all propagate NaNs correctly.
17281   __ Fmov(s0, n);
17282   __ Fmov(s1, m);
17283 
17284   __ Fadd(s2, s0, s1);
17285   __ Fsub(s3, s0, s1);
17286   __ Fmul(s4, s0, s1);
17287   __ Fdiv(s5, s0, s1);
17288   __ Fmax(s6, s0, s1);
17289   __ Fmin(s7, s0, s1);
17290 
17291   END();
17292   RUN();
17293 
17294   ASSERT_EQUAL_FP32(expected, s2);
17295   ASSERT_EQUAL_FP32(expected, s3);
17296   ASSERT_EQUAL_FP32(expected, s4);
17297   ASSERT_EQUAL_FP32(expected, s5);
17298   ASSERT_EQUAL_FP32(expected, s6);
17299   ASSERT_EQUAL_FP32(expected, s7);
17300 
17301   TEARDOWN();
17302 }
17303 
17304 
TEST(process_nans_float)17305 TEST(process_nans_float) {
17306   // Make sure that NaN propagation works correctly.
17307   float sn = RawbitsToFloat(0x7f951111);
17308   float sm = RawbitsToFloat(0x7f952222);
17309   float qn = RawbitsToFloat(0x7fea1111);
17310   float qm = RawbitsToFloat(0x7fea2222);
17311   VIXL_ASSERT(IsSignallingNaN(sn));
17312   VIXL_ASSERT(IsSignallingNaN(sm));
17313   VIXL_ASSERT(IsQuietNaN(qn));
17314   VIXL_ASSERT(IsQuietNaN(qm));
17315 
17316   // The input NaNs after passing through ProcessNaN.
17317   float sn_proc = RawbitsToFloat(0x7fd51111);
17318   float sm_proc = RawbitsToFloat(0x7fd52222);
17319   float qn_proc = qn;
17320   float qm_proc = qm;
17321   VIXL_ASSERT(IsQuietNaN(sn_proc));
17322   VIXL_ASSERT(IsQuietNaN(sm_proc));
17323   VIXL_ASSERT(IsQuietNaN(qn_proc));
17324   VIXL_ASSERT(IsQuietNaN(qm_proc));
17325 
17326   // Quiet NaNs are propagated.
17327   ProcessNaNsHelper(qn, 0, qn_proc);
17328   ProcessNaNsHelper(0, qm, qm_proc);
17329   ProcessNaNsHelper(qn, qm, qn_proc);
17330 
17331   // Signalling NaNs are propagated, and made quiet.
17332   ProcessNaNsHelper(sn, 0, sn_proc);
17333   ProcessNaNsHelper(0, sm, sm_proc);
17334   ProcessNaNsHelper(sn, sm, sn_proc);
17335 
17336   // Signalling NaNs take precedence over quiet NaNs.
17337   ProcessNaNsHelper(sn, qm, sn_proc);
17338   ProcessNaNsHelper(qn, sm, sm_proc);
17339   ProcessNaNsHelper(sn, sm, sn_proc);
17340 }
17341 
17342 
ProcessNaNsHelper(Float16 n,Float16 m,Float16 expected)17343 static void ProcessNaNsHelper(Float16 n, Float16 m, Float16 expected) {
17344   VIXL_ASSERT(IsNaN(n) || IsNaN(m));
17345   VIXL_ASSERT(IsNaN(expected));
17346 
17347   SETUP_WITH_FEATURES(CPUFeatures::kFP, CPUFeatures::kFPHalf);
17348 
17349   START();
17350 
17351   // Execute a number of instructions which all use ProcessNaNs, and check that
17352   // they all propagate NaNs correctly.
17353   __ Fmov(h0, n);
17354   __ Fmov(h1, m);
17355 
17356   __ Fadd(h2, h0, h1);
17357   __ Fsub(h3, h0, h1);
17358   __ Fmul(h4, h0, h1);
17359   __ Fdiv(h5, h0, h1);
17360   __ Fmax(h6, h0, h1);
17361   __ Fmin(h7, h0, h1);
17362 
17363   END();
17364 
17365 #ifdef VIXL_INCLUDE_SIMULATOR_AARCH64
17366   RUN();
17367   ASSERT_EQUAL_FP16(expected, h2);
17368   ASSERT_EQUAL_FP16(expected, h3);
17369   ASSERT_EQUAL_FP16(expected, h4);
17370   ASSERT_EQUAL_FP16(expected, h5);
17371   ASSERT_EQUAL_FP16(expected, h6);
17372   ASSERT_EQUAL_FP16(expected, h7);
17373 #else
17374   USE(expected);
17375 #endif
17376 
17377   TEARDOWN();
17378 }
17379 
17380 
TEST(process_nans_half)17381 TEST(process_nans_half) {
17382   // Make sure that NaN propagation works correctly.
17383   Float16 sn(RawbitsToFloat16(0x7c11));
17384   Float16 sm(RawbitsToFloat16(0xfc22));
17385   Float16 qn(RawbitsToFloat16(0x7e33));
17386   Float16 qm(RawbitsToFloat16(0xfe44));
17387   VIXL_ASSERT(IsSignallingNaN(sn));
17388   VIXL_ASSERT(IsSignallingNaN(sm));
17389   VIXL_ASSERT(IsQuietNaN(qn));
17390   VIXL_ASSERT(IsQuietNaN(qm));
17391 
17392   // The input NaNs after passing through ProcessNaN.
17393   Float16 sn_proc(RawbitsToFloat16(0x7e11));
17394   Float16 sm_proc(RawbitsToFloat16(0xfe22));
17395   Float16 qn_proc = qn;
17396   Float16 qm_proc = qm;
17397   VIXL_ASSERT(IsQuietNaN(sn_proc));
17398   VIXL_ASSERT(IsQuietNaN(sm_proc));
17399   VIXL_ASSERT(IsQuietNaN(qn_proc));
17400   VIXL_ASSERT(IsQuietNaN(qm_proc));
17401 
17402   // Quiet NaNs are propagated.
17403   ProcessNaNsHelper(qn, Float16(), qn_proc);
17404   ProcessNaNsHelper(Float16(), qm, qm_proc);
17405   ProcessNaNsHelper(qn, qm, qn_proc);
17406 
17407   // Signalling NaNs are propagated, and made quiet.
17408   ProcessNaNsHelper(sn, Float16(), sn_proc);
17409   ProcessNaNsHelper(Float16(), sm, sm_proc);
17410   ProcessNaNsHelper(sn, sm, sn_proc);
17411 
17412   // Signalling NaNs take precedence over quiet NaNs.
17413   ProcessNaNsHelper(sn, qm, sn_proc);
17414   ProcessNaNsHelper(qn, sm, sm_proc);
17415   ProcessNaNsHelper(sn, sm, sn_proc);
17416 }
17417 
17418 
DefaultNaNHelper(float n,float m,float a)17419 static void DefaultNaNHelper(float n, float m, float a) {
17420   VIXL_ASSERT(IsNaN(n) || IsNaN(m) || IsNaN(a));
17421 
17422   bool test_1op = IsNaN(n);
17423   bool test_2op = IsNaN(n) || IsNaN(m);
17424 
17425   SETUP_WITH_FEATURES(CPUFeatures::kFP);
17426   START();
17427 
17428   // Enable Default-NaN mode in the FPCR.
17429   __ Mrs(x0, FPCR);
17430   __ Orr(x1, x0, DN_mask);
17431   __ Msr(FPCR, x1);
17432 
17433   // Execute a number of instructions which all use ProcessNaNs, and check that
17434   // they all produce the default NaN.
17435   __ Fmov(s0, n);
17436   __ Fmov(s1, m);
17437   __ Fmov(s2, a);
17438 
17439   if (test_1op) {
17440     // Operations that always propagate NaNs unchanged, even signalling NaNs.
17441     __ Fmov(s10, s0);
17442     __ Fabs(s11, s0);
17443     __ Fneg(s12, s0);
17444 
17445     // Operations that use ProcessNaN.
17446     __ Fsqrt(s13, s0);
17447     __ Frinta(s14, s0);
17448     __ Frintn(s15, s0);
17449     __ Frintz(s16, s0);
17450 
17451     // Fcvt usually has special NaN handling, but it respects default-NaN mode.
17452     __ Fcvt(d17, s0);
17453   }
17454 
17455   if (test_2op) {
17456     __ Fadd(s18, s0, s1);
17457     __ Fsub(s19, s0, s1);
17458     __ Fmul(s20, s0, s1);
17459     __ Fdiv(s21, s0, s1);
17460     __ Fmax(s22, s0, s1);
17461     __ Fmin(s23, s0, s1);
17462   }
17463 
17464   __ Fmadd(s24, s0, s1, s2);
17465   __ Fmsub(s25, s0, s1, s2);
17466   __ Fnmadd(s26, s0, s1, s2);
17467   __ Fnmsub(s27, s0, s1, s2);
17468 
17469   // Restore FPCR.
17470   __ Msr(FPCR, x0);
17471 
17472   END();
17473   RUN();
17474 
17475   if (test_1op) {
17476     uint32_t n_raw = FloatToRawbits(n);
17477     ASSERT_EQUAL_FP32(n, s10);
17478     ASSERT_EQUAL_FP32(RawbitsToFloat(n_raw & ~kSSignMask), s11);
17479     ASSERT_EQUAL_FP32(RawbitsToFloat(n_raw ^ kSSignMask), s12);
17480     ASSERT_EQUAL_FP32(kFP32DefaultNaN, s13);
17481     ASSERT_EQUAL_FP32(kFP32DefaultNaN, s14);
17482     ASSERT_EQUAL_FP32(kFP32DefaultNaN, s15);
17483     ASSERT_EQUAL_FP32(kFP32DefaultNaN, s16);
17484     ASSERT_EQUAL_FP64(kFP64DefaultNaN, d17);
17485   }
17486 
17487   if (test_2op) {
17488     ASSERT_EQUAL_FP32(kFP32DefaultNaN, s18);
17489     ASSERT_EQUAL_FP32(kFP32DefaultNaN, s19);
17490     ASSERT_EQUAL_FP32(kFP32DefaultNaN, s20);
17491     ASSERT_EQUAL_FP32(kFP32DefaultNaN, s21);
17492     ASSERT_EQUAL_FP32(kFP32DefaultNaN, s22);
17493     ASSERT_EQUAL_FP32(kFP32DefaultNaN, s23);
17494   }
17495 
17496   ASSERT_EQUAL_FP32(kFP32DefaultNaN, s24);
17497   ASSERT_EQUAL_FP32(kFP32DefaultNaN, s25);
17498   ASSERT_EQUAL_FP32(kFP32DefaultNaN, s26);
17499   ASSERT_EQUAL_FP32(kFP32DefaultNaN, s27);
17500 
17501   TEARDOWN();
17502 }
17503 
17504 
TEST(default_nan_float)17505 TEST(default_nan_float) {
17506   float sn = RawbitsToFloat(0x7f951111);
17507   float sm = RawbitsToFloat(0x7f952222);
17508   float sa = RawbitsToFloat(0x7f95aaaa);
17509   float qn = RawbitsToFloat(0x7fea1111);
17510   float qm = RawbitsToFloat(0x7fea2222);
17511   float qa = RawbitsToFloat(0x7feaaaaa);
17512   VIXL_ASSERT(IsSignallingNaN(sn));
17513   VIXL_ASSERT(IsSignallingNaN(sm));
17514   VIXL_ASSERT(IsSignallingNaN(sa));
17515   VIXL_ASSERT(IsQuietNaN(qn));
17516   VIXL_ASSERT(IsQuietNaN(qm));
17517   VIXL_ASSERT(IsQuietNaN(qa));
17518 
17519   //   - Signalling NaNs
17520   DefaultNaNHelper(sn, 0.0f, 0.0f);
17521   DefaultNaNHelper(0.0f, sm, 0.0f);
17522   DefaultNaNHelper(0.0f, 0.0f, sa);
17523   DefaultNaNHelper(sn, sm, 0.0f);
17524   DefaultNaNHelper(0.0f, sm, sa);
17525   DefaultNaNHelper(sn, 0.0f, sa);
17526   DefaultNaNHelper(sn, sm, sa);
17527   //   - Quiet NaNs
17528   DefaultNaNHelper(qn, 0.0f, 0.0f);
17529   DefaultNaNHelper(0.0f, qm, 0.0f);
17530   DefaultNaNHelper(0.0f, 0.0f, qa);
17531   DefaultNaNHelper(qn, qm, 0.0f);
17532   DefaultNaNHelper(0.0f, qm, qa);
17533   DefaultNaNHelper(qn, 0.0f, qa);
17534   DefaultNaNHelper(qn, qm, qa);
17535   //   - Mixed NaNs
17536   DefaultNaNHelper(qn, sm, sa);
17537   DefaultNaNHelper(sn, qm, sa);
17538   DefaultNaNHelper(sn, sm, qa);
17539   DefaultNaNHelper(qn, qm, sa);
17540   DefaultNaNHelper(sn, qm, qa);
17541   DefaultNaNHelper(qn, sm, qa);
17542   DefaultNaNHelper(qn, qm, qa);
17543 }
17544 
17545 
DefaultNaNHelper(double n,double m,double a)17546 static void DefaultNaNHelper(double n, double m, double a) {
17547   VIXL_ASSERT(IsNaN(n) || IsNaN(m) || IsNaN(a));
17548 
17549   bool test_1op = IsNaN(n);
17550   bool test_2op = IsNaN(n) || IsNaN(m);
17551 
17552   SETUP_WITH_FEATURES(CPUFeatures::kFP);
17553 
17554   START();
17555 
17556   // Enable Default-NaN mode in the FPCR.
17557   __ Mrs(x0, FPCR);
17558   __ Orr(x1, x0, DN_mask);
17559   __ Msr(FPCR, x1);
17560 
17561   // Execute a number of instructions which all use ProcessNaNs, and check that
17562   // they all produce the default NaN.
17563   __ Fmov(d0, n);
17564   __ Fmov(d1, m);
17565   __ Fmov(d2, a);
17566 
17567   if (test_1op) {
17568     // Operations that always propagate NaNs unchanged, even signalling NaNs.
17569     __ Fmov(d10, d0);
17570     __ Fabs(d11, d0);
17571     __ Fneg(d12, d0);
17572 
17573     // Operations that use ProcessNaN.
17574     __ Fsqrt(d13, d0);
17575     __ Frinta(d14, d0);
17576     __ Frintn(d15, d0);
17577     __ Frintz(d16, d0);
17578 
17579     // Fcvt usually has special NaN handling, but it respects default-NaN mode.
17580     __ Fcvt(s17, d0);
17581   }
17582 
17583   if (test_2op) {
17584     __ Fadd(d18, d0, d1);
17585     __ Fsub(d19, d0, d1);
17586     __ Fmul(d20, d0, d1);
17587     __ Fdiv(d21, d0, d1);
17588     __ Fmax(d22, d0, d1);
17589     __ Fmin(d23, d0, d1);
17590   }
17591 
17592   __ Fmadd(d24, d0, d1, d2);
17593   __ Fmsub(d25, d0, d1, d2);
17594   __ Fnmadd(d26, d0, d1, d2);
17595   __ Fnmsub(d27, d0, d1, d2);
17596 
17597   // Restore FPCR.
17598   __ Msr(FPCR, x0);
17599 
17600   END();
17601   RUN();
17602 
17603   if (test_1op) {
17604     uint64_t n_raw = DoubleToRawbits(n);
17605     ASSERT_EQUAL_FP64(n, d10);
17606     ASSERT_EQUAL_FP64(RawbitsToDouble(n_raw & ~kDSignMask), d11);
17607     ASSERT_EQUAL_FP64(RawbitsToDouble(n_raw ^ kDSignMask), d12);
17608     ASSERT_EQUAL_FP64(kFP64DefaultNaN, d13);
17609     ASSERT_EQUAL_FP64(kFP64DefaultNaN, d14);
17610     ASSERT_EQUAL_FP64(kFP64DefaultNaN, d15);
17611     ASSERT_EQUAL_FP64(kFP64DefaultNaN, d16);
17612     ASSERT_EQUAL_FP32(kFP32DefaultNaN, s17);
17613   }
17614 
17615   if (test_2op) {
17616     ASSERT_EQUAL_FP64(kFP64DefaultNaN, d18);
17617     ASSERT_EQUAL_FP64(kFP64DefaultNaN, d19);
17618     ASSERT_EQUAL_FP64(kFP64DefaultNaN, d20);
17619     ASSERT_EQUAL_FP64(kFP64DefaultNaN, d21);
17620     ASSERT_EQUAL_FP64(kFP64DefaultNaN, d22);
17621     ASSERT_EQUAL_FP64(kFP64DefaultNaN, d23);
17622   }
17623 
17624   ASSERT_EQUAL_FP64(kFP64DefaultNaN, d24);
17625   ASSERT_EQUAL_FP64(kFP64DefaultNaN, d25);
17626   ASSERT_EQUAL_FP64(kFP64DefaultNaN, d26);
17627   ASSERT_EQUAL_FP64(kFP64DefaultNaN, d27);
17628 
17629   TEARDOWN();
17630 }
17631 
17632 
TEST(default_nan_double)17633 TEST(default_nan_double) {
17634   double sn = RawbitsToDouble(0x7ff5555511111111);
17635   double sm = RawbitsToDouble(0x7ff5555522222222);
17636   double sa = RawbitsToDouble(0x7ff55555aaaaaaaa);
17637   double qn = RawbitsToDouble(0x7ffaaaaa11111111);
17638   double qm = RawbitsToDouble(0x7ffaaaaa22222222);
17639   double qa = RawbitsToDouble(0x7ffaaaaaaaaaaaaa);
17640   VIXL_ASSERT(IsSignallingNaN(sn));
17641   VIXL_ASSERT(IsSignallingNaN(sm));
17642   VIXL_ASSERT(IsSignallingNaN(sa));
17643   VIXL_ASSERT(IsQuietNaN(qn));
17644   VIXL_ASSERT(IsQuietNaN(qm));
17645   VIXL_ASSERT(IsQuietNaN(qa));
17646 
17647   //   - Signalling NaNs
17648   DefaultNaNHelper(sn, 0.0, 0.0);
17649   DefaultNaNHelper(0.0, sm, 0.0);
17650   DefaultNaNHelper(0.0, 0.0, sa);
17651   DefaultNaNHelper(sn, sm, 0.0);
17652   DefaultNaNHelper(0.0, sm, sa);
17653   DefaultNaNHelper(sn, 0.0, sa);
17654   DefaultNaNHelper(sn, sm, sa);
17655   //   - Quiet NaNs
17656   DefaultNaNHelper(qn, 0.0, 0.0);
17657   DefaultNaNHelper(0.0, qm, 0.0);
17658   DefaultNaNHelper(0.0, 0.0, qa);
17659   DefaultNaNHelper(qn, qm, 0.0);
17660   DefaultNaNHelper(0.0, qm, qa);
17661   DefaultNaNHelper(qn, 0.0, qa);
17662   DefaultNaNHelper(qn, qm, qa);
17663   //   - Mixed NaNs
17664   DefaultNaNHelper(qn, sm, sa);
17665   DefaultNaNHelper(sn, qm, sa);
17666   DefaultNaNHelper(sn, sm, qa);
17667   DefaultNaNHelper(qn, qm, sa);
17668   DefaultNaNHelper(sn, qm, qa);
17669   DefaultNaNHelper(qn, sm, qa);
17670   DefaultNaNHelper(qn, qm, qa);
17671 }
17672 
17673 
TEST(ldar_stlr)17674 TEST(ldar_stlr) {
17675   // The middle value is read, modified, and written. The padding exists only to
17676   // check for over-write.
17677   uint8_t b[] = {0, 0x12, 0};
17678   uint16_t h[] = {0, 0x1234, 0};
17679   uint32_t w[] = {0, 0x12345678, 0};
17680   uint64_t x[] = {0, 0x123456789abcdef0, 0};
17681 
17682   SETUP();
17683   START();
17684 
17685   __ Mov(x10, reinterpret_cast<uintptr_t>(&b[1]));
17686   __ Ldarb(w0, MemOperand(x10));
17687   __ Add(w0, w0, 1);
17688   __ Stlrb(w0, MemOperand(x10));
17689 
17690   __ Mov(x10, reinterpret_cast<uintptr_t>(&h[1]));
17691   __ Ldarh(w0, MemOperand(x10));
17692   __ Add(w0, w0, 1);
17693   __ Stlrh(w0, MemOperand(x10));
17694 
17695   __ Mov(x10, reinterpret_cast<uintptr_t>(&w[1]));
17696   __ Ldar(w0, MemOperand(x10));
17697   __ Add(w0, w0, 1);
17698   __ Stlr(w0, MemOperand(x10));
17699 
17700   __ Mov(x10, reinterpret_cast<uintptr_t>(&x[1]));
17701   __ Ldar(x0, MemOperand(x10));
17702   __ Add(x0, x0, 1);
17703   __ Stlr(x0, MemOperand(x10));
17704 
17705   END();
17706   RUN();
17707 
17708   ASSERT_EQUAL_32(0x13, b[1]);
17709   ASSERT_EQUAL_32(0x1235, h[1]);
17710   ASSERT_EQUAL_32(0x12345679, w[1]);
17711   ASSERT_EQUAL_64(0x123456789abcdef1, x[1]);
17712 
17713   // Check for over-write.
17714   ASSERT_EQUAL_32(0, b[0]);
17715   ASSERT_EQUAL_32(0, b[2]);
17716   ASSERT_EQUAL_32(0, h[0]);
17717   ASSERT_EQUAL_32(0, h[2]);
17718   ASSERT_EQUAL_32(0, w[0]);
17719   ASSERT_EQUAL_32(0, w[2]);
17720   ASSERT_EQUAL_64(0, x[0]);
17721   ASSERT_EQUAL_64(0, x[2]);
17722 
17723   TEARDOWN();
17724 }
17725 
17726 
TEST(ldlar_stllr)17727 TEST(ldlar_stllr) {
17728   // The middle value is read, modified, and written. The padding exists only to
17729   // check for over-write.
17730   uint8_t b[] = {0, 0x12, 0};
17731   uint16_t h[] = {0, 0x1234, 0};
17732   uint32_t w[] = {0, 0x12345678, 0};
17733   uint64_t x[] = {0, 0x123456789abcdef0, 0};
17734 
17735   SETUP_WITH_FEATURES(CPUFeatures::kLORegions);
17736 
17737   START();
17738 
17739   __ Mov(x10, reinterpret_cast<uintptr_t>(&b[1]));
17740   __ Ldlarb(w0, MemOperand(x10));
17741   __ Add(w0, w0, 1);
17742   __ Stllrb(w0, MemOperand(x10));
17743 
17744   __ Mov(x10, reinterpret_cast<uintptr_t>(&h[1]));
17745   __ Ldlarh(w0, MemOperand(x10));
17746   __ Add(w0, w0, 1);
17747   __ Stllrh(w0, MemOperand(x10));
17748 
17749   __ Mov(x10, reinterpret_cast<uintptr_t>(&w[1]));
17750   __ Ldlar(w0, MemOperand(x10));
17751   __ Add(w0, w0, 1);
17752   __ Stllr(w0, MemOperand(x10));
17753 
17754   __ Mov(x10, reinterpret_cast<uintptr_t>(&x[1]));
17755   __ Ldlar(x0, MemOperand(x10));
17756   __ Add(x0, x0, 1);
17757   __ Stllr(x0, MemOperand(x10));
17758 
17759   END();
17760 
17761 #ifdef VIXL_INCLUDE_SIMULATOR_AARCH64
17762   RUN();
17763 
17764   ASSERT_EQUAL_32(0x13, b[1]);
17765   ASSERT_EQUAL_32(0x1235, h[1]);
17766   ASSERT_EQUAL_32(0x12345679, w[1]);
17767   ASSERT_EQUAL_64(0x123456789abcdef1, x[1]);
17768 
17769   // Check for over-write.
17770   ASSERT_EQUAL_32(0, b[0]);
17771   ASSERT_EQUAL_32(0, b[2]);
17772   ASSERT_EQUAL_32(0, h[0]);
17773   ASSERT_EQUAL_32(0, h[2]);
17774   ASSERT_EQUAL_32(0, w[0]);
17775   ASSERT_EQUAL_32(0, w[2]);
17776   ASSERT_EQUAL_64(0, x[0]);
17777   ASSERT_EQUAL_64(0, x[2]);
17778 #endif  // VIXL_INCLUDE_SIMULATOR_AARCH64
17779 
17780   TEARDOWN();
17781 }
17782 
17783 
TEST(ldxr_stxr)17784 TEST(ldxr_stxr) {
17785   // The middle value is read, modified, and written. The padding exists only to
17786   // check for over-write.
17787   uint8_t b[] = {0, 0x12, 0};
17788   uint16_t h[] = {0, 0x1234, 0};
17789   uint32_t w[] = {0, 0x12345678, 0};
17790   uint64_t x[] = {0, 0x123456789abcdef0, 0};
17791 
17792   // As above, but get suitably-aligned values for ldxp and stxp.
17793   uint32_t wp_data[] = {0, 0, 0, 0, 0};
17794   uint32_t* wp = AlignUp(wp_data + 1, kWRegSizeInBytes * 2) - 1;
17795   wp[1] = 0x12345678;  // wp[1] is 64-bit-aligned.
17796   wp[2] = 0x87654321;
17797   uint64_t xp_data[] = {0, 0, 0, 0, 0};
17798   uint64_t* xp = AlignUp(xp_data + 1, kXRegSizeInBytes * 2) - 1;
17799   xp[1] = 0x123456789abcdef0;  // xp[1] is 128-bit-aligned.
17800   xp[2] = 0x0fedcba987654321;
17801 
17802   SETUP();
17803   START();
17804 
17805   __ Mov(x10, reinterpret_cast<uintptr_t>(&b[1]));
17806   Label try_b;
17807   __ Bind(&try_b);
17808   __ Ldxrb(w0, MemOperand(x10));
17809   __ Add(w0, w0, 1);
17810   __ Stxrb(w5, w0, MemOperand(x10));
17811   __ Cbnz(w5, &try_b);
17812 
17813   __ Mov(x10, reinterpret_cast<uintptr_t>(&h[1]));
17814   Label try_h;
17815   __ Bind(&try_h);
17816   __ Ldxrh(w0, MemOperand(x10));
17817   __ Add(w0, w0, 1);
17818   __ Stxrh(w5, w0, MemOperand(x10));
17819   __ Cbnz(w5, &try_h);
17820 
17821   __ Mov(x10, reinterpret_cast<uintptr_t>(&w[1]));
17822   Label try_w;
17823   __ Bind(&try_w);
17824   __ Ldxr(w0, MemOperand(x10));
17825   __ Add(w0, w0, 1);
17826   __ Stxr(w5, w0, MemOperand(x10));
17827   __ Cbnz(w5, &try_w);
17828 
17829   __ Mov(x10, reinterpret_cast<uintptr_t>(&x[1]));
17830   Label try_x;
17831   __ Bind(&try_x);
17832   __ Ldxr(x0, MemOperand(x10));
17833   __ Add(x0, x0, 1);
17834   __ Stxr(w5, x0, MemOperand(x10));
17835   __ Cbnz(w5, &try_x);
17836 
17837   __ Mov(x10, reinterpret_cast<uintptr_t>(&wp[1]));
17838   Label try_wp;
17839   __ Bind(&try_wp);
17840   __ Ldxp(w0, w1, MemOperand(x10));
17841   __ Add(w0, w0, 1);
17842   __ Add(w1, w1, 1);
17843   __ Stxp(w5, w0, w1, MemOperand(x10));
17844   __ Cbnz(w5, &try_wp);
17845 
17846   __ Mov(x10, reinterpret_cast<uintptr_t>(&xp[1]));
17847   Label try_xp;
17848   __ Bind(&try_xp);
17849   __ Ldxp(x0, x1, MemOperand(x10));
17850   __ Add(x0, x0, 1);
17851   __ Add(x1, x1, 1);
17852   __ Stxp(w5, x0, x1, MemOperand(x10));
17853   __ Cbnz(w5, &try_xp);
17854 
17855   END();
17856   RUN();
17857 
17858   ASSERT_EQUAL_32(0x13, b[1]);
17859   ASSERT_EQUAL_32(0x1235, h[1]);
17860   ASSERT_EQUAL_32(0x12345679, w[1]);
17861   ASSERT_EQUAL_64(0x123456789abcdef1, x[1]);
17862   ASSERT_EQUAL_32(0x12345679, wp[1]);
17863   ASSERT_EQUAL_32(0x87654322, wp[2]);
17864   ASSERT_EQUAL_64(0x123456789abcdef1, xp[1]);
17865   ASSERT_EQUAL_64(0x0fedcba987654322, xp[2]);
17866 
17867   // Check for over-write.
17868   ASSERT_EQUAL_32(0, b[0]);
17869   ASSERT_EQUAL_32(0, b[2]);
17870   ASSERT_EQUAL_32(0, h[0]);
17871   ASSERT_EQUAL_32(0, h[2]);
17872   ASSERT_EQUAL_32(0, w[0]);
17873   ASSERT_EQUAL_32(0, w[2]);
17874   ASSERT_EQUAL_64(0, x[0]);
17875   ASSERT_EQUAL_64(0, x[2]);
17876   ASSERT_EQUAL_32(0, wp[0]);
17877   ASSERT_EQUAL_32(0, wp[3]);
17878   ASSERT_EQUAL_64(0, xp[0]);
17879   ASSERT_EQUAL_64(0, xp[3]);
17880 
17881   TEARDOWN();
17882 }
17883 
17884 
TEST(ldaxr_stlxr)17885 TEST(ldaxr_stlxr) {
17886   // The middle value is read, modified, and written. The padding exists only to
17887   // check for over-write.
17888   uint8_t b[] = {0, 0x12, 0};
17889   uint16_t h[] = {0, 0x1234, 0};
17890   uint32_t w[] = {0, 0x12345678, 0};
17891   uint64_t x[] = {0, 0x123456789abcdef0, 0};
17892 
17893   // As above, but get suitably-aligned values for ldxp and stxp.
17894   uint32_t wp_data[] = {0, 0, 0, 0, 0};
17895   uint32_t* wp = AlignUp(wp_data + 1, kWRegSizeInBytes * 2) - 1;
17896   wp[1] = 0x12345678;  // wp[1] is 64-bit-aligned.
17897   wp[2] = 0x87654321;
17898   uint64_t xp_data[] = {0, 0, 0, 0, 0};
17899   uint64_t* xp = AlignUp(xp_data + 1, kXRegSizeInBytes * 2) - 1;
17900   xp[1] = 0x123456789abcdef0;  // xp[1] is 128-bit-aligned.
17901   xp[2] = 0x0fedcba987654321;
17902 
17903   SETUP();
17904   START();
17905 
17906   __ Mov(x10, reinterpret_cast<uintptr_t>(&b[1]));
17907   Label try_b;
17908   __ Bind(&try_b);
17909   __ Ldaxrb(w0, MemOperand(x10));
17910   __ Add(w0, w0, 1);
17911   __ Stlxrb(w5, w0, MemOperand(x10));
17912   __ Cbnz(w5, &try_b);
17913 
17914   __ Mov(x10, reinterpret_cast<uintptr_t>(&h[1]));
17915   Label try_h;
17916   __ Bind(&try_h);
17917   __ Ldaxrh(w0, MemOperand(x10));
17918   __ Add(w0, w0, 1);
17919   __ Stlxrh(w5, w0, MemOperand(x10));
17920   __ Cbnz(w5, &try_h);
17921 
17922   __ Mov(x10, reinterpret_cast<uintptr_t>(&w[1]));
17923   Label try_w;
17924   __ Bind(&try_w);
17925   __ Ldaxr(w0, MemOperand(x10));
17926   __ Add(w0, w0, 1);
17927   __ Stlxr(w5, w0, MemOperand(x10));
17928   __ Cbnz(w5, &try_w);
17929 
17930   __ Mov(x10, reinterpret_cast<uintptr_t>(&x[1]));
17931   Label try_x;
17932   __ Bind(&try_x);
17933   __ Ldaxr(x0, MemOperand(x10));
17934   __ Add(x0, x0, 1);
17935   __ Stlxr(w5, x0, MemOperand(x10));
17936   __ Cbnz(w5, &try_x);
17937 
17938   __ Mov(x10, reinterpret_cast<uintptr_t>(&wp[1]));
17939   Label try_wp;
17940   __ Bind(&try_wp);
17941   __ Ldaxp(w0, w1, MemOperand(x10));
17942   __ Add(w0, w0, 1);
17943   __ Add(w1, w1, 1);
17944   __ Stlxp(w5, w0, w1, MemOperand(x10));
17945   __ Cbnz(w5, &try_wp);
17946 
17947   __ Mov(x10, reinterpret_cast<uintptr_t>(&xp[1]));
17948   Label try_xp;
17949   __ Bind(&try_xp);
17950   __ Ldaxp(x0, x1, MemOperand(x10));
17951   __ Add(x0, x0, 1);
17952   __ Add(x1, x1, 1);
17953   __ Stlxp(w5, x0, x1, MemOperand(x10));
17954   __ Cbnz(w5, &try_xp);
17955 
17956   END();
17957   RUN();
17958 
17959   ASSERT_EQUAL_32(0x13, b[1]);
17960   ASSERT_EQUAL_32(0x1235, h[1]);
17961   ASSERT_EQUAL_32(0x12345679, w[1]);
17962   ASSERT_EQUAL_64(0x123456789abcdef1, x[1]);
17963   ASSERT_EQUAL_32(0x12345679, wp[1]);
17964   ASSERT_EQUAL_32(0x87654322, wp[2]);
17965   ASSERT_EQUAL_64(0x123456789abcdef1, xp[1]);
17966   ASSERT_EQUAL_64(0x0fedcba987654322, xp[2]);
17967 
17968   // Check for over-write.
17969   ASSERT_EQUAL_32(0, b[0]);
17970   ASSERT_EQUAL_32(0, b[2]);
17971   ASSERT_EQUAL_32(0, h[0]);
17972   ASSERT_EQUAL_32(0, h[2]);
17973   ASSERT_EQUAL_32(0, w[0]);
17974   ASSERT_EQUAL_32(0, w[2]);
17975   ASSERT_EQUAL_64(0, x[0]);
17976   ASSERT_EQUAL_64(0, x[2]);
17977   ASSERT_EQUAL_32(0, wp[0]);
17978   ASSERT_EQUAL_32(0, wp[3]);
17979   ASSERT_EQUAL_64(0, xp[0]);
17980   ASSERT_EQUAL_64(0, xp[3]);
17981 
17982   TEARDOWN();
17983 }
17984 
17985 
TEST(clrex)17986 TEST(clrex) {
17987   // This data should never be written.
17988   uint64_t data[] = {0, 0, 0};
17989   uint64_t* data_aligned = AlignUp(data, kXRegSizeInBytes * 2);
17990 
17991   SETUP();
17992   START();
17993 
17994   __ Mov(x10, reinterpret_cast<uintptr_t>(data_aligned));
17995   __ Mov(w6, 0);
17996 
17997   __ Ldxrb(w0, MemOperand(x10));
17998   __ Clrex();
17999   __ Add(w0, w0, 1);
18000   __ Stxrb(w5, w0, MemOperand(x10));
18001   __ Add(w6, w6, w5);
18002 
18003   __ Ldxrh(w0, MemOperand(x10));
18004   __ Clrex();
18005   __ Add(w0, w0, 1);
18006   __ Stxrh(w5, w0, MemOperand(x10));
18007   __ Add(w6, w6, w5);
18008 
18009   __ Ldxr(w0, MemOperand(x10));
18010   __ Clrex();
18011   __ Add(w0, w0, 1);
18012   __ Stxr(w5, w0, MemOperand(x10));
18013   __ Add(w6, w6, w5);
18014 
18015   __ Ldxr(x0, MemOperand(x10));
18016   __ Clrex();
18017   __ Add(x0, x0, 1);
18018   __ Stxr(w5, x0, MemOperand(x10));
18019   __ Add(w6, w6, w5);
18020 
18021   __ Ldxp(w0, w1, MemOperand(x10));
18022   __ Clrex();
18023   __ Add(w0, w0, 1);
18024   __ Add(w1, w1, 1);
18025   __ Stxp(w5, w0, w1, MemOperand(x10));
18026   __ Add(w6, w6, w5);
18027 
18028   __ Ldxp(x0, x1, MemOperand(x10));
18029   __ Clrex();
18030   __ Add(x0, x0, 1);
18031   __ Add(x1, x1, 1);
18032   __ Stxp(w5, x0, x1, MemOperand(x10));
18033   __ Add(w6, w6, w5);
18034 
18035   // Acquire-release variants.
18036 
18037   __ Ldaxrb(w0, MemOperand(x10));
18038   __ Clrex();
18039   __ Add(w0, w0, 1);
18040   __ Stlxrb(w5, w0, MemOperand(x10));
18041   __ Add(w6, w6, w5);
18042 
18043   __ Ldaxrh(w0, MemOperand(x10));
18044   __ Clrex();
18045   __ Add(w0, w0, 1);
18046   __ Stlxrh(w5, w0, MemOperand(x10));
18047   __ Add(w6, w6, w5);
18048 
18049   __ Ldaxr(w0, MemOperand(x10));
18050   __ Clrex();
18051   __ Add(w0, w0, 1);
18052   __ Stlxr(w5, w0, MemOperand(x10));
18053   __ Add(w6, w6, w5);
18054 
18055   __ Ldaxr(x0, MemOperand(x10));
18056   __ Clrex();
18057   __ Add(x0, x0, 1);
18058   __ Stlxr(w5, x0, MemOperand(x10));
18059   __ Add(w6, w6, w5);
18060 
18061   __ Ldaxp(w0, w1, MemOperand(x10));
18062   __ Clrex();
18063   __ Add(w0, w0, 1);
18064   __ Add(w1, w1, 1);
18065   __ Stlxp(w5, w0, w1, MemOperand(x10));
18066   __ Add(w6, w6, w5);
18067 
18068   __ Ldaxp(x0, x1, MemOperand(x10));
18069   __ Clrex();
18070   __ Add(x0, x0, 1);
18071   __ Add(x1, x1, 1);
18072   __ Stlxp(w5, x0, x1, MemOperand(x10));
18073   __ Add(w6, w6, w5);
18074 
18075   END();
18076   RUN();
18077 
18078   // None of the 12 store-exclusives should have succeeded.
18079   ASSERT_EQUAL_32(12, w6);
18080 
18081   ASSERT_EQUAL_64(0, data[0]);
18082   ASSERT_EQUAL_64(0, data[1]);
18083   ASSERT_EQUAL_64(0, data[2]);
18084 
18085   TEARDOWN();
18086 }
18087 
18088 
18089 #ifdef VIXL_INCLUDE_SIMULATOR_AARCH64
18090 // Check that the simulator occasionally makes store-exclusive fail.
TEST(ldxr_stxr_fail)18091 TEST(ldxr_stxr_fail) {
18092   uint64_t data[] = {0, 0, 0};
18093   uint64_t* data_aligned = AlignUp(data, kXRegSizeInBytes * 2);
18094 
18095   // Impose a hard limit on the number of attempts, so the test cannot hang.
18096   static const uint64_t kWatchdog = 10000;
18097   Label done;
18098 
18099   SETUP();
18100   START();
18101 
18102   __ Mov(x10, reinterpret_cast<uintptr_t>(data_aligned));
18103   __ Mov(x11, kWatchdog);
18104 
18105   // This loop is the opposite of what we normally do with ldxr and stxr; we
18106   // keep trying until we fail (or the watchdog counter runs out).
18107   Label try_b;
18108   __ Bind(&try_b);
18109   __ Ldxrb(w0, MemOperand(x10));
18110   __ Stxrb(w5, w0, MemOperand(x10));
18111   // Check the watchdog counter.
18112   __ Sub(x11, x11, 1);
18113   __ Cbz(x11, &done);
18114   // Check the exclusive-store result.
18115   __ Cbz(w5, &try_b);
18116 
18117   Label try_h;
18118   __ Bind(&try_h);
18119   __ Ldxrh(w0, MemOperand(x10));
18120   __ Stxrh(w5, w0, MemOperand(x10));
18121   __ Sub(x11, x11, 1);
18122   __ Cbz(x11, &done);
18123   __ Cbz(w5, &try_h);
18124 
18125   Label try_w;
18126   __ Bind(&try_w);
18127   __ Ldxr(w0, MemOperand(x10));
18128   __ Stxr(w5, w0, MemOperand(x10));
18129   __ Sub(x11, x11, 1);
18130   __ Cbz(x11, &done);
18131   __ Cbz(w5, &try_w);
18132 
18133   Label try_x;
18134   __ Bind(&try_x);
18135   __ Ldxr(x0, MemOperand(x10));
18136   __ Stxr(w5, x0, MemOperand(x10));
18137   __ Sub(x11, x11, 1);
18138   __ Cbz(x11, &done);
18139   __ Cbz(w5, &try_x);
18140 
18141   Label try_wp;
18142   __ Bind(&try_wp);
18143   __ Ldxp(w0, w1, MemOperand(x10));
18144   __ Stxp(w5, w0, w1, MemOperand(x10));
18145   __ Sub(x11, x11, 1);
18146   __ Cbz(x11, &done);
18147   __ Cbz(w5, &try_wp);
18148 
18149   Label try_xp;
18150   __ Bind(&try_xp);
18151   __ Ldxp(x0, x1, MemOperand(x10));
18152   __ Stxp(w5, x0, x1, MemOperand(x10));
18153   __ Sub(x11, x11, 1);
18154   __ Cbz(x11, &done);
18155   __ Cbz(w5, &try_xp);
18156 
18157   __ Bind(&done);
18158   // Trigger an error if x11 (watchdog) is zero.
18159   __ Cmp(x11, 0);
18160   __ Cset(x12, eq);
18161 
18162   END();
18163   RUN();
18164 
18165   // Check that the watchdog counter didn't run out.
18166   ASSERT_EQUAL_64(0, x12);
18167 
18168   TEARDOWN();
18169 }
18170 #endif
18171 
18172 
18173 #ifdef VIXL_INCLUDE_SIMULATOR_AARCH64
18174 // Check that the simulator occasionally makes store-exclusive fail.
TEST(ldaxr_stlxr_fail)18175 TEST(ldaxr_stlxr_fail) {
18176   uint64_t data[] = {0, 0, 0};
18177   uint64_t* data_aligned = AlignUp(data, kXRegSizeInBytes * 2);
18178 
18179   // Impose a hard limit on the number of attempts, so the test cannot hang.
18180   static const uint64_t kWatchdog = 10000;
18181   Label done;
18182 
18183   SETUP();
18184   START();
18185 
18186   __ Mov(x10, reinterpret_cast<uintptr_t>(data_aligned));
18187   __ Mov(x11, kWatchdog);
18188 
18189   // This loop is the opposite of what we normally do with ldxr and stxr; we
18190   // keep trying until we fail (or the watchdog counter runs out).
18191   Label try_b;
18192   __ Bind(&try_b);
18193   __ Ldxrb(w0, MemOperand(x10));
18194   __ Stxrb(w5, w0, MemOperand(x10));
18195   // Check the watchdog counter.
18196   __ Sub(x11, x11, 1);
18197   __ Cbz(x11, &done);
18198   // Check the exclusive-store result.
18199   __ Cbz(w5, &try_b);
18200 
18201   Label try_h;
18202   __ Bind(&try_h);
18203   __ Ldaxrh(w0, MemOperand(x10));
18204   __ Stlxrh(w5, w0, MemOperand(x10));
18205   __ Sub(x11, x11, 1);
18206   __ Cbz(x11, &done);
18207   __ Cbz(w5, &try_h);
18208 
18209   Label try_w;
18210   __ Bind(&try_w);
18211   __ Ldaxr(w0, MemOperand(x10));
18212   __ Stlxr(w5, w0, MemOperand(x10));
18213   __ Sub(x11, x11, 1);
18214   __ Cbz(x11, &done);
18215   __ Cbz(w5, &try_w);
18216 
18217   Label try_x;
18218   __ Bind(&try_x);
18219   __ Ldaxr(x0, MemOperand(x10));
18220   __ Stlxr(w5, x0, MemOperand(x10));
18221   __ Sub(x11, x11, 1);
18222   __ Cbz(x11, &done);
18223   __ Cbz(w5, &try_x);
18224 
18225   Label try_wp;
18226   __ Bind(&try_wp);
18227   __ Ldaxp(w0, w1, MemOperand(x10));
18228   __ Stlxp(w5, w0, w1, MemOperand(x10));
18229   __ Sub(x11, x11, 1);
18230   __ Cbz(x11, &done);
18231   __ Cbz(w5, &try_wp);
18232 
18233   Label try_xp;
18234   __ Bind(&try_xp);
18235   __ Ldaxp(x0, x1, MemOperand(x10));
18236   __ Stlxp(w5, x0, x1, MemOperand(x10));
18237   __ Sub(x11, x11, 1);
18238   __ Cbz(x11, &done);
18239   __ Cbz(w5, &try_xp);
18240 
18241   __ Bind(&done);
18242   // Trigger an error if x11 (watchdog) is zero.
18243   __ Cmp(x11, 0);
18244   __ Cset(x12, eq);
18245 
18246   END();
18247   RUN();
18248 
18249   // Check that the watchdog counter didn't run out.
18250   ASSERT_EQUAL_64(0, x12);
18251 
18252   TEARDOWN();
18253 }
18254 #endif
18255 
TEST(cas_casa_casl_casal_w)18256 TEST(cas_casa_casl_casal_w) {
18257   uint64_t data1[] = {0x01234567, 0};
18258   uint64_t data2[] = {0x01234567, 0};
18259   uint64_t data3[] = {0x01234567, 0};
18260   uint64_t data4[] = {0x01234567, 0};
18261   uint64_t data5[] = {0x01234567, 0};
18262   uint64_t data6[] = {0x01234567, 0};
18263   uint64_t data7[] = {0x01234567, 0};
18264   uint64_t data8[] = {0x01234567, 0};
18265 
18266   uint64_t* data1_aligned = AlignUp(data1, kXRegSizeInBytes * 2);
18267   uint64_t* data2_aligned = AlignUp(data2, kXRegSizeInBytes * 2);
18268   uint64_t* data3_aligned = AlignUp(data3, kXRegSizeInBytes * 2);
18269   uint64_t* data4_aligned = AlignUp(data4, kXRegSizeInBytes * 2);
18270   uint64_t* data5_aligned = AlignUp(data5, kXRegSizeInBytes * 2);
18271   uint64_t* data6_aligned = AlignUp(data6, kXRegSizeInBytes * 2);
18272   uint64_t* data7_aligned = AlignUp(data7, kXRegSizeInBytes * 2);
18273   uint64_t* data8_aligned = AlignUp(data8, kXRegSizeInBytes * 2);
18274 
18275   SETUP_WITH_FEATURES(CPUFeatures::kAtomics);
18276 
18277   START();
18278 
18279   __ Mov(x21, reinterpret_cast<uintptr_t>(data1_aligned));
18280   __ Mov(x22, reinterpret_cast<uintptr_t>(data2_aligned));
18281   __ Mov(x23, reinterpret_cast<uintptr_t>(data3_aligned));
18282   __ Mov(x24, reinterpret_cast<uintptr_t>(data4_aligned));
18283   __ Mov(x25, reinterpret_cast<uintptr_t>(data5_aligned));
18284   __ Mov(x26, reinterpret_cast<uintptr_t>(data6_aligned));
18285   __ Mov(x27, reinterpret_cast<uintptr_t>(data7_aligned));
18286   __ Mov(x28, reinterpret_cast<uintptr_t>(data8_aligned));
18287 
18288   __ Mov(x0, 0xffffffff);
18289 
18290   __ Mov(x1, 0x76543210);
18291   __ Mov(x2, 0x01234567);
18292   __ Mov(x3, 0x76543210);
18293   __ Mov(x4, 0x01234567);
18294   __ Mov(x5, 0x76543210);
18295   __ Mov(x6, 0x01234567);
18296   __ Mov(x7, 0x76543210);
18297   __ Mov(x8, 0x01234567);
18298 
18299   __ Cas(w1, w0, MemOperand(x21));
18300   __ Cas(w2, w0, MemOperand(x22));
18301   __ Casa(w3, w0, MemOperand(x23));
18302   __ Casa(w4, w0, MemOperand(x24));
18303   __ Casl(w5, w0, MemOperand(x25));
18304   __ Casl(w6, w0, MemOperand(x26));
18305   __ Casal(w7, w0, MemOperand(x27));
18306   __ Casal(w8, w0, MemOperand(x28));
18307 
18308   END();
18309 
18310 #ifdef VIXL_INCLUDE_SIMULATOR_AARCH64
18311   RUN();
18312 
18313   ASSERT_EQUAL_64(0x01234567, x1);
18314   ASSERT_EQUAL_64(0x01234567, x2);
18315   ASSERT_EQUAL_64(0x01234567, x3);
18316   ASSERT_EQUAL_64(0x01234567, x4);
18317   ASSERT_EQUAL_64(0x01234567, x5);
18318   ASSERT_EQUAL_64(0x01234567, x6);
18319   ASSERT_EQUAL_64(0x01234567, x7);
18320   ASSERT_EQUAL_64(0x01234567, x8);
18321 
18322   ASSERT_EQUAL_64(0x01234567, data1[0]);
18323   ASSERT_EQUAL_64(0xffffffff, data2[0]);
18324   ASSERT_EQUAL_64(0x01234567, data3[0]);
18325   ASSERT_EQUAL_64(0xffffffff, data4[0]);
18326   ASSERT_EQUAL_64(0x01234567, data5[0]);
18327   ASSERT_EQUAL_64(0xffffffff, data6[0]);
18328   ASSERT_EQUAL_64(0x01234567, data7[0]);
18329   ASSERT_EQUAL_64(0xffffffff, data8[0]);
18330 #endif  // VIXL_INCLUDE_SIMULATOR_AARCH64
18331 
18332   TEARDOWN();
18333 }
18334 
TEST(cas_casa_casl_casal_x)18335 TEST(cas_casa_casl_casal_x) {
18336   uint64_t data1[] = {0x0123456789abcdef, 0};
18337   uint64_t data2[] = {0x0123456789abcdef, 0};
18338   uint64_t data3[] = {0x0123456789abcdef, 0};
18339   uint64_t data4[] = {0x0123456789abcdef, 0};
18340   uint64_t data5[] = {0x0123456789abcdef, 0};
18341   uint64_t data6[] = {0x0123456789abcdef, 0};
18342   uint64_t data7[] = {0x0123456789abcdef, 0};
18343   uint64_t data8[] = {0x0123456789abcdef, 0};
18344 
18345   uint64_t* data1_aligned = AlignUp(data1, kXRegSizeInBytes * 2);
18346   uint64_t* data2_aligned = AlignUp(data2, kXRegSizeInBytes * 2);
18347   uint64_t* data3_aligned = AlignUp(data3, kXRegSizeInBytes * 2);
18348   uint64_t* data4_aligned = AlignUp(data4, kXRegSizeInBytes * 2);
18349   uint64_t* data5_aligned = AlignUp(data5, kXRegSizeInBytes * 2);
18350   uint64_t* data6_aligned = AlignUp(data6, kXRegSizeInBytes * 2);
18351   uint64_t* data7_aligned = AlignUp(data7, kXRegSizeInBytes * 2);
18352   uint64_t* data8_aligned = AlignUp(data8, kXRegSizeInBytes * 2);
18353 
18354   SETUP_WITH_FEATURES(CPUFeatures::kAtomics);
18355 
18356   START();
18357 
18358   __ Mov(x21, reinterpret_cast<uintptr_t>(data1_aligned));
18359   __ Mov(x22, reinterpret_cast<uintptr_t>(data2_aligned));
18360   __ Mov(x23, reinterpret_cast<uintptr_t>(data3_aligned));
18361   __ Mov(x24, reinterpret_cast<uintptr_t>(data4_aligned));
18362   __ Mov(x25, reinterpret_cast<uintptr_t>(data5_aligned));
18363   __ Mov(x26, reinterpret_cast<uintptr_t>(data6_aligned));
18364   __ Mov(x27, reinterpret_cast<uintptr_t>(data7_aligned));
18365   __ Mov(x28, reinterpret_cast<uintptr_t>(data8_aligned));
18366 
18367   __ Mov(x0, 0xffffffffffffffff);
18368 
18369   __ Mov(x1, 0xfedcba9876543210);
18370   __ Mov(x2, 0x0123456789abcdef);
18371   __ Mov(x3, 0xfedcba9876543210);
18372   __ Mov(x4, 0x0123456789abcdef);
18373   __ Mov(x5, 0xfedcba9876543210);
18374   __ Mov(x6, 0x0123456789abcdef);
18375   __ Mov(x7, 0xfedcba9876543210);
18376   __ Mov(x8, 0x0123456789abcdef);
18377 
18378   __ Cas(x1, x0, MemOperand(x21));
18379   __ Cas(x2, x0, MemOperand(x22));
18380   __ Casa(x3, x0, MemOperand(x23));
18381   __ Casa(x4, x0, MemOperand(x24));
18382   __ Casl(x5, x0, MemOperand(x25));
18383   __ Casl(x6, x0, MemOperand(x26));
18384   __ Casal(x7, x0, MemOperand(x27));
18385   __ Casal(x8, x0, MemOperand(x28));
18386 
18387   END();
18388 
18389 #ifdef VIXL_INCLUDE_SIMULATOR_AARCH64
18390   RUN();
18391 
18392   ASSERT_EQUAL_64(0x0123456789abcdef, x1);
18393   ASSERT_EQUAL_64(0x0123456789abcdef, x2);
18394   ASSERT_EQUAL_64(0x0123456789abcdef, x3);
18395   ASSERT_EQUAL_64(0x0123456789abcdef, x4);
18396   ASSERT_EQUAL_64(0x0123456789abcdef, x5);
18397   ASSERT_EQUAL_64(0x0123456789abcdef, x6);
18398   ASSERT_EQUAL_64(0x0123456789abcdef, x7);
18399   ASSERT_EQUAL_64(0x0123456789abcdef, x8);
18400 
18401   ASSERT_EQUAL_64(0x0123456789abcdef, data1[0]);
18402   ASSERT_EQUAL_64(0xffffffffffffffff, data2[0]);
18403   ASSERT_EQUAL_64(0x0123456789abcdef, data3[0]);
18404   ASSERT_EQUAL_64(0xffffffffffffffff, data4[0]);
18405   ASSERT_EQUAL_64(0x0123456789abcdef, data5[0]);
18406   ASSERT_EQUAL_64(0xffffffffffffffff, data6[0]);
18407   ASSERT_EQUAL_64(0x0123456789abcdef, data7[0]);
18408   ASSERT_EQUAL_64(0xffffffffffffffff, data8[0]);
18409 #endif  // VIXL_INCLUDE_SIMULATOR_AARCH64
18410 
18411   TEARDOWN();
18412 }
18413 
TEST(casb_casab_caslb_casalb)18414 TEST(casb_casab_caslb_casalb) {
18415   uint64_t data1[] = {0x01234567, 0};
18416   uint64_t data2[] = {0x01234567, 0};
18417   uint64_t data3[] = {0x01234567, 0};
18418   uint64_t data4[] = {0x01234567, 0};
18419   uint64_t data5[] = {0x01234567, 0};
18420   uint64_t data6[] = {0x01234567, 0};
18421   uint64_t data7[] = {0x01234567, 0};
18422   uint64_t data8[] = {0x01234567, 0};
18423 
18424   uint64_t* data1_aligned = AlignUp(data1, kXRegSizeInBytes * 2);
18425   uint64_t* data2_aligned = AlignUp(data2, kXRegSizeInBytes * 2);
18426   uint64_t* data3_aligned = AlignUp(data3, kXRegSizeInBytes * 2);
18427   uint64_t* data4_aligned = AlignUp(data4, kXRegSizeInBytes * 2);
18428   uint64_t* data5_aligned = AlignUp(data5, kXRegSizeInBytes * 2);
18429   uint64_t* data6_aligned = AlignUp(data6, kXRegSizeInBytes * 2);
18430   uint64_t* data7_aligned = AlignUp(data7, kXRegSizeInBytes * 2);
18431   uint64_t* data8_aligned = AlignUp(data8, kXRegSizeInBytes * 2);
18432 
18433   SETUP_WITH_FEATURES(CPUFeatures::kAtomics);
18434 
18435   START();
18436 
18437   __ Mov(x21, reinterpret_cast<uintptr_t>(data1_aligned));
18438   __ Mov(x22, reinterpret_cast<uintptr_t>(data2_aligned));
18439   __ Mov(x23, reinterpret_cast<uintptr_t>(data3_aligned));
18440   __ Mov(x24, reinterpret_cast<uintptr_t>(data4_aligned));
18441   __ Mov(x25, reinterpret_cast<uintptr_t>(data5_aligned));
18442   __ Mov(x26, reinterpret_cast<uintptr_t>(data6_aligned));
18443   __ Mov(x27, reinterpret_cast<uintptr_t>(data7_aligned));
18444   __ Mov(x28, reinterpret_cast<uintptr_t>(data8_aligned));
18445 
18446   __ Mov(x0, 0xffffffff);
18447 
18448   __ Mov(x1, 0x76543210);
18449   __ Mov(x2, 0x01234567);
18450   __ Mov(x3, 0x76543210);
18451   __ Mov(x4, 0x01234567);
18452   __ Mov(x5, 0x76543210);
18453   __ Mov(x6, 0x01234567);
18454   __ Mov(x7, 0x76543210);
18455   __ Mov(x8, 0x01234567);
18456 
18457   __ Casb(w1, w0, MemOperand(x21));
18458   __ Casb(w2, w0, MemOperand(x22));
18459   __ Casab(w3, w0, MemOperand(x23));
18460   __ Casab(w4, w0, MemOperand(x24));
18461   __ Caslb(w5, w0, MemOperand(x25));
18462   __ Caslb(w6, w0, MemOperand(x26));
18463   __ Casalb(w7, w0, MemOperand(x27));
18464   __ Casalb(w8, w0, MemOperand(x28));
18465 
18466   END();
18467 
18468 #ifdef VIXL_INCLUDE_SIMULATOR_AARCH64
18469   RUN();
18470 
18471   ASSERT_EQUAL_64(0x00000067, x1);
18472   ASSERT_EQUAL_64(0x00000067, x2);
18473   ASSERT_EQUAL_64(0x00000067, x3);
18474   ASSERT_EQUAL_64(0x00000067, x4);
18475   ASSERT_EQUAL_64(0x00000067, x5);
18476   ASSERT_EQUAL_64(0x00000067, x6);
18477   ASSERT_EQUAL_64(0x00000067, x7);
18478   ASSERT_EQUAL_64(0x00000067, x8);
18479 
18480   ASSERT_EQUAL_64(0x01234567, data1[0]);
18481   ASSERT_EQUAL_64(0x012345ff, data2[0]);
18482   ASSERT_EQUAL_64(0x01234567, data3[0]);
18483   ASSERT_EQUAL_64(0x012345ff, data4[0]);
18484   ASSERT_EQUAL_64(0x01234567, data5[0]);
18485   ASSERT_EQUAL_64(0x012345ff, data6[0]);
18486   ASSERT_EQUAL_64(0x01234567, data7[0]);
18487   ASSERT_EQUAL_64(0x012345ff, data8[0]);
18488 #endif  // VIXL_INCLUDE_SIMULATOR_AARCH64
18489 
18490   TEARDOWN();
18491 }
18492 
TEST(cash_casah_caslh_casalh)18493 TEST(cash_casah_caslh_casalh) {
18494   uint64_t data1[] = {0x01234567, 0};
18495   uint64_t data2[] = {0x01234567, 0};
18496   uint64_t data3[] = {0x01234567, 0};
18497   uint64_t data4[] = {0x01234567, 0};
18498   uint64_t data5[] = {0x01234567, 0};
18499   uint64_t data6[] = {0x01234567, 0};
18500   uint64_t data7[] = {0x01234567, 0};
18501   uint64_t data8[] = {0x01234567, 0};
18502 
18503   uint64_t* data1_aligned = AlignUp(data1, kXRegSizeInBytes * 2);
18504   uint64_t* data2_aligned = AlignUp(data2, kXRegSizeInBytes * 2);
18505   uint64_t* data3_aligned = AlignUp(data3, kXRegSizeInBytes * 2);
18506   uint64_t* data4_aligned = AlignUp(data4, kXRegSizeInBytes * 2);
18507   uint64_t* data5_aligned = AlignUp(data5, kXRegSizeInBytes * 2);
18508   uint64_t* data6_aligned = AlignUp(data6, kXRegSizeInBytes * 2);
18509   uint64_t* data7_aligned = AlignUp(data7, kXRegSizeInBytes * 2);
18510   uint64_t* data8_aligned = AlignUp(data8, kXRegSizeInBytes * 2);
18511 
18512   SETUP_WITH_FEATURES(CPUFeatures::kAtomics);
18513 
18514   START();
18515 
18516   __ Mov(x21, reinterpret_cast<uintptr_t>(data1_aligned));
18517   __ Mov(x22, reinterpret_cast<uintptr_t>(data2_aligned));
18518   __ Mov(x23, reinterpret_cast<uintptr_t>(data3_aligned));
18519   __ Mov(x24, reinterpret_cast<uintptr_t>(data4_aligned));
18520   __ Mov(x25, reinterpret_cast<uintptr_t>(data5_aligned));
18521   __ Mov(x26, reinterpret_cast<uintptr_t>(data6_aligned));
18522   __ Mov(x27, reinterpret_cast<uintptr_t>(data7_aligned));
18523   __ Mov(x28, reinterpret_cast<uintptr_t>(data8_aligned));
18524 
18525   __ Mov(x0, 0xffffffff);
18526 
18527   __ Mov(x1, 0x76543210);
18528   __ Mov(x2, 0x01234567);
18529   __ Mov(x3, 0x76543210);
18530   __ Mov(x4, 0x01234567);
18531   __ Mov(x5, 0x76543210);
18532   __ Mov(x6, 0x01234567);
18533   __ Mov(x7, 0x76543210);
18534   __ Mov(x8, 0x01234567);
18535 
18536   __ Cash(w1, w0, MemOperand(x21));
18537   __ Cash(w2, w0, MemOperand(x22));
18538   __ Casah(w3, w0, MemOperand(x23));
18539   __ Casah(w4, w0, MemOperand(x24));
18540   __ Caslh(w5, w0, MemOperand(x25));
18541   __ Caslh(w6, w0, MemOperand(x26));
18542   __ Casalh(w7, w0, MemOperand(x27));
18543   __ Casalh(w8, w0, MemOperand(x28));
18544 
18545   END();
18546 
18547 #ifdef VIXL_INCLUDE_SIMULATOR_AARCH64
18548   RUN();
18549 
18550   ASSERT_EQUAL_64(0x00004567, x1);
18551   ASSERT_EQUAL_64(0x00004567, x2);
18552   ASSERT_EQUAL_64(0x00004567, x3);
18553   ASSERT_EQUAL_64(0x00004567, x4);
18554   ASSERT_EQUAL_64(0x00004567, x5);
18555   ASSERT_EQUAL_64(0x00004567, x6);
18556   ASSERT_EQUAL_64(0x00004567, x7);
18557   ASSERT_EQUAL_64(0x00004567, x8);
18558 
18559   ASSERT_EQUAL_64(0x01234567, data1[0]);
18560   ASSERT_EQUAL_64(0x0123ffff, data2[0]);
18561   ASSERT_EQUAL_64(0x01234567, data3[0]);
18562   ASSERT_EQUAL_64(0x0123ffff, data4[0]);
18563   ASSERT_EQUAL_64(0x01234567, data5[0]);
18564   ASSERT_EQUAL_64(0x0123ffff, data6[0]);
18565   ASSERT_EQUAL_64(0x01234567, data7[0]);
18566   ASSERT_EQUAL_64(0x0123ffff, data8[0]);
18567 #endif  // VIXL_INCLUDE_SIMULATOR_AARCH64
18568 
18569   TEARDOWN();
18570 }
18571 
TEST(casp_caspa_caspl_caspal)18572 TEST(casp_caspa_caspl_caspal) {
18573   uint64_t data1[] = {0x89abcdef01234567, 0};
18574   uint64_t data2[] = {0x89abcdef01234567, 0};
18575   uint64_t data3[] = {0x89abcdef01234567, 0};
18576   uint64_t data4[] = {0x89abcdef01234567, 0};
18577   uint64_t data5[] = {0x89abcdef01234567, 0};
18578   uint64_t data6[] = {0x89abcdef01234567, 0};
18579   uint64_t data7[] = {0x89abcdef01234567, 0};
18580   uint64_t data8[] = {0x89abcdef01234567, 0};
18581 
18582   uint64_t* data1_aligned = AlignUp(data1, kXRegSizeInBytes * 2);
18583   uint64_t* data2_aligned = AlignUp(data2, kXRegSizeInBytes * 2);
18584   uint64_t* data3_aligned = AlignUp(data3, kXRegSizeInBytes * 2);
18585   uint64_t* data4_aligned = AlignUp(data4, kXRegSizeInBytes * 2);
18586   uint64_t* data5_aligned = AlignUp(data5, kXRegSizeInBytes * 2);
18587   uint64_t* data6_aligned = AlignUp(data6, kXRegSizeInBytes * 2);
18588   uint64_t* data7_aligned = AlignUp(data7, kXRegSizeInBytes * 2);
18589   uint64_t* data8_aligned = AlignUp(data8, kXRegSizeInBytes * 2);
18590 
18591   SETUP_WITH_FEATURES(CPUFeatures::kAtomics);
18592 
18593   START();
18594 
18595   __ Mov(x21, reinterpret_cast<uintptr_t>(data1_aligned));
18596   __ Mov(x22, reinterpret_cast<uintptr_t>(data2_aligned));
18597   __ Mov(x23, reinterpret_cast<uintptr_t>(data3_aligned));
18598   __ Mov(x24, reinterpret_cast<uintptr_t>(data4_aligned));
18599   __ Mov(x25, reinterpret_cast<uintptr_t>(data5_aligned));
18600   __ Mov(x26, reinterpret_cast<uintptr_t>(data6_aligned));
18601   __ Mov(x27, reinterpret_cast<uintptr_t>(data7_aligned));
18602   __ Mov(x28, reinterpret_cast<uintptr_t>(data8_aligned));
18603 
18604   __ Mov(x0, 0xffffffff);
18605   __ Mov(x1, 0xffffffff);
18606 
18607   __ Mov(x2, 0x76543210);
18608   __ Mov(x3, 0xfedcba98);
18609   __ Mov(x4, 0x89abcdef);
18610   __ Mov(x5, 0x01234567);
18611 
18612   __ Mov(x6, 0x76543210);
18613   __ Mov(x7, 0xfedcba98);
18614   __ Mov(x8, 0x89abcdef);
18615   __ Mov(x9, 0x01234567);
18616 
18617   __ Mov(x10, 0x76543210);
18618   __ Mov(x11, 0xfedcba98);
18619   __ Mov(x12, 0x89abcdef);
18620   __ Mov(x13, 0x01234567);
18621 
18622   __ Mov(x14, 0x76543210);
18623   __ Mov(x15, 0xfedcba98);
18624   __ Mov(x16, 0x89abcdef);
18625   __ Mov(x17, 0x01234567);
18626 
18627   __ Casp(w2, w3, w0, w1, MemOperand(x21));
18628   __ Casp(w4, w5, w0, w1, MemOperand(x22));
18629   __ Caspa(w6, w7, w0, w1, MemOperand(x23));
18630   __ Caspa(w8, w9, w0, w1, MemOperand(x24));
18631   __ Caspl(w10, w11, w0, w1, MemOperand(x25));
18632   __ Caspl(w12, w13, w0, w1, MemOperand(x26));
18633   __ Caspal(w14, w15, w0, w1, MemOperand(x27));
18634   __ Caspal(w16, w17, w0, w1, MemOperand(x28));
18635 
18636   END();
18637 
18638 #ifdef VIXL_INCLUDE_SIMULATOR_AARCH64
18639   RUN();
18640 
18641   ASSERT_EQUAL_64(0x89abcdef, x2);
18642   ASSERT_EQUAL_64(0x01234567, x3);
18643   ASSERT_EQUAL_64(0x89abcdef, x4);
18644   ASSERT_EQUAL_64(0x01234567, x5);
18645   ASSERT_EQUAL_64(0x89abcdef, x6);
18646   ASSERT_EQUAL_64(0x01234567, x7);
18647   ASSERT_EQUAL_64(0x89abcdef, x8);
18648   ASSERT_EQUAL_64(0x01234567, x9);
18649   ASSERT_EQUAL_64(0x89abcdef, x10);
18650   ASSERT_EQUAL_64(0x01234567, x11);
18651   ASSERT_EQUAL_64(0x89abcdef, x12);
18652   ASSERT_EQUAL_64(0x01234567, x13);
18653   ASSERT_EQUAL_64(0x89abcdef, x14);
18654   ASSERT_EQUAL_64(0x01234567, x15);
18655   ASSERT_EQUAL_64(0x89abcdef, x16);
18656   ASSERT_EQUAL_64(0x01234567, x17);
18657 
18658   ASSERT_EQUAL_64(0x89abcdef01234567, data1[0]);
18659   ASSERT_EQUAL_64(0xffffffffffffffff, data2[0]);
18660   ASSERT_EQUAL_64(0x89abcdef01234567, data3[0]);
18661   ASSERT_EQUAL_64(0xffffffffffffffff, data4[0]);
18662   ASSERT_EQUAL_64(0x89abcdef01234567, data5[0]);
18663   ASSERT_EQUAL_64(0xffffffffffffffff, data6[0]);
18664   ASSERT_EQUAL_64(0x89abcdef01234567, data7[0]);
18665   ASSERT_EQUAL_64(0xffffffffffffffff, data8[0]);
18666 #endif  // VIXL_INCLUDE_SIMULATOR_AARCH64
18667 
18668   TEARDOWN();
18669 }
18670 
18671 
18672 typedef void (MacroAssembler::*AtomicMemoryLoadSignature)(
18673     const Register& rs, const Register& rt, const MemOperand& src);
18674 typedef void (MacroAssembler::*AtomicMemoryStoreSignature)(
18675     const Register& rs, const MemOperand& src);
18676 
AtomicMemoryWHelper(AtomicMemoryLoadSignature * load_funcs,AtomicMemoryStoreSignature * store_funcs,uint64_t arg1,uint64_t arg2,uint64_t expected,uint64_t result_mask)18677 void AtomicMemoryWHelper(AtomicMemoryLoadSignature* load_funcs,
18678                          AtomicMemoryStoreSignature* store_funcs,
18679                          uint64_t arg1,
18680                          uint64_t arg2,
18681                          uint64_t expected,
18682                          uint64_t result_mask) {
18683   uint64_t data0[] __attribute__((aligned(kXRegSizeInBytes * 2))) = {arg2, 0};
18684   uint64_t data1[] __attribute__((aligned(kXRegSizeInBytes * 2))) = {arg2, 0};
18685   uint64_t data2[] __attribute__((aligned(kXRegSizeInBytes * 2))) = {arg2, 0};
18686   uint64_t data3[] __attribute__((aligned(kXRegSizeInBytes * 2))) = {arg2, 0};
18687   uint64_t data4[] __attribute__((aligned(kXRegSizeInBytes * 2))) = {arg2, 0};
18688   uint64_t data5[] __attribute__((aligned(kXRegSizeInBytes * 2))) = {arg2, 0};
18689 
18690   SETUP_WITH_FEATURES(CPUFeatures::kAtomics);
18691   START();
18692 
18693   __ Mov(x20, reinterpret_cast<uintptr_t>(data0));
18694   __ Mov(x21, reinterpret_cast<uintptr_t>(data1));
18695   __ Mov(x22, reinterpret_cast<uintptr_t>(data2));
18696   __ Mov(x23, reinterpret_cast<uintptr_t>(data3));
18697 
18698   __ Mov(x0, arg1);
18699   __ Mov(x1, arg1);
18700   __ Mov(x2, arg1);
18701   __ Mov(x3, arg1);
18702 
18703   (masm.*(load_funcs[0]))(w0, w10, MemOperand(x20));
18704   (masm.*(load_funcs[1]))(w1, w11, MemOperand(x21));
18705   (masm.*(load_funcs[2]))(w2, w12, MemOperand(x22));
18706   (masm.*(load_funcs[3]))(w3, w13, MemOperand(x23));
18707 
18708   if (store_funcs != NULL) {
18709     __ Mov(x24, reinterpret_cast<uintptr_t>(data4));
18710     __ Mov(x25, reinterpret_cast<uintptr_t>(data5));
18711     __ Mov(x4, arg1);
18712     __ Mov(x5, arg1);
18713 
18714     (masm.*(store_funcs[0]))(w4, MemOperand(x24));
18715     (masm.*(store_funcs[1]))(w5, MemOperand(x25));
18716   }
18717 
18718   END();
18719 
18720 #ifdef VIXL_INCLUDE_SIMULATOR_AARCH64
18721   RUN();
18722 
18723   uint64_t stored_value = arg2 & result_mask;
18724   ASSERT_EQUAL_64(stored_value, x10);
18725   ASSERT_EQUAL_64(stored_value, x11);
18726   ASSERT_EQUAL_64(stored_value, x12);
18727   ASSERT_EQUAL_64(stored_value, x13);
18728 
18729   // The data fields contain arg2 already then only the bits masked by
18730   // result_mask are overwritten.
18731   uint64_t final_expected = (arg2 & ~result_mask) | (expected & result_mask);
18732   ASSERT_EQUAL_64(final_expected, data0[0]);
18733   ASSERT_EQUAL_64(final_expected, data1[0]);
18734   ASSERT_EQUAL_64(final_expected, data2[0]);
18735   ASSERT_EQUAL_64(final_expected, data3[0]);
18736 
18737   if (store_funcs != NULL) {
18738     ASSERT_EQUAL_64(final_expected, data4[0]);
18739     ASSERT_EQUAL_64(final_expected, data5[0]);
18740   }
18741 #else
18742   USE(expected, result_mask);
18743 #endif  // VIXL_INCLUDE_SIMULATOR_AARCH64
18744 
18745   TEARDOWN();
18746 }
18747 
AtomicMemoryXHelper(AtomicMemoryLoadSignature * load_funcs,AtomicMemoryStoreSignature * store_funcs,uint64_t arg1,uint64_t arg2,uint64_t expected)18748 void AtomicMemoryXHelper(AtomicMemoryLoadSignature* load_funcs,
18749                          AtomicMemoryStoreSignature* store_funcs,
18750                          uint64_t arg1,
18751                          uint64_t arg2,
18752                          uint64_t expected) {
18753   uint64_t data0[] __attribute__((aligned(kXRegSizeInBytes * 2))) = {arg2, 0};
18754   uint64_t data1[] __attribute__((aligned(kXRegSizeInBytes * 2))) = {arg2, 0};
18755   uint64_t data2[] __attribute__((aligned(kXRegSizeInBytes * 2))) = {arg2, 0};
18756   uint64_t data3[] __attribute__((aligned(kXRegSizeInBytes * 2))) = {arg2, 0};
18757   uint64_t data4[] __attribute__((aligned(kXRegSizeInBytes * 2))) = {arg2, 0};
18758   uint64_t data5[] __attribute__((aligned(kXRegSizeInBytes * 2))) = {arg2, 0};
18759 
18760   SETUP_WITH_FEATURES(CPUFeatures::kAtomics);
18761   START();
18762 
18763   __ Mov(x20, reinterpret_cast<uintptr_t>(data0));
18764   __ Mov(x21, reinterpret_cast<uintptr_t>(data1));
18765   __ Mov(x22, reinterpret_cast<uintptr_t>(data2));
18766   __ Mov(x23, reinterpret_cast<uintptr_t>(data3));
18767 
18768   __ Mov(x0, arg1);
18769   __ Mov(x1, arg1);
18770   __ Mov(x2, arg1);
18771   __ Mov(x3, arg1);
18772 
18773   (masm.*(load_funcs[0]))(x0, x10, MemOperand(x20));
18774   (masm.*(load_funcs[1]))(x1, x11, MemOperand(x21));
18775   (masm.*(load_funcs[2]))(x2, x12, MemOperand(x22));
18776   (masm.*(load_funcs[3]))(x3, x13, MemOperand(x23));
18777 
18778   if (store_funcs != NULL) {
18779     __ Mov(x24, reinterpret_cast<uintptr_t>(data4));
18780     __ Mov(x25, reinterpret_cast<uintptr_t>(data5));
18781     __ Mov(x4, arg1);
18782     __ Mov(x5, arg1);
18783 
18784     (masm.*(store_funcs[0]))(x4, MemOperand(x24));
18785     (masm.*(store_funcs[1]))(x5, MemOperand(x25));
18786   }
18787 
18788   END();
18789 
18790 #ifdef VIXL_INCLUDE_SIMULATOR_AARCH64
18791   RUN();
18792 
18793   ASSERT_EQUAL_64(arg2, x10);
18794   ASSERT_EQUAL_64(arg2, x11);
18795   ASSERT_EQUAL_64(arg2, x12);
18796   ASSERT_EQUAL_64(arg2, x13);
18797 
18798   ASSERT_EQUAL_64(expected, data0[0]);
18799   ASSERT_EQUAL_64(expected, data1[0]);
18800   ASSERT_EQUAL_64(expected, data2[0]);
18801   ASSERT_EQUAL_64(expected, data3[0]);
18802 
18803   if (store_funcs != NULL) {
18804     ASSERT_EQUAL_64(expected, data4[0]);
18805     ASSERT_EQUAL_64(expected, data5[0]);
18806   }
18807 #else
18808   USE(expected);
18809 #endif  // VIXL_INCLUDE_SIMULATOR_AARCH64
18810 
18811   TEARDOWN();
18812 }
18813 
18814 // clang-format off
18815 #define MAKE_LOADS(NAME)           \
18816     {&MacroAssembler::Ld##NAME,    \
18817      &MacroAssembler::Ld##NAME##a, \
18818      &MacroAssembler::Ld##NAME##l, \
18819      &MacroAssembler::Ld##NAME##al}
18820 #define MAKE_STORES(NAME) \
18821     {&MacroAssembler::St##NAME, &MacroAssembler::St##NAME##l}
18822 
18823 #define MAKE_B_LOADS(NAME)          \
18824     {&MacroAssembler::Ld##NAME##b,  \
18825      &MacroAssembler::Ld##NAME##ab, \
18826      &MacroAssembler::Ld##NAME##lb, \
18827      &MacroAssembler::Ld##NAME##alb}
18828 #define MAKE_B_STORES(NAME) \
18829     {&MacroAssembler::St##NAME##b, &MacroAssembler::St##NAME##lb}
18830 
18831 #define MAKE_H_LOADS(NAME)          \
18832     {&MacroAssembler::Ld##NAME##h,  \
18833      &MacroAssembler::Ld##NAME##ah, \
18834      &MacroAssembler::Ld##NAME##lh, \
18835      &MacroAssembler::Ld##NAME##alh}
18836 #define MAKE_H_STORES(NAME) \
18837     {&MacroAssembler::St##NAME##h, &MacroAssembler::St##NAME##lh}
18838 // clang-format on
18839 
TEST(atomic_memory_add)18840 TEST(atomic_memory_add) {
18841   AtomicMemoryLoadSignature loads[] = MAKE_LOADS(add);
18842   AtomicMemoryStoreSignature stores[] = MAKE_STORES(add);
18843   AtomicMemoryLoadSignature b_loads[] = MAKE_B_LOADS(add);
18844   AtomicMemoryStoreSignature b_stores[] = MAKE_B_STORES(add);
18845   AtomicMemoryLoadSignature h_loads[] = MAKE_H_LOADS(add);
18846   AtomicMemoryStoreSignature h_stores[] = MAKE_H_STORES(add);
18847 
18848   // The arguments are chosen to have two useful properties:
18849   //  * When multiplied by small values (such as a register index), this value
18850   //    is clearly readable in the result.
18851   //  * The value is not formed from repeating fixed-size smaller values, so it
18852   //    can be used to detect endianness-related errors.
18853   uint64_t arg1 = 0x0100001000100101;
18854   uint64_t arg2 = 0x0200002000200202;
18855   uint64_t expected = arg1 + arg2;
18856 
18857   AtomicMemoryWHelper(b_loads, b_stores, arg1, arg2, expected, kByteMask);
18858   AtomicMemoryWHelper(h_loads, h_stores, arg1, arg2, expected, kHalfWordMask);
18859   AtomicMemoryWHelper(loads, stores, arg1, arg2, expected, kWordMask);
18860   AtomicMemoryXHelper(loads, stores, arg1, arg2, expected);
18861 }
18862 
TEST(atomic_memory_clr)18863 TEST(atomic_memory_clr) {
18864   AtomicMemoryLoadSignature loads[] = MAKE_LOADS(clr);
18865   AtomicMemoryStoreSignature stores[] = MAKE_STORES(clr);
18866   AtomicMemoryLoadSignature b_loads[] = MAKE_B_LOADS(clr);
18867   AtomicMemoryStoreSignature b_stores[] = MAKE_B_STORES(clr);
18868   AtomicMemoryLoadSignature h_loads[] = MAKE_H_LOADS(clr);
18869   AtomicMemoryStoreSignature h_stores[] = MAKE_H_STORES(clr);
18870 
18871   uint64_t arg1 = 0x0300003000300303;
18872   uint64_t arg2 = 0x0500005000500505;
18873   uint64_t expected = arg2 & ~arg1;
18874 
18875   AtomicMemoryWHelper(b_loads, b_stores, arg1, arg2, expected, kByteMask);
18876   AtomicMemoryWHelper(h_loads, h_stores, arg1, arg2, expected, kHalfWordMask);
18877   AtomicMemoryWHelper(loads, stores, arg1, arg2, expected, kWordMask);
18878   AtomicMemoryXHelper(loads, stores, arg1, arg2, expected);
18879 }
18880 
TEST(atomic_memory_eor)18881 TEST(atomic_memory_eor) {
18882   AtomicMemoryLoadSignature loads[] = MAKE_LOADS(eor);
18883   AtomicMemoryStoreSignature stores[] = MAKE_STORES(eor);
18884   AtomicMemoryLoadSignature b_loads[] = MAKE_B_LOADS(eor);
18885   AtomicMemoryStoreSignature b_stores[] = MAKE_B_STORES(eor);
18886   AtomicMemoryLoadSignature h_loads[] = MAKE_H_LOADS(eor);
18887   AtomicMemoryStoreSignature h_stores[] = MAKE_H_STORES(eor);
18888 
18889   uint64_t arg1 = 0x0300003000300303;
18890   uint64_t arg2 = 0x0500005000500505;
18891   uint64_t expected = arg1 ^ arg2;
18892 
18893   AtomicMemoryWHelper(b_loads, b_stores, arg1, arg2, expected, kByteMask);
18894   AtomicMemoryWHelper(h_loads, h_stores, arg1, arg2, expected, kHalfWordMask);
18895   AtomicMemoryWHelper(loads, stores, arg1, arg2, expected, kWordMask);
18896   AtomicMemoryXHelper(loads, stores, arg1, arg2, expected);
18897 }
18898 
TEST(atomic_memory_set)18899 TEST(atomic_memory_set) {
18900   AtomicMemoryLoadSignature loads[] = MAKE_LOADS(set);
18901   AtomicMemoryStoreSignature stores[] = MAKE_STORES(set);
18902   AtomicMemoryLoadSignature b_loads[] = MAKE_B_LOADS(set);
18903   AtomicMemoryStoreSignature b_stores[] = MAKE_B_STORES(set);
18904   AtomicMemoryLoadSignature h_loads[] = MAKE_H_LOADS(set);
18905   AtomicMemoryStoreSignature h_stores[] = MAKE_H_STORES(set);
18906 
18907   uint64_t arg1 = 0x0300003000300303;
18908   uint64_t arg2 = 0x0500005000500505;
18909   uint64_t expected = arg1 | arg2;
18910 
18911   AtomicMemoryWHelper(b_loads, b_stores, arg1, arg2, expected, kByteMask);
18912   AtomicMemoryWHelper(h_loads, h_stores, arg1, arg2, expected, kHalfWordMask);
18913   AtomicMemoryWHelper(loads, stores, arg1, arg2, expected, kWordMask);
18914   AtomicMemoryXHelper(loads, stores, arg1, arg2, expected);
18915 }
18916 
TEST(atomic_memory_smax)18917 TEST(atomic_memory_smax) {
18918   AtomicMemoryLoadSignature loads[] = MAKE_LOADS(smax);
18919   AtomicMemoryStoreSignature stores[] = MAKE_STORES(smax);
18920   AtomicMemoryLoadSignature b_loads[] = MAKE_B_LOADS(smax);
18921   AtomicMemoryStoreSignature b_stores[] = MAKE_B_STORES(smax);
18922   AtomicMemoryLoadSignature h_loads[] = MAKE_H_LOADS(smax);
18923   AtomicMemoryStoreSignature h_stores[] = MAKE_H_STORES(smax);
18924 
18925   uint64_t arg1 = 0x8100000080108181;
18926   uint64_t arg2 = 0x0100001000100101;
18927   uint64_t expected = 0x0100001000100101;
18928 
18929   AtomicMemoryWHelper(b_loads, b_stores, arg1, arg2, expected, kByteMask);
18930   AtomicMemoryWHelper(h_loads, h_stores, arg1, arg2, expected, kHalfWordMask);
18931   AtomicMemoryWHelper(loads, stores, arg1, arg2, expected, kWordMask);
18932   AtomicMemoryXHelper(loads, stores, arg1, arg2, expected);
18933 }
18934 
TEST(atomic_memory_smin)18935 TEST(atomic_memory_smin) {
18936   AtomicMemoryLoadSignature loads[] = MAKE_LOADS(smin);
18937   AtomicMemoryStoreSignature stores[] = MAKE_STORES(smin);
18938   AtomicMemoryLoadSignature b_loads[] = MAKE_B_LOADS(smin);
18939   AtomicMemoryStoreSignature b_stores[] = MAKE_B_STORES(smin);
18940   AtomicMemoryLoadSignature h_loads[] = MAKE_H_LOADS(smin);
18941   AtomicMemoryStoreSignature h_stores[] = MAKE_H_STORES(smin);
18942 
18943   uint64_t arg1 = 0x8100000080108181;
18944   uint64_t arg2 = 0x0100001000100101;
18945   uint64_t expected = 0x8100000080108181;
18946 
18947   AtomicMemoryWHelper(b_loads, b_stores, arg1, arg2, expected, kByteMask);
18948   AtomicMemoryWHelper(h_loads, h_stores, arg1, arg2, expected, kHalfWordMask);
18949   AtomicMemoryWHelper(loads, stores, arg1, arg2, expected, kWordMask);
18950   AtomicMemoryXHelper(loads, stores, arg1, arg2, expected);
18951 }
18952 
TEST(atomic_memory_umax)18953 TEST(atomic_memory_umax) {
18954   AtomicMemoryLoadSignature loads[] = MAKE_LOADS(umax);
18955   AtomicMemoryStoreSignature stores[] = MAKE_STORES(umax);
18956   AtomicMemoryLoadSignature b_loads[] = MAKE_B_LOADS(umax);
18957   AtomicMemoryStoreSignature b_stores[] = MAKE_B_STORES(umax);
18958   AtomicMemoryLoadSignature h_loads[] = MAKE_H_LOADS(umax);
18959   AtomicMemoryStoreSignature h_stores[] = MAKE_H_STORES(umax);
18960 
18961   uint64_t arg1 = 0x8100000080108181;
18962   uint64_t arg2 = 0x0100001000100101;
18963   uint64_t expected = 0x8100000080108181;
18964 
18965   AtomicMemoryWHelper(b_loads, b_stores, arg1, arg2, expected, kByteMask);
18966   AtomicMemoryWHelper(h_loads, h_stores, arg1, arg2, expected, kHalfWordMask);
18967   AtomicMemoryWHelper(loads, stores, arg1, arg2, expected, kWordMask);
18968   AtomicMemoryXHelper(loads, stores, arg1, arg2, expected);
18969 }
18970 
TEST(atomic_memory_umin)18971 TEST(atomic_memory_umin) {
18972   AtomicMemoryLoadSignature loads[] = MAKE_LOADS(umin);
18973   AtomicMemoryStoreSignature stores[] = MAKE_STORES(umin);
18974   AtomicMemoryLoadSignature b_loads[] = MAKE_B_LOADS(umin);
18975   AtomicMemoryStoreSignature b_stores[] = MAKE_B_STORES(umin);
18976   AtomicMemoryLoadSignature h_loads[] = MAKE_H_LOADS(umin);
18977   AtomicMemoryStoreSignature h_stores[] = MAKE_H_STORES(umin);
18978 
18979   uint64_t arg1 = 0x8100000080108181;
18980   uint64_t arg2 = 0x0100001000100101;
18981   uint64_t expected = 0x0100001000100101;
18982 
18983   AtomicMemoryWHelper(b_loads, b_stores, arg1, arg2, expected, kByteMask);
18984   AtomicMemoryWHelper(h_loads, h_stores, arg1, arg2, expected, kHalfWordMask);
18985   AtomicMemoryWHelper(loads, stores, arg1, arg2, expected, kWordMask);
18986   AtomicMemoryXHelper(loads, stores, arg1, arg2, expected);
18987 }
18988 
TEST(atomic_memory_swp)18989 TEST(atomic_memory_swp) {
18990   AtomicMemoryLoadSignature loads[] = {&MacroAssembler::Swp,
18991                                        &MacroAssembler::Swpa,
18992                                        &MacroAssembler::Swpl,
18993                                        &MacroAssembler::Swpal};
18994   AtomicMemoryLoadSignature b_loads[] = {&MacroAssembler::Swpb,
18995                                          &MacroAssembler::Swpab,
18996                                          &MacroAssembler::Swplb,
18997                                          &MacroAssembler::Swpalb};
18998   AtomicMemoryLoadSignature h_loads[] = {&MacroAssembler::Swph,
18999                                          &MacroAssembler::Swpah,
19000                                          &MacroAssembler::Swplh,
19001                                          &MacroAssembler::Swpalh};
19002 
19003   uint64_t arg1 = 0x0100001000100101;
19004   uint64_t arg2 = 0x0200002000200202;
19005   uint64_t expected = 0x0100001000100101;
19006 
19007   // SWP functions have equivalent signatures to the Atomic Memory LD functions
19008   // so we can use the same helper but without the ST aliases.
19009   AtomicMemoryWHelper(b_loads, NULL, arg1, arg2, expected, kByteMask);
19010   AtomicMemoryWHelper(h_loads, NULL, arg1, arg2, expected, kHalfWordMask);
19011   AtomicMemoryWHelper(loads, NULL, arg1, arg2, expected, kWordMask);
19012   AtomicMemoryXHelper(loads, NULL, arg1, arg2, expected);
19013 }
19014 
19015 
TEST(ldaprb_ldaprh_ldapr)19016 TEST(ldaprb_ldaprh_ldapr) {
19017   uint64_t data0[] = {0x1010101010101010, 0};
19018   uint64_t data1[] = {0x1010101010101010, 0};
19019   uint64_t data2[] = {0x1010101010101010, 0};
19020   uint64_t data3[] = {0x1010101010101010, 0};
19021 
19022   uint64_t* data0_aligned = AlignUp(data0, kXRegSizeInBytes * 2);
19023   uint64_t* data1_aligned = AlignUp(data1, kXRegSizeInBytes * 2);
19024   uint64_t* data2_aligned = AlignUp(data2, kXRegSizeInBytes * 2);
19025   uint64_t* data3_aligned = AlignUp(data3, kXRegSizeInBytes * 2);
19026 
19027   SETUP_WITH_FEATURES(CPUFeatures::kRCpc);
19028   START();
19029 
19030   __ Mov(x20, reinterpret_cast<uintptr_t>(data0_aligned));
19031   __ Mov(x21, reinterpret_cast<uintptr_t>(data1_aligned));
19032   __ Mov(x22, reinterpret_cast<uintptr_t>(data2_aligned));
19033   __ Mov(x23, reinterpret_cast<uintptr_t>(data3_aligned));
19034 
19035   __ Ldaprb(w0, MemOperand(x20));
19036   __ Ldaprh(w1, MemOperand(x21));
19037   __ Ldapr(w2, MemOperand(x22));
19038   __ Ldapr(x3, MemOperand(x23));
19039 
19040   END();
19041 
19042 #ifdef VIXL_INCLUDE_SIMULATOR_AARCH64
19043   RUN();
19044   ASSERT_EQUAL_64(0x10, x0);
19045   ASSERT_EQUAL_64(0x1010, x1);
19046   ASSERT_EQUAL_64(0x10101010, x2);
19047   ASSERT_EQUAL_64(0x1010101010101010, x3);
19048 #endif
19049 
19050   TEARDOWN();
19051 }
19052 
TEST(load_store_tagged_immediate_offset)19053 TEST(load_store_tagged_immediate_offset) {
19054   uint64_t tags[] = {0x00, 0x1, 0x55, 0xff};
19055   int tag_count = sizeof(tags) / sizeof(tags[0]);
19056 
19057   const int kMaxDataLength = 160;
19058 
19059   for (int i = 0; i < tag_count; i++) {
19060     unsigned char src[kMaxDataLength];
19061     uint64_t src_raw = reinterpret_cast<uint64_t>(src);
19062     uint64_t src_tag = tags[i];
19063     uint64_t src_tagged = CPU::SetPointerTag(src_raw, src_tag);
19064 
19065     for (int k = 0; k < kMaxDataLength; k++) {
19066       src[k] = k + 1;
19067     }
19068 
19069     for (int j = 0; j < tag_count; j++) {
19070       unsigned char dst[kMaxDataLength];
19071       uint64_t dst_raw = reinterpret_cast<uint64_t>(dst);
19072       uint64_t dst_tag = tags[j];
19073       uint64_t dst_tagged = CPU::SetPointerTag(dst_raw, dst_tag);
19074 
19075       memset(dst, 0, kMaxDataLength);
19076 
19077       SETUP_WITH_FEATURES(CPUFeatures::kNEON);
19078       START();
19079 
19080       __ Mov(x0, src_tagged);
19081       __ Mov(x1, dst_tagged);
19082 
19083       int offset = 0;
19084 
19085       // Scaled-immediate offsets.
19086       {
19087         ExactAssemblyScope scope(&masm, 2 * kInstructionSize);
19088         __ ldp(q0, q1, MemOperand(x0, offset));
19089         __ stp(q0, q1, MemOperand(x1, offset));
19090       }
19091       offset += 2 * kQRegSizeInBytes;
19092 
19093       {
19094         ExactAssemblyScope scope(&masm, 2 * kInstructionSize);
19095         __ ldp(x2, x3, MemOperand(x0, offset));
19096         __ stp(x2, x3, MemOperand(x1, offset));
19097       }
19098       offset += 2 * kXRegSizeInBytes;
19099 
19100       {
19101         ExactAssemblyScope scope(&masm, 2 * kInstructionSize);
19102         __ ldpsw(x2, x3, MemOperand(x0, offset));
19103         __ stp(w2, w3, MemOperand(x1, offset));
19104       }
19105       offset += 2 * kWRegSizeInBytes;
19106 
19107       {
19108         ExactAssemblyScope scope(&masm, 2 * kInstructionSize);
19109         __ ldp(d0, d1, MemOperand(x0, offset));
19110         __ stp(d0, d1, MemOperand(x1, offset));
19111       }
19112       offset += 2 * kDRegSizeInBytes;
19113 
19114       {
19115         ExactAssemblyScope scope(&masm, 2 * kInstructionSize);
19116         __ ldp(w2, w3, MemOperand(x0, offset));
19117         __ stp(w2, w3, MemOperand(x1, offset));
19118       }
19119       offset += 2 * kWRegSizeInBytes;
19120 
19121       {
19122         ExactAssemblyScope scope(&masm, 2 * kInstructionSize);
19123         __ ldp(s0, s1, MemOperand(x0, offset));
19124         __ stp(s0, s1, MemOperand(x1, offset));
19125       }
19126       offset += 2 * kSRegSizeInBytes;
19127 
19128       {
19129         ExactAssemblyScope scope(&masm, 2 * kInstructionSize);
19130         __ ldr(x2, MemOperand(x0, offset), RequireScaledOffset);
19131         __ str(x2, MemOperand(x1, offset), RequireScaledOffset);
19132       }
19133       offset += kXRegSizeInBytes;
19134 
19135       {
19136         ExactAssemblyScope scope(&masm, 2 * kInstructionSize);
19137         __ ldr(d0, MemOperand(x0, offset), RequireScaledOffset);
19138         __ str(d0, MemOperand(x1, offset), RequireScaledOffset);
19139       }
19140       offset += kDRegSizeInBytes;
19141 
19142       {
19143         ExactAssemblyScope scope(&masm, 2 * kInstructionSize);
19144         __ ldr(w2, MemOperand(x0, offset), RequireScaledOffset);
19145         __ str(w2, MemOperand(x1, offset), RequireScaledOffset);
19146       }
19147       offset += kWRegSizeInBytes;
19148 
19149       {
19150         ExactAssemblyScope scope(&masm, 2 * kInstructionSize);
19151         __ ldr(s0, MemOperand(x0, offset), RequireScaledOffset);
19152         __ str(s0, MemOperand(x1, offset), RequireScaledOffset);
19153       }
19154       offset += kSRegSizeInBytes;
19155 
19156       {
19157         ExactAssemblyScope scope(&masm, 2 * kInstructionSize);
19158         __ ldrh(w2, MemOperand(x0, offset), RequireScaledOffset);
19159         __ strh(w2, MemOperand(x1, offset), RequireScaledOffset);
19160       }
19161       offset += 2;
19162 
19163       {
19164         ExactAssemblyScope scope(&masm, 2 * kInstructionSize);
19165         __ ldrsh(w2, MemOperand(x0, offset), RequireScaledOffset);
19166         __ strh(w2, MemOperand(x1, offset), RequireScaledOffset);
19167       }
19168       offset += 2;
19169 
19170       {
19171         ExactAssemblyScope scope(&masm, 2 * kInstructionSize);
19172         __ ldrb(w2, MemOperand(x0, offset), RequireScaledOffset);
19173         __ strb(w2, MemOperand(x1, offset), RequireScaledOffset);
19174       }
19175       offset += 1;
19176 
19177       {
19178         ExactAssemblyScope scope(&masm, 2 * kInstructionSize);
19179         __ ldrsb(w2, MemOperand(x0, offset), RequireScaledOffset);
19180         __ strb(w2, MemOperand(x1, offset), RequireScaledOffset);
19181       }
19182       offset += 1;
19183 
19184       // Unscaled-immediate offsets.
19185 
19186       {
19187         ExactAssemblyScope scope(&masm, 2 * kInstructionSize);
19188         __ ldur(x2, MemOperand(x0, offset), RequireUnscaledOffset);
19189         __ stur(x2, MemOperand(x1, offset), RequireUnscaledOffset);
19190       }
19191       offset += kXRegSizeInBytes;
19192 
19193       {
19194         ExactAssemblyScope scope(&masm, 2 * kInstructionSize);
19195         __ ldur(d0, MemOperand(x0, offset), RequireUnscaledOffset);
19196         __ stur(d0, MemOperand(x1, offset), RequireUnscaledOffset);
19197       }
19198       offset += kDRegSizeInBytes;
19199 
19200       {
19201         ExactAssemblyScope scope(&masm, 2 * kInstructionSize);
19202         __ ldur(w2, MemOperand(x0, offset), RequireUnscaledOffset);
19203         __ stur(w2, MemOperand(x1, offset), RequireUnscaledOffset);
19204       }
19205       offset += kWRegSizeInBytes;
19206 
19207       {
19208         ExactAssemblyScope scope(&masm, 2 * kInstructionSize);
19209         __ ldur(s0, MemOperand(x0, offset), RequireUnscaledOffset);
19210         __ stur(s0, MemOperand(x1, offset), RequireUnscaledOffset);
19211       }
19212       offset += kSRegSizeInBytes;
19213 
19214       {
19215         ExactAssemblyScope scope(&masm, 2 * kInstructionSize);
19216         __ ldurh(w2, MemOperand(x0, offset), RequireUnscaledOffset);
19217         __ sturh(w2, MemOperand(x1, offset), RequireUnscaledOffset);
19218       }
19219       offset += 2;
19220 
19221       {
19222         ExactAssemblyScope scope(&masm, 2 * kInstructionSize);
19223         __ ldursh(w2, MemOperand(x0, offset), RequireUnscaledOffset);
19224         __ sturh(w2, MemOperand(x1, offset), RequireUnscaledOffset);
19225       }
19226       offset += 2;
19227 
19228       {
19229         ExactAssemblyScope scope(&masm, 2 * kInstructionSize);
19230         __ ldurb(w2, MemOperand(x0, offset), RequireUnscaledOffset);
19231         __ sturb(w2, MemOperand(x1, offset), RequireUnscaledOffset);
19232       }
19233       offset += 1;
19234 
19235       {
19236         ExactAssemblyScope scope(&masm, 2 * kInstructionSize);
19237         __ ldursb(w2, MemOperand(x0, offset), RequireUnscaledOffset);
19238         __ sturb(w2, MemOperand(x1, offset), RequireUnscaledOffset);
19239       }
19240       offset += 1;
19241 
19242       // Extract the tag (so we can test that it was preserved correctly).
19243       __ Ubfx(x0, x0, kAddressTagOffset, kAddressTagWidth);
19244       __ Ubfx(x1, x1, kAddressTagOffset, kAddressTagWidth);
19245 
19246       VIXL_ASSERT(kMaxDataLength >= offset);
19247 
19248       END();
19249       RUN();
19250 
19251       ASSERT_EQUAL_64(src_tag, x0);
19252       ASSERT_EQUAL_64(dst_tag, x1);
19253 
19254       for (int k = 0; k < offset; k++) {
19255         VIXL_CHECK(src[k] == dst[k]);
19256       }
19257 
19258       TEARDOWN();
19259     }
19260   }
19261 }
19262 
19263 
TEST(load_store_tagged_immediate_preindex)19264 TEST(load_store_tagged_immediate_preindex) {
19265   uint64_t tags[] = {0x00, 0x1, 0x55, 0xff};
19266   int tag_count = sizeof(tags) / sizeof(tags[0]);
19267 
19268   const int kMaxDataLength = 128;
19269 
19270   for (int i = 0; i < tag_count; i++) {
19271     unsigned char src[kMaxDataLength];
19272     uint64_t src_raw = reinterpret_cast<uint64_t>(src);
19273     uint64_t src_tag = tags[i];
19274     uint64_t src_tagged = CPU::SetPointerTag(src_raw, src_tag);
19275 
19276     for (int k = 0; k < kMaxDataLength; k++) {
19277       src[k] = k + 1;
19278     }
19279 
19280     for (int j = 0; j < tag_count; j++) {
19281       unsigned char dst[kMaxDataLength];
19282       uint64_t dst_raw = reinterpret_cast<uint64_t>(dst);
19283       uint64_t dst_tag = tags[j];
19284       uint64_t dst_tagged = CPU::SetPointerTag(dst_raw, dst_tag);
19285 
19286       for (int k = 0; k < kMaxDataLength; k++) {
19287         dst[k] = 0;
19288       }
19289 
19290       SETUP_WITH_FEATURES(CPUFeatures::kNEON);
19291       START();
19292 
19293       // Each MemOperand must apply a pre-index equal to the size of the
19294       // previous access.
19295 
19296       // Start with a non-zero preindex.
19297       int preindex = 62 * kXRegSizeInBytes;
19298       int data_length = 0;
19299 
19300       __ Mov(x0, src_tagged - preindex);
19301       __ Mov(x1, dst_tagged - preindex);
19302 
19303       {
19304         ExactAssemblyScope scope(&masm, 2 * kInstructionSize);
19305         __ ldp(q0, q1, MemOperand(x0, preindex, PreIndex));
19306         __ stp(q0, q1, MemOperand(x1, preindex, PreIndex));
19307       }
19308       preindex = 2 * kQRegSizeInBytes;
19309       data_length = preindex;
19310 
19311       {
19312         ExactAssemblyScope scope(&masm, 2 * kInstructionSize);
19313         __ ldp(x2, x3, MemOperand(x0, preindex, PreIndex));
19314         __ stp(x2, x3, MemOperand(x1, preindex, PreIndex));
19315       }
19316       preindex = 2 * kXRegSizeInBytes;
19317       data_length += preindex;
19318 
19319       {
19320         ExactAssemblyScope scope(&masm, 2 * kInstructionSize);
19321         __ ldpsw(x2, x3, MemOperand(x0, preindex, PreIndex));
19322         __ stp(w2, w3, MemOperand(x1, preindex, PreIndex));
19323       }
19324       preindex = 2 * kWRegSizeInBytes;
19325       data_length += preindex;
19326 
19327       {
19328         ExactAssemblyScope scope(&masm, 2 * kInstructionSize);
19329         __ ldp(d0, d1, MemOperand(x0, preindex, PreIndex));
19330         __ stp(d0, d1, MemOperand(x1, preindex, PreIndex));
19331       }
19332       preindex = 2 * kDRegSizeInBytes;
19333       data_length += preindex;
19334 
19335       {
19336         ExactAssemblyScope scope(&masm, 2 * kInstructionSize);
19337         __ ldp(w2, w3, MemOperand(x0, preindex, PreIndex));
19338         __ stp(w2, w3, MemOperand(x1, preindex, PreIndex));
19339       }
19340       preindex = 2 * kWRegSizeInBytes;
19341       data_length += preindex;
19342 
19343       {
19344         ExactAssemblyScope scope(&masm, 2 * kInstructionSize);
19345         __ ldp(s0, s1, MemOperand(x0, preindex, PreIndex));
19346         __ stp(s0, s1, MemOperand(x1, preindex, PreIndex));
19347       }
19348       preindex = 2 * kSRegSizeInBytes;
19349       data_length += preindex;
19350 
19351       {
19352         ExactAssemblyScope scope(&masm, 2 * kInstructionSize);
19353         __ ldr(x2, MemOperand(x0, preindex, PreIndex));
19354         __ str(x2, MemOperand(x1, preindex, PreIndex));
19355       }
19356       preindex = kXRegSizeInBytes;
19357       data_length += preindex;
19358 
19359       {
19360         ExactAssemblyScope scope(&masm, 2 * kInstructionSize);
19361         __ ldr(d0, MemOperand(x0, preindex, PreIndex));
19362         __ str(d0, MemOperand(x1, preindex, PreIndex));
19363       }
19364       preindex = kDRegSizeInBytes;
19365       data_length += preindex;
19366 
19367       {
19368         ExactAssemblyScope scope(&masm, 2 * kInstructionSize);
19369         __ ldr(w2, MemOperand(x0, preindex, PreIndex));
19370         __ str(w2, MemOperand(x1, preindex, PreIndex));
19371       }
19372       preindex = kWRegSizeInBytes;
19373       data_length += preindex;
19374 
19375       {
19376         ExactAssemblyScope scope(&masm, 2 * kInstructionSize);
19377         __ ldr(s0, MemOperand(x0, preindex, PreIndex));
19378         __ str(s0, MemOperand(x1, preindex, PreIndex));
19379       }
19380       preindex = kSRegSizeInBytes;
19381       data_length += preindex;
19382 
19383       {
19384         ExactAssemblyScope scope(&masm, 2 * kInstructionSize);
19385         __ ldrh(w2, MemOperand(x0, preindex, PreIndex));
19386         __ strh(w2, MemOperand(x1, preindex, PreIndex));
19387       }
19388       preindex = 2;
19389       data_length += preindex;
19390 
19391       {
19392         ExactAssemblyScope scope(&masm, 2 * kInstructionSize);
19393         __ ldrsh(w2, MemOperand(x0, preindex, PreIndex));
19394         __ strh(w2, MemOperand(x1, preindex, PreIndex));
19395       }
19396       preindex = 2;
19397       data_length += preindex;
19398 
19399       {
19400         ExactAssemblyScope scope(&masm, 2 * kInstructionSize);
19401         __ ldrb(w2, MemOperand(x0, preindex, PreIndex));
19402         __ strb(w2, MemOperand(x1, preindex, PreIndex));
19403       }
19404       preindex = 1;
19405       data_length += preindex;
19406 
19407       {
19408         ExactAssemblyScope scope(&masm, 2 * kInstructionSize);
19409         __ ldrsb(w2, MemOperand(x0, preindex, PreIndex));
19410         __ strb(w2, MemOperand(x1, preindex, PreIndex));
19411       }
19412       preindex = 1;
19413       data_length += preindex;
19414 
19415       VIXL_ASSERT(kMaxDataLength >= data_length);
19416 
19417       END();
19418       RUN();
19419 
19420       // Check that the preindex was correctly applied in each operation, and
19421       // that the tag was preserved.
19422       ASSERT_EQUAL_64(src_tagged + data_length - preindex, x0);
19423       ASSERT_EQUAL_64(dst_tagged + data_length - preindex, x1);
19424 
19425       for (int k = 0; k < data_length; k++) {
19426         VIXL_CHECK(src[k] == dst[k]);
19427       }
19428 
19429       TEARDOWN();
19430     }
19431   }
19432 }
19433 
19434 
TEST(load_store_tagged_immediate_postindex)19435 TEST(load_store_tagged_immediate_postindex) {
19436   uint64_t tags[] = {0x00, 0x1, 0x55, 0xff};
19437   int tag_count = sizeof(tags) / sizeof(tags[0]);
19438 
19439   const int kMaxDataLength = 128;
19440 
19441   for (int i = 0; i < tag_count; i++) {
19442     unsigned char src[kMaxDataLength];
19443     uint64_t src_raw = reinterpret_cast<uint64_t>(src);
19444     uint64_t src_tag = tags[i];
19445     uint64_t src_tagged = CPU::SetPointerTag(src_raw, src_tag);
19446 
19447     for (int k = 0; k < kMaxDataLength; k++) {
19448       src[k] = k + 1;
19449     }
19450 
19451     for (int j = 0; j < tag_count; j++) {
19452       unsigned char dst[kMaxDataLength];
19453       uint64_t dst_raw = reinterpret_cast<uint64_t>(dst);
19454       uint64_t dst_tag = tags[j];
19455       uint64_t dst_tagged = CPU::SetPointerTag(dst_raw, dst_tag);
19456 
19457       for (int k = 0; k < kMaxDataLength; k++) {
19458         dst[k] = 0;
19459       }
19460 
19461       SETUP_WITH_FEATURES(CPUFeatures::kNEON);
19462       START();
19463 
19464       int postindex = 2 * kXRegSizeInBytes;
19465       int data_length = 0;
19466 
19467       __ Mov(x0, src_tagged);
19468       __ Mov(x1, dst_tagged);
19469 
19470       {
19471         ExactAssemblyScope scope(&masm, 2 * kInstructionSize);
19472         __ ldp(x2, x3, MemOperand(x0, postindex, PostIndex));
19473         __ stp(x2, x3, MemOperand(x1, postindex, PostIndex));
19474       }
19475       data_length = postindex;
19476 
19477       postindex = 2 * kQRegSizeInBytes;
19478       {
19479         ExactAssemblyScope scope(&masm, 2 * kInstructionSize);
19480         __ ldp(q0, q1, MemOperand(x0, postindex, PostIndex));
19481         __ stp(q0, q1, MemOperand(x1, postindex, PostIndex));
19482       }
19483       data_length += postindex;
19484 
19485       postindex = 2 * kWRegSizeInBytes;
19486       {
19487         ExactAssemblyScope scope(&masm, 2 * kInstructionSize);
19488         __ ldpsw(x2, x3, MemOperand(x0, postindex, PostIndex));
19489         __ stp(w2, w3, MemOperand(x1, postindex, PostIndex));
19490       }
19491       data_length += postindex;
19492 
19493       postindex = 2 * kDRegSizeInBytes;
19494       {
19495         ExactAssemblyScope scope(&masm, 2 * kInstructionSize);
19496         __ ldp(d0, d1, MemOperand(x0, postindex, PostIndex));
19497         __ stp(d0, d1, MemOperand(x1, postindex, PostIndex));
19498       }
19499       data_length += postindex;
19500 
19501       postindex = 2 * kWRegSizeInBytes;
19502       {
19503         ExactAssemblyScope scope(&masm, 2 * kInstructionSize);
19504         __ ldp(w2, w3, MemOperand(x0, postindex, PostIndex));
19505         __ stp(w2, w3, MemOperand(x1, postindex, PostIndex));
19506       }
19507       data_length += postindex;
19508 
19509       postindex = 2 * kSRegSizeInBytes;
19510       {
19511         ExactAssemblyScope scope(&masm, 2 * kInstructionSize);
19512         __ ldp(s0, s1, MemOperand(x0, postindex, PostIndex));
19513         __ stp(s0, s1, MemOperand(x1, postindex, PostIndex));
19514       }
19515       data_length += postindex;
19516 
19517       postindex = kXRegSizeInBytes;
19518       {
19519         ExactAssemblyScope scope(&masm, 2 * kInstructionSize);
19520         __ ldr(x2, MemOperand(x0, postindex, PostIndex));
19521         __ str(x2, MemOperand(x1, postindex, PostIndex));
19522       }
19523       data_length += postindex;
19524 
19525       postindex = kDRegSizeInBytes;
19526       {
19527         ExactAssemblyScope scope(&masm, 2 * kInstructionSize);
19528         __ ldr(d0, MemOperand(x0, postindex, PostIndex));
19529         __ str(d0, MemOperand(x1, postindex, PostIndex));
19530       }
19531       data_length += postindex;
19532 
19533       postindex = kWRegSizeInBytes;
19534       {
19535         ExactAssemblyScope scope(&masm, 2 * kInstructionSize);
19536         __ ldr(w2, MemOperand(x0, postindex, PostIndex));
19537         __ str(w2, MemOperand(x1, postindex, PostIndex));
19538       }
19539       data_length += postindex;
19540 
19541       postindex = kSRegSizeInBytes;
19542       {
19543         ExactAssemblyScope scope(&masm, 2 * kInstructionSize);
19544         __ ldr(s0, MemOperand(x0, postindex, PostIndex));
19545         __ str(s0, MemOperand(x1, postindex, PostIndex));
19546       }
19547       data_length += postindex;
19548 
19549       postindex = 2;
19550       {
19551         ExactAssemblyScope scope(&masm, 2 * kInstructionSize);
19552         __ ldrh(w2, MemOperand(x0, postindex, PostIndex));
19553         __ strh(w2, MemOperand(x1, postindex, PostIndex));
19554       }
19555       data_length += postindex;
19556 
19557       postindex = 2;
19558       {
19559         ExactAssemblyScope scope(&masm, 2 * kInstructionSize);
19560         __ ldrsh(w2, MemOperand(x0, postindex, PostIndex));
19561         __ strh(w2, MemOperand(x1, postindex, PostIndex));
19562       }
19563       data_length += postindex;
19564 
19565       postindex = 1;
19566       {
19567         ExactAssemblyScope scope(&masm, 2 * kInstructionSize);
19568         __ ldrb(w2, MemOperand(x0, postindex, PostIndex));
19569         __ strb(w2, MemOperand(x1, postindex, PostIndex));
19570       }
19571       data_length += postindex;
19572 
19573       postindex = 1;
19574       {
19575         ExactAssemblyScope scope(&masm, 2 * kInstructionSize);
19576         __ ldrsb(w2, MemOperand(x0, postindex, PostIndex));
19577         __ strb(w2, MemOperand(x1, postindex, PostIndex));
19578       }
19579       data_length += postindex;
19580 
19581       VIXL_ASSERT(kMaxDataLength >= data_length);
19582 
19583       END();
19584       RUN();
19585 
19586       // Check that the postindex was correctly applied in each operation, and
19587       // that the tag was preserved.
19588       ASSERT_EQUAL_64(src_tagged + data_length, x0);
19589       ASSERT_EQUAL_64(dst_tagged + data_length, x1);
19590 
19591       for (int k = 0; k < data_length; k++) {
19592         VIXL_CHECK(src[k] == dst[k]);
19593       }
19594 
19595       TEARDOWN();
19596     }
19597   }
19598 }
19599 
19600 
TEST(load_store_tagged_register_offset)19601 TEST(load_store_tagged_register_offset) {
19602   uint64_t tags[] = {0x00, 0x1, 0x55, 0xff};
19603   int tag_count = sizeof(tags) / sizeof(tags[0]);
19604 
19605   const int kMaxDataLength = 128;
19606 
19607   for (int i = 0; i < tag_count; i++) {
19608     unsigned char src[kMaxDataLength];
19609     uint64_t src_raw = reinterpret_cast<uint64_t>(src);
19610     uint64_t src_tag = tags[i];
19611     uint64_t src_tagged = CPU::SetPointerTag(src_raw, src_tag);
19612 
19613     for (int k = 0; k < kMaxDataLength; k++) {
19614       src[k] = k + 1;
19615     }
19616 
19617     for (int j = 0; j < tag_count; j++) {
19618       unsigned char dst[kMaxDataLength];
19619       uint64_t dst_raw = reinterpret_cast<uint64_t>(dst);
19620       uint64_t dst_tag = tags[j];
19621       uint64_t dst_tagged = CPU::SetPointerTag(dst_raw, dst_tag);
19622 
19623       // Also tag the offset register; the operation should still succeed.
19624       for (int o = 0; o < tag_count; o++) {
19625         uint64_t offset_base = CPU::SetPointerTag(UINT64_C(0), tags[o]);
19626         int data_length = 0;
19627 
19628         for (int k = 0; k < kMaxDataLength; k++) {
19629           dst[k] = 0;
19630         }
19631 
19632         SETUP_WITH_FEATURES(CPUFeatures::kNEON);
19633         START();
19634 
19635         __ Mov(x0, src_tagged);
19636         __ Mov(x1, dst_tagged);
19637 
19638         __ Mov(x10, offset_base + data_length);
19639         {
19640           ExactAssemblyScope scope(&masm, 2 * kInstructionSize);
19641           __ ldr(x2, MemOperand(x0, x10));
19642           __ str(x2, MemOperand(x1, x10));
19643         }
19644         data_length += kXRegSizeInBytes;
19645 
19646         __ Mov(x10, offset_base + data_length);
19647         {
19648           ExactAssemblyScope scope(&masm, 2 * kInstructionSize);
19649           __ ldr(d0, MemOperand(x0, x10));
19650           __ str(d0, MemOperand(x1, x10));
19651         }
19652         data_length += kDRegSizeInBytes;
19653 
19654         __ Mov(x10, offset_base + data_length);
19655         {
19656           ExactAssemblyScope scope(&masm, 2 * kInstructionSize);
19657           __ ldr(w2, MemOperand(x0, x10));
19658           __ str(w2, MemOperand(x1, x10));
19659         }
19660         data_length += kWRegSizeInBytes;
19661 
19662         __ Mov(x10, offset_base + data_length);
19663         {
19664           ExactAssemblyScope scope(&masm, 2 * kInstructionSize);
19665           __ ldr(s0, MemOperand(x0, x10));
19666           __ str(s0, MemOperand(x1, x10));
19667         }
19668         data_length += kSRegSizeInBytes;
19669 
19670         __ Mov(x10, offset_base + data_length);
19671         {
19672           ExactAssemblyScope scope(&masm, 2 * kInstructionSize);
19673           __ ldrh(w2, MemOperand(x0, x10));
19674           __ strh(w2, MemOperand(x1, x10));
19675         }
19676         data_length += 2;
19677 
19678         __ Mov(x10, offset_base + data_length);
19679         {
19680           ExactAssemblyScope scope(&masm, 2 * kInstructionSize);
19681           __ ldrsh(w2, MemOperand(x0, x10));
19682           __ strh(w2, MemOperand(x1, x10));
19683         }
19684         data_length += 2;
19685 
19686         __ Mov(x10, offset_base + data_length);
19687         {
19688           ExactAssemblyScope scope(&masm, 2 * kInstructionSize);
19689           __ ldrb(w2, MemOperand(x0, x10));
19690           __ strb(w2, MemOperand(x1, x10));
19691         }
19692         data_length += 1;
19693 
19694         __ Mov(x10, offset_base + data_length);
19695         {
19696           ExactAssemblyScope scope(&masm, 2 * kInstructionSize);
19697           __ ldrsb(w2, MemOperand(x0, x10));
19698           __ strb(w2, MemOperand(x1, x10));
19699         }
19700         data_length += 1;
19701 
19702         VIXL_ASSERT(kMaxDataLength >= data_length);
19703 
19704         END();
19705         RUN();
19706 
19707         // Check that the postindex was correctly applied in each operation, and
19708         // that the tag was preserved.
19709         ASSERT_EQUAL_64(src_tagged, x0);
19710         ASSERT_EQUAL_64(dst_tagged, x1);
19711         ASSERT_EQUAL_64(offset_base + data_length - 1, x10);
19712 
19713         for (int k = 0; k < data_length; k++) {
19714           VIXL_CHECK(src[k] == dst[k]);
19715         }
19716 
19717         TEARDOWN();
19718       }
19719     }
19720   }
19721 }
19722 
19723 
TEST(load_store_tagged_register_postindex)19724 TEST(load_store_tagged_register_postindex) {
19725   uint64_t src[] = {0x0706050403020100, 0x0f0e0d0c0b0a0908};
19726   uint64_t tags[] = {0x00, 0x1, 0x55, 0xff};
19727   int tag_count = sizeof(tags) / sizeof(tags[0]);
19728 
19729   for (int j = 0; j < tag_count; j++) {
19730     for (int i = 0; i < tag_count; i++) {
19731       SETUP_WITH_FEATURES(CPUFeatures::kNEON);
19732 
19733       uint64_t src_base = reinterpret_cast<uint64_t>(src);
19734       uint64_t src_tagged = CPU::SetPointerTag(src_base, tags[i]);
19735       uint64_t offset_tagged = CPU::SetPointerTag(UINT64_C(0), tags[j]);
19736 
19737       START();
19738       __ Mov(x10, src_tagged);
19739       __ Mov(x11, offset_tagged);
19740       __ Ld1(v0.V16B(), MemOperand(x10, x11, PostIndex));
19741       // TODO: add other instructions (ld2-4, st1-4) as they become available.
19742       END();
19743 
19744       RUN();
19745 
19746       ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0706050403020100, q0);
19747       ASSERT_EQUAL_64(src_tagged + offset_tagged, x10);
19748 
19749       TEARDOWN();
19750     }
19751   }
19752 }
19753 
19754 
TEST(branch_tagged)19755 TEST(branch_tagged) {
19756   SETUP();
19757   START();
19758 
19759   Label loop, loop_entry, done;
19760   __ Adr(x0, &loop);
19761   __ Mov(x1, 0);
19762   __ B(&loop_entry);
19763 
19764   __ Bind(&loop);
19765   __ Add(x1, x1, 1);  // Count successful jumps.
19766 
19767   // Advance to the next tag, then bail out if we've come back around to tag 0.
19768   __ Add(x0, x0, UINT64_C(1) << kAddressTagOffset);
19769   __ Tst(x0, kAddressTagMask);
19770   __ B(eq, &done);
19771 
19772   __ Bind(&loop_entry);
19773   __ Br(x0);
19774 
19775   __ Bind(&done);
19776 
19777   END();
19778   RUN();
19779 
19780   ASSERT_EQUAL_64(1 << kAddressTagWidth, x1);
19781 
19782   TEARDOWN();
19783 }
19784 
19785 
TEST(branch_and_link_tagged)19786 TEST(branch_and_link_tagged) {
19787   SETUP();
19788   START();
19789 
19790   Label loop, loop_entry, done;
19791   __ Adr(x0, &loop);
19792   __ Mov(x1, 0);
19793   __ B(&loop_entry);
19794 
19795   __ Bind(&loop);
19796 
19797   // Bail out (before counting a successful jump) if lr appears to be tagged.
19798   __ Tst(lr, kAddressTagMask);
19799   __ B(ne, &done);
19800 
19801   __ Add(x1, x1, 1);  // Count successful jumps.
19802 
19803   // Advance to the next tag, then bail out if we've come back around to tag 0.
19804   __ Add(x0, x0, UINT64_C(1) << kAddressTagOffset);
19805   __ Tst(x0, kAddressTagMask);
19806   __ B(eq, &done);
19807 
19808   __ Bind(&loop_entry);
19809   __ Blr(x0);
19810 
19811   __ Bind(&done);
19812 
19813   END();
19814   RUN();
19815 
19816   ASSERT_EQUAL_64(1 << kAddressTagWidth, x1);
19817 
19818   TEARDOWN();
19819 }
19820 
19821 
TEST(branch_tagged_and_adr_adrp)19822 TEST(branch_tagged_and_adr_adrp) {
19823   SETUP_CUSTOM(kPageSize, PageOffsetDependentCode);
19824   START();
19825 
19826   Label loop, loop_entry, done;
19827   __ Adr(x0, &loop);
19828   __ Mov(x1, 0);
19829   __ B(&loop_entry);
19830 
19831   __ Bind(&loop);
19832 
19833   // Bail out (before counting a successful jump) if `adr x10, ...` is tagged.
19834   __ Adr(x10, &done);
19835   __ Tst(x10, kAddressTagMask);
19836   __ B(ne, &done);
19837 
19838   // Bail out (before counting a successful jump) if `adrp x11, ...` is tagged.
19839   __ Adrp(x11, &done);
19840   __ Tst(x11, kAddressTagMask);
19841   __ B(ne, &done);
19842 
19843   __ Add(x1, x1, 1);  // Count successful iterations.
19844 
19845   // Advance to the next tag, then bail out if we've come back around to tag 0.
19846   __ Add(x0, x0, UINT64_C(1) << kAddressTagOffset);
19847   __ Tst(x0, kAddressTagMask);
19848   __ B(eq, &done);
19849 
19850   __ Bind(&loop_entry);
19851   __ Br(x0);
19852 
19853   __ Bind(&done);
19854 
19855   END();
19856   RUN_CUSTOM();
19857 
19858   ASSERT_EQUAL_64(1 << kAddressTagWidth, x1);
19859 
19860   TEARDOWN_CUSTOM();
19861 }
19862 
TEST(neon_3same_addp)19863 TEST(neon_3same_addp) {
19864   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
19865 
19866   START();
19867 
19868   __ Movi(v0.V2D(), 0xff00aa5500ff55aa, 0xff00aa5500ff55aa);
19869   __ Movi(v1.V2D(), 0x000055aaff55ff00, 0xaa55ff55555500ff);
19870   __ Addp(v16.V16B(), v0.V16B(), v1.V16B());
19871 
19872   END();
19873 
19874   RUN();
19875   ASSERT_EQUAL_128(0x00ff54ffff54aaff, 0xffffffffffffffff, q16);
19876   TEARDOWN();
19877 }
19878 
TEST(neon_3same_sqdmulh_sqrdmulh)19879 TEST(neon_3same_sqdmulh_sqrdmulh) {
19880   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
19881 
19882   START();
19883 
19884   __ Movi(v0.V2D(), 0x0000000000000000, 0x0000040004008000);
19885   __ Movi(v1.V2D(), 0x0000000000000000, 0x0000002000108000);
19886   __ Movi(v2.V2D(), 0x0400000080000000, 0x0400000080000000);
19887   __ Movi(v3.V2D(), 0x0000002080000000, 0x0000001080000000);
19888 
19889   __ Sqdmulh(v16.V4H(), v0.V4H(), v1.V4H());
19890   __ Sqdmulh(v17.V4S(), v2.V4S(), v3.V4S());
19891   __ Sqdmulh(h18, h0, h1);
19892   __ Sqdmulh(s19, s2, s3);
19893 
19894   __ Sqrdmulh(v20.V4H(), v0.V4H(), v1.V4H());
19895   __ Sqrdmulh(v21.V4S(), v2.V4S(), v3.V4S());
19896   __ Sqrdmulh(h22, h0, h1);
19897   __ Sqrdmulh(s23, s2, s3);
19898 
19899   END();
19900 
19901   RUN();
19902   ASSERT_EQUAL_128(0x0000000000000000, 0x0000000100007fff, q16);
19903   ASSERT_EQUAL_128(0x000000017fffffff, 0x000000007fffffff, q17);
19904   ASSERT_EQUAL_128(0, 0x7fff, q18);
19905   ASSERT_EQUAL_128(0, 0x7fffffff, q19);
19906   ASSERT_EQUAL_128(0x0000000000000000, 0x0000000100017fff, q20);
19907   ASSERT_EQUAL_128(0x000000017fffffff, 0x000000017fffffff, q21);
19908   ASSERT_EQUAL_128(0, 0x7fff, q22);
19909   ASSERT_EQUAL_128(0, 0x7fffffff, q23);
19910   TEARDOWN();
19911 }
19912 
TEST(neon_byelement_sqdmulh_sqrdmulh)19913 TEST(neon_byelement_sqdmulh_sqrdmulh) {
19914   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
19915 
19916   START();
19917 
19918   __ Movi(v0.V2D(), 0x0000000000000000, 0x0000040004008000);
19919   __ Movi(v1.V2D(), 0x0000000000000000, 0x0000002000108000);
19920   __ Movi(v2.V2D(), 0x0400000080000000, 0x0400000080000000);
19921   __ Movi(v3.V2D(), 0x0000002080000000, 0x0000001080000000);
19922 
19923   __ Sqdmulh(v16.V4H(), v0.V4H(), v1.H(), 1);
19924   __ Sqdmulh(v17.V4S(), v2.V4S(), v3.S(), 1);
19925   __ Sqdmulh(h18, h0, v1.H(), 0);
19926   __ Sqdmulh(s19, s2, v3.S(), 0);
19927 
19928   __ Sqrdmulh(v20.V4H(), v0.V4H(), v1.H(), 1);
19929   __ Sqrdmulh(v21.V4S(), v2.V4S(), v3.S(), 1);
19930   __ Sqrdmulh(h22, h0, v1.H(), 0);
19931   __ Sqrdmulh(s23, s2, v3.S(), 0);
19932 
19933   END();
19934 
19935   RUN();
19936   ASSERT_EQUAL_128(0x0000000000000000, 0x000000000000fff0, q16);
19937   ASSERT_EQUAL_128(0x00000000fffffff0, 0x00000000fffffff0, q17);
19938   ASSERT_EQUAL_128(0, 0x7fff, q18);
19939   ASSERT_EQUAL_128(0, 0x7fffffff, q19);
19940   ASSERT_EQUAL_128(0x0000000000000000, 0x000000010001fff0, q20);
19941   ASSERT_EQUAL_128(0x00000001fffffff0, 0x00000001fffffff0, q21);
19942   ASSERT_EQUAL_128(0, 0x7fff, q22);
19943   ASSERT_EQUAL_128(0, 0x7fffffff, q23);
19944   TEARDOWN();
19945 }
19946 
TEST(neon_3same_sqrdmlah)19947 TEST(neon_3same_sqrdmlah) {
19948   SETUP_WITH_FEATURES(CPUFeatures::kNEON, CPUFeatures::kRDM);
19949 
19950   START();
19951 
19952   __ Movi(v0.V2D(), 0x0000000000000000, 0x0000040004008000);
19953   __ Movi(v1.V2D(), 0x0000000000000000, 0x0000002000108000);
19954   __ Movi(v2.V2D(), 0x0400000080000000, 0x0400000080000000);
19955   __ Movi(v3.V2D(), 0x0000002080000000, 0x0000001080000000);
19956 
19957   __ Movi(v16.V2D(), 0x0000040004008000, 0x0000040004008000);
19958   __ Movi(v17.V2D(), 0x0000000000000000, 0x0000002000108000);
19959   __ Movi(v18.V2D(), 0x0400000080000000, 0x0400000080000000);
19960   __ Movi(v19.V2D(), 0x0000002080000000, 0x0000001080000000);
19961 
19962   __ Sqrdmlah(v16.V4H(), v0.V4H(), v1.V4H());
19963   __ Sqrdmlah(v17.V4S(), v2.V4S(), v3.V4S());
19964   __ Sqrdmlah(h18, h0, h1);
19965   __ Sqrdmlah(s19, s2, s3);
19966 
19967   END();
19968 
19969 #ifdef VIXL_INCLUDE_SIMULATOR_AARCH64
19970   RUN();
19971   ASSERT_EQUAL_128(0, 0x0000040104010000, q16);
19972   ASSERT_EQUAL_128(0x000000017fffffff, 0x000000217fffffff, q17);
19973   ASSERT_EQUAL_128(0, 0x7fff, q18);
19974   ASSERT_EQUAL_128(0, 0, q19);
19975 #endif
19976   TEARDOWN();
19977 }
19978 
TEST(neon_byelement_sqrdmlah)19979 TEST(neon_byelement_sqrdmlah) {
19980   SETUP_WITH_FEATURES(CPUFeatures::kNEON, CPUFeatures::kRDM);
19981 
19982   START();
19983 
19984   __ Movi(v0.V2D(), 0x0000000000000000, 0x0000040004008000);
19985   __ Movi(v1.V2D(), 0x0000000000000000, 0x0000002000108000);
19986   __ Movi(v2.V2D(), 0x0400000080000000, 0x0400000080000000);
19987   __ Movi(v3.V2D(), 0x0000002080000000, 0x0000001080000000);
19988 
19989   __ Movi(v16.V2D(), 0x0000040004008000, 0x0000040004008000);
19990   __ Movi(v17.V2D(), 0x0000000000000000, 0x0000002000108000);
19991   __ Movi(v18.V2D(), 0x0400000080000000, 0x0400000080000000);
19992   __ Movi(v19.V2D(), 0x0000002080000000, 0x0000001080000000);
19993 
19994   __ Sqrdmlah(v16.V4H(), v0.V4H(), v1.H(), 1);
19995   __ Sqrdmlah(v17.V4S(), v2.V4S(), v3.S(), 1);
19996   __ Sqrdmlah(h18, h0, v1.H(), 0);
19997   __ Sqrdmlah(s19, s2, v3.S(), 0);
19998 
19999   END();
20000 
20001 #ifdef VIXL_INCLUDE_SIMULATOR_AARCH64
20002   RUN();
20003   ASSERT_EQUAL_128(0, 0x0000040104018000, q16);
20004   ASSERT_EQUAL_128(0x00000001fffffff0, 0x0000002100107ff0, q17);
20005   ASSERT_EQUAL_128(0, 0x7fff, q18);
20006   ASSERT_EQUAL_128(0, 0, q19);
20007 #endif
20008   TEARDOWN();
20009 }
20010 
TEST(neon_3same_sqrdmlsh)20011 TEST(neon_3same_sqrdmlsh) {
20012   SETUP_WITH_FEATURES(CPUFeatures::kNEON, CPUFeatures::kRDM);
20013 
20014   START();
20015 
20016   __ Movi(v0.V2D(), 0x0000000000000000, 0x0000040004000500);
20017   __ Movi(v1.V2D(), 0x0000000000000000, 0x0000002000100080);
20018   __ Movi(v2.V2D(), 0x0400000080000000, 0x0400000080000000);
20019   __ Movi(v3.V2D(), 0x0000002080000000, 0x0000001080000000);
20020 
20021   __ Movi(v16.V2D(), 0x4000400040004000, 0x4000400040004000);
20022   __ Movi(v17.V2D(), 0x4000400040004000, 0x4000400040004000);
20023   __ Movi(v18.V2D(), 0x4000400040004000, 0x4000400040004000);
20024   __ Movi(v19.V2D(), 0x4000400040004000, 0x4000400040004000);
20025 
20026   __ Sqrdmlsh(v16.V4H(), v0.V4H(), v1.V4H());
20027   __ Sqrdmlsh(v17.V4S(), v2.V4S(), v3.V4S());
20028   __ Sqrdmlsh(h18, h0, h1);
20029   __ Sqrdmlsh(s19, s2, s3);
20030 
20031   END();
20032 
20033 #ifdef VIXL_INCLUDE_SIMULATOR_AARCH64
20034   RUN();
20035   ASSERT_EQUAL_128(0, 0x40003fff40003ffb, q16);
20036   ASSERT_EQUAL_128(0x40003fffc0004000, 0x40004000c0004000, q17);
20037   ASSERT_EQUAL_128(0, 0x3ffb, q18);
20038   ASSERT_EQUAL_128(0, 0xc0004000, q19);
20039 #endif
20040   TEARDOWN();
20041 }
20042 
TEST(neon_byelement_sqrdmlsh)20043 TEST(neon_byelement_sqrdmlsh) {
20044   SETUP_WITH_FEATURES(CPUFeatures::kNEON, CPUFeatures::kRDM);
20045 
20046   START();
20047 
20048   __ Movi(v0.V2D(), 0x0000000000000000, 0x0000040004008000);
20049   __ Movi(v1.V2D(), 0x0000000000000000, 0x0000002000108000);
20050   __ Movi(v2.V2D(), 0x0400000080000000, 0x0400000080000000);
20051   __ Movi(v3.V2D(), 0x0000002080000000, 0x0000001080000000);
20052 
20053   __ Movi(v16.V2D(), 0x4000400040004000, 0x4000400040004000);
20054   __ Movi(v17.V2D(), 0x4000400040004000, 0x4000400040004000);
20055   __ Movi(v18.V2D(), 0x4000400040004000, 0x4000400040004000);
20056   __ Movi(v19.V2D(), 0x4000400040004000, 0x4000400040004000);
20057 
20058   __ Sqrdmlsh(v16.V4H(), v0.V4H(), v1.H(), 1);
20059   __ Sqrdmlsh(v17.V4S(), v2.V4S(), v3.S(), 1);
20060   __ Sqrdmlsh(h18, h0, v1.H(), 0);
20061   __ Sqrdmlsh(s19, s2, v3.S(), 0);
20062 
20063   END();
20064 
20065 #ifdef VIXL_INCLUDE_SIMULATOR_AARCH64
20066   RUN();
20067   ASSERT_EQUAL_128(0, 0x4000400040004010, q16);
20068   ASSERT_EQUAL_128(0x4000400040004010, 0x4000400040004010, q17);
20069   ASSERT_EQUAL_128(0, 0xc000, q18);
20070   ASSERT_EQUAL_128(0, 0xc0004000, q19);
20071 #endif
20072   TEARDOWN();
20073 }
20074 
TEST(neon_3same_sdot_udot)20075 TEST(neon_3same_sdot_udot) {
20076   SETUP_WITH_FEATURES(CPUFeatures::kNEON, CPUFeatures::kDotProduct);
20077 
20078   START();
20079 
20080   __ Movi(v0.V2D(), 0x7122712271227122, 0x7122712271227122);
20081   __ Movi(v1.V2D(), 0xe245e245f245f245, 0xe245e245f245f245);
20082   __ Movi(v2.V2D(), 0x3939393900000000, 0x3939393900000000);
20083 
20084   __ Movi(v16.V2D(), 0x0000400000004000, 0x0000400000004000);
20085   __ Movi(v17.V2D(), 0x0000400000004000, 0x0000400000004000);
20086   __ Movi(v18.V2D(), 0x0000400000004000, 0x0000400000004000);
20087   __ Movi(v19.V2D(), 0x0000400000004000, 0x0000400000004000);
20088 
20089   __ Sdot(v16.V4S(), v0.V16B(), v1.V16B());
20090   __ Sdot(v17.V2S(), v1.V8B(), v2.V8B());
20091 
20092   __ Udot(v18.V4S(), v0.V16B(), v1.V16B());
20093   __ Udot(v19.V2S(), v1.V8B(), v2.V8B());
20094 
20095   END();
20096 
20097 #ifdef VIXL_INCLUDE_SIMULATOR_AARCH64
20098   RUN();
20099   ASSERT_EQUAL_128(0x000037d8000045f8, 0x000037d8000045f8, q16);
20100   ASSERT_EQUAL_128(0, 0x0000515e00004000, q17);
20101   ASSERT_EQUAL_128(0x000119d8000127f8, 0x000119d8000127f8, q18);
20102   ASSERT_EQUAL_128(0, 0x0000c35e00004000, q19);
20103 #endif
20104   TEARDOWN();
20105 }
20106 
TEST(neon_byelement_sdot_udot)20107 TEST(neon_byelement_sdot_udot) {
20108   SETUP_WITH_FEATURES(CPUFeatures::kNEON, CPUFeatures::kDotProduct);
20109 
20110   START();
20111 
20112   __ Movi(v0.V2D(), 0x7122712271227122, 0x7122712271227122);
20113   __ Movi(v1.V2D(), 0xe245e245f245f245, 0xe245e245f245f245);
20114   __ Movi(v2.V2D(), 0x3939393900000000, 0x3939393900000000);
20115 
20116   __ Movi(v16.V2D(), 0x0000400000004000, 0x0000400000004000);
20117   __ Movi(v17.V2D(), 0x0000400000004000, 0x0000400000004000);
20118   __ Movi(v18.V2D(), 0x0000400000004000, 0x0000400000004000);
20119   __ Movi(v19.V2D(), 0x0000400000004000, 0x0000400000004000);
20120 
20121   __ Sdot(v16.V4S(), v0.V16B(), v1.S4B(), 1);
20122   __ Sdot(v17.V2S(), v1.V8B(), v2.S4B(), 1);
20123 
20124   __ Udot(v18.V4S(), v0.V16B(), v1.S4B(), 1);
20125   __ Udot(v19.V2S(), v1.V8B(), v2.S4B(), 1);
20126 
20127   END();
20128 
20129 #ifdef VIXL_INCLUDE_SIMULATOR_AARCH64
20130   RUN();
20131   ASSERT_EQUAL_128(0x000037d8000037d8, 0x000037d8000037d8, q16);
20132   ASSERT_EQUAL_128(0, 0x0000515e0000587e, q17);
20133   ASSERT_EQUAL_128(0x000119d8000119d8, 0x000119d8000119d8, q18);
20134   ASSERT_EQUAL_128(0, 0x0000c35e0000ca7e, q19);
20135 #endif
20136   TEARDOWN();
20137 }
20138 
20139 
TEST(neon_2regmisc_saddlp)20140 TEST(neon_2regmisc_saddlp) {
20141   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
20142 
20143   START();
20144 
20145   __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081);
20146 
20147   __ Saddlp(v16.V8H(), v0.V16B());
20148   __ Saddlp(v17.V4H(), v0.V8B());
20149 
20150   __ Saddlp(v18.V4S(), v0.V8H());
20151   __ Saddlp(v19.V2S(), v0.V4H());
20152 
20153   __ Saddlp(v20.V2D(), v0.V4S());
20154   __ Saddlp(v21.V1D(), v0.V2S());
20155 
20156   END();
20157 
20158   RUN();
20159   ASSERT_EQUAL_128(0x0080ffffff010080, 0xff01ffff0080ff01, q16);
20160   ASSERT_EQUAL_128(0x0000000000000000, 0xff01ffff0080ff01, q17);
20161   ASSERT_EQUAL_128(0x0000800000000081, 0xffff7f81ffff8200, q18);
20162   ASSERT_EQUAL_128(0x0000000000000000, 0xffff7f81ffff8200, q19);
20163   ASSERT_EQUAL_128(0x0000000000818000, 0xffffffff82017f81, q20);
20164   ASSERT_EQUAL_128(0x0000000000000000, 0xffffffff82017f81, q21);
20165   TEARDOWN();
20166 }
20167 
TEST(neon_2regmisc_uaddlp)20168 TEST(neon_2regmisc_uaddlp) {
20169   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
20170 
20171   START();
20172 
20173   __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081);
20174 
20175   __ Uaddlp(v16.V8H(), v0.V16B());
20176   __ Uaddlp(v17.V4H(), v0.V8B());
20177 
20178   __ Uaddlp(v18.V4S(), v0.V8H());
20179   __ Uaddlp(v19.V2S(), v0.V4H());
20180 
20181   __ Uaddlp(v20.V2D(), v0.V4S());
20182   __ Uaddlp(v21.V1D(), v0.V2S());
20183 
20184   END();
20185 
20186   RUN();
20187   ASSERT_EQUAL_128(0x008000ff01010080, 0x010100ff00800101, q16);
20188   ASSERT_EQUAL_128(0x0000000000000000, 0x010100ff00800101, q17);
20189   ASSERT_EQUAL_128(0x0000800000010081, 0x00017f8100008200, q18);
20190   ASSERT_EQUAL_128(0x0000000000000000, 0x00017f8100008200, q19);
20191   ASSERT_EQUAL_128(0x0000000100818000, 0x0000000082017f81, q20);
20192   ASSERT_EQUAL_128(0x0000000000000000, 0x0000000082017f81, q21);
20193   TEARDOWN();
20194 }
20195 
TEST(neon_2regmisc_sadalp)20196 TEST(neon_2regmisc_sadalp) {
20197   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
20198 
20199   START();
20200 
20201   __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081);
20202   __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff);
20203   __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff);
20204   __ Movi(v3.V2D(), 0x8000000000000001, 0x7fffffffffffffff);
20205   __ Movi(v4.V2D(), 0x8000000000000000, 0x0000000000000000);
20206 
20207   __ Mov(v16.V16B(), v1.V16B());
20208   __ Mov(v17.V16B(), v1.V16B());
20209   __ Sadalp(v16.V8H(), v0.V16B());
20210   __ Sadalp(v17.V4H(), v0.V8B());
20211 
20212   __ Mov(v18.V16B(), v2.V16B());
20213   __ Mov(v19.V16B(), v2.V16B());
20214   __ Sadalp(v18.V4S(), v1.V8H());
20215   __ Sadalp(v19.V2S(), v1.V4H());
20216 
20217   __ Mov(v20.V16B(), v3.V16B());
20218   __ Mov(v21.V16B(), v4.V16B());
20219   __ Sadalp(v20.V2D(), v2.V4S());
20220   __ Sadalp(v21.V1D(), v2.V2S());
20221 
20222   END();
20223 
20224   RUN();
20225   ASSERT_EQUAL_128(0x80808000ff000080, 0xff00ffff00817f00, q16);
20226   ASSERT_EQUAL_128(0x0000000000000000, 0xff00ffff00817f00, q17);
20227   ASSERT_EQUAL_128(0x7fff0001fffffffe, 0xffffffff80007fff, q18);
20228   ASSERT_EQUAL_128(0x0000000000000000, 0xffffffff80007fff, q19);
20229   ASSERT_EQUAL_128(0x7fffffff80000000, 0x800000007ffffffe, q20);
20230   ASSERT_EQUAL_128(0x0000000000000000, 0x000000007fffffff, q21);
20231   TEARDOWN();
20232 }
20233 
TEST(neon_2regmisc_uadalp)20234 TEST(neon_2regmisc_uadalp) {
20235   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
20236 
20237   START();
20238 
20239   __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081);
20240   __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff);
20241   __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff);
20242   __ Movi(v3.V2D(), 0x8000000000000001, 0x7fffffffffffffff);
20243   __ Movi(v4.V2D(), 0x8000000000000000, 0x0000000000000000);
20244 
20245   __ Mov(v16.V16B(), v1.V16B());
20246   __ Mov(v17.V16B(), v1.V16B());
20247   __ Uadalp(v16.V8H(), v0.V16B());
20248   __ Uadalp(v17.V4H(), v0.V8B());
20249 
20250   __ Mov(v18.V16B(), v2.V16B());
20251   __ Mov(v19.V16B(), v2.V16B());
20252   __ Uadalp(v18.V4S(), v1.V8H());
20253   __ Uadalp(v19.V2S(), v1.V4H());
20254 
20255   __ Mov(v20.V16B(), v3.V16B());
20256   __ Mov(v21.V16B(), v4.V16B());
20257   __ Uadalp(v20.V2D(), v2.V4S());
20258   __ Uadalp(v21.V1D(), v2.V2S());
20259 
20260   END();
20261 
20262   RUN();
20263   ASSERT_EQUAL_128(0x8080810001000080, 0x010000ff00818100, q16);
20264   ASSERT_EQUAL_128(0x0000000000000000, 0x010000ff00818100, q17);
20265   ASSERT_EQUAL_128(0x800100010000fffe, 0x0000ffff80007fff, q18);
20266   ASSERT_EQUAL_128(0x0000000000000000, 0x0000ffff80007fff, q19);
20267   ASSERT_EQUAL_128(0x8000000180000000, 0x800000007ffffffe, q20);
20268   ASSERT_EQUAL_128(0x0000000000000000, 0x000000007fffffff, q21);
20269   TEARDOWN();
20270 }
20271 
TEST(neon_3same_mul)20272 TEST(neon_3same_mul) {
20273   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
20274 
20275   START();
20276 
20277   __ Movi(v0.V2D(), 0xff00aa5500ff55aa, 0xff00aa5500ff55aa);
20278   __ Movi(v1.V2D(), 0x000055aaff55ff00, 0xaa55ff55555500ff);
20279   __ Movi(v16.V2D(), 0x0102030405060708, 0x090a0b0c0d0e0f00);
20280   __ Movi(v17.V2D(), 0x0102030405060708, 0x090a0b0c0d0e0f00);
20281 
20282   __ Mla(v16.V16B(), v0.V16B(), v1.V16B());
20283   __ Mls(v17.V16B(), v0.V16B(), v1.V16B());
20284   __ Mul(v18.V16B(), v0.V16B(), v1.V16B());
20285 
20286   END();
20287 
20288   RUN();
20289   ASSERT_EQUAL_128(0x0102757605b1b208, 0x5f0a61450db90f56, q16);
20290   ASSERT_EQUAL_128(0x01029192055b5c08, 0xb30ab5d30d630faa, q17);
20291   ASSERT_EQUAL_128(0x0000727200abab00, 0x5600563900ab0056, q18);
20292   TEARDOWN();
20293 }
20294 
20295 
TEST(neon_3same_absdiff)20296 TEST(neon_3same_absdiff) {
20297   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
20298 
20299   START();
20300 
20301   __ Movi(v0.V2D(), 0xff00aa5500ff55aa, 0xff00aa5500ff55aa);
20302   __ Movi(v1.V2D(), 0x000055aaff55ff00, 0xaa55ff55555500ff);
20303   __ Movi(v16.V2D(), 0x0102030405060708, 0x090a0b0c0d0e0f00);
20304   __ Movi(v17.V2D(), 0x0102030405060708, 0x090a0b0c0d0e0f00);
20305 
20306   __ Saba(v16.V16B(), v0.V16B(), v1.V16B());
20307   __ Uaba(v17.V16B(), v0.V16B(), v1.V16B());
20308   __ Sabd(v18.V16B(), v0.V16B(), v1.V16B());
20309   __ Uabd(v19.V16B(), v0.V16B(), v1.V16B());
20310 
20311   END();
20312 
20313   RUN();
20314   ASSERT_EQUAL_128(0x0202aeaf065c5d5e, 0x5e5f600c62646455, q16);
20315   ASSERT_EQUAL_128(0x0002585904b0b1b2, 0x5e5f600c62b86455, q17);
20316   ASSERT_EQUAL_128(0x0100abab01565656, 0x5555550055565555, q18);
20317   ASSERT_EQUAL_128(0xff005555ffaaaaaa, 0x5555550055aa5555, q19);
20318   TEARDOWN();
20319 }
20320 
20321 
TEST(neon_byelement_mul)20322 TEST(neon_byelement_mul) {
20323   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
20324 
20325   START();
20326 
20327   __ Movi(v0.V2D(), 0xff00aa5500ff55aa, 0xff00aa5500ff55aa);
20328   __ Movi(v1.V2D(), 0x000155aaff55ff00, 0xaa55ff55555500ff);
20329 
20330 
20331   __ Mul(v16.V4H(), v0.V4H(), v1.H(), 0);
20332   __ Mul(v17.V8H(), v0.V8H(), v1.H(), 7);
20333   __ Mul(v18.V2S(), v0.V2S(), v1.S(), 0);
20334   __ Mul(v19.V4S(), v0.V4S(), v1.S(), 3);
20335 
20336   __ Movi(v20.V2D(), 0x0000000000000000, 0x0001000200030004);
20337   __ Movi(v21.V2D(), 0x0005000600070008, 0x0001000200030004);
20338   __ Mla(v20.V4H(), v0.V4H(), v1.H(), 0);
20339   __ Mla(v21.V8H(), v0.V8H(), v1.H(), 7);
20340 
20341   __ Movi(v22.V2D(), 0x0000000000000000, 0x0000000200000004);
20342   __ Movi(v23.V2D(), 0x0000000600000008, 0x0000000200000004);
20343   __ Mla(v22.V2S(), v0.V2S(), v1.S(), 0);
20344   __ Mla(v23.V4S(), v0.V4S(), v1.S(), 3);
20345 
20346   __ Movi(v24.V2D(), 0x0000000000000000, 0x0100aaabfe015456);
20347   __ Movi(v25.V2D(), 0xff00aa5500ff55aa, 0xff00aa5500ff55aa);
20348   __ Mls(v24.V4H(), v0.V4H(), v1.H(), 0);
20349   __ Mls(v25.V8H(), v0.V8H(), v1.H(), 7);
20350 
20351   __ Movi(v26.V2D(), 0x0000000000000000, 0xc8e2aaabe1c85456);
20352   __ Movi(v27.V2D(), 0x39545572c6aa54e4, 0x39545572c6aa54e4);
20353   __ Mls(v26.V2S(), v0.V2S(), v1.S(), 0);
20354   __ Mls(v27.V4S(), v0.V4S(), v1.S(), 3);
20355 
20356   END();
20357 
20358   RUN();
20359   ASSERT_EQUAL_128(0x0000000000000000, 0x0100aaabfe015456, q16);
20360   ASSERT_EQUAL_128(0xff00aa5500ff55aa, 0xff00aa5500ff55aa, q17);
20361   ASSERT_EQUAL_128(0x0000000000000000, 0xc8e2aaabe1c85456, q18);
20362   ASSERT_EQUAL_128(0x39545572c6aa54e4, 0x39545572c6aa54e4, q19);
20363 
20364   ASSERT_EQUAL_128(0x0000000000000000, 0x0101aaadfe04545a, q20);
20365   ASSERT_EQUAL_128(0xff05aa5b010655b2, 0xff01aa57010255ae, q21);
20366   ASSERT_EQUAL_128(0x0000000000000000, 0xc8e2aaade1c8545a, q22);
20367   ASSERT_EQUAL_128(0x39545578c6aa54ec, 0x39545574c6aa54e8, q23);
20368 
20369   ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q24);
20370   ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q25);
20371   ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q26);
20372   ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q27);
20373   TEARDOWN();
20374 }
20375 
20376 
TEST(neon_byelement_mull)20377 TEST(neon_byelement_mull) {
20378   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
20379 
20380   START();
20381 
20382   __ Movi(v0.V2D(), 0xaa55ff55555500ff, 0xff00aa5500ff55aa);
20383   __ Movi(v1.V2D(), 0x000155aaff55ff00, 0xaa55ff55555500ff);
20384 
20385 
20386   __ Smull(v16.V4S(), v0.V4H(), v1.H(), 7);
20387   __ Smull2(v17.V4S(), v0.V8H(), v1.H(), 0);
20388   __ Umull(v18.V4S(), v0.V4H(), v1.H(), 7);
20389   __ Umull2(v19.V4S(), v0.V8H(), v1.H(), 0);
20390 
20391   __ Movi(v20.V2D(), 0x0000000100000002, 0x0000000200000001);
20392   __ Movi(v21.V2D(), 0x0000000100000002, 0x0000000200000001);
20393   __ Movi(v22.V2D(), 0x0000000100000002, 0x0000000200000001);
20394   __ Movi(v23.V2D(), 0x0000000100000002, 0x0000000200000001);
20395 
20396   __ Smlal(v20.V4S(), v0.V4H(), v1.H(), 7);
20397   __ Smlal2(v21.V4S(), v0.V8H(), v1.H(), 0);
20398   __ Umlal(v22.V4S(), v0.V4H(), v1.H(), 7);
20399   __ Umlal2(v23.V4S(), v0.V8H(), v1.H(), 0);
20400 
20401   __ Movi(v24.V2D(), 0xffffff00ffffaa55, 0x000000ff000055aa);
20402   __ Movi(v25.V2D(), 0xffaaaaabffff55ab, 0x0054ffab0000fe01);
20403   __ Movi(v26.V2D(), 0x0000ff000000aa55, 0x000000ff000055aa);
20404   __ Movi(v27.V2D(), 0x00a9aaab00fe55ab, 0x0054ffab0000fe01);
20405 
20406   __ Smlsl(v24.V4S(), v0.V4H(), v1.H(), 7);
20407   __ Smlsl2(v25.V4S(), v0.V8H(), v1.H(), 0);
20408   __ Umlsl(v26.V4S(), v0.V4H(), v1.H(), 7);
20409   __ Umlsl2(v27.V4S(), v0.V8H(), v1.H(), 0);
20410 
20411   END();
20412 
20413   RUN();
20414 
20415   ASSERT_EQUAL_128(0xffffff00ffffaa55, 0x000000ff000055aa, q16);
20416   ASSERT_EQUAL_128(0xffaaaaabffff55ab, 0x0054ffab0000fe01, q17);
20417   ASSERT_EQUAL_128(0x0000ff000000aa55, 0x000000ff000055aa, q18);
20418   ASSERT_EQUAL_128(0x00a9aaab00fe55ab, 0x0054ffab0000fe01, q19);
20419 
20420   ASSERT_EQUAL_128(0xffffff01ffffaa57, 0x00000101000055ab, q20);
20421   ASSERT_EQUAL_128(0xffaaaaacffff55ad, 0x0054ffad0000fe02, q21);
20422   ASSERT_EQUAL_128(0x0000ff010000aa57, 0x00000101000055ab, q22);
20423   ASSERT_EQUAL_128(0x00a9aaac00fe55ad, 0x0054ffad0000fe02, q23);
20424 
20425   ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q24);
20426   ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q25);
20427   ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q26);
20428   ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q27);
20429 
20430   TEARDOWN();
20431 }
20432 
20433 
TEST(neon_byelement_sqdmull)20434 TEST(neon_byelement_sqdmull) {
20435   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
20436 
20437   START();
20438 
20439   __ Movi(v0.V2D(), 0xaa55ff55555500ff, 0xff00aa5500ff55aa);
20440   __ Movi(v1.V2D(), 0x000155aaff55ff00, 0xaa55ff55555500ff);
20441 
20442   __ Sqdmull(v16.V4S(), v0.V4H(), v1.H(), 7);
20443   __ Sqdmull2(v17.V4S(), v0.V8H(), v1.H(), 0);
20444   __ Sqdmull(s18, h0, v1.H(), 7);
20445 
20446   __ Movi(v20.V2D(), 0x0000000100000002, 0x0000000200000001);
20447   __ Movi(v21.V2D(), 0x0000000100000002, 0x0000000200000001);
20448   __ Movi(v22.V2D(), 0x0000000100000002, 0x0000000200000001);
20449 
20450   __ Sqdmlal(v20.V4S(), v0.V4H(), v1.H(), 7);
20451   __ Sqdmlal2(v21.V4S(), v0.V8H(), v1.H(), 0);
20452   __ Sqdmlal(s22, h0, v1.H(), 7);
20453 
20454   __ Movi(v24.V2D(), 0xfffffe00ffff54aa, 0x000001fe0000ab54);
20455   __ Movi(v25.V2D(), 0xff555556fffeab56, 0x00a9ff560001fc02);
20456   __ Movi(v26.V2D(), 0x0000000000000000, 0x000000000000ab54);
20457 
20458   __ Sqdmlsl(v24.V4S(), v0.V4H(), v1.H(), 7);
20459   __ Sqdmlsl2(v25.V4S(), v0.V8H(), v1.H(), 0);
20460   __ Sqdmlsl(s26, h0, v1.H(), 7);
20461 
20462   END();
20463 
20464   RUN();
20465 
20466   ASSERT_EQUAL_128(0xfffffe00ffff54aa, 0x000001fe0000ab54, q16);
20467   ASSERT_EQUAL_128(0xff555556fffeab56, 0x00a9ff560001fc02, q17);
20468   ASSERT_EQUAL_128(0, 0x0000ab54, q18);
20469 
20470   ASSERT_EQUAL_128(0xfffffe01ffff54ac, 0x000002000000ab55, q20);
20471   ASSERT_EQUAL_128(0xff555557fffeab58, 0x00a9ff580001fc03, q21);
20472   ASSERT_EQUAL_128(0, 0x0000ab55, q22);
20473 
20474   ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q24);
20475   ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q25);
20476   ASSERT_EQUAL_128(0, 0x00000000, q26);
20477 
20478   TEARDOWN();
20479 }
20480 
20481 
TEST(neon_3diff_absdiff)20482 TEST(neon_3diff_absdiff) {
20483   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
20484 
20485   START();
20486 
20487   __ Movi(v0.V2D(), 0xff00aa5500ff55ab, 0xff00aa5500ff55aa);
20488   __ Movi(v1.V2D(), 0x000055aaff55ff00, 0xaa55ff55555500ff);
20489   __ Movi(v16.V2D(), 0x0102030405060708, 0x090a0b0c0d0e0f00);
20490   __ Movi(v17.V2D(), 0x0102030405060708, 0x090a0b0c0d0e0f00);
20491   __ Movi(v18.V2D(), 0x0102030405060708, 0x090a0b0c0d0e0f00);
20492   __ Movi(v19.V2D(), 0x0102030405060708, 0x090a0b0c0d0e0f00);
20493 
20494   __ Sabal(v16.V8H(), v0.V8B(), v1.V8B());
20495   __ Uabal(v17.V8H(), v0.V8B(), v1.V8B());
20496   __ Sabal2(v18.V8H(), v0.V16B(), v1.V16B());
20497   __ Uabal2(v19.V8H(), v0.V16B(), v1.V16B());
20498 
20499   END();
20500 
20501   RUN();
20502   ASSERT_EQUAL_128(0x01570359055b0708, 0x095f0b620d630f55, q16);
20503   ASSERT_EQUAL_128(0x01570359055b0708, 0x095f0bb60d630f55, q17);
20504   ASSERT_EQUAL_128(0x0103030405b107b3, 0x090b0b620d640f55, q18);
20505   ASSERT_EQUAL_128(0x02010304055b075d, 0x0a090bb60db80fab, q19);
20506   TEARDOWN();
20507 }
20508 
20509 
TEST(neon_3diff_sqdmull)20510 TEST(neon_3diff_sqdmull) {
20511   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
20512 
20513   START();
20514 
20515   __ Movi(v0.V2D(), 0x7fff7fff80008000, 0x80007fff7fff8000);
20516   __ Movi(v1.V2D(), 0x80007fff7fff8000, 0x7fff7fff80008000);
20517   __ Movi(v2.V2D(), 0x800000007fffffff, 0x7fffffff80000000);
20518   __ Movi(v3.V2D(), 0x8000000080000000, 0x8000000080000000);
20519 
20520   __ Sqdmull(v16.V4S(), v0.V4H(), v1.V4H());
20521   __ Sqdmull2(v17.V4S(), v0.V8H(), v1.V8H());
20522   __ Sqdmull(v18.V2D(), v2.V2S(), v3.V2S());
20523   __ Sqdmull2(v19.V2D(), v2.V4S(), v3.V4S());
20524   __ Sqdmull(s20, h0, h1);
20525   __ Sqdmull(d21, s2, s3);
20526 
20527   END();
20528 
20529   RUN();
20530   ASSERT_EQUAL_128(0x800100007ffe0002, 0x800100007fffffff, q16);
20531   ASSERT_EQUAL_128(0x800100007ffe0002, 0x800100007fffffff, q17);
20532   ASSERT_EQUAL_128(0x8000000100000000, 0x7fffffffffffffff, q18);
20533   ASSERT_EQUAL_128(0x7fffffffffffffff, 0x8000000100000000, q19);
20534   ASSERT_EQUAL_128(0, 0x7fffffff, q20);
20535   ASSERT_EQUAL_128(0, 0x7fffffffffffffff, q21);
20536   TEARDOWN();
20537 }
20538 
20539 
TEST(neon_3diff_sqdmlal)20540 TEST(neon_3diff_sqdmlal) {
20541   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
20542 
20543   START();
20544 
20545   __ Movi(v0.V2D(), 0x7fff7fff80008000, 0x80007fff7fff8000);
20546   __ Movi(v1.V2D(), 0x80007fff7fff8000, 0x7fff7fff80008000);
20547   __ Movi(v2.V2D(), 0x800000007fffffff, 0x7fffffff80000000);
20548   __ Movi(v3.V2D(), 0x8000000080000000, 0x8000000080000000);
20549 
20550   __ Movi(v16.V2D(), 0xffffffff00000001, 0x8fffffff00000001);
20551   __ Movi(v17.V2D(), 0x00000001ffffffff, 0x00000001ffffffff);
20552   __ Movi(v18.V2D(), 0x8000000000000001, 0x0000000000000001);
20553   __ Movi(v19.V2D(), 0xffffffffffffffff, 0x7fffffffffffffff);
20554   __ Movi(v20.V2D(), 0, 0x00000001);
20555   __ Movi(v21.V2D(), 0, 0x00000001);
20556 
20557   __ Sqdmlal(v16.V4S(), v0.V4H(), v1.V4H());
20558   __ Sqdmlal2(v17.V4S(), v0.V8H(), v1.V8H());
20559   __ Sqdmlal(v18.V2D(), v2.V2S(), v3.V2S());
20560   __ Sqdmlal2(v19.V2D(), v2.V4S(), v3.V4S());
20561   __ Sqdmlal(s20, h0, h1);
20562   __ Sqdmlal(d21, s2, s3);
20563 
20564   END();
20565 
20566   RUN();
20567   ASSERT_EQUAL_128(0x8000ffff7ffe0003, 0x800000007fffffff, q16);
20568   ASSERT_EQUAL_128(0x800100017ffe0001, 0x800100017ffffffe, q17);
20569   ASSERT_EQUAL_128(0x8000000000000000, 0x7fffffffffffffff, q18);
20570   ASSERT_EQUAL_128(0x7ffffffffffffffe, 0x00000000ffffffff, q19);
20571   ASSERT_EQUAL_128(0, 0x7fffffff, q20);
20572   ASSERT_EQUAL_128(0, 0x7fffffffffffffff, q21);
20573   TEARDOWN();
20574 }
20575 
20576 
TEST(neon_3diff_sqdmlsl)20577 TEST(neon_3diff_sqdmlsl) {
20578   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
20579 
20580   START();
20581 
20582   __ Movi(v0.V2D(), 0x7fff7fff80008000, 0x80007fff7fff8000);
20583   __ Movi(v1.V2D(), 0x80007fff7fff8000, 0x7fff7fff80008000);
20584   __ Movi(v2.V2D(), 0x800000007fffffff, 0x7fffffff80000000);
20585   __ Movi(v3.V2D(), 0x8000000080000000, 0x8000000080000000);
20586 
20587   __ Movi(v16.V2D(), 0xffffffff00000001, 0x7ffffffe80000001);
20588   __ Movi(v17.V2D(), 0x00000001ffffffff, 0x7ffffffe00000001);
20589   __ Movi(v18.V2D(), 0x8000000000000001, 0x8000000000000001);
20590   __ Movi(v19.V2D(), 0xfffffffffffffffe, 0x7fffffffffffffff);
20591   __ Movi(v20.V2D(), 0, 0x00000001);
20592   __ Movi(v21.V2D(), 0, 0x00000001);
20593 
20594   __ Sqdmlsl(v16.V4S(), v0.V4H(), v1.V4H());
20595   __ Sqdmlsl2(v17.V4S(), v0.V8H(), v1.V8H());
20596   __ Sqdmlsl(v18.V2D(), v2.V2S(), v3.V2S());
20597   __ Sqdmlsl2(v19.V2D(), v2.V4S(), v3.V4S());
20598   __ Sqdmlsl(s20, h0, h1);
20599   __ Sqdmlsl(d21, s2, s3);
20600 
20601   END();
20602 
20603   RUN();
20604   ASSERT_EQUAL_128(0x7ffeffff8001ffff, 0x7fffffff80000000, q16);
20605   ASSERT_EQUAL_128(0x7fff00018001fffd, 0x7fffffff80000002, q17);
20606   ASSERT_EQUAL_128(0xffffffff00000001, 0x8000000000000000, q18);
20607   ASSERT_EQUAL_128(0x8000000000000000, 0x7fffffffffffffff, q19);
20608   ASSERT_EQUAL_128(0, 0x80000002, q20);
20609   ASSERT_EQUAL_128(0, 0x8000000000000002, q21);
20610 
20611   TEARDOWN();
20612 }
20613 
20614 
TEST(neon_3diff_mla)20615 TEST(neon_3diff_mla) {
20616   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
20617 
20618   START();
20619 
20620   __ Movi(v0.V2D(), 0xff00aa5500ff55ab, 0xff00aa5500ff55aa);
20621   __ Movi(v1.V2D(), 0x000055aaff55ff00, 0xaa55ff55555500ff);
20622   __ Movi(v16.V2D(), 0x0102030405060708, 0x090a0b0c0d0e0f00);
20623   __ Movi(v17.V2D(), 0x0102030405060708, 0x090a0b0c0d0e0f00);
20624   __ Movi(v18.V2D(), 0x0102030405060708, 0x090a0b0c0d0e0f00);
20625   __ Movi(v19.V2D(), 0x0102030405060708, 0x090a0b0c0d0e0f00);
20626 
20627   __ Smlal(v16.V8H(), v0.V8B(), v1.V8B());
20628   __ Umlal(v17.V8H(), v0.V8B(), v1.V8B());
20629   __ Smlal2(v18.V8H(), v0.V16B(), v1.V16B());
20630   __ Umlal2(v19.V8H(), v0.V16B(), v1.V16B());
20631 
20632   END();
20633 
20634   RUN();
20635   ASSERT_EQUAL_128(0x01580304055c2341, 0x090a0ab70d0e0f56, q16);
20636   ASSERT_EQUAL_128(0xaa580304ae5c2341, 0x090a5fb70d0eb856, q17);
20637   ASSERT_EQUAL_128(0x01020304e878ea7a, 0x090a0ab70cb90f00, q18);
20638   ASSERT_EQUAL_128(0x010203043d783f7a, 0x090a5fb761b90f00, q19);
20639   TEARDOWN();
20640 }
20641 
20642 
TEST(neon_3diff_mls)20643 TEST(neon_3diff_mls) {
20644   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
20645 
20646   START();
20647 
20648   __ Movi(v0.V2D(), 0xff00aa5500ff55ab, 0xff00aa5500ff55aa);
20649   __ Movi(v1.V2D(), 0x000055aaff55ff00, 0xaa55ff55555500ff);
20650   __ Movi(v16.V2D(), 0x0102030405060708, 0x090a0b0c0d0e0f00);
20651   __ Movi(v17.V2D(), 0x0102030405060708, 0x090a0b0c0d0e0f00);
20652   __ Movi(v18.V2D(), 0x0102030405060708, 0x090a0b0c0d0e0f00);
20653   __ Movi(v19.V2D(), 0x0102030405060708, 0x090a0b0c0d0e0f00);
20654 
20655   __ Smlsl(v16.V8H(), v0.V8B(), v1.V8B());
20656   __ Umlsl(v17.V8H(), v0.V8B(), v1.V8B());
20657   __ Smlsl2(v18.V8H(), v0.V16B(), v1.V16B());
20658   __ Umlsl2(v19.V8H(), v0.V16B(), v1.V16B());
20659 
20660   END();
20661 
20662   RUN();
20663   ASSERT_EQUAL_128(0x00ac030404b0eacf, 0x090a0b610d0e0eaa, q16);
20664   ASSERT_EQUAL_128(0x57ac03045bb0eacf, 0x090ab6610d0e65aa, q17);
20665   ASSERT_EQUAL_128(0x0102030421942396, 0x090a0b610d630f00, q18);
20666   ASSERT_EQUAL_128(0x01020304cc94ce96, 0x090ab661b8630f00, q19);
20667   TEARDOWN();
20668 }
20669 
20670 
TEST(neon_3same_compare)20671 TEST(neon_3same_compare) {
20672   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
20673 
20674   START();
20675 
20676   __ Movi(v0.V2D(), 0xff00aa5500ff55aa, 0xff00aa5500ff55aa);
20677   __ Movi(v1.V2D(), 0x000055aaff55ff00, 0xaa55ff55555500ff);
20678 
20679   __ Cmeq(v16.V16B(), v0.V16B(), v0.V16B());
20680   __ Cmeq(v17.V16B(), v0.V16B(), v1.V16B());
20681   __ Cmge(v18.V16B(), v0.V16B(), v0.V16B());
20682   __ Cmge(v19.V16B(), v0.V16B(), v1.V16B());
20683   __ Cmgt(v20.V16B(), v0.V16B(), v0.V16B());
20684   __ Cmgt(v21.V16B(), v0.V16B(), v1.V16B());
20685   __ Cmhi(v22.V16B(), v0.V16B(), v0.V16B());
20686   __ Cmhi(v23.V16B(), v0.V16B(), v1.V16B());
20687   __ Cmhs(v24.V16B(), v0.V16B(), v0.V16B());
20688   __ Cmhs(v25.V16B(), v0.V16B(), v1.V16B());
20689 
20690   END();
20691 
20692   RUN();
20693   ASSERT_EQUAL_128(0xffffffffffffffff, 0xffffffffffffffff, q16);
20694   ASSERT_EQUAL_128(0x00ff000000000000, 0x000000ff00000000, q17);
20695   ASSERT_EQUAL_128(0xffffffffffffffff, 0xffffffffffffffff, q18);
20696   ASSERT_EQUAL_128(0x00ff00ffff00ff00, 0xff0000ff0000ff00, q19);
20697   ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q20);
20698   ASSERT_EQUAL_128(0x000000ffff00ff00, 0xff0000000000ff00, q21);
20699   ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q22);
20700   ASSERT_EQUAL_128(0xff00ff0000ff00ff, 0xff00000000ffff00, q23);
20701   ASSERT_EQUAL_128(0xffffffffffffffff, 0xffffffffffffffff, q24);
20702   ASSERT_EQUAL_128(0xffffff0000ff00ff, 0xff0000ff00ffff00, q25);
20703   TEARDOWN();
20704 }
20705 
20706 
TEST(neon_3same_scalar_compare)20707 TEST(neon_3same_scalar_compare) {
20708   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
20709 
20710   START();
20711 
20712   __ Movi(v0.V2D(), 0xff00aa5500ff55aa, 0xff00aa5500ff55aa);
20713   __ Movi(v1.V2D(), 0x000055aaff55ff00, 0xaa55ff55555500ff);
20714 
20715   __ Cmeq(d16, d0, d0);
20716   __ Cmeq(d17, d0, d1);
20717   __ Cmeq(d18, d1, d0);
20718   __ Cmge(d19, d0, d0);
20719   __ Cmge(d20, d0, d1);
20720   __ Cmge(d21, d1, d0);
20721   __ Cmgt(d22, d0, d0);
20722   __ Cmgt(d23, d0, d1);
20723   __ Cmhi(d24, d0, d0);
20724   __ Cmhi(d25, d0, d1);
20725   __ Cmhs(d26, d0, d0);
20726   __ Cmhs(d27, d0, d1);
20727   __ Cmhs(d28, d1, d0);
20728 
20729   END();
20730 
20731   RUN();
20732 
20733   ASSERT_EQUAL_128(0, 0xffffffffffffffff, q16);
20734   ASSERT_EQUAL_128(0, 0x0000000000000000, q17);
20735   ASSERT_EQUAL_128(0, 0x0000000000000000, q18);
20736   ASSERT_EQUAL_128(0, 0xffffffffffffffff, q19);
20737   ASSERT_EQUAL_128(0, 0xffffffffffffffff, q20);
20738   ASSERT_EQUAL_128(0, 0x0000000000000000, q21);
20739   ASSERT_EQUAL_128(0, 0x0000000000000000, q22);
20740   ASSERT_EQUAL_128(0, 0xffffffffffffffff, q23);
20741   ASSERT_EQUAL_128(0, 0x0000000000000000, q24);
20742   ASSERT_EQUAL_128(0, 0xffffffffffffffff, q25);
20743   ASSERT_EQUAL_128(0, 0xffffffffffffffff, q26);
20744   ASSERT_EQUAL_128(0, 0xffffffffffffffff, q27);
20745   ASSERT_EQUAL_128(0, 0x0000000000000000, q28);
20746 
20747   TEARDOWN();
20748 }
20749 
TEST(neon_fcmeq_h)20750 TEST(neon_fcmeq_h) {
20751   SETUP_WITH_FEATURES(CPUFeatures::kNEON,
20752                       CPUFeatures::kFP,
20753                       CPUFeatures::kNEONHalf);
20754 
20755   START();
20756 
20757   __ Movi(v0.V2D(), 0x0000000000000000, 0x0000000000000000);  // 0.
20758   __ Movi(v1.V2D(), 0xffffffffffffffff, 0xffffffffffffffff);  // NaN.
20759   __ Movi(v2.V2D(), 0xbc00bc00bc00bc00, 0xbc00bc00bc00bc00);  // -1.0.
20760   __ Movi(v3.V2D(), 0x3c003c003c003c00, 0x3c003c003c003c00);  // 1.0.
20761 
20762   __ Fcmeq(v4.V8H(), v0.V8H(), v0.V8H());
20763   __ Fcmeq(v5.V8H(), v1.V8H(), v0.V8H());
20764   __ Fcmeq(v6.V8H(), v2.V8H(), v0.V8H());
20765   __ Fcmeq(v7.V8H(), v3.V8H(), v0.V8H());
20766   __ Fcmeq(v8.V4H(), v0.V4H(), v0.V4H());
20767   __ Fcmeq(v9.V4H(), v1.V4H(), v0.V4H());
20768   __ Fcmeq(v10.V4H(), v2.V4H(), v0.V4H());
20769   __ Fcmeq(v11.V4H(), v3.V4H(), v0.V4H());
20770 
20771   END();
20772 
20773 #ifdef VIXL_INCLUDE_SIMULATOR_AARCH64
20774   RUN();
20775 
20776   ASSERT_EQUAL_128(0xffffffffffffffff, 0xffffffffffffffff, v4);
20777   ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, v5);
20778   ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, v6);
20779   ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, v7);
20780   ASSERT_EQUAL_128(0, 0xffffffffffffffff, v8);
20781   ASSERT_EQUAL_128(0, 0x0000000000000000, v9);
20782   ASSERT_EQUAL_128(0, 0x0000000000000000, v10);
20783   ASSERT_EQUAL_128(0, 0x0000000000000000, v11);
20784 #endif  // VIXL_INCLUDE_SIMULATOR_AARCH64
20785 
20786   TEARDOWN();
20787 }
20788 
TEST(neon_fcmeq_h_scalar)20789 TEST(neon_fcmeq_h_scalar) {
20790   SETUP_WITH_FEATURES(CPUFeatures::kNEON,
20791                       CPUFeatures::kFP,
20792                       CPUFeatures::kNEONHalf,
20793                       CPUFeatures::kFPHalf);
20794 
20795   START();
20796 
20797   __ Fmov(h0, Float16(0.0));
20798   __ Fmov(h1, RawbitsToFloat16(0xffff));
20799   __ Fmov(h2, Float16(-1.0));
20800   __ Fmov(h3, Float16(1.0));
20801   __ Fcmeq(h4, h0, h0);
20802   __ Fcmeq(h5, h1, h0);
20803   __ Fcmeq(h6, h2, h0);
20804   __ Fcmeq(h7, h3, h0);
20805 
20806   END();
20807 
20808 #ifdef VIXL_INCLUDE_SIMULATOR_AARCH64
20809   RUN();
20810 
20811   ASSERT_EQUAL_FP16(RawbitsToFloat16(0xffff), h4);
20812   ASSERT_EQUAL_FP16(RawbitsToFloat16(0x0000), h5);
20813   ASSERT_EQUAL_FP16(RawbitsToFloat16(0x0000), h6);
20814   ASSERT_EQUAL_FP16(RawbitsToFloat16(0x0000), h7);
20815 #endif  // VIXL_INCLUDE_SIMULATOR_AARCH64
20816 
20817   TEARDOWN();
20818 }
20819 
TEST(neon_fcmge_h)20820 TEST(neon_fcmge_h) {
20821   SETUP_WITH_FEATURES(CPUFeatures::kNEON,
20822                       CPUFeatures::kFP,
20823                       CPUFeatures::kNEONHalf);
20824 
20825   START();
20826 
20827   __ Movi(v0.V2D(), 0x0000000000000000, 0x0000000000000000);  // 0.
20828   __ Movi(v1.V2D(), 0xffffffffffffffff, 0xffffffffffffffff);  // NaN.
20829   __ Movi(v2.V2D(), 0xbc00bc00bc00bc00, 0xbc00bc00bc00bc00);  // -1.0.
20830   __ Movi(v3.V2D(), 0x3c003c003c003c00, 0x3c003c003c003c00);  // 1.0.
20831 
20832   __ Fcmge(v4.V8H(), v0.V8H(), v0.V8H());
20833   __ Fcmge(v5.V8H(), v1.V8H(), v0.V8H());
20834   __ Fcmge(v6.V8H(), v2.V8H(), v0.V8H());
20835   __ Fcmge(v7.V8H(), v3.V8H(), v0.V8H());
20836   __ Fcmge(v8.V4H(), v0.V4H(), v0.V4H());
20837   __ Fcmge(v9.V4H(), v1.V4H(), v0.V4H());
20838   __ Fcmge(v10.V4H(), v2.V4H(), v0.V4H());
20839   __ Fcmge(v11.V4H(), v3.V4H(), v0.V4H());
20840 
20841   END();
20842 
20843 #ifdef VIXL_INCLUDE_SIMULATOR_AARCH64
20844   RUN();
20845 
20846   ASSERT_EQUAL_128(0xffffffffffffffff, 0xffffffffffffffff, v4);
20847   ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, v5);
20848   ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, v6);
20849   ASSERT_EQUAL_128(0xffffffffffffffff, 0xffffffffffffffff, v7);
20850   ASSERT_EQUAL_128(0, 0xffffffffffffffff, v8);
20851   ASSERT_EQUAL_128(0, 0x0000000000000000, v9);
20852   ASSERT_EQUAL_128(0, 0x0000000000000000, v10);
20853   ASSERT_EQUAL_128(0, 0xffffffffffffffff, v11);
20854 #endif  // VIXL_INCLUDE_SIMULATOR_AARCH64
20855 
20856   TEARDOWN();
20857 }
20858 
TEST(neon_fcmge_h_scalar)20859 TEST(neon_fcmge_h_scalar) {
20860   SETUP_WITH_FEATURES(CPUFeatures::kNEON,
20861                       CPUFeatures::kFP,
20862                       CPUFeatures::kNEONHalf,
20863                       CPUFeatures::kFPHalf);
20864 
20865   START();
20866 
20867   __ Fmov(h0, Float16(0.0));
20868   __ Fmov(h1, RawbitsToFloat16(0xffff));
20869   __ Fmov(h2, Float16(-1.0));
20870   __ Fmov(h3, Float16(1.0));
20871   __ Fcmge(h4, h0, h0);
20872   __ Fcmge(h5, h1, h0);
20873   __ Fcmge(h6, h2, h0);
20874   __ Fcmge(h7, h3, h0);
20875 
20876   END();
20877 
20878 #ifdef VIXL_INCLUDE_SIMULATOR_AARCH64
20879   RUN();
20880 
20881   ASSERT_EQUAL_FP16(RawbitsToFloat16(0xffff), h4);
20882   ASSERT_EQUAL_FP16(RawbitsToFloat16(0x0000), h5);
20883   ASSERT_EQUAL_FP16(RawbitsToFloat16(0x0000), h6);
20884   ASSERT_EQUAL_FP16(RawbitsToFloat16(0xffff), h7);
20885 #endif  // VIXL_INCLUDE_SIMULATOR_AARCH64
20886 
20887   TEARDOWN();
20888 }
20889 
TEST(neon_fcmgt_h)20890 TEST(neon_fcmgt_h) {
20891   SETUP_WITH_FEATURES(CPUFeatures::kNEON,
20892                       CPUFeatures::kFP,
20893                       CPUFeatures::kNEONHalf);
20894 
20895   START();
20896 
20897   __ Movi(v0.V2D(), 0x0000000000000000, 0x0000000000000000);  // 0.
20898   __ Movi(v1.V2D(), 0xffffffffffffffff, 0xffffffffffffffff);  // NaN.
20899   __ Movi(v2.V2D(), 0xbc00bc00bc00bc00, 0xbc00bc00bc00bc00);  // -1.0.
20900   __ Movi(v3.V2D(), 0x3c003c003c003c00, 0x3c003c003c003c00);  // 1.0.
20901 
20902   __ Fcmgt(v4.V8H(), v0.V8H(), v0.V8H());
20903   __ Fcmgt(v5.V8H(), v1.V8H(), v0.V8H());
20904   __ Fcmgt(v6.V8H(), v2.V8H(), v0.V8H());
20905   __ Fcmgt(v7.V8H(), v3.V8H(), v0.V8H());
20906   __ Fcmgt(v8.V4H(), v0.V4H(), v0.V4H());
20907   __ Fcmgt(v9.V4H(), v1.V4H(), v0.V4H());
20908   __ Fcmgt(v10.V4H(), v2.V4H(), v0.V4H());
20909   __ Fcmgt(v11.V4H(), v3.V4H(), v0.V4H());
20910 
20911   END();
20912 
20913 #ifdef VIXL_INCLUDE_SIMULATOR_AARCH64
20914   RUN();
20915 
20916   ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, v4);
20917   ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, v5);
20918   ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, v6);
20919   ASSERT_EQUAL_128(0xffffffffffffffff, 0xffffffffffffffff, v7);
20920   ASSERT_EQUAL_128(0, 0x0000000000000000, v8);
20921   ASSERT_EQUAL_128(0, 0x0000000000000000, v9);
20922   ASSERT_EQUAL_128(0, 0x0000000000000000, v10);
20923   ASSERT_EQUAL_128(0, 0xffffffffffffffff, v11);
20924 #endif  // VIXL_INCLUDE_SIMULATOR_AARCH64
20925 
20926   TEARDOWN();
20927 }
20928 
TEST(neon_fcmgt_h_scalar)20929 TEST(neon_fcmgt_h_scalar) {
20930   SETUP_WITH_FEATURES(CPUFeatures::kNEON,
20931                       CPUFeatures::kFP,
20932                       CPUFeatures::kNEONHalf,
20933                       CPUFeatures::kFPHalf);
20934 
20935   START();
20936 
20937   __ Fmov(h0, Float16(0.0));
20938   __ Fmov(h1, RawbitsToFloat16(0xffff));
20939   __ Fmov(h2, Float16(-1.0));
20940   __ Fmov(h3, Float16(1.0));
20941   __ Fcmgt(h4, h0, h0);
20942   __ Fcmgt(h5, h1, h0);
20943   __ Fcmgt(h6, h2, h0);
20944   __ Fcmgt(h7, h3, h0);
20945 
20946   END();
20947 
20948 #ifdef VIXL_INCLUDE_SIMULATOR_AARCH64
20949   RUN();
20950 
20951   ASSERT_EQUAL_FP16(RawbitsToFloat16(0x0000), h4);
20952   ASSERT_EQUAL_FP16(RawbitsToFloat16(0x0000), h5);
20953   ASSERT_EQUAL_FP16(RawbitsToFloat16(0x0000), h6);
20954   ASSERT_EQUAL_FP16(RawbitsToFloat16(0xffff), h7);
20955 #endif  // VIXL_INCLUDE_SIMULATOR_AARCH64
20956 
20957   TEARDOWN();
20958 }
20959 
TEST(neon_facge_h)20960 TEST(neon_facge_h) {
20961   SETUP_WITH_FEATURES(CPUFeatures::kNEON,
20962                       CPUFeatures::kFP,
20963                       CPUFeatures::kNEONHalf);
20964 
20965   START();
20966 
20967   __ Movi(v0.V2D(), 0x0000000000000000, 0x0000000000000000);  // 0.
20968   __ Movi(v1.V2D(), 0xffffffffffffffff, 0xffffffffffffffff);  // NaN.
20969   __ Movi(v2.V2D(), 0xbc00bc00bc00bc00, 0xbc00bc00bc00bc00);  // -1.0.
20970   __ Movi(v3.V2D(), 0x3c003c003c003c00, 0x3c003c003c003c00);  // 1.0.
20971 
20972   __ Facge(v4.V8H(), v0.V8H(), v0.V8H());
20973   __ Facge(v5.V8H(), v1.V8H(), v0.V8H());
20974   __ Facge(v6.V8H(), v2.V8H(), v0.V8H());
20975   __ Facge(v7.V8H(), v3.V8H(), v0.V8H());
20976   __ Facge(v8.V4H(), v0.V4H(), v0.V4H());
20977   __ Facge(v9.V4H(), v1.V4H(), v0.V4H());
20978   __ Facge(v10.V4H(), v2.V4H(), v0.V4H());
20979   __ Facge(v11.V4H(), v3.V4H(), v0.V4H());
20980 
20981   END();
20982 
20983 #ifdef VIXL_INCLUDE_SIMULATOR_AARCH64
20984   RUN();
20985 
20986   ASSERT_EQUAL_128(0xffffffffffffffff, 0xffffffffffffffff, v4);
20987   ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, v5);
20988   ASSERT_EQUAL_128(0xffffffffffffffff, 0xffffffffffffffff, v6);
20989   ASSERT_EQUAL_128(0xffffffffffffffff, 0xffffffffffffffff, v7);
20990   ASSERT_EQUAL_128(0, 0xffffffffffffffff, v8);
20991   ASSERT_EQUAL_128(0, 0x0000000000000000, v9);
20992   ASSERT_EQUAL_128(0, 0xffffffffffffffff, v10);
20993   ASSERT_EQUAL_128(0, 0xffffffffffffffff, v11);
20994 #endif  // VIXL_INCLUDE_SIMULATOR_AARCH64
20995 
20996   TEARDOWN();
20997 }
20998 
TEST(neon_facge_h_scalar)20999 TEST(neon_facge_h_scalar) {
21000   SETUP_WITH_FEATURES(CPUFeatures::kNEON,
21001                       CPUFeatures::kFP,
21002                       CPUFeatures::kNEONHalf,
21003                       CPUFeatures::kFPHalf);
21004 
21005   START();
21006 
21007   __ Fmov(h0, Float16(0.0));
21008   __ Fmov(h1, RawbitsToFloat16(0xffff));
21009   __ Fmov(h2, Float16(-1.0));
21010   __ Fmov(h3, Float16(1.0));
21011   __ Facge(h4, h0, h0);
21012   __ Facge(h5, h1, h0);
21013   __ Facge(h6, h2, h0);
21014   __ Facge(h7, h3, h0);
21015 
21016   END();
21017 
21018 #ifdef VIXL_INCLUDE_SIMULATOR_AARCH64
21019   RUN();
21020 
21021   ASSERT_EQUAL_FP16(RawbitsToFloat16(0xffff), h4);
21022   ASSERT_EQUAL_FP16(RawbitsToFloat16(0x0000), h5);
21023   ASSERT_EQUAL_FP16(RawbitsToFloat16(0xffff), h6);
21024   ASSERT_EQUAL_FP16(RawbitsToFloat16(0xffff), h7);
21025 #endif  // VIXL_INCLUDE_SIMULATOR_AARCH64
21026 
21027   TEARDOWN();
21028 }
21029 
TEST(neon_facgt_h)21030 TEST(neon_facgt_h) {
21031   SETUP_WITH_FEATURES(CPUFeatures::kNEON,
21032                       CPUFeatures::kFP,
21033                       CPUFeatures::kNEONHalf);
21034 
21035   START();
21036 
21037   __ Movi(v0.V2D(), 0x0000000000000000, 0x0000000000000000);  // 0.
21038   __ Movi(v1.V2D(), 0xffffffffffffffff, 0xffffffffffffffff);  // NaN.
21039   __ Movi(v2.V2D(), 0xbc00bc00bc00bc00, 0xbc00bc00bc00bc00);  // -1.0.
21040   __ Movi(v3.V2D(), 0x3c003c003c003c00, 0x3c003c003c003c00);  // 1.0.
21041 
21042   __ Facgt(v4.V8H(), v0.V8H(), v0.V8H());
21043   __ Facgt(v5.V8H(), v1.V8H(), v0.V8H());
21044   __ Facgt(v6.V8H(), v2.V8H(), v0.V8H());
21045   __ Facgt(v7.V8H(), v3.V8H(), v0.V8H());
21046   __ Facgt(v8.V4H(), v0.V4H(), v0.V4H());
21047   __ Facgt(v9.V4H(), v1.V4H(), v0.V4H());
21048   __ Facgt(v10.V4H(), v2.V4H(), v0.V4H());
21049   __ Facgt(v11.V4H(), v3.V4H(), v0.V4H());
21050 
21051   END();
21052 
21053 #ifdef VIXL_INCLUDE_SIMULATOR_AARCH64
21054   RUN();
21055 
21056   ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, v4);
21057   ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, v5);
21058   ASSERT_EQUAL_128(0xffffffffffffffff, 0xffffffffffffffff, v6);
21059   ASSERT_EQUAL_128(0xffffffffffffffff, 0xffffffffffffffff, v7);
21060   ASSERT_EQUAL_128(0, 0x0000000000000000, v8);
21061   ASSERT_EQUAL_128(0, 0x0000000000000000, v9);
21062   ASSERT_EQUAL_128(0, 0xffffffffffffffff, v10);
21063   ASSERT_EQUAL_128(0, 0xffffffffffffffff, v11);
21064 #endif  // VIXL_INCLUDE_SIMULATOR_AARCH64
21065 
21066   TEARDOWN();
21067 }
21068 
TEST(neon_facgt_h_scalar)21069 TEST(neon_facgt_h_scalar) {
21070   SETUP_WITH_FEATURES(CPUFeatures::kNEON,
21071                       CPUFeatures::kFP,
21072                       CPUFeatures::kNEONHalf,
21073                       CPUFeatures::kFPHalf);
21074 
21075   START();
21076 
21077   __ Fmov(h0, Float16(0.0));
21078   __ Fmov(h1, RawbitsToFloat16(0xffff));
21079   __ Fmov(h2, Float16(-1.0));
21080   __ Fmov(h3, Float16(1.0));
21081   __ Facgt(h4, h0, h0);
21082   __ Facgt(h5, h1, h0);
21083   __ Facgt(h6, h2, h0);
21084   __ Facgt(h7, h3, h0);
21085 
21086   END();
21087 
21088 #ifdef VIXL_INCLUDE_SIMULATOR_AARCH64
21089   RUN();
21090 
21091   ASSERT_EQUAL_FP16(RawbitsToFloat16(0x0000), h4);
21092   ASSERT_EQUAL_FP16(RawbitsToFloat16(0x0000), h5);
21093   ASSERT_EQUAL_FP16(RawbitsToFloat16(0xffff), h6);
21094   ASSERT_EQUAL_FP16(RawbitsToFloat16(0xffff), h7);
21095 #endif  // VIXL_INCLUDE_SIMULATOR_AARCH64
21096 
21097   TEARDOWN();
21098 }
21099 
TEST(neon_2regmisc_fcmeq)21100 TEST(neon_2regmisc_fcmeq) {
21101   SETUP_WITH_FEATURES(CPUFeatures::kNEON, CPUFeatures::kFP);
21102 
21103   START();
21104 
21105   __ Movi(v0.V2D(), 0x0000000000000000, 0x0000000000000000);  // Zero.
21106   __ Movi(v1.V2D(), 0xffffffffffffffff, 0xffffffffffffffff);  // Nan.
21107   __ Movi(v2.V2D(), 0xbf800000bf800000, 0xbf800000bf800000);  // < 0.
21108   __ Movi(v3.V2D(), 0x3f8000003f800000, 0x3f8000003f800000);  // > 0.
21109 
21110   __ Fcmeq(s16, s0, 0.0);
21111   __ Fcmeq(s17, s1, 0.0);
21112   __ Fcmeq(s18, s2, 0.0);
21113   __ Fcmeq(d19, d0, 0.0);
21114   __ Fcmeq(d20, d1, 0.0);
21115   __ Fcmeq(d21, d2, 0.0);
21116   __ Fcmeq(v22.V2S(), v0.V2S(), 0.0);
21117   __ Fcmeq(v23.V4S(), v1.V4S(), 0.0);
21118   __ Fcmeq(v24.V2D(), v1.V2D(), 0.0);
21119   __ Fcmeq(v25.V2D(), v2.V2D(), 0.0);
21120 
21121   END();
21122 
21123   RUN();
21124   ASSERT_EQUAL_128(0, 0xffffffff, q16);
21125   ASSERT_EQUAL_128(0, 0x00000000, q17);
21126   ASSERT_EQUAL_128(0, 0x00000000, q18);
21127   ASSERT_EQUAL_128(0, 0xffffffffffffffff, q19);
21128   ASSERT_EQUAL_128(0, 0x0000000000000000, q20);
21129   ASSERT_EQUAL_128(0, 0x0000000000000000, q21);
21130   ASSERT_EQUAL_128(0, 0xffffffffffffffff, q22);
21131   ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q23);
21132   ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q24);
21133   ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q25);
21134   TEARDOWN();
21135 }
21136 
TEST(neon_2regmisc_fcmge)21137 TEST(neon_2regmisc_fcmge) {
21138   SETUP_WITH_FEATURES(CPUFeatures::kNEON, CPUFeatures::kFP);
21139 
21140   START();
21141 
21142   __ Movi(v0.V2D(), 0x0000000000000000, 0x0000000000000000);  // Zero.
21143   __ Movi(v1.V2D(), 0xffffffffffffffff, 0xffffffffffffffff);  // Nan.
21144   __ Movi(v2.V2D(), 0xbf800000bf800000, 0xbf800000bf800000);  // < 0.
21145   __ Movi(v3.V2D(), 0x3f8000003f800000, 0x3f8000003f800000);  // > 0.
21146 
21147   __ Fcmge(s16, s0, 0.0);
21148   __ Fcmge(s17, s1, 0.0);
21149   __ Fcmge(s18, s2, 0.0);
21150   __ Fcmge(d19, d0, 0.0);
21151   __ Fcmge(d20, d1, 0.0);
21152   __ Fcmge(d21, d3, 0.0);
21153   __ Fcmge(v22.V2S(), v0.V2S(), 0.0);
21154   __ Fcmge(v23.V4S(), v1.V4S(), 0.0);
21155   __ Fcmge(v24.V2D(), v1.V2D(), 0.0);
21156   __ Fcmge(v25.V2D(), v3.V2D(), 0.0);
21157 
21158   END();
21159 
21160   RUN();
21161   ASSERT_EQUAL_128(0, 0xffffffff, q16);
21162   ASSERT_EQUAL_128(0, 0x00000000, q17);
21163   ASSERT_EQUAL_128(0, 0x00000000, q18);
21164   ASSERT_EQUAL_128(0, 0xffffffffffffffff, q19);
21165   ASSERT_EQUAL_128(0, 0x0000000000000000, q20);
21166   ASSERT_EQUAL_128(0, 0xffffffffffffffff, q21);
21167   ASSERT_EQUAL_128(0, 0xffffffffffffffff, q22);
21168   ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q23);
21169   ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q24);
21170   ASSERT_EQUAL_128(0xffffffffffffffff, 0xffffffffffffffff, q25);
21171   TEARDOWN();
21172 }
21173 
21174 
TEST(neon_2regmisc_fcmgt)21175 TEST(neon_2regmisc_fcmgt) {
21176   SETUP_WITH_FEATURES(CPUFeatures::kNEON, CPUFeatures::kFP);
21177 
21178   START();
21179 
21180   __ Movi(v0.V2D(), 0x0000000000000000, 0x0000000000000000);  // Zero.
21181   __ Movi(v1.V2D(), 0xffffffffffffffff, 0xffffffffffffffff);  // Nan.
21182   __ Movi(v2.V2D(), 0xbf800000bf800000, 0xbf800000bf800000);  // < 0.
21183   __ Movi(v3.V2D(), 0x3f8000003f800000, 0x3f8000003f800000);  // > 0.
21184 
21185   __ Fcmgt(s16, s0, 0.0);
21186   __ Fcmgt(s17, s1, 0.0);
21187   __ Fcmgt(s18, s2, 0.0);
21188   __ Fcmgt(d19, d0, 0.0);
21189   __ Fcmgt(d20, d1, 0.0);
21190   __ Fcmgt(d21, d3, 0.0);
21191   __ Fcmgt(v22.V2S(), v0.V2S(), 0.0);
21192   __ Fcmgt(v23.V4S(), v1.V4S(), 0.0);
21193   __ Fcmgt(v24.V2D(), v1.V2D(), 0.0);
21194   __ Fcmgt(v25.V2D(), v3.V2D(), 0.0);
21195 
21196   END();
21197 
21198   RUN();
21199   ASSERT_EQUAL_128(0, 0x00000000, q16);
21200   ASSERT_EQUAL_128(0, 0x00000000, q17);
21201   ASSERT_EQUAL_128(0, 0x00000000, q18);
21202   ASSERT_EQUAL_128(0, 0x0000000000000000, q19);
21203   ASSERT_EQUAL_128(0, 0x0000000000000000, q20);
21204   ASSERT_EQUAL_128(0, 0xffffffffffffffff, q21);
21205   ASSERT_EQUAL_128(0, 0x0000000000000000, q22);
21206   ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q23);
21207   ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q24);
21208   ASSERT_EQUAL_128(0xffffffffffffffff, 0xffffffffffffffff, q25);
21209   TEARDOWN();
21210 }
21211 
TEST(neon_2regmisc_fcmle)21212 TEST(neon_2regmisc_fcmle) {
21213   SETUP_WITH_FEATURES(CPUFeatures::kNEON, CPUFeatures::kFP);
21214 
21215   START();
21216 
21217   __ Movi(v0.V2D(), 0x0000000000000000, 0x0000000000000000);  // Zero.
21218   __ Movi(v1.V2D(), 0xffffffffffffffff, 0xffffffffffffffff);  // Nan.
21219   __ Movi(v2.V2D(), 0xbf800000bf800000, 0xbf800000bf800000);  // < 0.
21220   __ Movi(v3.V2D(), 0x3f8000003f800000, 0x3f8000003f800000);  // > 0.
21221 
21222   __ Fcmle(s16, s0, 0.0);
21223   __ Fcmle(s17, s1, 0.0);
21224   __ Fcmle(s18, s3, 0.0);
21225   __ Fcmle(d19, d0, 0.0);
21226   __ Fcmle(d20, d1, 0.0);
21227   __ Fcmle(d21, d2, 0.0);
21228   __ Fcmle(v22.V2S(), v0.V2S(), 0.0);
21229   __ Fcmle(v23.V4S(), v1.V4S(), 0.0);
21230   __ Fcmle(v24.V2D(), v1.V2D(), 0.0);
21231   __ Fcmle(v25.V2D(), v2.V2D(), 0.0);
21232 
21233   END();
21234 
21235   RUN();
21236   ASSERT_EQUAL_128(0, 0xffffffff, q16);
21237   ASSERT_EQUAL_128(0, 0x00000000, q17);
21238   ASSERT_EQUAL_128(0, 0x00000000, q18);
21239   ASSERT_EQUAL_128(0, 0xffffffffffffffff, q19);
21240   ASSERT_EQUAL_128(0, 0x0000000000000000, q20);
21241   ASSERT_EQUAL_128(0, 0xffffffffffffffff, q21);
21242   ASSERT_EQUAL_128(0, 0xffffffffffffffff, q22);
21243   ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q23);
21244   ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q24);
21245   ASSERT_EQUAL_128(0xffffffffffffffff, 0xffffffffffffffff, q25);
21246   TEARDOWN();
21247 }
21248 
21249 
TEST(neon_2regmisc_fcmlt)21250 TEST(neon_2regmisc_fcmlt) {
21251   SETUP_WITH_FEATURES(CPUFeatures::kNEON, CPUFeatures::kFP);
21252 
21253   START();
21254 
21255   __ Movi(v0.V2D(), 0x0000000000000000, 0x0000000000000000);  // Zero.
21256   __ Movi(v1.V2D(), 0xffffffffffffffff, 0xffffffffffffffff);  // Nan.
21257   __ Movi(v2.V2D(), 0xbf800000bf800000, 0xbf800000bf800000);  // < 0.
21258   __ Movi(v3.V2D(), 0x3f8000003f800000, 0x3f8000003f800000);  // > 0.
21259 
21260   __ Fcmlt(s16, s0, 0.0);
21261   __ Fcmlt(s17, s1, 0.0);
21262   __ Fcmlt(s18, s3, 0.0);
21263   __ Fcmlt(d19, d0, 0.0);
21264   __ Fcmlt(d20, d1, 0.0);
21265   __ Fcmlt(d21, d2, 0.0);
21266   __ Fcmlt(v22.V2S(), v0.V2S(), 0.0);
21267   __ Fcmlt(v23.V4S(), v1.V4S(), 0.0);
21268   __ Fcmlt(v24.V2D(), v1.V2D(), 0.0);
21269   __ Fcmlt(v25.V2D(), v2.V2D(), 0.0);
21270 
21271   END();
21272 
21273   RUN();
21274   ASSERT_EQUAL_128(0, 0x00000000, q16);
21275   ASSERT_EQUAL_128(0, 0x00000000, q17);
21276   ASSERT_EQUAL_128(0, 0x00000000, q18);
21277   ASSERT_EQUAL_128(0, 0x0000000000000000, q19);
21278   ASSERT_EQUAL_128(0, 0x0000000000000000, q20);
21279   ASSERT_EQUAL_128(0, 0xffffffffffffffff, q21);
21280   ASSERT_EQUAL_128(0, 0x0000000000000000, q22);
21281   ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q23);
21282   ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q24);
21283   ASSERT_EQUAL_128(0xffffffffffffffff, 0xffffffffffffffff, q25);
21284   TEARDOWN();
21285 }
21286 
TEST(neon_2regmisc_cmeq)21287 TEST(neon_2regmisc_cmeq) {
21288   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
21289 
21290   START();
21291 
21292   __ Movi(v0.V2D(), 0x0001000200030004, 0x0000000000000000);
21293   __ Movi(v1.V2D(), 0x000055aaff55ff00, 0x0000ff55555500ff);
21294 
21295   __ Cmeq(v16.V8B(), v1.V8B(), 0);
21296   __ Cmeq(v17.V16B(), v1.V16B(), 0);
21297   __ Cmeq(v18.V4H(), v1.V4H(), 0);
21298   __ Cmeq(v19.V8H(), v1.V8H(), 0);
21299   __ Cmeq(v20.V2S(), v0.V2S(), 0);
21300   __ Cmeq(v21.V4S(), v0.V4S(), 0);
21301   __ Cmeq(d22, d0, 0);
21302   __ Cmeq(d23, d1, 0);
21303   __ Cmeq(v24.V2D(), v0.V2D(), 0);
21304 
21305   END();
21306 
21307   RUN();
21308   ASSERT_EQUAL_128(0x0000000000000000, 0xffff00000000ff00, q16);
21309   ASSERT_EQUAL_128(0xffff0000000000ff, 0xffff00000000ff00, q17);
21310   ASSERT_EQUAL_128(0x0000000000000000, 0xffff000000000000, q18);
21311   ASSERT_EQUAL_128(0xffff000000000000, 0xffff000000000000, q19);
21312   ASSERT_EQUAL_128(0x0000000000000000, 0xffffffffffffffff, q20);
21313   ASSERT_EQUAL_128(0x0000000000000000, 0xffffffffffffffff, q21);
21314   ASSERT_EQUAL_128(0x0000000000000000, 0xffffffffffffffff, q22);
21315   ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q23);
21316   ASSERT_EQUAL_128(0x0000000000000000, 0xffffffffffffffff, q24);
21317   TEARDOWN();
21318 }
21319 
21320 
TEST(neon_2regmisc_cmge)21321 TEST(neon_2regmisc_cmge) {
21322   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
21323 
21324   START();
21325 
21326   __ Movi(v0.V2D(), 0xff01000200030004, 0x0000000000000000);
21327   __ Movi(v1.V2D(), 0x000055aaff55ff00, 0x0000ff55555500ff);
21328 
21329   __ Cmge(v16.V8B(), v1.V8B(), 0);
21330   __ Cmge(v17.V16B(), v1.V16B(), 0);
21331   __ Cmge(v18.V4H(), v1.V4H(), 0);
21332   __ Cmge(v19.V8H(), v1.V8H(), 0);
21333   __ Cmge(v20.V2S(), v0.V2S(), 0);
21334   __ Cmge(v21.V4S(), v0.V4S(), 0);
21335   __ Cmge(d22, d0, 0);
21336   __ Cmge(d23, d1, 0);
21337   __ Cmge(v24.V2D(), v0.V2D(), 0);
21338 
21339   END();
21340 
21341   RUN();
21342   ASSERT_EQUAL_128(0x0000000000000000, 0xffff00ffffffff00, q16);
21343   ASSERT_EQUAL_128(0xffffff0000ff00ff, 0xffff00ffffffff00, q17);
21344   ASSERT_EQUAL_128(0x0000000000000000, 0xffff0000ffffffff, q18);
21345   ASSERT_EQUAL_128(0xffffffff00000000, 0xffff0000ffffffff, q19);
21346   ASSERT_EQUAL_128(0x0000000000000000, 0xffffffffffffffff, q20);
21347   ASSERT_EQUAL_128(0x00000000ffffffff, 0xffffffffffffffff, q21);
21348   ASSERT_EQUAL_128(0x0000000000000000, 0xffffffffffffffff, q22);
21349   ASSERT_EQUAL_128(0x0000000000000000, 0xffffffffffffffff, q23);
21350   ASSERT_EQUAL_128(0x0000000000000000, 0xffffffffffffffff, q24);
21351   TEARDOWN();
21352 }
21353 
21354 
TEST(neon_2regmisc_cmlt)21355 TEST(neon_2regmisc_cmlt) {
21356   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
21357 
21358   START();
21359 
21360   __ Movi(v0.V2D(), 0x0001000200030004, 0xff00000000000000);
21361   __ Movi(v1.V2D(), 0x000055aaff55ff00, 0x0000ff55555500ff);
21362 
21363   __ Cmlt(v16.V8B(), v1.V8B(), 0);
21364   __ Cmlt(v17.V16B(), v1.V16B(), 0);
21365   __ Cmlt(v18.V4H(), v1.V4H(), 0);
21366   __ Cmlt(v19.V8H(), v1.V8H(), 0);
21367   __ Cmlt(v20.V2S(), v1.V2S(), 0);
21368   __ Cmlt(v21.V4S(), v1.V4S(), 0);
21369   __ Cmlt(d22, d0, 0);
21370   __ Cmlt(d23, d1, 0);
21371   __ Cmlt(v24.V2D(), v0.V2D(), 0);
21372 
21373   END();
21374 
21375   RUN();
21376   ASSERT_EQUAL_128(0x0000000000000000, 0x0000ff00000000ff, q16);
21377   ASSERT_EQUAL_128(0x000000ffff00ff00, 0x0000ff00000000ff, q17);
21378   ASSERT_EQUAL_128(0x0000000000000000, 0x0000ffff00000000, q18);
21379   ASSERT_EQUAL_128(0x00000000ffffffff, 0x0000ffff00000000, q19);
21380   ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q20);
21381   ASSERT_EQUAL_128(0x00000000ffffffff, 0x0000000000000000, q21);
21382   ASSERT_EQUAL_128(0x0000000000000000, 0xffffffffffffffff, q22);
21383   ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q23);
21384   ASSERT_EQUAL_128(0x0000000000000000, 0xffffffffffffffff, q24);
21385   TEARDOWN();
21386 }
21387 
21388 
TEST(neon_2regmisc_cmle)21389 TEST(neon_2regmisc_cmle) {
21390   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
21391 
21392   START();
21393 
21394   __ Movi(v0.V2D(), 0x0001000200030004, 0x0000000000000000);
21395   __ Movi(v1.V2D(), 0x000055aaff55ff00, 0x0000ff55555500ff);
21396 
21397   __ Cmle(v16.V8B(), v1.V8B(), 0);
21398   __ Cmle(v17.V16B(), v1.V16B(), 0);
21399   __ Cmle(v18.V4H(), v1.V4H(), 0);
21400   __ Cmle(v19.V8H(), v1.V8H(), 0);
21401   __ Cmle(v20.V2S(), v1.V2S(), 0);
21402   __ Cmle(v21.V4S(), v1.V4S(), 0);
21403   __ Cmle(d22, d0, 0);
21404   __ Cmle(d23, d1, 0);
21405   __ Cmle(v24.V2D(), v0.V2D(), 0);
21406 
21407   END();
21408 
21409   RUN();
21410   ASSERT_EQUAL_128(0x0000000000000000, 0xffffff000000ffff, q16);
21411   ASSERT_EQUAL_128(0xffff00ffff00ffff, 0xffffff000000ffff, q17);
21412   ASSERT_EQUAL_128(0x0000000000000000, 0xffffffff00000000, q18);
21413   ASSERT_EQUAL_128(0xffff0000ffffffff, 0xffffffff00000000, q19);
21414   ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q20);
21415   ASSERT_EQUAL_128(0x00000000ffffffff, 0x0000000000000000, q21);
21416   ASSERT_EQUAL_128(0x0000000000000000, 0xffffffffffffffff, q22);
21417   ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q23);
21418   ASSERT_EQUAL_128(0x0000000000000000, 0xffffffffffffffff, q24);
21419   TEARDOWN();
21420 }
21421 
21422 
TEST(neon_2regmisc_cmgt)21423 TEST(neon_2regmisc_cmgt) {
21424   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
21425 
21426   START();
21427 
21428   __ Movi(v0.V2D(), 0x0001000200030004, 0x0000000000000000);
21429   __ Movi(v1.V2D(), 0x000055aaff55ff00, 0x0000ff55555500ff);
21430 
21431   __ Cmgt(v16.V8B(), v1.V8B(), 0);
21432   __ Cmgt(v17.V16B(), v1.V16B(), 0);
21433   __ Cmgt(v18.V4H(), v1.V4H(), 0);
21434   __ Cmgt(v19.V8H(), v1.V8H(), 0);
21435   __ Cmgt(v20.V2S(), v0.V2S(), 0);
21436   __ Cmgt(v21.V4S(), v0.V4S(), 0);
21437   __ Cmgt(d22, d0, 0);
21438   __ Cmgt(d23, d1, 0);
21439   __ Cmgt(v24.V2D(), v0.V2D(), 0);
21440 
21441   END();
21442 
21443   RUN();
21444   ASSERT_EQUAL_128(0x0000000000000000, 0x000000ffffff0000, q16);
21445   ASSERT_EQUAL_128(0x0000ff0000ff0000, 0x000000ffffff0000, q17);
21446   ASSERT_EQUAL_128(0x0000000000000000, 0x00000000ffffffff, q18);
21447   ASSERT_EQUAL_128(0x0000ffff00000000, 0x00000000ffffffff, q19);
21448   ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q20);
21449   ASSERT_EQUAL_128(0xffffffffffffffff, 0x0000000000000000, q21);
21450   ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q22);
21451   ASSERT_EQUAL_128(0x0000000000000000, 0xffffffffffffffff, q23);
21452   ASSERT_EQUAL_128(0xffffffffffffffff, 0x0000000000000000, q24);
21453   TEARDOWN();
21454 }
21455 
21456 
TEST(neon_2regmisc_neg)21457 TEST(neon_2regmisc_neg) {
21458   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
21459 
21460   START();
21461 
21462   __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081);
21463   __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff);
21464   __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff);
21465   __ Movi(v3.V2D(), 0x8000000000000001, 0x7fffffffffffffff);
21466   __ Movi(v4.V2D(), 0x8000000000000000, 0x0000000000000000);
21467 
21468   __ Neg(v16.V8B(), v0.V8B());
21469   __ Neg(v17.V16B(), v0.V16B());
21470   __ Neg(v18.V4H(), v1.V4H());
21471   __ Neg(v19.V8H(), v1.V8H());
21472   __ Neg(v20.V2S(), v2.V2S());
21473   __ Neg(v21.V4S(), v2.V4S());
21474   __ Neg(d22, d3);
21475   __ Neg(v23.V2D(), v3.V2D());
21476   __ Neg(v24.V2D(), v4.V2D());
21477 
21478   END();
21479 
21480   RUN();
21481   ASSERT_EQUAL_128(0x0000000000000000, 0x807f0100ff81807f, q16);
21482   ASSERT_EQUAL_128(0x81ff00017f8081ff, 0x807f0100ff81807f, q17);
21483   ASSERT_EQUAL_128(0x0000000000000000, 0x00010000ffff8001, q18);
21484   ASSERT_EQUAL_128(0x80007fff00010000, 0x00010000ffff8001, q19);
21485   ASSERT_EQUAL_128(0x0000000000000000, 0x0000000080000001, q20);
21486   ASSERT_EQUAL_128(0x8000000000000001, 0x0000000080000001, q21);
21487   ASSERT_EQUAL_128(0x0000000000000000, 0x8000000000000001, q22);
21488   ASSERT_EQUAL_128(0x7fffffffffffffff, 0x8000000000000001, q23);
21489   ASSERT_EQUAL_128(0x8000000000000000, 0x0000000000000000, q24);
21490 
21491   TEARDOWN();
21492 }
21493 
21494 
TEST(neon_2regmisc_sqneg)21495 TEST(neon_2regmisc_sqneg) {
21496   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
21497 
21498   START();
21499 
21500   __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081);
21501   __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff);
21502   __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff);
21503   __ Movi(v3.V2D(), 0x8000000000000001, 0x7fffffffffffffff);
21504   __ Movi(v4.V2D(), 0x8000000000000000, 0x0000000000000000);
21505 
21506   __ Sqneg(v16.V8B(), v0.V8B());
21507   __ Sqneg(v17.V16B(), v0.V16B());
21508   __ Sqneg(v18.V4H(), v1.V4H());
21509   __ Sqneg(v19.V8H(), v1.V8H());
21510   __ Sqneg(v20.V2S(), v2.V2S());
21511   __ Sqneg(v21.V4S(), v2.V4S());
21512   __ Sqneg(v22.V2D(), v3.V2D());
21513   __ Sqneg(v23.V2D(), v4.V2D());
21514 
21515   __ Sqneg(b24, b0);
21516   __ Sqneg(h25, h1);
21517   __ Sqneg(s26, s2);
21518   __ Sqneg(d27, d3);
21519 
21520   END();
21521 
21522   RUN();
21523   ASSERT_EQUAL_128(0x0000000000000000, 0x7f7f0100ff817f7f, q16);
21524   ASSERT_EQUAL_128(0x81ff00017f7f81ff, 0x7f7f0100ff817f7f, q17);
21525   ASSERT_EQUAL_128(0x0000000000000000, 0x00010000ffff8001, q18);
21526   ASSERT_EQUAL_128(0x7fff7fff00010000, 0x00010000ffff8001, q19);
21527   ASSERT_EQUAL_128(0x0000000000000000, 0x0000000080000001, q20);
21528   ASSERT_EQUAL_128(0x7fffffff00000001, 0x0000000080000001, q21);
21529   ASSERT_EQUAL_128(0x7fffffffffffffff, 0x8000000000000001, q22);
21530   ASSERT_EQUAL_128(0x7fffffffffffffff, 0x0000000000000000, q23);
21531 
21532   ASSERT_EQUAL_128(0, 0x7f, q24);
21533   ASSERT_EQUAL_128(0, 0x8001, q25);
21534   ASSERT_EQUAL_128(0, 0x80000001, q26);
21535   ASSERT_EQUAL_128(0, 0x8000000000000001, q27);
21536 
21537   TEARDOWN();
21538 }
21539 
21540 
TEST(neon_2regmisc_abs)21541 TEST(neon_2regmisc_abs) {
21542   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
21543 
21544   START();
21545 
21546   __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081);
21547   __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff);
21548   __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff);
21549   __ Movi(v3.V2D(), 0x8000000000000001, 0x7fffffffffffffff);
21550   __ Movi(v4.V2D(), 0x8000000000000000, 0x0000000000000000);
21551 
21552   __ Abs(v16.V8B(), v0.V8B());
21553   __ Abs(v17.V16B(), v0.V16B());
21554   __ Abs(v18.V4H(), v1.V4H());
21555   __ Abs(v19.V8H(), v1.V8H());
21556   __ Abs(v20.V2S(), v2.V2S());
21557   __ Abs(v21.V4S(), v2.V4S());
21558   __ Abs(d22, d3);
21559   __ Abs(v23.V2D(), v3.V2D());
21560   __ Abs(v24.V2D(), v4.V2D());
21561 
21562   END();
21563 
21564   RUN();
21565   ASSERT_EQUAL_128(0x0000000000000000, 0x807f0100017f807f, q16);
21566   ASSERT_EQUAL_128(0x7f0100017f807f01, 0x807f0100017f807f, q17);
21567   ASSERT_EQUAL_128(0x0000000000000000, 0x0001000000017fff, q18);
21568   ASSERT_EQUAL_128(0x80007fff00010000, 0x0001000000017fff, q19);
21569   ASSERT_EQUAL_128(0x0000000000000000, 0x000000007fffffff, q20);
21570   ASSERT_EQUAL_128(0x8000000000000001, 0x000000007fffffff, q21);
21571   ASSERT_EQUAL_128(0x0000000000000000, 0x7fffffffffffffff, q22);
21572   ASSERT_EQUAL_128(0x7fffffffffffffff, 0x7fffffffffffffff, q23);
21573   ASSERT_EQUAL_128(0x8000000000000000, 0x0000000000000000, q24);
21574 
21575   TEARDOWN();
21576 }
21577 
21578 
TEST(neon_2regmisc_sqabs)21579 TEST(neon_2regmisc_sqabs) {
21580   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
21581 
21582   START();
21583 
21584   __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081);
21585   __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff);
21586   __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff);
21587   __ Movi(v3.V2D(), 0x8000000000000001, 0x7fffffffffffffff);
21588   __ Movi(v4.V2D(), 0x8000000000000000, 0x0000000000000000);
21589 
21590   __ Sqabs(v16.V8B(), v0.V8B());
21591   __ Sqabs(v17.V16B(), v0.V16B());
21592   __ Sqabs(v18.V4H(), v1.V4H());
21593   __ Sqabs(v19.V8H(), v1.V8H());
21594   __ Sqabs(v20.V2S(), v2.V2S());
21595   __ Sqabs(v21.V4S(), v2.V4S());
21596   __ Sqabs(v22.V2D(), v3.V2D());
21597   __ Sqabs(v23.V2D(), v4.V2D());
21598 
21599   __ Sqabs(b24, b0);
21600   __ Sqabs(h25, h1);
21601   __ Sqabs(s26, s2);
21602   __ Sqabs(d27, d3);
21603 
21604   END();
21605 
21606   RUN();
21607   ASSERT_EQUAL_128(0x0000000000000000, 0x7f7f0100017f7f7f, q16);
21608   ASSERT_EQUAL_128(0x7f0100017f7f7f01, 0x7f7f0100017f7f7f, q17);
21609   ASSERT_EQUAL_128(0x0000000000000000, 0x0001000000017fff, q18);
21610   ASSERT_EQUAL_128(0x7fff7fff00010000, 0x0001000000017fff, q19);
21611   ASSERT_EQUAL_128(0x0000000000000000, 0x000000007fffffff, q20);
21612   ASSERT_EQUAL_128(0x7fffffff00000001, 0x000000007fffffff, q21);
21613   ASSERT_EQUAL_128(0x7fffffffffffffff, 0x7fffffffffffffff, q22);
21614   ASSERT_EQUAL_128(0x7fffffffffffffff, 0x0000000000000000, q23);
21615 
21616   ASSERT_EQUAL_128(0, 0x7f, q24);
21617   ASSERT_EQUAL_128(0, 0x7fff, q25);
21618   ASSERT_EQUAL_128(0, 0x7fffffff, q26);
21619   ASSERT_EQUAL_128(0, 0x7fffffffffffffff, q27);
21620 
21621   TEARDOWN();
21622 }
21623 
TEST(neon_2regmisc_suqadd)21624 TEST(neon_2regmisc_suqadd) {
21625   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
21626 
21627   START();
21628 
21629   __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081);
21630   __ Movi(v1.V2D(), 0x017f8081ff00017f, 0x010080ff7f0180ff);
21631 
21632   __ Movi(v2.V2D(), 0x80008001ffff0000, 0xffff000000017ffd);
21633   __ Movi(v3.V2D(), 0xffff000080008001, 0x00017fffffff0001);
21634 
21635   __ Movi(v4.V2D(), 0x80000000fffffffe, 0xfffffff17ffffffe);
21636   __ Movi(v5.V2D(), 0xffffffff80000000, 0x7fffffff00000002);
21637 
21638   __ Movi(v6.V2D(), 0x8000000000000001, 0x7fffffffffffffff);
21639   __ Movi(v7.V2D(), 0x8000000000000000, 0x8000000000000002);
21640 
21641   __ Mov(v16.V2D(), v0.V2D());
21642   __ Mov(v17.V2D(), v0.V2D());
21643   __ Mov(v18.V2D(), v2.V2D());
21644   __ Mov(v19.V2D(), v2.V2D());
21645   __ Mov(v20.V2D(), v4.V2D());
21646   __ Mov(v21.V2D(), v4.V2D());
21647   __ Mov(v22.V2D(), v6.V2D());
21648 
21649   __ Mov(v23.V2D(), v0.V2D());
21650   __ Mov(v24.V2D(), v2.V2D());
21651   __ Mov(v25.V2D(), v4.V2D());
21652   __ Mov(v26.V2D(), v6.V2D());
21653 
21654   __ Suqadd(v16.V8B(), v1.V8B());
21655   __ Suqadd(v17.V16B(), v1.V16B());
21656   __ Suqadd(v18.V4H(), v3.V4H());
21657   __ Suqadd(v19.V8H(), v3.V8H());
21658   __ Suqadd(v20.V2S(), v5.V2S());
21659   __ Suqadd(v21.V4S(), v5.V4S());
21660   __ Suqadd(v22.V2D(), v7.V2D());
21661 
21662   __ Suqadd(b23, b1);
21663   __ Suqadd(h24, h3);
21664   __ Suqadd(s25, s5);
21665   __ Suqadd(d26, d7);
21666 
21667   END();
21668 
21669   RUN();
21670   ASSERT_EQUAL_128(0x0000000000000000, 0x81817f7f7f7f007f, q16);
21671   ASSERT_EQUAL_128(0x7f7f7f7f7f807f7f, 0x81817f7f7f7f007f, q17);
21672   ASSERT_EQUAL_128(0x0000000000000000, 0x00007fff7fff7ffe, q18);
21673   ASSERT_EQUAL_128(0x7fff80017fff7fff, 0x00007fff7fff7ffe, q19);
21674   ASSERT_EQUAL_128(0x0000000000000000, 0x7ffffff07fffffff, q20);
21675   ASSERT_EQUAL_128(0x7fffffff7ffffffe, 0x7ffffff07fffffff, q21);
21676   ASSERT_EQUAL_128(0x0000000000000001, 0x7fffffffffffffff, q22);
21677 
21678   ASSERT_EQUAL_128(0, 0x7f, q23);
21679   ASSERT_EQUAL_128(0, 0x7ffe, q24);
21680   ASSERT_EQUAL_128(0, 0x7fffffff, q25);
21681   ASSERT_EQUAL_128(0, 0x7fffffffffffffff, q26);
21682   TEARDOWN();
21683 }
21684 
TEST(neon_2regmisc_usqadd)21685 TEST(neon_2regmisc_usqadd) {
21686   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
21687 
21688   START();
21689 
21690   __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f7ffe);
21691   __ Movi(v1.V2D(), 0x017f8081ff00017f, 0x010080ff7f018002);
21692 
21693   __ Movi(v2.V2D(), 0x80008001fffe0000, 0xffff000000017ffd);
21694   __ Movi(v3.V2D(), 0xffff000000028001, 0x00017fffffff0001);
21695 
21696   __ Movi(v4.V2D(), 0x80000000fffffffe, 0x00000001fffffffe);
21697   __ Movi(v5.V2D(), 0xffffffff80000000, 0xfffffffe00000002);
21698 
21699   __ Movi(v6.V2D(), 0x8000000000000002, 0x7fffffffffffffff);
21700   __ Movi(v7.V2D(), 0x7fffffffffffffff, 0x8000000000000000);
21701 
21702   __ Mov(v16.V2D(), v0.V2D());
21703   __ Mov(v17.V2D(), v0.V2D());
21704   __ Mov(v18.V2D(), v2.V2D());
21705   __ Mov(v19.V2D(), v2.V2D());
21706   __ Mov(v20.V2D(), v4.V2D());
21707   __ Mov(v21.V2D(), v4.V2D());
21708   __ Mov(v22.V2D(), v6.V2D());
21709 
21710   __ Mov(v23.V2D(), v0.V2D());
21711   __ Mov(v24.V2D(), v2.V2D());
21712   __ Mov(v25.V2D(), v4.V2D());
21713   __ Mov(v26.V2D(), v6.V2D());
21714 
21715   __ Usqadd(v16.V8B(), v1.V8B());
21716   __ Usqadd(v17.V16B(), v1.V16B());
21717   __ Usqadd(v18.V4H(), v3.V4H());
21718   __ Usqadd(v19.V8H(), v3.V8H());
21719   __ Usqadd(v20.V2S(), v5.V2S());
21720   __ Usqadd(v21.V4S(), v5.V4S());
21721   __ Usqadd(v22.V2D(), v7.V2D());
21722 
21723   __ Usqadd(b23, b1);
21724   __ Usqadd(h24, h3);
21725   __ Usqadd(s25, s5);
21726   __ Usqadd(d26, d7);
21727 
21728   END();
21729 
21730   RUN();
21731   ASSERT_EQUAL_128(0x0000000000000000, 0x81817f00808000ff, q16);
21732   ASSERT_EQUAL_128(0x8080008080808080, 0x81817f00808000ff, q17);
21733   ASSERT_EQUAL_128(0x0000000000000000, 0xffff7fff00007ffe, q18);
21734   ASSERT_EQUAL_128(0x7fff8001ffff0000, 0xffff7fff00007ffe, q19);
21735   ASSERT_EQUAL_128(0x0000000000000000, 0x00000000ffffffff, q20);
21736   ASSERT_EQUAL_128(0x7fffffff7ffffffe, 0x00000000ffffffff, q21);
21737   ASSERT_EQUAL_128(0xffffffffffffffff, 0x0000000000000000, q22);
21738 
21739   ASSERT_EQUAL_128(0, 0xff, q23);
21740   ASSERT_EQUAL_128(0, 0x7ffe, q24);
21741   ASSERT_EQUAL_128(0, 0xffffffff, q25);
21742   ASSERT_EQUAL_128(0, 0x0000000000000000, q26);
21743   TEARDOWN();
21744 }
21745 
21746 
TEST(system_sys)21747 TEST(system_sys) {
21748   SETUP();
21749   const char* msg = "SYS test!";
21750   uintptr_t msg_addr = reinterpret_cast<uintptr_t>(msg);
21751 
21752   START();
21753   __ Mov(x4, msg_addr);
21754   __ Sys(3, 0x7, 0x5, 1, x4);
21755   __ Mov(x3, x4);
21756   __ Sys(3, 0x7, 0xa, 1, x3);
21757   __ Mov(x2, x3);
21758   __ Sys(3, 0x7, 0xb, 1, x2);
21759   __ Mov(x1, x2);
21760   __ Sys(3, 0x7, 0xe, 1, x1);
21761   // TODO: Add tests to check ZVA equivalent.
21762   END();
21763 
21764   RUN();
21765 
21766   TEARDOWN();
21767 }
21768 
21769 
TEST(system_ic)21770 TEST(system_ic) {
21771   SETUP();
21772   const char* msg = "IC test!";
21773   uintptr_t msg_addr = reinterpret_cast<uintptr_t>(msg);
21774 
21775   START();
21776   __ Mov(x11, msg_addr);
21777   __ Ic(IVAU, x11);
21778   END();
21779 
21780   RUN();
21781 
21782   TEARDOWN();
21783 }
21784 
21785 
TEST(system_dc)21786 TEST(system_dc) {
21787   SETUP();
21788   const char* msg = "DC test!";
21789   uintptr_t msg_addr = reinterpret_cast<uintptr_t>(msg);
21790 
21791   START();
21792   __ Mov(x20, msg_addr);
21793   __ Dc(CVAC, x20);
21794   __ Mov(x21, x20);
21795   __ Dc(CVAU, x21);
21796   __ Mov(x22, x21);
21797   __ Dc(CIVAC, x22);
21798   // TODO: Add tests to check ZVA.
21799   END();
21800 
21801   RUN();
21802 
21803   TEARDOWN();
21804 }
21805 
21806 
TEST(neon_2regmisc_xtn)21807 TEST(neon_2regmisc_xtn) {
21808   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
21809 
21810   START();
21811 
21812   __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081);
21813   __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff);
21814   __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff);
21815   __ Movi(v3.V2D(), 0x8000000000000001, 0x7fffffffffffffff);
21816   __ Movi(v4.V2D(), 0x8000000000000000, 0x0000000000000000);
21817 
21818   __ Xtn(v16.V8B(), v0.V8H());
21819   __ Xtn2(v16.V16B(), v1.V8H());
21820   __ Xtn(v17.V4H(), v1.V4S());
21821   __ Xtn2(v17.V8H(), v2.V4S());
21822   __ Xtn(v18.V2S(), v3.V2D());
21823   __ Xtn2(v18.V4S(), v4.V2D());
21824 
21825   END();
21826 
21827   RUN();
21828   ASSERT_EQUAL_128(0x0001ff00ff0001ff, 0x01ff800181007f81, q16);
21829   ASSERT_EQUAL_128(0x0000ffff0000ffff, 0x8001000000007fff, q17);
21830   ASSERT_EQUAL_128(0x0000000000000000, 0x00000001ffffffff, q18);
21831   TEARDOWN();
21832 }
21833 
21834 
TEST(neon_2regmisc_sqxtn)21835 TEST(neon_2regmisc_sqxtn) {
21836   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
21837 
21838   START();
21839 
21840   __ Movi(v0.V2D(), 0x7f01007a81807f01, 0x8081ff00017f8081);
21841   __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff);
21842   __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff);
21843   __ Movi(v3.V2D(), 0x8000000000000001, 0x7fffffffffffffff);
21844   __ Movi(v4.V2D(), 0x8000000000000000, 0x0000000000000000);
21845 
21846   __ Sqxtn(v16.V8B(), v0.V8H());
21847   __ Sqxtn2(v16.V16B(), v1.V8H());
21848   __ Sqxtn(v17.V4H(), v1.V4S());
21849   __ Sqxtn2(v17.V8H(), v2.V4S());
21850   __ Sqxtn(v18.V2S(), v3.V2D());
21851   __ Sqxtn2(v18.V4S(), v4.V2D());
21852   __ Sqxtn(b19, h0);
21853   __ Sqxtn(h20, s0);
21854   __ Sqxtn(s21, d0);
21855 
21856   END();
21857 
21858   RUN();
21859   ASSERT_EQUAL_128(0x8080ff00ff00017f, 0x7f7a807f80807f80, q16);
21860   ASSERT_EQUAL_128(0x8000ffff00007fff, 0x8000800080007fff, q17);
21861   ASSERT_EQUAL_128(0x8000000000000000, 0x800000007fffffff, q18);
21862   ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000080, q19);
21863   ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000007fff, q20);
21864   ASSERT_EQUAL_128(0x0000000000000000, 0x0000000080000000, q21);
21865   TEARDOWN();
21866 }
21867 
21868 
TEST(neon_2regmisc_uqxtn)21869 TEST(neon_2regmisc_uqxtn) {
21870   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
21871 
21872   START();
21873 
21874   __ Movi(v0.V2D(), 0x7f01007a81807f01, 0x8081ff00017f8081);
21875   __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff);
21876   __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff);
21877   __ Movi(v3.V2D(), 0x8000000000000001, 0x7fffffffffffffff);
21878   __ Movi(v4.V2D(), 0x8000000000000000, 0x0000000000000000);
21879 
21880   __ Uqxtn(v16.V8B(), v0.V8H());
21881   __ Uqxtn2(v16.V16B(), v1.V8H());
21882   __ Uqxtn(v17.V4H(), v1.V4S());
21883   __ Uqxtn2(v17.V8H(), v2.V4S());
21884   __ Uqxtn(v18.V2S(), v3.V2D());
21885   __ Uqxtn2(v18.V4S(), v4.V2D());
21886   __ Uqxtn(b19, h0);
21887   __ Uqxtn(h20, s0);
21888   __ Uqxtn(s21, d0);
21889 
21890   END();
21891 
21892   RUN();
21893   ASSERT_EQUAL_128(0xffffff00ff0001ff, 0xff7affffffffffff, q16);
21894   ASSERT_EQUAL_128(0xffffffff0000ffff, 0xffffffffffffffff, q17);
21895   ASSERT_EQUAL_128(0xffffffff00000000, 0xffffffffffffffff, q18);
21896   ASSERT_EQUAL_128(0x0000000000000000, 0x00000000000000ff, q19);
21897   ASSERT_EQUAL_128(0x0000000000000000, 0x000000000000ffff, q20);
21898   ASSERT_EQUAL_128(0x0000000000000000, 0x00000000ffffffff, q21);
21899   TEARDOWN();
21900 }
21901 
21902 
TEST(neon_2regmisc_sqxtun)21903 TEST(neon_2regmisc_sqxtun) {
21904   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
21905 
21906   START();
21907 
21908   __ Movi(v0.V2D(), 0x7f01007a81807f01, 0x8081ff00017f8081);
21909   __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff);
21910   __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff);
21911   __ Movi(v3.V2D(), 0x8000000000000001, 0x7fffffffffffffff);
21912   __ Movi(v4.V2D(), 0x8000000000000000, 0x0000000000000000);
21913 
21914   __ Sqxtun(v16.V8B(), v0.V8H());
21915   __ Sqxtun2(v16.V16B(), v1.V8H());
21916   __ Sqxtun(v17.V4H(), v1.V4S());
21917   __ Sqxtun2(v17.V8H(), v2.V4S());
21918   __ Sqxtun(v18.V2S(), v3.V2D());
21919   __ Sqxtun2(v18.V4S(), v4.V2D());
21920   __ Sqxtun(b19, h0);
21921   __ Sqxtun(h20, s0);
21922   __ Sqxtun(s21, d0);
21923 
21924   END();
21925 
21926   RUN();
21927   ASSERT_EQUAL_128(0x00000000000001ff, 0xff7a00ff0000ff00, q16);
21928   ASSERT_EQUAL_128(0x000000000000ffff, 0x000000000000ffff, q17);
21929   ASSERT_EQUAL_128(0x0000000000000000, 0x00000000ffffffff, q18);
21930   ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q19);
21931   ASSERT_EQUAL_128(0x0000000000000000, 0x000000000000ffff, q20);
21932   ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q21);
21933   TEARDOWN();
21934 }
21935 
TEST(neon_3same_and)21936 TEST(neon_3same_and) {
21937   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
21938 
21939   START();
21940 
21941   __ Movi(v0.V2D(), 0xff00aa5500ff55aa, 0xff00aa5500ff55aa);
21942   __ Movi(v1.V2D(), 0x00aa55aaff55ff00, 0xaa55ff00555500ff);
21943 
21944   __ And(v16.V16B(), v0.V16B(), v0.V16B());  // self test
21945   __ And(v17.V16B(), v0.V16B(), v1.V16B());  // all combinations
21946   __ And(v24.V8B(), v0.V8B(), v0.V8B());     // self test
21947   __ And(v25.V8B(), v0.V8B(), v1.V8B());     // all combinations
21948   END();
21949 
21950   RUN();
21951   ASSERT_EQUAL_128(0xff00aa5500ff55aa, 0xff00aa5500ff55aa, q16);
21952   ASSERT_EQUAL_128(0x0000000000555500, 0xaa00aa00005500aa, q17);
21953   ASSERT_EQUAL_128(0, 0xff00aa5500ff55aa, q24);
21954   ASSERT_EQUAL_128(0, 0xaa00aa00005500aa, q25);
21955   TEARDOWN();
21956 }
21957 
TEST(neon_3same_bic)21958 TEST(neon_3same_bic) {
21959   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
21960 
21961   START();
21962 
21963   __ Movi(v0.V2D(), 0xff00aa5500ff55aa, 0xff00aa5500ff55aa);
21964   __ Movi(v1.V2D(), 0x00ffaa00aa55aaff, 0xffff005500ff00ff);
21965 
21966   __ Bic(v16.V16B(), v0.V16B(), v0.V16B());  // self test
21967   __ Bic(v17.V16B(), v0.V16B(), v1.V16B());  // all combinations
21968   __ Bic(v24.V8B(), v0.V8B(), v0.V8B());     // self test
21969   __ Bic(v25.V8B(), v0.V8B(), v1.V8B());     // all combinations
21970   END();
21971 
21972   RUN();
21973   ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q16);
21974   ASSERT_EQUAL_128(0xff00005500aa5500, 0x0000aa0000005500, q17);
21975   ASSERT_EQUAL_128(0, 0x0000000000000000, q24);
21976   ASSERT_EQUAL_128(0, 0x0000aa0000005500, q25);
21977   TEARDOWN();
21978 }
21979 
TEST(neon_3same_orr)21980 TEST(neon_3same_orr) {
21981   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
21982 
21983   START();
21984 
21985   __ Movi(v0.V2D(), 0xff00aa5500ff55aa, 0xff00aa5500ff55aa);
21986   __ Movi(v1.V2D(), 0x00aa55aaff55ff00, 0xaa55ff00555500ff);
21987 
21988   __ Orr(v16.V16B(), v0.V16B(), v0.V16B());  // self test
21989   __ Orr(v17.V16B(), v0.V16B(), v1.V16B());  // all combinations
21990   __ Orr(v24.V8B(), v0.V8B(), v0.V8B());     // self test
21991   __ Orr(v25.V8B(), v0.V8B(), v1.V8B());     // all combinations
21992   END();
21993 
21994   RUN();
21995   ASSERT_EQUAL_128(0xff00aa5500ff55aa, 0xff00aa5500ff55aa, q16);
21996   ASSERT_EQUAL_128(0xffaaffffffffffaa, 0xff55ff5555ff55ff, q17);
21997   ASSERT_EQUAL_128(0, 0xff00aa5500ff55aa, q24);
21998   ASSERT_EQUAL_128(0, 0xff55ff5555ff55ff, q25);
21999   TEARDOWN();
22000 }
22001 
TEST(neon_3same_mov)22002 TEST(neon_3same_mov) {
22003   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
22004 
22005   START();
22006 
22007   __ Movi(v0.V2D(), 0xff00aa5500ff55aa, 0xff00aa5500ff55aa);
22008 
22009   __ Mov(v16.V16B(), v0.V16B());
22010   __ Mov(v17.V8H(), v0.V8H());
22011   __ Mov(v18.V4S(), v0.V4S());
22012   __ Mov(v19.V2D(), v0.V2D());
22013 
22014   __ Mov(v24.V8B(), v0.V8B());
22015   __ Mov(v25.V4H(), v0.V4H());
22016   __ Mov(v26.V2S(), v0.V2S());
22017   END();
22018 
22019   RUN();
22020 
22021   ASSERT_EQUAL_128(0xff00aa5500ff55aa, 0xff00aa5500ff55aa, q16);
22022   ASSERT_EQUAL_128(0xff00aa5500ff55aa, 0xff00aa5500ff55aa, q17);
22023   ASSERT_EQUAL_128(0xff00aa5500ff55aa, 0xff00aa5500ff55aa, q18);
22024   ASSERT_EQUAL_128(0xff00aa5500ff55aa, 0xff00aa5500ff55aa, q19);
22025 
22026   ASSERT_EQUAL_128(0x0, 0xff00aa5500ff55aa, q24);
22027   ASSERT_EQUAL_128(0x0, 0xff00aa5500ff55aa, q25);
22028   ASSERT_EQUAL_128(0x0, 0xff00aa5500ff55aa, q26);
22029 
22030   TEARDOWN();
22031 }
22032 
TEST(neon_3same_orn)22033 TEST(neon_3same_orn) {
22034   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
22035 
22036   START();
22037 
22038   __ Movi(v0.V2D(), 0xff00aa5500ff55aa, 0xff00aa5500ff55aa);
22039   __ Movi(v1.V2D(), 0x00aa55aaff55ff00, 0xaa55ff00555500ff);
22040 
22041   __ Orn(v16.V16B(), v0.V16B(), v0.V16B());  // self test
22042   __ Orn(v17.V16B(), v0.V16B(), v1.V16B());  // all combinations
22043   __ Orn(v24.V8B(), v0.V8B(), v0.V8B());     // self test
22044   __ Orn(v25.V8B(), v0.V8B(), v1.V8B());     // all combinations
22045   END();
22046 
22047   RUN();
22048   ASSERT_EQUAL_128(0xffffffffffffffff, 0xffffffffffffffff, q16);
22049   ASSERT_EQUAL_128(0xff55aa5500ff55ff, 0xffaaaaffaaffffaa, q17);
22050   ASSERT_EQUAL_128(0, 0xffffffffffffffff, q24);
22051   ASSERT_EQUAL_128(0, 0xffaaaaffaaffffaa, q25);
22052   TEARDOWN();
22053 }
22054 
TEST(neon_3same_eor)22055 TEST(neon_3same_eor) {
22056   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
22057 
22058   START();
22059 
22060   __ Movi(v0.V2D(), 0xff00aa5500ff55aa, 0xff00aa5500ff55aa);
22061   __ Movi(v1.V2D(), 0x00ffaa00aa55aaff, 0xffff005500ff00ff);
22062 
22063   __ Eor(v16.V16B(), v0.V16B(), v0.V16B());  // self test
22064   __ Eor(v17.V16B(), v0.V16B(), v1.V16B());  // all combinations
22065   __ Eor(v24.V8B(), v0.V8B(), v0.V8B());     // self test
22066   __ Eor(v25.V8B(), v0.V8B(), v1.V8B());     // all combinations
22067   END();
22068 
22069   RUN();
22070   ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q16);
22071   ASSERT_EQUAL_128(0xffff0055aaaaff55, 0x00ffaa0000005555, q17);
22072   ASSERT_EQUAL_128(0, 0x0000000000000000, q24);
22073   ASSERT_EQUAL_128(0, 0x00ffaa0000005555, q25);
22074   TEARDOWN();
22075 }
22076 
TEST(neon_3same_bif)22077 TEST(neon_3same_bif) {
22078   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
22079 
22080   START();
22081 
22082   __ Movi(v16.V2D(), 0xffff0000ff00ffff, 0xffff00000000aaaa);
22083   __ Movi(v0.V2D(), 0xff00ff00ff005555, 0xaaaa5555aaaaaaaa);
22084   __ Movi(v1.V2D(), 0x00ff00ffff0055aa, 0x55aa55aa55aa55aa);
22085 
22086   __ Movi(v17.V2D(), 0x5555aa55cccccccc, 0x33333333f0f0f0f0);
22087   __ Movi(v2.V2D(), 0x555555aaff00ff00, 0xff00ff00ff00ff00);
22088   __ Movi(v3.V2D(), 0xaa55aa5500ffff00, 0x00ffff0000ffff00);
22089 
22090   __ Movi(v18.V2D(), 0, 0xf0f0f0f00f0f0f0f);
22091   __ Movi(v4.V2D(), 0, 0xf0f0f0f0f0f0f0f0);
22092   __ Movi(v5.V2D(), 0, 0x00ffff0000ffff00);
22093 
22094   __ Bif(v16.V16B(), v0.V16B(), v1.V16B());
22095   __ Bif(v17.V16B(), v2.V16B(), v3.V16B());
22096   __ Bif(v18.V8B(), v4.V8B(), v5.V8B());
22097   END();
22098 
22099   RUN();
22100 
22101   ASSERT_EQUAL_128(0xffffff00ff0055ff, 0xffaa0055aa00aaaa, q16);
22102   ASSERT_EQUAL_128(0x5555ffffffcccc00, 0xff333300fff0f000, q17);
22103   ASSERT_EQUAL_128(0, 0xf0f0f0f0f00f0ff0, q18);
22104   TEARDOWN();
22105 }
22106 
TEST(neon_3same_bit)22107 TEST(neon_3same_bit) {
22108   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
22109 
22110   START();
22111 
22112   __ Movi(v16.V2D(), 0xffff0000ff00ffff, 0xffff00000000aaaa);
22113   __ Movi(v0.V2D(), 0xff00ff00ff005555, 0xaaaa5555aaaaaaaa);
22114   __ Movi(v1.V2D(), 0x00ff00ffff0055aa, 0x55aa55aa55aa55aa);
22115 
22116   __ Movi(v17.V2D(), 0x5555aa55cccccccc, 0x33333333f0f0f0f0);
22117   __ Movi(v2.V2D(), 0x555555aaff00ff00, 0xff00ff00ff00ff00);
22118   __ Movi(v3.V2D(), 0xaa55aa5500ffff00, 0x00ffff0000ffff00);
22119 
22120   __ Movi(v18.V2D(), 0, 0xf0f0f0f00f0f0f0f);
22121   __ Movi(v4.V2D(), 0, 0xf0f0f0f0f0f0f0f0);
22122   __ Movi(v5.V2D(), 0, 0x00ffff0000ffff00);
22123 
22124   __ Bit(v16.V16B(), v0.V16B(), v1.V16B());
22125   __ Bit(v17.V16B(), v2.V16B(), v3.V16B());
22126   __ Bit(v18.V8B(), v4.V8B(), v5.V8B());
22127   END();
22128 
22129   RUN();
22130 
22131   ASSERT_EQUAL_128(0xff000000ff00ff55, 0xaaff550000aaaaaa, q16);
22132   ASSERT_EQUAL_128(0x55550000cc00ffcc, 0x3300ff33f000fff0, q17);
22133   ASSERT_EQUAL_128(0, 0xf0f0f0f00ff0f00f, q18);
22134   TEARDOWN();
22135 }
22136 
TEST(neon_3same_bsl)22137 TEST(neon_3same_bsl) {
22138   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
22139 
22140   START();
22141 
22142   __ Movi(v16.V2D(), 0xffff0000ff00ffff, 0xffff00000000aaaa);
22143   __ Movi(v0.V2D(), 0xff00ff00ff005555, 0xaaaa5555aaaaaaaa);
22144   __ Movi(v1.V2D(), 0x00ff00ffff0055aa, 0x55aa55aa55aa55aa);
22145 
22146   __ Movi(v17.V2D(), 0x5555aa55cccccccc, 0x33333333f0f0f0f0);
22147   __ Movi(v2.V2D(), 0x555555aaff00ff00, 0xff00ff00ff00ff00);
22148   __ Movi(v3.V2D(), 0xaa55aa5500ffff00, 0x00ffff0000ffff00);
22149 
22150   __ Movi(v18.V2D(), 0, 0xf0f0f0f00f0f0f0f);
22151   __ Movi(v4.V2D(), 0, 0xf0f0f0f0f0f0f0f0);
22152   __ Movi(v5.V2D(), 0, 0x00ffff0000ffff00);
22153 
22154   __ Bsl(v16.V16B(), v0.V16B(), v1.V16B());
22155   __ Bsl(v17.V16B(), v2.V16B(), v3.V16B());
22156   __ Bsl(v18.V8B(), v4.V8B(), v5.V8B());
22157   END();
22158 
22159   RUN();
22160 
22161   ASSERT_EQUAL_128(0xff0000ffff005555, 0xaaaa55aa55aaffaa, q16);
22162   ASSERT_EQUAL_128(0xff550000cc33ff00, 0x33ccff00f00fff00, q17);
22163   ASSERT_EQUAL_128(0, 0xf0fffff000f0f000, q18);
22164   TEARDOWN();
22165 }
22166 
22167 
TEST(neon_3same_smax)22168 TEST(neon_3same_smax) {
22169   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
22170 
22171   START();
22172 
22173   __ Movi(v0.V2D(), 0xaaaaaaaa55555555, 0xffffffff0000aa55);
22174   __ Movi(v1.V2D(), 0x55aa5555aaaaaaaa, 0x00000000ffaa55ff);
22175 
22176   __ Smax(v16.V8B(), v0.V8B(), v1.V8B());
22177   __ Smax(v18.V4H(), v0.V4H(), v1.V4H());
22178   __ Smax(v20.V2S(), v0.V2S(), v1.V2S());
22179 
22180   __ Smax(v17.V16B(), v0.V16B(), v1.V16B());
22181   __ Smax(v19.V8H(), v0.V8H(), v1.V8H());
22182   __ Smax(v21.V4S(), v0.V4S(), v1.V4S());
22183   END();
22184 
22185   RUN();
22186 
22187   ASSERT_EQUAL_128(0x0, 0x0000000000005555, q16);
22188   ASSERT_EQUAL_128(0x0, 0x00000000000055ff, q18);
22189   ASSERT_EQUAL_128(0x0, 0x000000000000aa55, q20);
22190   ASSERT_EQUAL_128(0x55aa555555555555, 0x0000000000005555, q17);
22191   ASSERT_EQUAL_128(0x55aa555555555555, 0x00000000000055ff, q19);
22192   ASSERT_EQUAL_128(0x55aa555555555555, 0x000000000000aa55, q21);
22193   TEARDOWN();
22194 }
22195 
22196 
TEST(neon_3same_smaxp)22197 TEST(neon_3same_smaxp) {
22198   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
22199 
22200   START();
22201 
22202   __ Movi(v0.V2D(), 0xaaaaaaaa55555555, 0xffffffff0000aa55);
22203   __ Movi(v1.V2D(), 0x55aa5555aaaaaaaa, 0x00000000ffaa55ff);
22204 
22205   __ Smaxp(v16.V8B(), v0.V8B(), v1.V8B());
22206   __ Smaxp(v18.V4H(), v0.V4H(), v1.V4H());
22207   __ Smaxp(v20.V2S(), v0.V2S(), v1.V2S());
22208 
22209   __ Smaxp(v17.V16B(), v0.V16B(), v1.V16B());
22210   __ Smaxp(v19.V8H(), v0.V8H(), v1.V8H());
22211   __ Smaxp(v21.V4S(), v0.V4S(), v1.V4S());
22212   END();
22213 
22214   RUN();
22215 
22216   ASSERT_EQUAL_128(0x0, 0x0000ff55ffff0055, q16);
22217   ASSERT_EQUAL_128(0x0, 0x000055ffffff0000, q18);
22218   ASSERT_EQUAL_128(0x0, 0x000000000000aa55, q20);
22219   ASSERT_EQUAL_128(0x5555aaaa0000ff55, 0xaaaa5555ffff0055, q17);
22220   ASSERT_EQUAL_128(0x55aaaaaa000055ff, 0xaaaa5555ffff0000, q19);
22221   ASSERT_EQUAL_128(0x55aa555500000000, 0x555555550000aa55, q21);
22222   TEARDOWN();
22223 }
22224 
22225 
TEST(neon_addp_scalar)22226 TEST(neon_addp_scalar) {
22227   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
22228 
22229   START();
22230 
22231   __ Movi(v0.V2D(), 0x0011223344aafe80, 0x00112233aabbfc00);
22232   __ Movi(v1.V2D(), 0x55aa5555aaaaaaaa, 0x00000000ffaa55ff);
22233   __ Movi(v2.V2D(), 0xaaaaaaaa66555555, 0xffffffff0000aa00);
22234 
22235   __ Addp(d16, v0.V2D());
22236   __ Addp(d17, v1.V2D());
22237   __ Addp(d18, v2.V2D());
22238 
22239   END();
22240 
22241   RUN();
22242 
22243   ASSERT_EQUAL_128(0x0, 0x00224466ef66fa80, q16);
22244   ASSERT_EQUAL_128(0x0, 0x55aa5556aa5500a9, q17);
22245   ASSERT_EQUAL_128(0x0, 0xaaaaaaa96655ff55, q18);
22246   TEARDOWN();
22247 }
22248 
TEST(neon_acrosslanes_addv)22249 TEST(neon_acrosslanes_addv) {
22250   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
22251 
22252   START();
22253 
22254   __ Movi(v0.V2D(), 0x0011223344aafe80, 0x00112233aabbfc00);
22255   __ Movi(v1.V2D(), 0x55aa5555aaaaaaaa, 0x00000000ffaa55ff);
22256   __ Movi(v2.V2D(), 0xaaaaaaaa66555555, 0xffffffff0000aa00);
22257 
22258   __ Addv(b16, v0.V8B());
22259   __ Addv(b17, v0.V16B());
22260   __ Addv(h18, v1.V4H());
22261   __ Addv(h19, v1.V8H());
22262   __ Addv(s20, v2.V4S());
22263 
22264   END();
22265 
22266   RUN();
22267 
22268   ASSERT_EQUAL_128(0x0, 0xc7, q16);
22269   ASSERT_EQUAL_128(0x0, 0x99, q17);
22270   ASSERT_EQUAL_128(0x0, 0x55a9, q18);
22271   ASSERT_EQUAL_128(0x0, 0x55fc, q19);
22272   ASSERT_EQUAL_128(0x0, 0x1100a9fe, q20);
22273   TEARDOWN();
22274 }
22275 
22276 
TEST(neon_acrosslanes_saddlv)22277 TEST(neon_acrosslanes_saddlv) {
22278   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
22279 
22280   START();
22281 
22282   __ Movi(v0.V2D(), 0x0011223344aafe80, 0x00112233aabbfc00);
22283   __ Movi(v1.V2D(), 0x55aa5555aaaaaaaa, 0x00000000ffaa55ff);
22284   __ Movi(v2.V2D(), 0xaaaaaaaa66555555, 0xffffffff0000aa00);
22285 
22286   __ Saddlv(h16, v0.V8B());
22287   __ Saddlv(h17, v0.V16B());
22288   __ Saddlv(s18, v1.V4H());
22289   __ Saddlv(s19, v1.V8H());
22290   __ Saddlv(d20, v2.V4S());
22291 
22292   END();
22293 
22294   RUN();
22295 
22296   ASSERT_EQUAL_128(0x0, 0xffc7, q16);
22297   ASSERT_EQUAL_128(0x0, 0xff99, q17);
22298   ASSERT_EQUAL_128(0x0, 0x000055a9, q18);
22299   ASSERT_EQUAL_128(0x0, 0x000055fc, q19);
22300   ASSERT_EQUAL_128(0x0, 0x0000001100a9fe, q20);
22301   TEARDOWN();
22302 }
22303 
22304 
TEST(neon_acrosslanes_uaddlv)22305 TEST(neon_acrosslanes_uaddlv) {
22306   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
22307 
22308   START();
22309 
22310   __ Movi(v0.V2D(), 0x0011223344aafe80, 0x00112233aabbfc00);
22311   __ Movi(v1.V2D(), 0x55aa5555aaaaaaaa, 0x00000000ffaa55ff);
22312   __ Movi(v2.V2D(), 0xaaaaaaaa66555555, 0xffffffff0000aa00);
22313 
22314   __ Uaddlv(h16, v0.V8B());
22315   __ Uaddlv(h17, v0.V16B());
22316   __ Uaddlv(s18, v1.V4H());
22317   __ Uaddlv(s19, v1.V8H());
22318   __ Uaddlv(d20, v2.V4S());
22319 
22320   END();
22321 
22322   RUN();
22323 
22324   ASSERT_EQUAL_128(0x0, 0x02c7, q16);
22325   ASSERT_EQUAL_128(0x0, 0x0599, q17);
22326   ASSERT_EQUAL_128(0x0, 0x000155a9, q18);
22327   ASSERT_EQUAL_128(0x0, 0x000355fc, q19);
22328   ASSERT_EQUAL_128(0x0, 0x000000021100a9fe, q20);
22329   TEARDOWN();
22330 }
22331 
22332 
TEST(neon_acrosslanes_smaxv)22333 TEST(neon_acrosslanes_smaxv) {
22334   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
22335 
22336   START();
22337 
22338   __ Movi(v0.V2D(), 0x0011223344aafe80, 0x00112233aabbfc00);
22339   __ Movi(v1.V2D(), 0x55aa5555aaaaaaaa, 0x00000000ffaa55ff);
22340   __ Movi(v2.V2D(), 0xaaaaaaaa66555555, 0xffffffff0000aa00);
22341 
22342   __ Smaxv(b16, v0.V8B());
22343   __ Smaxv(b17, v0.V16B());
22344   __ Smaxv(h18, v1.V4H());
22345   __ Smaxv(h19, v1.V8H());
22346   __ Smaxv(s20, v2.V4S());
22347 
22348   END();
22349 
22350   RUN();
22351 
22352   ASSERT_EQUAL_128(0x0, 0x33, q16);
22353   ASSERT_EQUAL_128(0x0, 0x44, q17);
22354   ASSERT_EQUAL_128(0x0, 0x55ff, q18);
22355   ASSERT_EQUAL_128(0x0, 0x55ff, q19);
22356   ASSERT_EQUAL_128(0x0, 0x66555555, q20);
22357   TEARDOWN();
22358 }
22359 
22360 
TEST(neon_acrosslanes_sminv)22361 TEST(neon_acrosslanes_sminv) {
22362   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
22363 
22364   START();
22365 
22366   __ Movi(v0.V2D(), 0x0011223344aafe80, 0x00112233aabbfc00);
22367   __ Movi(v1.V2D(), 0xfffa5555aaaaaaaa, 0x00000000ffaa55ff);
22368   __ Movi(v2.V2D(), 0xaaaaaaaa66555555, 0xffffffff0000aa00);
22369 
22370   __ Sminv(b16, v0.V8B());
22371   __ Sminv(b17, v0.V16B());
22372   __ Sminv(h18, v1.V4H());
22373   __ Sminv(h19, v1.V8H());
22374   __ Sminv(s20, v2.V4S());
22375 
22376   END();
22377 
22378   RUN();
22379 
22380   ASSERT_EQUAL_128(0x0, 0xaa, q16);
22381   ASSERT_EQUAL_128(0x0, 0x80, q17);
22382   ASSERT_EQUAL_128(0x0, 0xffaa, q18);
22383   ASSERT_EQUAL_128(0x0, 0xaaaa, q19);
22384   ASSERT_EQUAL_128(0x0, 0xaaaaaaaa, q20);
22385   TEARDOWN();
22386 }
22387 
TEST(neon_acrosslanes_umaxv)22388 TEST(neon_acrosslanes_umaxv) {
22389   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
22390 
22391   START();
22392 
22393   __ Movi(v0.V2D(), 0x0011223344aafe80, 0x00112233aabbfc00);
22394   __ Movi(v1.V2D(), 0x55aa5555aaaaffab, 0x00000000ffaa55ff);
22395   __ Movi(v2.V2D(), 0xaaaaaaaa66555555, 0xffffffff0000aa00);
22396 
22397   __ Umaxv(b16, v0.V8B());
22398   __ Umaxv(b17, v0.V16B());
22399   __ Umaxv(h18, v1.V4H());
22400   __ Umaxv(h19, v1.V8H());
22401   __ Umaxv(s20, v2.V4S());
22402 
22403   END();
22404 
22405   RUN();
22406 
22407   ASSERT_EQUAL_128(0x0, 0xfc, q16);
22408   ASSERT_EQUAL_128(0x0, 0xfe, q17);
22409   ASSERT_EQUAL_128(0x0, 0xffaa, q18);
22410   ASSERT_EQUAL_128(0x0, 0xffab, q19);
22411   ASSERT_EQUAL_128(0x0, 0xffffffff, q20);
22412   TEARDOWN();
22413 }
22414 
22415 
TEST(neon_acrosslanes_uminv)22416 TEST(neon_acrosslanes_uminv) {
22417   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
22418 
22419   START();
22420 
22421   __ Movi(v0.V2D(), 0x0011223344aafe80, 0x02112233aabbfc01);
22422   __ Movi(v1.V2D(), 0xfffa5555aaaa0000, 0x00010003ffaa55ff);
22423   __ Movi(v2.V2D(), 0xaaaaaaaa66555555, 0xffffffff0000aa00);
22424 
22425   __ Uminv(b16, v0.V8B());
22426   __ Uminv(b17, v0.V16B());
22427   __ Uminv(h18, v1.V4H());
22428   __ Uminv(h19, v1.V8H());
22429   __ Uminv(s20, v2.V4S());
22430 
22431   END();
22432 
22433   RUN();
22434 
22435   ASSERT_EQUAL_128(0x0, 0x01, q16);
22436   ASSERT_EQUAL_128(0x0, 0x00, q17);
22437   ASSERT_EQUAL_128(0x0, 0x0001, q18);
22438   ASSERT_EQUAL_128(0x0, 0x0000, q19);
22439   ASSERT_EQUAL_128(0x0, 0x0000aa00, q20);
22440   TEARDOWN();
22441 }
22442 
22443 
TEST(neon_3same_smin)22444 TEST(neon_3same_smin) {
22445   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
22446 
22447   START();
22448 
22449   __ Movi(v0.V2D(), 0xaaaaaaaa55555555, 0xffffffff0000aa55);
22450   __ Movi(v1.V2D(), 0x55aa5555aaaaaaaa, 0x00000000ffaa55ff);
22451 
22452   __ Smin(v16.V8B(), v0.V8B(), v1.V8B());
22453   __ Smin(v18.V4H(), v0.V4H(), v1.V4H());
22454   __ Smin(v20.V2S(), v0.V2S(), v1.V2S());
22455 
22456   __ Smin(v17.V16B(), v0.V16B(), v1.V16B());
22457   __ Smin(v19.V8H(), v0.V8H(), v1.V8H());
22458   __ Smin(v21.V4S(), v0.V4S(), v1.V4S());
22459   END();
22460 
22461   RUN();
22462 
22463   ASSERT_EQUAL_128(0x0, 0xffffffffffaaaaff, q16);
22464   ASSERT_EQUAL_128(0x0, 0xffffffffffaaaa55, q18);
22465   ASSERT_EQUAL_128(0x0, 0xffffffffffaa55ff, q20);
22466   ASSERT_EQUAL_128(0xaaaaaaaaaaaaaaaa, 0xffffffffffaaaaff, q17);
22467   ASSERT_EQUAL_128(0xaaaaaaaaaaaaaaaa, 0xffffffffffaaaa55, q19);
22468   ASSERT_EQUAL_128(0xaaaaaaaaaaaaaaaa, 0xffffffffffaa55ff, q21);
22469   TEARDOWN();
22470 }
22471 
22472 
TEST(neon_3same_umax)22473 TEST(neon_3same_umax) {
22474   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
22475 
22476   START();
22477 
22478   __ Movi(v0.V2D(), 0xaaaaaaaa55555555, 0xffffffff0000aa55);
22479   __ Movi(v1.V2D(), 0x55aa5555aaaaaaaa, 0x00000000ffaa55ff);
22480 
22481   __ Umax(v16.V8B(), v0.V8B(), v1.V8B());
22482   __ Umax(v18.V4H(), v0.V4H(), v1.V4H());
22483   __ Umax(v20.V2S(), v0.V2S(), v1.V2S());
22484 
22485   __ Umax(v17.V16B(), v0.V16B(), v1.V16B());
22486   __ Umax(v19.V8H(), v0.V8H(), v1.V8H());
22487   __ Umax(v21.V4S(), v0.V4S(), v1.V4S());
22488   END();
22489 
22490   RUN();
22491 
22492   ASSERT_EQUAL_128(0x0, 0xffffffffffaaaaff, q16);
22493   ASSERT_EQUAL_128(0x0, 0xffffffffffaaaa55, q18);
22494   ASSERT_EQUAL_128(0x0, 0xffffffffffaa55ff, q20);
22495   ASSERT_EQUAL_128(0xaaaaaaaaaaaaaaaa, 0xffffffffffaaaaff, q17);
22496   ASSERT_EQUAL_128(0xaaaaaaaaaaaaaaaa, 0xffffffffffaaaa55, q19);
22497   ASSERT_EQUAL_128(0xaaaaaaaaaaaaaaaa, 0xffffffffffaa55ff, q21);
22498   TEARDOWN();
22499 }
22500 
22501 
TEST(neon_3same_umin)22502 TEST(neon_3same_umin) {
22503   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
22504 
22505   START();
22506 
22507   __ Movi(v0.V2D(), 0xaaaaaaaa55555555, 0xffffffff0000aa55);
22508   __ Movi(v1.V2D(), 0x55aa5555aaaaaaaa, 0x00000000ffaa55ff);
22509 
22510   __ Umin(v16.V8B(), v0.V8B(), v1.V8B());
22511   __ Umin(v18.V4H(), v0.V4H(), v1.V4H());
22512   __ Umin(v20.V2S(), v0.V2S(), v1.V2S());
22513 
22514   __ Umin(v17.V16B(), v0.V16B(), v1.V16B());
22515   __ Umin(v19.V8H(), v0.V8H(), v1.V8H());
22516   __ Umin(v21.V4S(), v0.V4S(), v1.V4S());
22517   END();
22518 
22519   RUN();
22520 
22521   ASSERT_EQUAL_128(0x0, 0x0000000000005555, q16);
22522   ASSERT_EQUAL_128(0x0, 0x00000000000055ff, q18);
22523   ASSERT_EQUAL_128(0x0, 0x000000000000aa55, q20);
22524   ASSERT_EQUAL_128(0x55aa555555555555, 0x0000000000005555, q17);
22525   ASSERT_EQUAL_128(0x55aa555555555555, 0x00000000000055ff, q19);
22526   ASSERT_EQUAL_128(0x55aa555555555555, 0x000000000000aa55, q21);
22527   TEARDOWN();
22528 }
22529 
22530 
TEST(neon_3same_extra_fcadd)22531 TEST(neon_3same_extra_fcadd) {
22532   SETUP_WITH_FEATURES(CPUFeatures::kNEON, CPUFeatures::kFP, CPUFeatures::kFcma);
22533 
22534   START();
22535 
22536   // (0i, 5) (d)
22537   __ Movi(v0.V2D(), 0x0, 0x4014000000000000);
22538   // (5i, 0) (d)
22539   __ Movi(v1.V2D(), 0x4014000000000000, 0x0);
22540   // (10i, 10) (d)
22541   __ Movi(v2.V2D(), 0x4024000000000000, 0x4024000000000000);
22542   // (5i, 5), (5i, 5) (f)
22543   __ Movi(v3.V2D(), 0x40A0000040A00000, 0x40A0000040A00000);
22544   // (5i, 5), (0i, 0) (f)
22545   __ Movi(v4.V2D(), 0x40A0000040A00000, 0x0);
22546   // 324567i, 16000 (f)
22547   __ Movi(v5.V2D(), 0x0, 0x489E7AE0467A0000);
22548 
22549   // Subtraction (10, 10) - (5, 5) == (5, 5)
22550   __ Fcadd(v31.V2D(), v2.V2D(), v1.V2D(), 90);
22551   __ Fcadd(v31.V2D(), v31.V2D(), v0.V2D(), 270);
22552 
22553   // Addition (10, 10) + (5, 5) == (15, 15)
22554   __ Fcadd(v30.V2D(), v2.V2D(), v1.V2D(), 270);
22555   __ Fcadd(v30.V2D(), v30.V2D(), v0.V2D(), 90);
22556 
22557   // 2S
22558   __ Fcadd(v29.V2S(), v4.V2S(), v5.V2S(), 90);
22559   __ Fcadd(v28.V2S(), v4.V2S(), v5.V2S(), 270);
22560 
22561   // 4S
22562   __ Fcadd(v27.V4S(), v3.V4S(), v4.V4S(), 90);
22563   __ Fcadd(v26.V4S(), v3.V4S(), v4.V4S(), 270);
22564 
22565   END();
22566 
22567 #ifdef VIXL_INCLUDE_SIMULATOR_AARCH64
22568   RUN();
22569   ASSERT_EQUAL_128(0x4014000000000000, 0x4014000000000000, q31);
22570   ASSERT_EQUAL_128(0x402E000000000000, 0x402E000000000000, q30);
22571   ASSERT_EQUAL_128(0x0, 0x467a0000c89e7ae0, q29);  // (16000i, -324567)
22572   ASSERT_EQUAL_128(0x0, 0xc67a0000489e7ae0, q28);  // (-16000i, 324567)
22573   ASSERT_EQUAL_128(0x4120000000000000, 0x40A0000040A00000, q27);
22574   ASSERT_EQUAL_128(0x0000000041200000, 0x40A0000040A00000, q26);
22575 #endif
22576   TEARDOWN();
22577 }
22578 
22579 
TEST(neon_3same_extra_fcmla)22580 TEST(neon_3same_extra_fcmla) {
22581   SETUP_WITH_FEATURES(CPUFeatures::kNEON, CPUFeatures::kFP, CPUFeatures::kFcma);
22582 
22583   START();
22584 
22585   __ Movi(v1.V2D(), 0x0, 0x40A0000040400000);  // (5i, 3) (f)
22586   __ Movi(v2.V2D(), 0x0, 0x4040000040A00000);  // (3i, 5) (f)
22587 
22588   __ Movi(v3.V2D(), 0x0, 0x4000000040400000);  // (2i, 3) (f)
22589   __ Movi(v4.V2D(), 0x0, 0x40E000003F800000);  // (7i, 1) (f)
22590 
22591   __ Movi(v5.V2D(), 0x0, 0x4000000040400000);  // (2i, 3) (f)
22592   __ Movi(v6.V2D(), 0x0, 0x408000003F800000);  // (4i, 1) (f)
22593 
22594   // (1.5i, 2.5), (31.5i, 1024) (f)
22595   __ Movi(v7.V2D(), 0x3FC0000040200000, 0x41FC000044800000);
22596   // (2048i, 412.75), (3645i, 0) (f)
22597   __ Movi(v8.V2D(), 0x4500000043CE6000, 0x4563D00000000000);
22598   // (2000i, 450,000) (d)
22599   __ Movi(v9.V2D(), 0x409F400000000000, 0x411B774000000000);
22600   // (30,000i, 1250) (d)
22601   __ Movi(v10.V2D(), 0x40DD4C0000000000, 0x4093880000000000);
22602 
22603   // DST
22604   __ Movi(v24.V2D(), 0x0, 0x0);
22605   __ Movi(v25.V2D(), 0x0, 0x0);
22606   __ Movi(v26.V2D(), 0x0, 0x0);
22607   __ Movi(v27.V2D(), 0x0, 0x0);
22608   __ Movi(v28.V2D(), 0x0, 0x0);
22609   __ Movi(v29.V2D(), 0x0, 0x0);
22610   __ Movi(v30.V2D(), 0x0, 0x0);
22611   __ Movi(v31.V2D(), 0x0, 0x0);
22612 
22613   // Full calculations
22614   __ Fcmla(v31.V2S(), v1.V2S(), v2.V2S(), 90);
22615   __ Fcmla(v31.V2S(), v1.V2S(), v2.V2S(), 0);
22616 
22617   __ Fcmla(v30.V2S(), v3.V2S(), v4.V2S(), 0);
22618   __ Fcmla(v30.V2S(), v3.V2S(), v4.V2S(), 90);
22619 
22620   __ Fcmla(v29.V2S(), v5.V2S(), v6.V2S(), 90);
22621   __ Fcmla(v29.V2S(), v5.V2S(), v6.V2S(), 0);
22622 
22623   __ Fcmla(v28.V2D(), v9.V2D(), v10.V2D(), 0);
22624   __ Fcmla(v28.V2D(), v9.V2D(), v10.V2D(), 90);
22625 
22626   // Partial checks
22627   __ Fcmla(v27.V2S(), v1.V2S(), v2.V2S(), 0);
22628   __ Fcmla(v26.V2S(), v2.V2S(), v1.V2S(), 0);
22629 
22630   __ Fcmla(v25.V4S(), v7.V4S(), v8.V4S(), 270);
22631   __ Fcmla(v24.V4S(), v7.V4S(), v8.V4S(), 180);
22632 
22633   END();
22634 
22635 #ifdef VIXL_INCLUDE_SIMULATOR_AARCH64
22636   RUN();
22637 
22638   ASSERT_EQUAL_128(0x0, 0x4208000000000000, q31);  // (34i, 0)
22639   ASSERT_EQUAL_128(0x0, 0x41B80000C1300000, q30);  // (23i, -11)
22640   ASSERT_EQUAL_128(0x0, 0x41600000C0A00000, q29);  // (14i, -5)
22641 
22642   // (13502500000i, 502500000)
22643   ASSERT_EQUAL_128(0x4209267E65000000, 0x41BDF38AA0000000, q28);
22644   ASSERT_EQUAL_128(0x0, 0x4110000041700000, q27);  //  (9i, 15)
22645   ASSERT_EQUAL_128(0x0, 0x41C8000041700000, q26);  // (25i, 15)
22646   // (512i, 1.031875E3), (373248i, 0)
22647   ASSERT_EQUAL_128(0xc41ac80045400000, 0x0000000047e040c0, q25);
22648   // (619.125i, -3072), (0i, -114817.5)
22649   ASSERT_EQUAL_128(0xc5a00000c480fc00, 0xca63d00000000000, q24);
22650 #endif
22651   TEARDOWN();
22652 }
22653 
22654 
TEST(neon_byelement_fcmla)22655 TEST(neon_byelement_fcmla) {
22656   SETUP_WITH_FEATURES(CPUFeatures::kNEON, CPUFeatures::kFP, CPUFeatures::kFcma);
22657 
22658   START();
22659 
22660   // (5i, 3), (5i, 3) (f)
22661   __ Movi(v1.V2D(), 0x40A0000040400000, 0x40A0000040400000);
22662   // (3i, 5), (3i, 5) (f)
22663   __ Movi(v2.V2D(), 0x4040000040A00000, 0x4040000040A00000);
22664   // (7i, 1), (5i, 3) (f)
22665   __ Movi(v3.V2D(), 0x40E000003F800000, 0x40A0000040400000);
22666   // (4i, 1), (3i, 5) (f)
22667   __ Movi(v4.V2D(), 0x408000003F800000, 0x4040000040A00000);
22668   // (4i, 1), (7i, 1) (f)
22669   __ Movi(v5.V2D(), 0x408000003F800000, 0x40E000003F800000);
22670   // (2i, 3), (0, 0) (f)
22671   __ Movi(v6.V2D(), 0x4000000040400000, 0x0);
22672 
22673   // DST
22674   __ Movi(v22.V2D(), 0x0, 0x0);
22675   __ Movi(v23.V2D(), 0x0, 0x0);
22676   __ Movi(v24.V2D(), 0x0, 0x0);
22677   __ Movi(v25.V2D(), 0x0, 0x0);
22678   __ Movi(v26.V2D(), 0x0, 0x0);
22679   __ Movi(v27.V2D(), 0x0, 0x0);
22680   __ Movi(v28.V2D(), 0x0, 0x0);
22681   __ Movi(v29.V2D(), 0x0, 0x0);
22682   __ Movi(v30.V2D(), 0x0, 0x0);
22683   __ Movi(v31.V2D(), 0x0, 0x0);
22684 
22685   // Full calculation (pairs)
22686   __ Fcmla(v31.V4S(), v1.V4S(), v2.S(), 0, 90);
22687   __ Fcmla(v31.V4S(), v1.V4S(), v2.S(), 0, 0);
22688   __ Fcmla(v30.V4S(), v5.V4S(), v6.S(), 1, 90);
22689   __ Fcmla(v30.V4S(), v5.V4S(), v6.S(), 1, 0);
22690 
22691   // Rotations
22692   __ Fcmla(v29.V4S(), v3.V4S(), v4.S(), 1, 0);
22693   __ Fcmla(v28.V4S(), v3.V4S(), v4.S(), 1, 90);
22694   __ Fcmla(v27.V4S(), v3.V4S(), v4.S(), 1, 180);
22695   __ Fcmla(v26.V4S(), v3.V4S(), v4.S(), 1, 270);
22696   __ Fcmla(v25.V4S(), v3.V4S(), v4.S(), 0, 270);
22697   __ Fcmla(v24.V4S(), v3.V4S(), v4.S(), 0, 180);
22698   __ Fcmla(v23.V4S(), v3.V4S(), v4.S(), 0, 90);
22699   __ Fcmla(v22.V4S(), v3.V4S(), v4.S(), 0, 0);
22700 
22701   END();
22702 
22703 #ifdef VIXL_INCLUDE_SIMULATOR_AARCH64
22704   RUN();
22705   // (34i, 0), (34i, 0)
22706   ASSERT_EQUAL_128(0x4208000000000000, 0x4208000000000000, q31);
22707   // (14i, -5), (23i, -11)
22708   ASSERT_EQUAL_128(0x41600000C0A00000, 0x41B80000C1300000, q30);
22709   // (4i, 1), (12i, 3)
22710   ASSERT_EQUAL_128(0x408000003f800000, 0x4140000040400000, q29);
22711   // (7i, -28), (5i, -20)
22712   ASSERT_EQUAL_128(0x40e00000c1e00000, 0x40a00000c1a00000, q28);
22713   // (-4i, -1), (-12i, -3)
22714   ASSERT_EQUAL_128(0xc0800000bf800000, 0xc1400000c0400000, q27);
22715   // (-7i, 28), (-5i, 20)
22716   ASSERT_EQUAL_128(0xc0e0000041e00000, 0xc0a0000041a00000, q26);
22717   // (-35i, 21), (-25i, 15)
22718   ASSERT_EQUAL_128(0xc20c000041a80000, 0xc1c8000041700000, q25);
22719   // (-3i, -5), (-9i, -15)
22720   ASSERT_EQUAL_128(0xc0400000c0a00000, 0xc1100000c1700000, q24);
22721   // (35i, -21), (25i, -15)
22722   ASSERT_EQUAL_128(0x420c0000c1a80000, 0x41c80000c1700000, q23);
22723   // (3i, 5), (9i, 15)
22724   ASSERT_EQUAL_128(0x4040000040a00000, 0x4110000041700000, q22);
22725 #endif
22726 
22727   TEARDOWN();
22728 }
22729 
22730 
TEST(neon_2regmisc_mvn)22731 TEST(neon_2regmisc_mvn) {
22732   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
22733 
22734   START();
22735 
22736   __ Movi(v0.V2D(), 0x00ff00ffff0055aa, 0x55aa55aa55aa55aa);
22737 
22738   __ Mvn(v16.V16B(), v0.V16B());
22739   __ Mvn(v17.V8H(), v0.V8H());
22740   __ Mvn(v18.V4S(), v0.V4S());
22741   __ Mvn(v19.V2D(), v0.V2D());
22742 
22743   __ Mvn(v24.V8B(), v0.V8B());
22744   __ Mvn(v25.V4H(), v0.V4H());
22745   __ Mvn(v26.V2S(), v0.V2S());
22746 
22747   END();
22748 
22749   RUN();
22750 
22751   ASSERT_EQUAL_128(0xff00ff0000ffaa55, 0xaa55aa55aa55aa55, q16);
22752   ASSERT_EQUAL_128(0xff00ff0000ffaa55, 0xaa55aa55aa55aa55, q17);
22753   ASSERT_EQUAL_128(0xff00ff0000ffaa55, 0xaa55aa55aa55aa55, q18);
22754   ASSERT_EQUAL_128(0xff00ff0000ffaa55, 0xaa55aa55aa55aa55, q19);
22755 
22756   ASSERT_EQUAL_128(0x0, 0xaa55aa55aa55aa55, q24);
22757   ASSERT_EQUAL_128(0x0, 0xaa55aa55aa55aa55, q25);
22758   ASSERT_EQUAL_128(0x0, 0xaa55aa55aa55aa55, q26);
22759   TEARDOWN();
22760 }
22761 
22762 
TEST(neon_2regmisc_not)22763 TEST(neon_2regmisc_not) {
22764   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
22765 
22766   START();
22767 
22768   __ Movi(v0.V2D(), 0x00ff00ffff0055aa, 0x55aa55aa55aa55aa);
22769   __ Movi(v1.V2D(), 0, 0x00ffff0000ffff00);
22770 
22771   __ Not(v16.V16B(), v0.V16B());
22772   __ Not(v17.V8B(), v1.V8B());
22773   END();
22774 
22775   RUN();
22776 
22777   ASSERT_EQUAL_128(0xff00ff0000ffaa55, 0xaa55aa55aa55aa55, q16);
22778   ASSERT_EQUAL_128(0x0, 0xff0000ffff0000ff, q17);
22779   TEARDOWN();
22780 }
22781 
22782 
TEST(neon_2regmisc_cls_clz_cnt)22783 TEST(neon_2regmisc_cls_clz_cnt) {
22784   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
22785 
22786   START();
22787 
22788   __ Movi(v0.V2D(), 0x0001020304050607, 0x08090a0b0c0d0e0f);
22789   __ Movi(v1.V2D(), 0xfedcba9876543210, 0x0123456789abcdef);
22790 
22791   __ Cls(v16.V8B(), v1.V8B());
22792   __ Cls(v17.V16B(), v1.V16B());
22793   __ Cls(v18.V4H(), v1.V4H());
22794   __ Cls(v19.V8H(), v1.V8H());
22795   __ Cls(v20.V2S(), v1.V2S());
22796   __ Cls(v21.V4S(), v1.V4S());
22797 
22798   __ Clz(v22.V8B(), v0.V8B());
22799   __ Clz(v23.V16B(), v0.V16B());
22800   __ Clz(v24.V4H(), v0.V4H());
22801   __ Clz(v25.V8H(), v0.V8H());
22802   __ Clz(v26.V2S(), v0.V2S());
22803   __ Clz(v27.V4S(), v0.V4S());
22804 
22805   __ Cnt(v28.V8B(), v0.V8B());
22806   __ Cnt(v29.V16B(), v1.V16B());
22807 
22808   END();
22809 
22810   RUN();
22811 
22812   ASSERT_EQUAL_128(0x0000000000000000, 0x0601000000000102, q16);
22813   ASSERT_EQUAL_128(0x0601000000000102, 0x0601000000000102, q17);
22814   ASSERT_EQUAL_128(0x0000000000000000, 0x0006000000000001, q18);
22815   ASSERT_EQUAL_128(0x0006000000000001, 0x0006000000000001, q19);
22816   ASSERT_EQUAL_128(0x0000000000000000, 0x0000000600000000, q20);
22817   ASSERT_EQUAL_128(0x0000000600000000, 0x0000000600000000, q21);
22818 
22819   ASSERT_EQUAL_128(0x0000000000000000, 0x0404040404040404, q22);
22820   ASSERT_EQUAL_128(0x0807060605050505, 0x0404040404040404, q23);
22821   ASSERT_EQUAL_128(0x0000000000000000, 0x0004000400040004, q24);
22822   ASSERT_EQUAL_128(0x000f000600050005, 0x0004000400040004, q25);
22823   ASSERT_EQUAL_128(0x0000000000000000, 0x0000000400000004, q26);
22824   ASSERT_EQUAL_128(0x0000000f00000005, 0x0000000400000004, q27);
22825 
22826   ASSERT_EQUAL_128(0x0000000000000000, 0x0102020302030304, q28);
22827   ASSERT_EQUAL_128(0x0705050305030301, 0x0103030503050507, q29);
22828 
22829   TEARDOWN();
22830 }
22831 
TEST(neon_2regmisc_rev)22832 TEST(neon_2regmisc_rev) {
22833   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
22834 
22835   START();
22836 
22837   __ Movi(v0.V2D(), 0x0001020304050607, 0x08090a0b0c0d0e0f);
22838   __ Movi(v1.V2D(), 0xfedcba9876543210, 0x0123456789abcdef);
22839 
22840   __ Rev16(v16.V8B(), v0.V8B());
22841   __ Rev16(v17.V16B(), v0.V16B());
22842 
22843   __ Rev32(v18.V8B(), v0.V8B());
22844   __ Rev32(v19.V16B(), v0.V16B());
22845   __ Rev32(v20.V4H(), v0.V4H());
22846   __ Rev32(v21.V8H(), v0.V8H());
22847 
22848   __ Rev64(v22.V8B(), v0.V8B());
22849   __ Rev64(v23.V16B(), v0.V16B());
22850   __ Rev64(v24.V4H(), v0.V4H());
22851   __ Rev64(v25.V8H(), v0.V8H());
22852   __ Rev64(v26.V2S(), v0.V2S());
22853   __ Rev64(v27.V4S(), v0.V4S());
22854 
22855   __ Rbit(v28.V8B(), v1.V8B());
22856   __ Rbit(v29.V16B(), v1.V16B());
22857 
22858   END();
22859 
22860   RUN();
22861 
22862   ASSERT_EQUAL_128(0x0000000000000000, 0x09080b0a0d0c0f0e, q16);
22863   ASSERT_EQUAL_128(0x0100030205040706, 0x09080b0a0d0c0f0e, q17);
22864 
22865   ASSERT_EQUAL_128(0x0000000000000000, 0x0b0a09080f0e0d0c, q18);
22866   ASSERT_EQUAL_128(0x0302010007060504, 0x0b0a09080f0e0d0c, q19);
22867   ASSERT_EQUAL_128(0x0000000000000000, 0x0a0b08090e0f0c0d, q20);
22868   ASSERT_EQUAL_128(0x0203000106070405, 0x0a0b08090e0f0c0d, q21);
22869 
22870   ASSERT_EQUAL_128(0x0000000000000000, 0x0f0e0d0c0b0a0908, q22);
22871   ASSERT_EQUAL_128(0x0706050403020100, 0x0f0e0d0c0b0a0908, q23);
22872   ASSERT_EQUAL_128(0x0000000000000000, 0x0e0f0c0d0a0b0809, q24);
22873   ASSERT_EQUAL_128(0x0607040502030001, 0x0e0f0c0d0a0b0809, q25);
22874   ASSERT_EQUAL_128(0x0000000000000000, 0x0c0d0e0f08090a0b, q26);
22875   ASSERT_EQUAL_128(0x0405060700010203, 0x0c0d0e0f08090a0b, q27);
22876 
22877   ASSERT_EQUAL_128(0x0000000000000000, 0x80c4a2e691d5b3f7, q28);
22878   ASSERT_EQUAL_128(0x7f3b5d196e2a4c08, 0x80c4a2e691d5b3f7, q29);
22879 
22880   TEARDOWN();
22881 }
22882 
22883 
TEST(neon_sli)22884 TEST(neon_sli) {
22885   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
22886 
22887   START();
22888 
22889   __ Movi(v0.V2D(), 0x0001020304050607, 0x08090a0b0c0d0e0f);
22890   __ Movi(v1.V2D(), 0xfedcba9876543210, 0x0123456789abcdef);
22891 
22892   __ Mov(v16.V2D(), v0.V2D());
22893   __ Mov(v17.V2D(), v0.V2D());
22894   __ Mov(v18.V2D(), v0.V2D());
22895   __ Mov(v19.V2D(), v0.V2D());
22896   __ Mov(v20.V2D(), v0.V2D());
22897   __ Mov(v21.V2D(), v0.V2D());
22898   __ Mov(v22.V2D(), v0.V2D());
22899   __ Mov(v23.V2D(), v0.V2D());
22900 
22901   __ Sli(v16.V8B(), v1.V8B(), 4);
22902   __ Sli(v17.V16B(), v1.V16B(), 7);
22903   __ Sli(v18.V4H(), v1.V4H(), 8);
22904   __ Sli(v19.V8H(), v1.V8H(), 15);
22905   __ Sli(v20.V2S(), v1.V2S(), 0);
22906   __ Sli(v21.V4S(), v1.V4S(), 31);
22907   __ Sli(v22.V2D(), v1.V2D(), 48);
22908 
22909   __ Sli(d23, d1, 48);
22910 
22911   END();
22912 
22913   RUN();
22914 
22915   ASSERT_EQUAL_128(0x0000000000000000, 0x18395a7b9cbddeff, q16);
22916   ASSERT_EQUAL_128(0x0001020304050607, 0x88898a8b8c8d8e8f, q17);
22917   ASSERT_EQUAL_128(0x0000000000000000, 0x2309670bab0def0f, q18);
22918   ASSERT_EQUAL_128(0x0001020304050607, 0x88098a0b8c0d8e0f, q19);
22919   ASSERT_EQUAL_128(0x0000000000000000, 0x0123456789abcdef, q20);
22920   ASSERT_EQUAL_128(0x0001020304050607, 0x88090a0b8c0d0e0f, q21);
22921   ASSERT_EQUAL_128(0x3210020304050607, 0xcdef0a0b0c0d0e0f, q22);
22922 
22923   ASSERT_EQUAL_128(0x0000000000000000, 0xcdef0a0b0c0d0e0f, q23);
22924 
22925 
22926   TEARDOWN();
22927 }
22928 
22929 
TEST(neon_sri)22930 TEST(neon_sri) {
22931   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
22932 
22933   START();
22934 
22935   __ Movi(v0.V2D(), 0x0001020304050607, 0x08090a0b0c0d0e0f);
22936   __ Movi(v1.V2D(), 0xfedcba9876543210, 0x0123456789abcdef);
22937 
22938   __ Mov(v16.V2D(), v0.V2D());
22939   __ Mov(v17.V2D(), v0.V2D());
22940   __ Mov(v18.V2D(), v0.V2D());
22941   __ Mov(v19.V2D(), v0.V2D());
22942   __ Mov(v20.V2D(), v0.V2D());
22943   __ Mov(v21.V2D(), v0.V2D());
22944   __ Mov(v22.V2D(), v0.V2D());
22945   __ Mov(v23.V2D(), v0.V2D());
22946 
22947   __ Sri(v16.V8B(), v1.V8B(), 4);
22948   __ Sri(v17.V16B(), v1.V16B(), 7);
22949   __ Sri(v18.V4H(), v1.V4H(), 8);
22950   __ Sri(v19.V8H(), v1.V8H(), 15);
22951   __ Sri(v20.V2S(), v1.V2S(), 1);
22952   __ Sri(v21.V4S(), v1.V4S(), 31);
22953   __ Sri(v22.V2D(), v1.V2D(), 48);
22954 
22955   __ Sri(d23, d1, 48);
22956 
22957   END();
22958 
22959   RUN();
22960 
22961   ASSERT_EQUAL_128(0x0000000000000000, 0x00020406080a0c0e, q16);
22962   ASSERT_EQUAL_128(0x0101030304040606, 0x08080a0a0d0d0f0f, q17);
22963   ASSERT_EQUAL_128(0x0000000000000000, 0x08010a450c890ecd, q18);
22964   ASSERT_EQUAL_128(0x0001020304040606, 0x08080a0a0c0d0e0f, q19);
22965   ASSERT_EQUAL_128(0x0000000000000000, 0x0091a2b344d5e6f7, q20);
22966   ASSERT_EQUAL_128(0x0001020304050606, 0x08090a0a0c0d0e0f, q21);
22967   ASSERT_EQUAL_128(0x000102030405fedc, 0x08090a0b0c0d0123, q22);
22968 
22969   ASSERT_EQUAL_128(0x0000000000000000, 0x08090a0b0c0d0123, q23);
22970 
22971 
22972   TEARDOWN();
22973 }
22974 
22975 
TEST(neon_shrn)22976 TEST(neon_shrn) {
22977   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
22978 
22979   START();
22980 
22981   __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081);
22982   __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff);
22983   __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff);
22984   __ Movi(v3.V2D(), 0x8000000000000001, 0x7fffffffffffffff);
22985   __ Movi(v4.V2D(), 0x8000000000000000, 0x0000000000000000);
22986 
22987   __ Shrn(v16.V8B(), v0.V8H(), 8);
22988   __ Shrn2(v16.V16B(), v1.V8H(), 1);
22989   __ Shrn(v17.V4H(), v1.V4S(), 16);
22990   __ Shrn2(v17.V8H(), v2.V4S(), 1);
22991   __ Shrn(v18.V2S(), v3.V2D(), 32);
22992   __ Shrn2(v18.V4S(), v3.V2D(), 1);
22993 
22994   END();
22995 
22996   RUN();
22997   ASSERT_EQUAL_128(0x0000ff00ff0000ff, 0x7f00817f80ff0180, q16);
22998   ASSERT_EQUAL_128(0x0000ffff0000ffff, 0x8000ffffffff0001, q17);
22999   ASSERT_EQUAL_128(0x00000000ffffffff, 0x800000007fffffff, q18);
23000   TEARDOWN();
23001 }
23002 
23003 
TEST(neon_rshrn)23004 TEST(neon_rshrn) {
23005   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
23006 
23007   START();
23008 
23009   __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081);
23010   __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff);
23011   __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff);
23012   __ Movi(v3.V2D(), 0x8000000000000001, 0x7fffffffffffffff);
23013   __ Movi(v4.V2D(), 0x8000000000000000, 0x0000000000000000);
23014 
23015   __ Rshrn(v16.V8B(), v0.V8H(), 8);
23016   __ Rshrn2(v16.V16B(), v1.V8H(), 1);
23017   __ Rshrn(v17.V4H(), v1.V4S(), 16);
23018   __ Rshrn2(v17.V8H(), v2.V4S(), 1);
23019   __ Rshrn(v18.V2S(), v3.V2D(), 32);
23020   __ Rshrn2(v18.V4S(), v3.V2D(), 1);
23021 
23022   END();
23023 
23024   RUN();
23025   ASSERT_EQUAL_128(0x0001000000000100, 0x7f01827f81ff0181, q16);
23026   ASSERT_EQUAL_128(0x0000000000000000, 0x8001ffffffff0001, q17);
23027   ASSERT_EQUAL_128(0x0000000100000000, 0x8000000080000000, q18);
23028   TEARDOWN();
23029 }
23030 
23031 
TEST(neon_uqshrn)23032 TEST(neon_uqshrn) {
23033   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
23034 
23035   START();
23036 
23037   __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081);
23038   __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff);
23039   __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff);
23040   __ Movi(v3.V2D(), 0x8000000000000001, 0x7fffffffffffffff);
23041   __ Movi(v4.V2D(), 0x8000000000000000, 0x0000000000000000);
23042 
23043   __ Uqshrn(v16.V8B(), v0.V8H(), 8);
23044   __ Uqshrn2(v16.V16B(), v1.V8H(), 1);
23045   __ Uqshrn(v17.V4H(), v1.V4S(), 16);
23046   __ Uqshrn2(v17.V8H(), v2.V4S(), 1);
23047   __ Uqshrn(v18.V2S(), v3.V2D(), 32);
23048   __ Uqshrn2(v18.V4S(), v3.V2D(), 1);
23049 
23050   __ Uqshrn(b19, h0, 8);
23051   __ Uqshrn(h20, s1, 16);
23052   __ Uqshrn(s21, d3, 32);
23053 
23054   END();
23055 
23056   RUN();
23057   ASSERT_EQUAL_128(0xffffff00ff0000ff, 0x7f00817f80ff0180, q16);
23058   ASSERT_EQUAL_128(0xffffffff0000ffff, 0x8000ffffffff0001, q17);
23059   ASSERT_EQUAL_128(0xffffffffffffffff, 0x800000007fffffff, q18);
23060   ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000080, q19);
23061   ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000001, q20);
23062   ASSERT_EQUAL_128(0x0000000000000000, 0x000000007fffffff, q21);
23063   TEARDOWN();
23064 }
23065 
23066 
TEST(neon_uqrshrn)23067 TEST(neon_uqrshrn) {
23068   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
23069 
23070   START();
23071 
23072   __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081);
23073   __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff);
23074   __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff);
23075   __ Movi(v3.V2D(), 0x8000000000000001, 0x7fffffffffffffff);
23076   __ Movi(v4.V2D(), 0x8000000000000000, 0x0000000000000000);
23077 
23078   __ Uqrshrn(v16.V8B(), v0.V8H(), 8);
23079   __ Uqrshrn2(v16.V16B(), v1.V8H(), 1);
23080   __ Uqrshrn(v17.V4H(), v1.V4S(), 16);
23081   __ Uqrshrn2(v17.V8H(), v2.V4S(), 1);
23082   __ Uqrshrn(v18.V2S(), v3.V2D(), 32);
23083   __ Uqrshrn2(v18.V4S(), v3.V2D(), 1);
23084 
23085   __ Uqrshrn(b19, h0, 8);
23086   __ Uqrshrn(h20, s1, 16);
23087   __ Uqrshrn(s21, d3, 32);
23088 
23089   END();
23090 
23091   RUN();
23092   ASSERT_EQUAL_128(0xffffff00ff0001ff, 0x7f01827f81ff0181, q16);
23093   ASSERT_EQUAL_128(0xffffffff0000ffff, 0x8001ffffffff0001, q17);
23094   ASSERT_EQUAL_128(0xffffffffffffffff, 0x8000000080000000, q18);
23095   ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000081, q19);
23096   ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000001, q20);
23097   ASSERT_EQUAL_128(0x0000000000000000, 0x0000000080000000, q21);
23098   TEARDOWN();
23099 }
23100 
23101 
TEST(neon_sqshrn)23102 TEST(neon_sqshrn) {
23103   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
23104 
23105   START();
23106 
23107   __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081);
23108   __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff);
23109   __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff);
23110   __ Movi(v3.V2D(), 0x8000000000000001, 0x7fffffffffffffff);
23111   __ Movi(v4.V2D(), 0x8000000000000000, 0x0000000000000000);
23112 
23113   __ Sqshrn(v16.V8B(), v0.V8H(), 8);
23114   __ Sqshrn2(v16.V16B(), v1.V8H(), 1);
23115   __ Sqshrn(v17.V4H(), v1.V4S(), 16);
23116   __ Sqshrn2(v17.V8H(), v2.V4S(), 1);
23117   __ Sqshrn(v18.V2S(), v3.V2D(), 32);
23118   __ Sqshrn2(v18.V4S(), v3.V2D(), 1);
23119 
23120   __ Sqshrn(b19, h0, 8);
23121   __ Sqshrn(h20, s1, 16);
23122   __ Sqshrn(s21, d3, 32);
23123 
23124   END();
23125 
23126   RUN();
23127   ASSERT_EQUAL_128(0x8080ff00ff00007f, 0x7f00817f80ff0180, q16);
23128   ASSERT_EQUAL_128(0x8000ffff00007fff, 0x8000ffffffff0001, q17);
23129   ASSERT_EQUAL_128(0x800000007fffffff, 0x800000007fffffff, q18);
23130   ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000080, q19);
23131   ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000001, q20);
23132   ASSERT_EQUAL_128(0x0000000000000000, 0x000000007fffffff, q21);
23133   TEARDOWN();
23134 }
23135 
23136 
TEST(neon_sqrshrn)23137 TEST(neon_sqrshrn) {
23138   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
23139 
23140   START();
23141 
23142   __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081);
23143   __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff);
23144   __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff);
23145   __ Movi(v3.V2D(), 0x8000000000000001, 0x7fffffffffffffff);
23146   __ Movi(v4.V2D(), 0x8000000000000000, 0x0000000000000000);
23147 
23148   __ Sqrshrn(v16.V8B(), v0.V8H(), 8);
23149   __ Sqrshrn2(v16.V16B(), v1.V8H(), 1);
23150   __ Sqrshrn(v17.V4H(), v1.V4S(), 16);
23151   __ Sqrshrn2(v17.V8H(), v2.V4S(), 1);
23152   __ Sqrshrn(v18.V2S(), v3.V2D(), 32);
23153   __ Sqrshrn2(v18.V4S(), v3.V2D(), 1);
23154 
23155   __ Sqrshrn(b19, h0, 8);
23156   __ Sqrshrn(h20, s1, 16);
23157   __ Sqrshrn(s21, d3, 32);
23158 
23159   END();
23160 
23161   RUN();
23162   ASSERT_EQUAL_128(0x808000000000017f, 0x7f01827f81ff0181, q16);
23163   ASSERT_EQUAL_128(0x8000000000007fff, 0x8001ffffffff0001, q17);
23164   ASSERT_EQUAL_128(0x800000007fffffff, 0x800000007fffffff, q18);
23165   ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000081, q19);
23166   ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000001, q20);
23167   ASSERT_EQUAL_128(0x0000000000000000, 0x000000007fffffff, q21);
23168   TEARDOWN();
23169 }
23170 
23171 
TEST(neon_sqshrun)23172 TEST(neon_sqshrun) {
23173   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
23174 
23175   START();
23176 
23177   __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081);
23178   __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff);
23179   __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff);
23180   __ Movi(v3.V2D(), 0x8000000000000001, 0x7fffffffffffffff);
23181   __ Movi(v4.V2D(), 0x8000000000000000, 0x0000000000000000);
23182 
23183   __ Sqshrun(v16.V8B(), v0.V8H(), 8);
23184   __ Sqshrun2(v16.V16B(), v1.V8H(), 1);
23185   __ Sqshrun(v17.V4H(), v1.V4S(), 16);
23186   __ Sqshrun2(v17.V8H(), v2.V4S(), 1);
23187   __ Sqshrun(v18.V2S(), v3.V2D(), 32);
23188   __ Sqshrun2(v18.V4S(), v3.V2D(), 1);
23189 
23190   __ Sqshrun(b19, h0, 8);
23191   __ Sqshrun(h20, s1, 16);
23192   __ Sqshrun(s21, d3, 32);
23193 
23194   END();
23195 
23196   RUN();
23197   ASSERT_EQUAL_128(0x00000000000000ff, 0x7f00007f00000100, q16);
23198   ASSERT_EQUAL_128(0x000000000000ffff, 0x0000000000000001, q17);
23199   ASSERT_EQUAL_128(0x00000000ffffffff, 0x000000007fffffff, q18);
23200   ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q19);
23201   ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000001, q20);
23202   ASSERT_EQUAL_128(0x0000000000000000, 0x000000007fffffff, q21);
23203   TEARDOWN();
23204 }
23205 
23206 
TEST(neon_sqrshrun)23207 TEST(neon_sqrshrun) {
23208   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
23209 
23210   START();
23211 
23212   __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081);
23213   __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff);
23214   __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff);
23215   __ Movi(v3.V2D(), 0x8000000000000001, 0x7fffffffffffffff);
23216   __ Movi(v4.V2D(), 0x8000000000000000, 0x0000000000000000);
23217 
23218   __ Sqrshrun(v16.V8B(), v0.V8H(), 8);
23219   __ Sqrshrun2(v16.V16B(), v1.V8H(), 1);
23220   __ Sqrshrun(v17.V4H(), v1.V4S(), 16);
23221   __ Sqrshrun2(v17.V8H(), v2.V4S(), 1);
23222   __ Sqrshrun(v18.V2S(), v3.V2D(), 32);
23223   __ Sqrshrun2(v18.V4S(), v3.V2D(), 1);
23224 
23225   __ Sqrshrun(b19, h0, 8);
23226   __ Sqrshrun(h20, s1, 16);
23227   __ Sqrshrun(s21, d3, 32);
23228 
23229   END();
23230 
23231   RUN();
23232   ASSERT_EQUAL_128(0x00000000000001ff, 0x7f01007f00000100, q16);
23233   ASSERT_EQUAL_128(0x000000000000ffff, 0x0000000000000001, q17);
23234   ASSERT_EQUAL_128(0x00000000ffffffff, 0x0000000080000000, q18);
23235   ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q19);
23236   ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000001, q20);
23237   ASSERT_EQUAL_128(0x0000000000000000, 0x0000000080000000, q21);
23238   TEARDOWN();
23239 }
23240 
TEST(neon_modimm_bic)23241 TEST(neon_modimm_bic) {
23242   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
23243 
23244   START();
23245 
23246   __ Movi(v16.V2D(), 0x00aaff55ff0055aa, 0x5555ffff0000aaaa);
23247   __ Movi(v17.V2D(), 0x00aaff55ff0055aa, 0x5555ffff0000aaaa);
23248   __ Movi(v18.V2D(), 0x00aaff55ff0055aa, 0x5555ffff0000aaaa);
23249   __ Movi(v19.V2D(), 0x00aaff55ff0055aa, 0x5555ffff0000aaaa);
23250   __ Movi(v20.V2D(), 0x00aaff55ff0055aa, 0x5555ffff0000aaaa);
23251   __ Movi(v21.V2D(), 0x00aaff55ff0055aa, 0x5555ffff0000aaaa);
23252   __ Movi(v22.V2D(), 0x00aaff55ff0055aa, 0x5555ffff0000aaaa);
23253   __ Movi(v23.V2D(), 0x00aaff55ff0055aa, 0x5555ffff0000aaaa);
23254   __ Movi(v24.V2D(), 0x00aaff55ff0055aa, 0x5555ffff0000aaaa);
23255   __ Movi(v25.V2D(), 0x00aaff55ff0055aa, 0x5555ffff0000aaaa);
23256   __ Movi(v26.V2D(), 0x00aaff55ff0055aa, 0x5555ffff0000aaaa);
23257   __ Movi(v27.V2D(), 0x00aaff55ff0055aa, 0x5555ffff0000aaaa);
23258 
23259   __ Bic(v16.V4H(), 0x00, 0);
23260   __ Bic(v17.V4H(), 0xff, 8);
23261   __ Bic(v18.V8H(), 0x00, 0);
23262   __ Bic(v19.V8H(), 0xff, 8);
23263 
23264   __ Bic(v20.V2S(), 0x00, 0);
23265   __ Bic(v21.V2S(), 0xff, 8);
23266   __ Bic(v22.V2S(), 0x00, 16);
23267   __ Bic(v23.V2S(), 0xff, 24);
23268 
23269   __ Bic(v24.V4S(), 0xff, 0);
23270   __ Bic(v25.V4S(), 0x00, 8);
23271   __ Bic(v26.V4S(), 0xff, 16);
23272   __ Bic(v27.V4S(), 0x00, 24);
23273 
23274   END();
23275 
23276   RUN();
23277 
23278   ASSERT_EQUAL_128(0x0, 0x5555ffff0000aaaa, q16);
23279   ASSERT_EQUAL_128(0x0, 0x005500ff000000aa, q17);
23280   ASSERT_EQUAL_128(0x00aaff55ff0055aa, 0x5555ffff0000aaaa, q18);
23281   ASSERT_EQUAL_128(0x00aa0055000000aa, 0x005500ff000000aa, q19);
23282 
23283   ASSERT_EQUAL_128(0x0, 0x5555ffff0000aaaa, q20);
23284   ASSERT_EQUAL_128(0x0, 0x555500ff000000aa, q21);
23285   ASSERT_EQUAL_128(0x0, 0x5555ffff0000aaaa, q22);
23286   ASSERT_EQUAL_128(0x0, 0x0055ffff0000aaaa, q23);
23287 
23288   ASSERT_EQUAL_128(0x00aaff00ff005500, 0x5555ff000000aa00, q24);
23289   ASSERT_EQUAL_128(0x00aaff55ff0055aa, 0x5555ffff0000aaaa, q25);
23290   ASSERT_EQUAL_128(0x0000ff55ff0055aa, 0x5500ffff0000aaaa, q26);
23291   ASSERT_EQUAL_128(0x00aaff55ff0055aa, 0x5555ffff0000aaaa, q27);
23292 
23293   TEARDOWN();
23294 }
23295 
23296 
TEST(neon_modimm_movi_16bit_any)23297 TEST(neon_modimm_movi_16bit_any) {
23298   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
23299 
23300   START();
23301 
23302   __ Movi(v0.V4H(), 0xabab);
23303   __ Movi(v1.V4H(), 0xab00);
23304   __ Movi(v2.V4H(), 0xabff);
23305   __ Movi(v3.V8H(), 0x00ab);
23306   __ Movi(v4.V8H(), 0xffab);
23307   __ Movi(v5.V8H(), 0xabcd);
23308 
23309   END();
23310 
23311   RUN();
23312 
23313   ASSERT_EQUAL_128(0x0, 0xabababababababab, q0);
23314   ASSERT_EQUAL_128(0x0, 0xab00ab00ab00ab00, q1);
23315   ASSERT_EQUAL_128(0x0, 0xabffabffabffabff, q2);
23316   ASSERT_EQUAL_128(0x00ab00ab00ab00ab, 0x00ab00ab00ab00ab, q3);
23317   ASSERT_EQUAL_128(0xffabffabffabffab, 0xffabffabffabffab, q4);
23318   ASSERT_EQUAL_128(0xabcdabcdabcdabcd, 0xabcdabcdabcdabcd, q5);
23319 
23320   TEARDOWN();
23321 }
23322 
23323 
TEST(neon_modimm_movi_32bit_any)23324 TEST(neon_modimm_movi_32bit_any) {
23325   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
23326 
23327   START();
23328 
23329   __ Movi(v0.V2S(), 0x000000ab);
23330   __ Movi(v1.V2S(), 0x0000ab00);
23331   __ Movi(v2.V4S(), 0x00ab0000);
23332   __ Movi(v3.V4S(), 0xab000000);
23333 
23334   __ Movi(v4.V2S(), 0xffffffab);
23335   __ Movi(v5.V2S(), 0xffffabff);
23336   __ Movi(v6.V4S(), 0xffabffff);
23337   __ Movi(v7.V4S(), 0xabffffff);
23338 
23339   __ Movi(v16.V2S(), 0x0000abff);
23340   __ Movi(v17.V2S(), 0x00abffff);
23341   __ Movi(v18.V4S(), 0xffab0000);
23342   __ Movi(v19.V4S(), 0xffffab00);
23343 
23344   __ Movi(v20.V4S(), 0xabababab);
23345   __ Movi(v21.V4S(), 0xabcdabcd);
23346   __ Movi(v22.V4S(), 0xabcdef01);
23347   __ Movi(v23.V4S(), 0x00ffff00);
23348 
23349   END();
23350 
23351   RUN();
23352 
23353   ASSERT_EQUAL_128(0x0, 0x000000ab000000ab, q0);
23354   ASSERT_EQUAL_128(0x0, 0x0000ab000000ab00, q1);
23355   ASSERT_EQUAL_128(0x00ab000000ab0000, 0x00ab000000ab0000, q2);
23356   ASSERT_EQUAL_128(0xab000000ab000000, 0xab000000ab000000, q3);
23357 
23358   ASSERT_EQUAL_128(0x0, 0xffffffabffffffab, q4);
23359   ASSERT_EQUAL_128(0x0, 0xffffabffffffabff, q5);
23360   ASSERT_EQUAL_128(0xffabffffffabffff, 0xffabffffffabffff, q6);
23361   ASSERT_EQUAL_128(0xabffffffabffffff, 0xabffffffabffffff, q7);
23362 
23363   ASSERT_EQUAL_128(0x0, 0x0000abff0000abff, q16);
23364   ASSERT_EQUAL_128(0x0, 0x00abffff00abffff, q17);
23365   ASSERT_EQUAL_128(0xffab0000ffab0000, 0xffab0000ffab0000, q18);
23366   ASSERT_EQUAL_128(0xffffab00ffffab00, 0xffffab00ffffab00, q19);
23367 
23368   ASSERT_EQUAL_128(0xabababababababab, 0xabababababababab, q20);
23369   ASSERT_EQUAL_128(0xabcdabcdabcdabcd, 0xabcdabcdabcdabcd, q21);
23370   ASSERT_EQUAL_128(0xabcdef01abcdef01, 0xabcdef01abcdef01, q22);
23371   ASSERT_EQUAL_128(0x00ffff0000ffff00, 0x00ffff0000ffff00, q23);
23372   TEARDOWN();
23373 }
23374 
23375 
TEST(neon_modimm_movi_64bit_any)23376 TEST(neon_modimm_movi_64bit_any) {
23377   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
23378 
23379   START();
23380 
23381   __ Movi(v0.V1D(), 0x00ffff0000ffffff);
23382   __ Movi(v1.V2D(), 0xabababababababab);
23383   __ Movi(v2.V2D(), 0xabcdabcdabcdabcd);
23384   __ Movi(v3.V2D(), 0xabcdef01abcdef01);
23385   __ Movi(v4.V1D(), 0xabcdef0123456789);
23386   __ Movi(v5.V2D(), 0xabcdef0123456789);
23387 
23388   END();
23389 
23390   RUN();
23391 
23392   ASSERT_EQUAL_64(0x00ffff0000ffffff, d0);
23393   ASSERT_EQUAL_128(0xabababababababab, 0xabababababababab, q1);
23394   ASSERT_EQUAL_128(0xabcdabcdabcdabcd, 0xabcdabcdabcdabcd, q2);
23395   ASSERT_EQUAL_128(0xabcdef01abcdef01, 0xabcdef01abcdef01, q3);
23396   ASSERT_EQUAL_64(0xabcdef0123456789, d4);
23397   ASSERT_EQUAL_128(0xabcdef0123456789, 0xabcdef0123456789, q5);
23398 
23399   TEARDOWN();
23400 }
23401 
23402 
TEST(neon_modimm_movi)23403 TEST(neon_modimm_movi) {
23404   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
23405 
23406   START();
23407 
23408   __ Movi(v0.V8B(), 0xaa);
23409   __ Movi(v1.V16B(), 0x55);
23410 
23411   __ Movi(d2, 0x00ffff0000ffffff);
23412   __ Movi(v3.V2D(), 0x00ffff0000ffffff);
23413 
23414   __ Movi(v16.V4H(), 0x00, LSL, 0);
23415   __ Movi(v17.V4H(), 0xff, LSL, 8);
23416   __ Movi(v18.V8H(), 0x00, LSL, 0);
23417   __ Movi(v19.V8H(), 0xff, LSL, 8);
23418 
23419   __ Movi(v20.V2S(), 0x00, LSL, 0);
23420   __ Movi(v21.V2S(), 0xff, LSL, 8);
23421   __ Movi(v22.V2S(), 0x00, LSL, 16);
23422   __ Movi(v23.V2S(), 0xff, LSL, 24);
23423 
23424   __ Movi(v24.V4S(), 0xff, LSL, 0);
23425   __ Movi(v25.V4S(), 0x00, LSL, 8);
23426   __ Movi(v26.V4S(), 0xff, LSL, 16);
23427   __ Movi(v27.V4S(), 0x00, LSL, 24);
23428 
23429   __ Movi(v28.V2S(), 0xaa, MSL, 8);
23430   __ Movi(v29.V2S(), 0x55, MSL, 16);
23431   __ Movi(v30.V4S(), 0xff, MSL, 8);
23432   __ Movi(v31.V4S(), 0x00, MSL, 16);
23433 
23434   END();
23435 
23436   RUN();
23437 
23438   ASSERT_EQUAL_128(0x0, 0xaaaaaaaaaaaaaaaa, q0);
23439   ASSERT_EQUAL_128(0x5555555555555555, 0x5555555555555555, q1);
23440 
23441   ASSERT_EQUAL_128(0x0, 0x00ffff0000ffffff, q2);
23442   ASSERT_EQUAL_128(0x00ffff0000ffffff, 0x00ffff0000ffffff, q3);
23443 
23444   ASSERT_EQUAL_128(0x0, 0x0000000000000000, q16);
23445   ASSERT_EQUAL_128(0x0, 0xff00ff00ff00ff00, q17);
23446   ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q18);
23447   ASSERT_EQUAL_128(0xff00ff00ff00ff00, 0xff00ff00ff00ff00, q19);
23448 
23449   ASSERT_EQUAL_128(0x0, 0x0000000000000000, q20);
23450   ASSERT_EQUAL_128(0x0, 0x0000ff000000ff00, q21);
23451   ASSERT_EQUAL_128(0x0, 0x0000000000000000, q22);
23452   ASSERT_EQUAL_128(0x0, 0xff000000ff000000, q23);
23453 
23454   ASSERT_EQUAL_128(0x000000ff000000ff, 0x000000ff000000ff, q24);
23455   ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q25);
23456   ASSERT_EQUAL_128(0x00ff000000ff0000, 0x00ff000000ff0000, q26);
23457   ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q27);
23458 
23459   ASSERT_EQUAL_128(0x0, 0x0000aaff0000aaff, q28);
23460   ASSERT_EQUAL_128(0x0, 0x0055ffff0055ffff, q29);
23461   ASSERT_EQUAL_128(0x0000ffff0000ffff, 0x0000ffff0000ffff, q30);
23462   ASSERT_EQUAL_128(0x0000ffff0000ffff, 0x0000ffff0000ffff, q31);
23463 
23464   TEARDOWN();
23465 }
23466 
23467 
TEST(neon_modimm_mvni)23468 TEST(neon_modimm_mvni) {
23469   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
23470 
23471   START();
23472 
23473   __ Mvni(v16.V4H(), 0x00, LSL, 0);
23474   __ Mvni(v17.V4H(), 0xff, LSL, 8);
23475   __ Mvni(v18.V8H(), 0x00, LSL, 0);
23476   __ Mvni(v19.V8H(), 0xff, LSL, 8);
23477 
23478   __ Mvni(v20.V2S(), 0x00, LSL, 0);
23479   __ Mvni(v21.V2S(), 0xff, LSL, 8);
23480   __ Mvni(v22.V2S(), 0x00, LSL, 16);
23481   __ Mvni(v23.V2S(), 0xff, LSL, 24);
23482 
23483   __ Mvni(v24.V4S(), 0xff, LSL, 0);
23484   __ Mvni(v25.V4S(), 0x00, LSL, 8);
23485   __ Mvni(v26.V4S(), 0xff, LSL, 16);
23486   __ Mvni(v27.V4S(), 0x00, LSL, 24);
23487 
23488   __ Mvni(v28.V2S(), 0xaa, MSL, 8);
23489   __ Mvni(v29.V2S(), 0x55, MSL, 16);
23490   __ Mvni(v30.V4S(), 0xff, MSL, 8);
23491   __ Mvni(v31.V4S(), 0x00, MSL, 16);
23492 
23493   END();
23494 
23495   RUN();
23496 
23497   ASSERT_EQUAL_128(0x0, 0xffffffffffffffff, q16);
23498   ASSERT_EQUAL_128(0x0, 0x00ff00ff00ff00ff, q17);
23499   ASSERT_EQUAL_128(0xffffffffffffffff, 0xffffffffffffffff, q18);
23500   ASSERT_EQUAL_128(0x00ff00ff00ff00ff, 0x00ff00ff00ff00ff, q19);
23501 
23502   ASSERT_EQUAL_128(0x0, 0xffffffffffffffff, q20);
23503   ASSERT_EQUAL_128(0x0, 0xffff00ffffff00ff, q21);
23504   ASSERT_EQUAL_128(0x0, 0xffffffffffffffff, q22);
23505   ASSERT_EQUAL_128(0x0, 0x00ffffff00ffffff, q23);
23506 
23507   ASSERT_EQUAL_128(0xffffff00ffffff00, 0xffffff00ffffff00, q24);
23508   ASSERT_EQUAL_128(0xffffffffffffffff, 0xffffffffffffffff, q25);
23509   ASSERT_EQUAL_128(0xff00ffffff00ffff, 0xff00ffffff00ffff, q26);
23510   ASSERT_EQUAL_128(0xffffffffffffffff, 0xffffffffffffffff, q27);
23511 
23512   ASSERT_EQUAL_128(0x0, 0xffff5500ffff5500, q28);
23513   ASSERT_EQUAL_128(0x0, 0xffaa0000ffaa0000, q29);
23514   ASSERT_EQUAL_128(0xffff0000ffff0000, 0xffff0000ffff0000, q30);
23515   ASSERT_EQUAL_128(0xffff0000ffff0000, 0xffff0000ffff0000, q31);
23516 
23517   TEARDOWN();
23518 }
23519 
23520 
TEST(neon_modimm_orr)23521 TEST(neon_modimm_orr) {
23522   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
23523 
23524   START();
23525 
23526   __ Movi(v16.V2D(), 0x00aaff55ff0055aa, 0x5555ffff0000aaaa);
23527   __ Movi(v17.V2D(), 0x00aaff55ff0055aa, 0x5555ffff0000aaaa);
23528   __ Movi(v18.V2D(), 0x00aaff55ff0055aa, 0x5555ffff0000aaaa);
23529   __ Movi(v19.V2D(), 0x00aaff55ff0055aa, 0x5555ffff0000aaaa);
23530   __ Movi(v20.V2D(), 0x00aaff55ff0055aa, 0x5555ffff0000aaaa);
23531   __ Movi(v21.V2D(), 0x00aaff55ff0055aa, 0x5555ffff0000aaaa);
23532   __ Movi(v22.V2D(), 0x00aaff55ff0055aa, 0x5555ffff0000aaaa);
23533   __ Movi(v23.V2D(), 0x00aaff55ff0055aa, 0x5555ffff0000aaaa);
23534   __ Movi(v24.V2D(), 0x00aaff55ff0055aa, 0x5555ffff0000aaaa);
23535   __ Movi(v25.V2D(), 0x00aaff55ff0055aa, 0x5555ffff0000aaaa);
23536   __ Movi(v26.V2D(), 0x00aaff55ff0055aa, 0x5555ffff0000aaaa);
23537   __ Movi(v27.V2D(), 0x00aaff55ff0055aa, 0x5555ffff0000aaaa);
23538 
23539   __ Orr(v16.V4H(), 0x00, 0);
23540   __ Orr(v17.V4H(), 0xff, 8);
23541   __ Orr(v18.V8H(), 0x00, 0);
23542   __ Orr(v19.V8H(), 0xff, 8);
23543 
23544   __ Orr(v20.V2S(), 0x00, 0);
23545   __ Orr(v21.V2S(), 0xff, 8);
23546   __ Orr(v22.V2S(), 0x00, 16);
23547   __ Orr(v23.V2S(), 0xff, 24);
23548 
23549   __ Orr(v24.V4S(), 0xff, 0);
23550   __ Orr(v25.V4S(), 0x00, 8);
23551   __ Orr(v26.V4S(), 0xff, 16);
23552   __ Orr(v27.V4S(), 0x00, 24);
23553 
23554   END();
23555 
23556   RUN();
23557 
23558   ASSERT_EQUAL_128(0x0, 0x5555ffff0000aaaa, q16);
23559   ASSERT_EQUAL_128(0x0, 0xff55ffffff00ffaa, q17);
23560   ASSERT_EQUAL_128(0x00aaff55ff0055aa, 0x5555ffff0000aaaa, q18);
23561   ASSERT_EQUAL_128(0xffaaff55ff00ffaa, 0xff55ffffff00ffaa, q19);
23562 
23563   ASSERT_EQUAL_128(0x0, 0x5555ffff0000aaaa, q20);
23564   ASSERT_EQUAL_128(0x0, 0x5555ffff0000ffaa, q21);
23565   ASSERT_EQUAL_128(0x0, 0x5555ffff0000aaaa, q22);
23566   ASSERT_EQUAL_128(0x0, 0xff55ffffff00aaaa, q23);
23567 
23568   ASSERT_EQUAL_128(0x00aaffffff0055ff, 0x5555ffff0000aaff, q24);
23569   ASSERT_EQUAL_128(0x00aaff55ff0055aa, 0x5555ffff0000aaaa, q25);
23570   ASSERT_EQUAL_128(0x00ffff55ffff55aa, 0x55ffffff00ffaaaa, q26);
23571   ASSERT_EQUAL_128(0x00aaff55ff0055aa, 0x5555ffff0000aaaa, q27);
23572 
23573   TEARDOWN();
23574 }
23575 
23576 
23577 // TODO: add arbitrary values once load literal to Q registers is supported.
TEST(neon_modimm_fmov)23578 TEST(neon_modimm_fmov) {
23579   SETUP_WITH_FEATURES(CPUFeatures::kNEON, CPUFeatures::kFP);
23580 
23581   // Immediates which can be encoded in the instructions.
23582   const float kOne = 1.0f;
23583   const float kPointFive = 0.5f;
23584   const double kMinusThirteen = -13.0;
23585   // Immediates which cannot be encoded in the instructions.
23586   const float kNonImmFP32 = 255.0f;
23587   const double kNonImmFP64 = 12.3456;
23588 
23589   START();
23590   __ Fmov(v11.V2S(), kOne);
23591   __ Fmov(v12.V4S(), kPointFive);
23592   __ Fmov(v22.V2D(), kMinusThirteen);
23593   __ Fmov(v13.V2S(), kNonImmFP32);
23594   __ Fmov(v14.V4S(), kNonImmFP32);
23595   __ Fmov(v23.V2D(), kNonImmFP64);
23596   __ Fmov(v1.V2S(), 0.0);
23597   __ Fmov(v2.V4S(), 0.0);
23598   __ Fmov(v3.V2D(), 0.0);
23599   __ Fmov(v4.V2S(), kFP32PositiveInfinity);
23600   __ Fmov(v5.V4S(), kFP32PositiveInfinity);
23601   __ Fmov(v6.V2D(), kFP64PositiveInfinity);
23602   END();
23603 
23604   RUN();
23605 
23606   const uint64_t kOne1S = FloatToRawbits(1.0);
23607   const uint64_t kOne2S = (kOne1S << 32) | kOne1S;
23608   const uint64_t kPointFive1S = FloatToRawbits(0.5);
23609   const uint64_t kPointFive2S = (kPointFive1S << 32) | kPointFive1S;
23610   const uint64_t kMinusThirteen1D = DoubleToRawbits(-13.0);
23611   const uint64_t kNonImmFP321S = FloatToRawbits(kNonImmFP32);
23612   const uint64_t kNonImmFP322S = (kNonImmFP321S << 32) | kNonImmFP321S;
23613   const uint64_t kNonImmFP641D = DoubleToRawbits(kNonImmFP64);
23614   const uint64_t kFP32Inf1S = FloatToRawbits(kFP32PositiveInfinity);
23615   const uint64_t kFP32Inf2S = (kFP32Inf1S << 32) | kFP32Inf1S;
23616   const uint64_t kFP64Inf1D = DoubleToRawbits(kFP64PositiveInfinity);
23617 
23618   ASSERT_EQUAL_128(0x0, kOne2S, q11);
23619   ASSERT_EQUAL_128(kPointFive2S, kPointFive2S, q12);
23620   ASSERT_EQUAL_128(kMinusThirteen1D, kMinusThirteen1D, q22);
23621   ASSERT_EQUAL_128(0x0, kNonImmFP322S, q13);
23622   ASSERT_EQUAL_128(kNonImmFP322S, kNonImmFP322S, q14);
23623   ASSERT_EQUAL_128(kNonImmFP641D, kNonImmFP641D, q23);
23624   ASSERT_EQUAL_128(0x0, 0x0, q1);
23625   ASSERT_EQUAL_128(0x0, 0x0, q2);
23626   ASSERT_EQUAL_128(0x0, 0x0, q3);
23627   ASSERT_EQUAL_128(0x0, kFP32Inf2S, q4);
23628   ASSERT_EQUAL_128(kFP32Inf2S, kFP32Inf2S, q5);
23629   ASSERT_EQUAL_128(kFP64Inf1D, kFP64Inf1D, q6);
23630 
23631   TEARDOWN();
23632 }
23633 
23634 
TEST(neon_perm)23635 TEST(neon_perm) {
23636   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
23637 
23638   START();
23639 
23640   __ Movi(v0.V2D(), 0x0001020304050607, 0x08090a0b0c0d0e0f);
23641   __ Movi(v1.V2D(), 0x1011121314151617, 0x18191a1b1c1d1e1f);
23642 
23643   __ Trn1(v16.V16B(), v0.V16B(), v1.V16B());
23644   __ Trn2(v17.V16B(), v0.V16B(), v1.V16B());
23645   __ Zip1(v18.V16B(), v0.V16B(), v1.V16B());
23646   __ Zip2(v19.V16B(), v0.V16B(), v1.V16B());
23647   __ Uzp1(v20.V16B(), v0.V16B(), v1.V16B());
23648   __ Uzp2(v21.V16B(), v0.V16B(), v1.V16B());
23649 
23650   END();
23651 
23652   RUN();
23653 
23654   ASSERT_EQUAL_128(0x1101130315051707, 0x19091b0b1d0d1f0f, q16);
23655   ASSERT_EQUAL_128(0x1000120214041606, 0x18081a0a1c0c1e0e, q17);
23656   ASSERT_EQUAL_128(0x180819091a0a1b0b, 0x1c0c1d0d1e0e1f0f, q18);
23657   ASSERT_EQUAL_128(0x1000110112021303, 0x1404150516061707, q19);
23658   ASSERT_EQUAL_128(0x11131517191b1d1f, 0x01030507090b0d0f, q20);
23659   ASSERT_EQUAL_128(0x10121416181a1c1e, 0x00020406080a0c0e, q21);
23660 
23661   TEARDOWN();
23662 }
23663 
23664 
TEST(neon_copy_dup_element)23665 TEST(neon_copy_dup_element) {
23666   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
23667 
23668   START();
23669 
23670   __ Movi(v0.V2D(), 0x0011223344556677, 0x8899aabbccddeeff);
23671   __ Movi(v1.V2D(), 0xffeddccbbaae9988, 0x7766554433221100);
23672   __ Movi(v2.V2D(), 0xffeddccbbaae9988, 0x0011223344556677);
23673   __ Movi(v3.V2D(), 0x7766554433221100, 0x8899aabbccddeeff);
23674   __ Movi(v4.V2D(), 0x7766554433221100, 0x0123456789abcdef);
23675   __ Movi(v5.V2D(), 0x0011223344556677, 0x0123456789abcdef);
23676 
23677   __ Dup(v16.V16B(), v0.B(), 0);
23678   __ Dup(v17.V8H(), v1.H(), 7);
23679   __ Dup(v18.V4S(), v1.S(), 3);
23680   __ Dup(v19.V2D(), v0.D(), 0);
23681 
23682   __ Dup(v20.V8B(), v0.B(), 0);
23683   __ Dup(v21.V4H(), v1.H(), 7);
23684   __ Dup(v22.V2S(), v1.S(), 3);
23685 
23686   __ Dup(v23.B(), v0.B(), 0);
23687   __ Dup(v24.H(), v1.H(), 7);
23688   __ Dup(v25.S(), v1.S(), 3);
23689   __ Dup(v26.D(), v0.D(), 0);
23690 
23691   __ Dup(v2.V16B(), v2.B(), 0);
23692   __ Dup(v3.V8H(), v3.H(), 7);
23693   __ Dup(v4.V4S(), v4.S(), 0);
23694   __ Dup(v5.V2D(), v5.D(), 1);
23695 
23696   END();
23697 
23698   RUN();
23699 
23700   ASSERT_EQUAL_128(0xffffffffffffffff, 0xffffffffffffffff, q16);
23701   ASSERT_EQUAL_128(0xffedffedffedffed, 0xffedffedffedffed, q17);
23702   ASSERT_EQUAL_128(0xffeddccbffeddccb, 0xffeddccbffeddccb, q18);
23703   ASSERT_EQUAL_128(0x8899aabbccddeeff, 0x8899aabbccddeeff, q19);
23704 
23705   ASSERT_EQUAL_128(0, 0xffffffffffffffff, q20);
23706   ASSERT_EQUAL_128(0, 0xffedffedffedffed, q21);
23707   ASSERT_EQUAL_128(0, 0xffeddccbffeddccb, q22);
23708 
23709   ASSERT_EQUAL_128(0, 0x00000000000000ff, q23);
23710   ASSERT_EQUAL_128(0, 0x000000000000ffed, q24);
23711   ASSERT_EQUAL_128(0, 0x00000000ffeddccb, q25);
23712   ASSERT_EQUAL_128(0, 0x8899aabbccddeeff, q26);
23713 
23714   ASSERT_EQUAL_128(0x7777777777777777, 0x7777777777777777, q2);
23715   ASSERT_EQUAL_128(0x7766776677667766, 0x7766776677667766, q3);
23716   ASSERT_EQUAL_128(0x89abcdef89abcdef, 0x89abcdef89abcdef, q4);
23717   ASSERT_EQUAL_128(0x0011223344556677, 0x0011223344556677, q5);
23718   TEARDOWN();
23719 }
23720 
23721 
TEST(neon_copy_dup_general)23722 TEST(neon_copy_dup_general) {
23723   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
23724 
23725   START();
23726 
23727   __ Mov(x0, 0x0011223344556677);
23728 
23729   __ Dup(v16.V16B(), w0);
23730   __ Dup(v17.V8H(), w0);
23731   __ Dup(v18.V4S(), w0);
23732   __ Dup(v19.V2D(), x0);
23733 
23734   __ Dup(v20.V8B(), w0);
23735   __ Dup(v21.V4H(), w0);
23736   __ Dup(v22.V2S(), w0);
23737 
23738   __ Dup(v2.V16B(), wzr);
23739   __ Dup(v3.V8H(), wzr);
23740   __ Dup(v4.V4S(), wzr);
23741   __ Dup(v5.V2D(), xzr);
23742 
23743   END();
23744 
23745   RUN();
23746 
23747   ASSERT_EQUAL_128(0x7777777777777777, 0x7777777777777777, q16);
23748   ASSERT_EQUAL_128(0x6677667766776677, 0x6677667766776677, q17);
23749   ASSERT_EQUAL_128(0x4455667744556677, 0x4455667744556677, q18);
23750   ASSERT_EQUAL_128(0x0011223344556677, 0x0011223344556677, q19);
23751 
23752   ASSERT_EQUAL_128(0, 0x7777777777777777, q20);
23753   ASSERT_EQUAL_128(0, 0x6677667766776677, q21);
23754   ASSERT_EQUAL_128(0, 0x4455667744556677, q22);
23755 
23756   ASSERT_EQUAL_128(0, 0, q2);
23757   ASSERT_EQUAL_128(0, 0, q3);
23758   ASSERT_EQUAL_128(0, 0, q4);
23759   ASSERT_EQUAL_128(0, 0, q5);
23760   TEARDOWN();
23761 }
23762 
23763 
TEST(neon_copy_ins_element)23764 TEST(neon_copy_ins_element) {
23765   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
23766 
23767   START();
23768 
23769   __ Movi(v0.V2D(), 0x0011223344556677, 0x8899aabbccddeeff);
23770   __ Movi(v1.V2D(), 0xffeddccbbaae9988, 0x7766554433221100);
23771   __ Movi(v16.V2D(), 0x0123456789abcdef, 0xfedcba9876543210);
23772   __ Movi(v17.V2D(), 0xfedcba9876543210, 0x0123456789abcdef);
23773   __ Movi(v18.V2D(), 0x0011223344556677, 0x8899aabbccddeeff);
23774   __ Movi(v19.V2D(), 0x0011223344556677, 0x8899aabbccddeeff);
23775 
23776   __ Movi(v2.V2D(), 0, 0x0011223344556677);
23777   __ Movi(v3.V2D(), 0, 0x8899aabbccddeeff);
23778   __ Movi(v4.V2D(), 0, 0x0123456789abcdef);
23779   __ Movi(v5.V2D(), 0, 0x0123456789abcdef);
23780 
23781   __ Ins(v16.V16B(), 15, v0.V16B(), 0);
23782   __ Ins(v17.V8H(), 0, v1.V8H(), 7);
23783   __ Ins(v18.V4S(), 3, v1.V4S(), 0);
23784   __ Ins(v19.V2D(), 1, v0.V2D(), 0);
23785 
23786   __ Ins(v2.V16B(), 2, v2.V16B(), 0);
23787   __ Ins(v3.V8H(), 0, v3.V8H(), 7);
23788   __ Ins(v4.V4S(), 3, v4.V4S(), 0);
23789   __ Ins(v5.V2D(), 0, v5.V2D(), 1);
23790 
23791   END();
23792 
23793   RUN();
23794 
23795   ASSERT_EQUAL_128(0xff23456789abcdef, 0xfedcba9876543210, q16);
23796   ASSERT_EQUAL_128(0xfedcba9876543210, 0x0123456789abffed, q17);
23797   ASSERT_EQUAL_128(0x3322110044556677, 0x8899aabbccddeeff, q18);
23798   ASSERT_EQUAL_128(0x8899aabbccddeeff, 0x8899aabbccddeeff, q19);
23799 
23800   ASSERT_EQUAL_128(0, 0x0011223344776677, q2);
23801   ASSERT_EQUAL_128(0, 0x8899aabbccdd0000, q3);
23802   ASSERT_EQUAL_128(0x89abcdef00000000, 0x0123456789abcdef, q4);
23803   ASSERT_EQUAL_128(0, 0, q5);
23804   TEARDOWN();
23805 }
23806 
23807 
TEST(neon_copy_mov_element)23808 TEST(neon_copy_mov_element) {
23809   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
23810 
23811   START();
23812 
23813   __ Movi(v0.V2D(), 0x0011223344556677, 0x8899aabbccddeeff);
23814   __ Movi(v1.V2D(), 0xffeddccbbaae9988, 0x7766554433221100);
23815   __ Movi(v16.V2D(), 0x0123456789abcdef, 0xfedcba9876543210);
23816   __ Movi(v17.V2D(), 0xfedcba9876543210, 0x0123456789abcdef);
23817   __ Movi(v18.V2D(), 0x0011223344556677, 0x8899aabbccddeeff);
23818   __ Movi(v19.V2D(), 0x0011223344556677, 0x8899aabbccddeeff);
23819 
23820   __ Movi(v2.V2D(), 0, 0x0011223344556677);
23821   __ Movi(v3.V2D(), 0, 0x8899aabbccddeeff);
23822   __ Movi(v4.V2D(), 0, 0x0123456789abcdef);
23823   __ Movi(v5.V2D(), 0, 0x0123456789abcdef);
23824 
23825   __ Mov(v16.V16B(), 15, v0.V16B(), 0);
23826   __ Mov(v17.V8H(), 0, v1.V8H(), 7);
23827   __ Mov(v18.V4S(), 3, v1.V4S(), 0);
23828   __ Mov(v19.V2D(), 1, v0.V2D(), 0);
23829 
23830   __ Mov(v2.V16B(), 2, v2.V16B(), 0);
23831   __ Mov(v3.V8H(), 0, v3.V8H(), 7);
23832   __ Mov(v4.V4S(), 3, v4.V4S(), 0);
23833   __ Mov(v5.V2D(), 0, v5.V2D(), 1);
23834 
23835   END();
23836 
23837   RUN();
23838 
23839   ASSERT_EQUAL_128(0xff23456789abcdef, 0xfedcba9876543210, q16);
23840   ASSERT_EQUAL_128(0xfedcba9876543210, 0x0123456789abffed, q17);
23841   ASSERT_EQUAL_128(0x3322110044556677, 0x8899aabbccddeeff, q18);
23842   ASSERT_EQUAL_128(0x8899aabbccddeeff, 0x8899aabbccddeeff, q19);
23843 
23844   ASSERT_EQUAL_128(0, 0x0011223344776677, q2);
23845   ASSERT_EQUAL_128(0, 0x8899aabbccdd0000, q3);
23846   ASSERT_EQUAL_128(0x89abcdef00000000, 0x0123456789abcdef, q4);
23847   ASSERT_EQUAL_128(0, 0, q5);
23848   TEARDOWN();
23849 }
23850 
23851 
TEST(neon_copy_smov)23852 TEST(neon_copy_smov) {
23853   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
23854 
23855   START();
23856 
23857   __ Movi(v0.V2D(), 0x0123456789abcdef, 0xfedcba9876543210);
23858 
23859   __ Smov(w0, v0.B(), 7);
23860   __ Smov(w1, v0.B(), 15);
23861 
23862   __ Smov(w2, v0.H(), 0);
23863   __ Smov(w3, v0.H(), 3);
23864 
23865   __ Smov(x4, v0.B(), 7);
23866   __ Smov(x5, v0.B(), 15);
23867 
23868   __ Smov(x6, v0.H(), 0);
23869   __ Smov(x7, v0.H(), 3);
23870 
23871   __ Smov(x16, v0.S(), 0);
23872   __ Smov(x17, v0.S(), 1);
23873 
23874   END();
23875 
23876   RUN();
23877 
23878   ASSERT_EQUAL_32(0xfffffffe, w0);
23879   ASSERT_EQUAL_32(0x00000001, w1);
23880   ASSERT_EQUAL_32(0x00003210, w2);
23881   ASSERT_EQUAL_32(0xfffffedc, w3);
23882   ASSERT_EQUAL_64(0xfffffffffffffffe, x4);
23883   ASSERT_EQUAL_64(0x0000000000000001, x5);
23884   ASSERT_EQUAL_64(0x0000000000003210, x6);
23885   ASSERT_EQUAL_64(0xfffffffffffffedc, x7);
23886   ASSERT_EQUAL_64(0x0000000076543210, x16);
23887   ASSERT_EQUAL_64(0xfffffffffedcba98, x17);
23888 
23889   TEARDOWN();
23890 }
23891 
23892 
TEST(neon_copy_umov_mov)23893 TEST(neon_copy_umov_mov) {
23894   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
23895 
23896   START();
23897 
23898   __ Movi(v0.V2D(), 0x0123456789abcdef, 0xfedcba9876543210);
23899 
23900   __ Umov(w0, v0.B(), 15);
23901   __ Umov(w1, v0.H(), 0);
23902   __ Umov(w2, v0.S(), 3);
23903   __ Umov(x3, v0.D(), 1);
23904 
23905   __ Mov(w4, v0.S(), 3);
23906   __ Mov(x5, v0.D(), 1);
23907 
23908   END();
23909 
23910   RUN();
23911 
23912   ASSERT_EQUAL_32(0x00000001, w0);
23913   ASSERT_EQUAL_32(0x00003210, w1);
23914   ASSERT_EQUAL_32(0x01234567, w2);
23915   ASSERT_EQUAL_64(0x0123456789abcdef, x3);
23916   ASSERT_EQUAL_32(0x01234567, w4);
23917   ASSERT_EQUAL_64(0x0123456789abcdef, x5);
23918 
23919   TEARDOWN();
23920 }
23921 
23922 
TEST(neon_copy_ins_general)23923 TEST(neon_copy_ins_general) {
23924   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
23925 
23926   START();
23927 
23928   __ Mov(x0, 0x0011223344556677);
23929   __ Movi(v16.V2D(), 0x0123456789abcdef, 0xfedcba9876543210);
23930   __ Movi(v17.V2D(), 0xfedcba9876543210, 0x0123456789abcdef);
23931   __ Movi(v18.V2D(), 0x0011223344556677, 0x8899aabbccddeeff);
23932   __ Movi(v19.V2D(), 0x0011223344556677, 0x8899aabbccddeeff);
23933 
23934   __ Movi(v2.V2D(), 0, 0x0011223344556677);
23935   __ Movi(v3.V2D(), 0, 0x8899aabbccddeeff);
23936   __ Movi(v4.V2D(), 0, 0x0123456789abcdef);
23937   __ Movi(v5.V2D(), 0, 0x0123456789abcdef);
23938 
23939   __ Ins(v16.V16B(), 15, w0);
23940   __ Ins(v17.V8H(), 0, w0);
23941   __ Ins(v18.V4S(), 3, w0);
23942   __ Ins(v19.V2D(), 0, x0);
23943 
23944   __ Ins(v2.V16B(), 2, w0);
23945   __ Ins(v3.V8H(), 0, w0);
23946   __ Ins(v4.V4S(), 3, w0);
23947   __ Ins(v5.V2D(), 1, x0);
23948 
23949   END();
23950 
23951   RUN();
23952 
23953   ASSERT_EQUAL_128(0x7723456789abcdef, 0xfedcba9876543210, q16);
23954   ASSERT_EQUAL_128(0xfedcba9876543210, 0x0123456789ab6677, q17);
23955   ASSERT_EQUAL_128(0x4455667744556677, 0x8899aabbccddeeff, q18);
23956   ASSERT_EQUAL_128(0x0011223344556677, 0x0011223344556677, q19);
23957 
23958   ASSERT_EQUAL_128(0, 0x0011223344776677, q2);
23959   ASSERT_EQUAL_128(0, 0x8899aabbccdd6677, q3);
23960   ASSERT_EQUAL_128(0x4455667700000000, 0x0123456789abcdef, q4);
23961   ASSERT_EQUAL_128(0x0011223344556677, 0x0123456789abcdef, q5);
23962   TEARDOWN();
23963 }
23964 
23965 
TEST(neon_extract_ext)23966 TEST(neon_extract_ext) {
23967   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
23968 
23969   START();
23970 
23971   __ Movi(v0.V2D(), 0x0011223344556677, 0x8899aabbccddeeff);
23972   __ Movi(v1.V2D(), 0xffeddccbbaae9988, 0x7766554433221100);
23973 
23974   __ Movi(v2.V2D(), 0, 0x0011223344556677);
23975   __ Movi(v3.V2D(), 0, 0x8899aabbccddeeff);
23976 
23977   __ Ext(v16.V16B(), v0.V16B(), v1.V16B(), 0);
23978   __ Ext(v17.V16B(), v0.V16B(), v1.V16B(), 15);
23979   __ Ext(v1.V16B(), v0.V16B(), v1.V16B(), 8);  // Dest is same as one Src
23980   __ Ext(v0.V16B(), v0.V16B(), v0.V16B(), 8);  // All reg are the same
23981 
23982   __ Ext(v18.V8B(), v2.V8B(), v3.V8B(), 0);
23983   __ Ext(v19.V8B(), v2.V8B(), v3.V8B(), 7);
23984   __ Ext(v2.V8B(), v2.V8B(), v3.V8B(), 4);  // Dest is same as one Src
23985   __ Ext(v3.V8B(), v3.V8B(), v3.V8B(), 4);  // All reg are the same
23986 
23987   END();
23988 
23989   RUN();
23990 
23991   ASSERT_EQUAL_128(0x0011223344556677, 0x8899aabbccddeeff, q16);
23992   ASSERT_EQUAL_128(0xeddccbbaae998877, 0x6655443322110000, q17);
23993   ASSERT_EQUAL_128(0x7766554433221100, 0x0011223344556677, q1);
23994   ASSERT_EQUAL_128(0x8899aabbccddeeff, 0x0011223344556677, q0);
23995 
23996   ASSERT_EQUAL_128(0, 0x0011223344556677, q18);
23997   ASSERT_EQUAL_128(0, 0x99aabbccddeeff00, q19);
23998   ASSERT_EQUAL_128(0, 0xccddeeff00112233, q2);
23999   ASSERT_EQUAL_128(0, 0xccddeeff8899aabb, q3);
24000   TEARDOWN();
24001 }
24002 
24003 
TEST(neon_3different_uaddl)24004 TEST(neon_3different_uaddl) {
24005   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
24006 
24007   START();
24008 
24009   __ Movi(v0.V2D(), 0x0000000000000000, 0x0000000000000000);
24010   __ Movi(v1.V2D(), 0, 0x00010280810e0fff);
24011   __ Movi(v2.V2D(), 0, 0x0101010101010101);
24012 
24013   __ Movi(v3.V2D(), 0x0000000000000000, 0x0000000000000000);
24014   __ Movi(v4.V2D(), 0x0000000000000000, 0x0000000000000000);
24015   __ Movi(v5.V2D(), 0, 0x0000000180008001);
24016   __ Movi(v6.V2D(), 0, 0x000e000ff000ffff);
24017   __ Movi(v7.V2D(), 0, 0x0001000100010001);
24018 
24019   __ Movi(v16.V2D(), 0x0000000000000000, 0x0000000000000000);
24020   __ Movi(v17.V2D(), 0x0000000000000000, 0x0000000000000000);
24021   __ Movi(v18.V2D(), 0, 0x0000000000000001);
24022   __ Movi(v19.V2D(), 0, 0x80000001ffffffff);
24023   __ Movi(v20.V2D(), 0, 0x0000000100000001);
24024 
24025   __ Uaddl(v0.V8H(), v1.V8B(), v2.V8B());
24026 
24027   __ Uaddl(v3.V4S(), v5.V4H(), v7.V4H());
24028   __ Uaddl(v4.V4S(), v6.V4H(), v7.V4H());
24029 
24030   __ Uaddl(v16.V2D(), v18.V2S(), v20.V2S());
24031   __ Uaddl(v17.V2D(), v19.V2S(), v20.V2S());
24032 
24033 
24034   END();
24035 
24036   RUN();
24037 
24038   ASSERT_EQUAL_128(0x0001000200030081, 0x0082000f00100100, q0);
24039   ASSERT_EQUAL_128(0x0000000100000002, 0x0000800100008002, q3);
24040   ASSERT_EQUAL_128(0x0000000f00000010, 0x0000f00100010000, q4);
24041   ASSERT_EQUAL_128(0x0000000000000001, 0x0000000000000002, q16);
24042   ASSERT_EQUAL_128(0x0000000080000002, 0x0000000100000000, q17);
24043   TEARDOWN();
24044 }
24045 
24046 
TEST(neon_3different_addhn_subhn)24047 TEST(neon_3different_addhn_subhn) {
24048   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
24049 
24050   START();
24051 
24052   __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081);
24053   __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff);
24054   __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff);
24055   __ Movi(v3.V2D(), 0x8000000000000001, 0x7fffffffffffffff);
24056   __ Movi(v4.V2D(), 0x8000000000000000, 0x0000000000000000);
24057 
24058   __ Addhn(v16.V8B(), v0.V8H(), v1.V8H());
24059   __ Addhn2(v16.V16B(), v2.V8H(), v3.V8H());
24060   __ Raddhn(v17.V8B(), v0.V8H(), v1.V8H());
24061   __ Raddhn2(v17.V16B(), v2.V8H(), v3.V8H());
24062   __ Subhn(v18.V8B(), v0.V8H(), v1.V8H());
24063   __ Subhn2(v18.V16B(), v2.V8H(), v3.V8H());
24064   __ Rsubhn(v19.V8B(), v0.V8H(), v1.V8H());
24065   __ Rsubhn2(v19.V16B(), v2.V8H(), v3.V8H());
24066 
24067   END();
24068 
24069   RUN();
24070 
24071   ASSERT_EQUAL_128(0x0000ff007fff7fff, 0xff81817f80ff0100, q16);
24072   ASSERT_EQUAL_128(0x0000000080008000, 0xff81817f81ff0201, q17);
24073   ASSERT_EQUAL_128(0x0000ffff80008000, 0xff80817f80ff0100, q18);
24074   ASSERT_EQUAL_128(0x0000000080008000, 0xff81827f81ff0101, q19);
24075   TEARDOWN();
24076 }
24077 
TEST(neon_d_only_scalar)24078 TEST(neon_d_only_scalar) {
24079   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
24080 
24081   START();
24082 
24083   __ Movi(v0.V2D(), 0xaaaaaaaaaaaaaaaa, 0xf0000000f000f0f0);
24084   __ Movi(v1.V2D(), 0x5555555555555555, 0x7fffffff7fff7f7f);
24085   __ Movi(v2.V2D(), 0xaaaaaaaaaaaaaaaa, 0x1000000010001010);
24086   __ Movi(v3.V2D(), 0xffffffffffffffff, 2);
24087   __ Movi(v4.V2D(), 0xffffffffffffffff, -2);
24088 
24089   __ Add(d16, d0, d0);
24090   __ Add(d17, d1, d1);
24091   __ Add(d18, d2, d2);
24092   __ Sub(d19, d0, d0);
24093   __ Sub(d20, d0, d1);
24094   __ Sub(d21, d1, d0);
24095   __ Ushl(d22, d0, d3);
24096   __ Ushl(d23, d0, d4);
24097   __ Sshl(d24, d0, d3);
24098   __ Sshl(d25, d0, d4);
24099   __ Ushr(d26, d0, 1);
24100   __ Sshr(d27, d0, 3);
24101   __ Shl(d28, d0, 0);
24102   __ Shl(d29, d0, 16);
24103 
24104   END();
24105 
24106   RUN();
24107 
24108   ASSERT_EQUAL_128(0, 0xe0000001e001e1e0, q16);
24109   ASSERT_EQUAL_128(0, 0xfffffffefffefefe, q17);
24110   ASSERT_EQUAL_128(0, 0x2000000020002020, q18);
24111   ASSERT_EQUAL_128(0, 0, q19);
24112   ASSERT_EQUAL_128(0, 0x7000000170017171, q20);
24113   ASSERT_EQUAL_128(0, 0x8ffffffe8ffe8e8f, q21);
24114   ASSERT_EQUAL_128(0, 0xc0000003c003c3c0, q22);
24115   ASSERT_EQUAL_128(0, 0x3c0000003c003c3c, q23);
24116   ASSERT_EQUAL_128(0, 0xc0000003c003c3c0, q24);
24117   ASSERT_EQUAL_128(0, 0xfc0000003c003c3c, q25);
24118   ASSERT_EQUAL_128(0, 0x7800000078007878, q26);
24119   ASSERT_EQUAL_128(0, 0xfe0000001e001e1e, q27);
24120   ASSERT_EQUAL_128(0, 0xf0000000f000f0f0, q28);
24121   ASSERT_EQUAL_128(0, 0x0000f000f0f00000, q29);
24122 
24123   TEARDOWN();
24124 }
24125 
24126 
TEST(neon_sqshl_imm_scalar)24127 TEST(neon_sqshl_imm_scalar) {
24128   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
24129 
24130   START();
24131 
24132   __ Movi(v0.V2D(), 0x0, 0x7f);
24133   __ Movi(v1.V2D(), 0x0, 0x80);
24134   __ Movi(v2.V2D(), 0x0, 0x01);
24135   __ Sqshl(b16, b0, 1);
24136   __ Sqshl(b17, b1, 1);
24137   __ Sqshl(b18, b2, 1);
24138 
24139   __ Movi(v0.V2D(), 0x0, 0x7fff);
24140   __ Movi(v1.V2D(), 0x0, 0x8000);
24141   __ Movi(v2.V2D(), 0x0, 0x0001);
24142   __ Sqshl(h19, h0, 1);
24143   __ Sqshl(h20, h1, 1);
24144   __ Sqshl(h21, h2, 1);
24145 
24146   __ Movi(v0.V2D(), 0x0, 0x7fffffff);
24147   __ Movi(v1.V2D(), 0x0, 0x80000000);
24148   __ Movi(v2.V2D(), 0x0, 0x00000001);
24149   __ Sqshl(s22, s0, 1);
24150   __ Sqshl(s23, s1, 1);
24151   __ Sqshl(s24, s2, 1);
24152 
24153   __ Movi(v0.V2D(), 0x0, 0x7fffffffffffffff);
24154   __ Movi(v1.V2D(), 0x0, 0x8000000000000000);
24155   __ Movi(v2.V2D(), 0x0, 0x0000000000000001);
24156   __ Sqshl(d25, d0, 1);
24157   __ Sqshl(d26, d1, 1);
24158   __ Sqshl(d27, d2, 1);
24159 
24160   END();
24161 
24162   RUN();
24163 
24164   ASSERT_EQUAL_128(0, 0x7f, q16);
24165   ASSERT_EQUAL_128(0, 0x80, q17);
24166   ASSERT_EQUAL_128(0, 0x02, q18);
24167 
24168   ASSERT_EQUAL_128(0, 0x7fff, q19);
24169   ASSERT_EQUAL_128(0, 0x8000, q20);
24170   ASSERT_EQUAL_128(0, 0x0002, q21);
24171 
24172   ASSERT_EQUAL_128(0, 0x7fffffff, q22);
24173   ASSERT_EQUAL_128(0, 0x80000000, q23);
24174   ASSERT_EQUAL_128(0, 0x00000002, q24);
24175 
24176   ASSERT_EQUAL_128(0, 0x7fffffffffffffff, q25);
24177   ASSERT_EQUAL_128(0, 0x8000000000000000, q26);
24178   ASSERT_EQUAL_128(0, 0x0000000000000002, q27);
24179 
24180   TEARDOWN();
24181 }
24182 
24183 
TEST(neon_uqshl_imm_scalar)24184 TEST(neon_uqshl_imm_scalar) {
24185   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
24186 
24187   START();
24188 
24189   __ Movi(v0.V2D(), 0x0, 0x7f);
24190   __ Movi(v1.V2D(), 0x0, 0x80);
24191   __ Movi(v2.V2D(), 0x0, 0x01);
24192   __ Uqshl(b16, b0, 1);
24193   __ Uqshl(b17, b1, 1);
24194   __ Uqshl(b18, b2, 1);
24195 
24196   __ Movi(v0.V2D(), 0x0, 0x7fff);
24197   __ Movi(v1.V2D(), 0x0, 0x8000);
24198   __ Movi(v2.V2D(), 0x0, 0x0001);
24199   __ Uqshl(h19, h0, 1);
24200   __ Uqshl(h20, h1, 1);
24201   __ Uqshl(h21, h2, 1);
24202 
24203   __ Movi(v0.V2D(), 0x0, 0x7fffffff);
24204   __ Movi(v1.V2D(), 0x0, 0x80000000);
24205   __ Movi(v2.V2D(), 0x0, 0x00000001);
24206   __ Uqshl(s22, s0, 1);
24207   __ Uqshl(s23, s1, 1);
24208   __ Uqshl(s24, s2, 1);
24209 
24210   __ Movi(v0.V2D(), 0x0, 0x7fffffffffffffff);
24211   __ Movi(v1.V2D(), 0x0, 0x8000000000000000);
24212   __ Movi(v2.V2D(), 0x0, 0x0000000000000001);
24213   __ Uqshl(d25, d0, 1);
24214   __ Uqshl(d26, d1, 1);
24215   __ Uqshl(d27, d2, 1);
24216 
24217   END();
24218 
24219   RUN();
24220 
24221   ASSERT_EQUAL_128(0, 0xfe, q16);
24222   ASSERT_EQUAL_128(0, 0xff, q17);
24223   ASSERT_EQUAL_128(0, 0x02, q18);
24224 
24225   ASSERT_EQUAL_128(0, 0xfffe, q19);
24226   ASSERT_EQUAL_128(0, 0xffff, q20);
24227   ASSERT_EQUAL_128(0, 0x0002, q21);
24228 
24229   ASSERT_EQUAL_128(0, 0xfffffffe, q22);
24230   ASSERT_EQUAL_128(0, 0xffffffff, q23);
24231   ASSERT_EQUAL_128(0, 0x00000002, q24);
24232 
24233   ASSERT_EQUAL_128(0, 0xfffffffffffffffe, q25);
24234   ASSERT_EQUAL_128(0, 0xffffffffffffffff, q26);
24235   ASSERT_EQUAL_128(0, 0x0000000000000002, q27);
24236 
24237   TEARDOWN();
24238 }
24239 
24240 
TEST(neon_sqshlu_scalar)24241 TEST(neon_sqshlu_scalar) {
24242   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
24243 
24244   START();
24245 
24246   __ Movi(v0.V2D(), 0x0, 0x7f);
24247   __ Movi(v1.V2D(), 0x0, 0x80);
24248   __ Movi(v2.V2D(), 0x0, 0x01);
24249   __ Sqshlu(b16, b0, 2);
24250   __ Sqshlu(b17, b1, 2);
24251   __ Sqshlu(b18, b2, 2);
24252 
24253   __ Movi(v0.V2D(), 0x0, 0x7fff);
24254   __ Movi(v1.V2D(), 0x0, 0x8000);
24255   __ Movi(v2.V2D(), 0x0, 0x0001);
24256   __ Sqshlu(h19, h0, 2);
24257   __ Sqshlu(h20, h1, 2);
24258   __ Sqshlu(h21, h2, 2);
24259 
24260   __ Movi(v0.V2D(), 0x0, 0x7fffffff);
24261   __ Movi(v1.V2D(), 0x0, 0x80000000);
24262   __ Movi(v2.V2D(), 0x0, 0x00000001);
24263   __ Sqshlu(s22, s0, 2);
24264   __ Sqshlu(s23, s1, 2);
24265   __ Sqshlu(s24, s2, 2);
24266 
24267   __ Movi(v0.V2D(), 0x0, 0x7fffffffffffffff);
24268   __ Movi(v1.V2D(), 0x0, 0x8000000000000000);
24269   __ Movi(v2.V2D(), 0x0, 0x0000000000000001);
24270   __ Sqshlu(d25, d0, 2);
24271   __ Sqshlu(d26, d1, 2);
24272   __ Sqshlu(d27, d2, 2);
24273 
24274   END();
24275 
24276   RUN();
24277 
24278   ASSERT_EQUAL_128(0, 0xff, q16);
24279   ASSERT_EQUAL_128(0, 0x00, q17);
24280   ASSERT_EQUAL_128(0, 0x04, q18);
24281 
24282   ASSERT_EQUAL_128(0, 0xffff, q19);
24283   ASSERT_EQUAL_128(0, 0x0000, q20);
24284   ASSERT_EQUAL_128(0, 0x0004, q21);
24285 
24286   ASSERT_EQUAL_128(0, 0xffffffff, q22);
24287   ASSERT_EQUAL_128(0, 0x00000000, q23);
24288   ASSERT_EQUAL_128(0, 0x00000004, q24);
24289 
24290   ASSERT_EQUAL_128(0, 0xffffffffffffffff, q25);
24291   ASSERT_EQUAL_128(0, 0x0000000000000000, q26);
24292   ASSERT_EQUAL_128(0, 0x0000000000000004, q27);
24293 
24294   TEARDOWN();
24295 }
24296 
24297 
TEST(neon_sshll)24298 TEST(neon_sshll) {
24299   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
24300 
24301   START();
24302 
24303   __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081);
24304   __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff);
24305   __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff);
24306 
24307   __ Sshll(v16.V8H(), v0.V8B(), 4);
24308   __ Sshll2(v17.V8H(), v0.V16B(), 4);
24309 
24310   __ Sshll(v18.V4S(), v1.V4H(), 8);
24311   __ Sshll2(v19.V4S(), v1.V8H(), 8);
24312 
24313   __ Sshll(v20.V2D(), v2.V2S(), 16);
24314   __ Sshll2(v21.V2D(), v2.V4S(), 16);
24315 
24316   END();
24317 
24318   RUN();
24319 
24320   ASSERT_EQUAL_128(0xf800f810fff00000, 0x001007f0f800f810, q16);
24321   ASSERT_EQUAL_128(0x07f000100000fff0, 0xf810f80007f00010, q17);
24322   ASSERT_EQUAL_128(0xffffff0000000000, 0x00000100007fff00, q18);
24323   ASSERT_EQUAL_128(0xff800000ff800100, 0xffffff0000000000, q19);
24324   ASSERT_EQUAL_128(0x0000000000000000, 0x00007fffffff0000, q20);
24325   ASSERT_EQUAL_128(0xffff800000000000, 0xffffffffffff0000, q21);
24326   TEARDOWN();
24327 }
24328 
TEST(neon_shll)24329 TEST(neon_shll) {
24330   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
24331 
24332   START();
24333 
24334   __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081);
24335   __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff);
24336   __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff);
24337 
24338   __ Shll(v16.V8H(), v0.V8B(), 8);
24339   __ Shll2(v17.V8H(), v0.V16B(), 8);
24340 
24341   __ Shll(v18.V4S(), v1.V4H(), 16);
24342   __ Shll2(v19.V4S(), v1.V8H(), 16);
24343 
24344   __ Shll(v20.V2D(), v2.V2S(), 32);
24345   __ Shll2(v21.V2D(), v2.V4S(), 32);
24346 
24347   END();
24348 
24349   RUN();
24350 
24351   ASSERT_EQUAL_128(0x80008100ff000000, 0x01007f0080008100, q16);
24352   ASSERT_EQUAL_128(0x7f0001000000ff00, 0x810080007f000100, q17);
24353   ASSERT_EQUAL_128(0xffff000000000000, 0x000100007fff0000, q18);
24354   ASSERT_EQUAL_128(0x8000000080010000, 0xffff000000000000, q19);
24355   ASSERT_EQUAL_128(0x0000000000000000, 0x7fffffff00000000, q20);
24356   ASSERT_EQUAL_128(0x8000000000000000, 0xffffffff00000000, q21);
24357   TEARDOWN();
24358 }
24359 
TEST(neon_ushll)24360 TEST(neon_ushll) {
24361   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
24362 
24363   START();
24364 
24365   __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081);
24366   __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff);
24367   __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff);
24368 
24369   __ Ushll(v16.V8H(), v0.V8B(), 4);
24370   __ Ushll2(v17.V8H(), v0.V16B(), 4);
24371 
24372   __ Ushll(v18.V4S(), v1.V4H(), 8);
24373   __ Ushll2(v19.V4S(), v1.V8H(), 8);
24374 
24375   __ Ushll(v20.V2D(), v2.V2S(), 16);
24376   __ Ushll2(v21.V2D(), v2.V4S(), 16);
24377 
24378   END();
24379 
24380   RUN();
24381 
24382   ASSERT_EQUAL_128(0x080008100ff00000, 0x001007f008000810, q16);
24383   ASSERT_EQUAL_128(0x07f0001000000ff0, 0x0810080007f00010, q17);
24384   ASSERT_EQUAL_128(0x00ffff0000000000, 0x00000100007fff00, q18);
24385   ASSERT_EQUAL_128(0x0080000000800100, 0x00ffff0000000000, q19);
24386   ASSERT_EQUAL_128(0x0000000000000000, 0x00007fffffff0000, q20);
24387   ASSERT_EQUAL_128(0x0000800000000000, 0x0000ffffffff0000, q21);
24388   TEARDOWN();
24389 }
24390 
24391 
TEST(neon_sxtl)24392 TEST(neon_sxtl) {
24393   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
24394 
24395   START();
24396 
24397   __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081);
24398   __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff);
24399   __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff);
24400 
24401   __ Sxtl(v16.V8H(), v0.V8B());
24402   __ Sxtl2(v17.V8H(), v0.V16B());
24403 
24404   __ Sxtl(v18.V4S(), v1.V4H());
24405   __ Sxtl2(v19.V4S(), v1.V8H());
24406 
24407   __ Sxtl(v20.V2D(), v2.V2S());
24408   __ Sxtl2(v21.V2D(), v2.V4S());
24409 
24410   END();
24411 
24412   RUN();
24413 
24414   ASSERT_EQUAL_128(0xff80ff81ffff0000, 0x0001007fff80ff81, q16);
24415   ASSERT_EQUAL_128(0x007f00010000ffff, 0xff81ff80007f0001, q17);
24416   ASSERT_EQUAL_128(0xffffffff00000000, 0x0000000100007fff, q18);
24417   ASSERT_EQUAL_128(0xffff8000ffff8001, 0xffffffff00000000, q19);
24418   ASSERT_EQUAL_128(0x0000000000000000, 0x000000007fffffff, q20);
24419   ASSERT_EQUAL_128(0xffffffff80000000, 0xffffffffffffffff, q21);
24420   TEARDOWN();
24421 }
24422 
24423 
TEST(neon_uxtl)24424 TEST(neon_uxtl) {
24425   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
24426 
24427   START();
24428 
24429   __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081);
24430   __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff);
24431   __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff);
24432 
24433   __ Uxtl(v16.V8H(), v0.V8B());
24434   __ Uxtl2(v17.V8H(), v0.V16B());
24435 
24436   __ Uxtl(v18.V4S(), v1.V4H());
24437   __ Uxtl2(v19.V4S(), v1.V8H());
24438 
24439   __ Uxtl(v20.V2D(), v2.V2S());
24440   __ Uxtl2(v21.V2D(), v2.V4S());
24441 
24442   END();
24443 
24444   RUN();
24445 
24446   ASSERT_EQUAL_128(0x0080008100ff0000, 0x0001007f00800081, q16);
24447   ASSERT_EQUAL_128(0x007f0001000000ff, 0x00810080007f0001, q17);
24448   ASSERT_EQUAL_128(0x0000ffff00000000, 0x0000000100007fff, q18);
24449   ASSERT_EQUAL_128(0x0000800000008001, 0x0000ffff00000000, q19);
24450   ASSERT_EQUAL_128(0x0000000000000000, 0x000000007fffffff, q20);
24451   ASSERT_EQUAL_128(0x0000000080000000, 0x00000000ffffffff, q21);
24452   TEARDOWN();
24453 }
24454 
24455 
TEST(neon_ssra)24456 TEST(neon_ssra) {
24457   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
24458 
24459   START();
24460 
24461   __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081);
24462   __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff);
24463   __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff);
24464   __ Movi(v3.V2D(), 0x8000000000000001, 0x7fffffffffffffff);
24465   __ Movi(v4.V2D(), 0x8000000000000000, 0x0000000000000000);
24466 
24467   __ Mov(v16.V2D(), v0.V2D());
24468   __ Mov(v17.V2D(), v0.V2D());
24469   __ Mov(v18.V2D(), v1.V2D());
24470   __ Mov(v19.V2D(), v1.V2D());
24471   __ Mov(v20.V2D(), v2.V2D());
24472   __ Mov(v21.V2D(), v2.V2D());
24473   __ Mov(v22.V2D(), v3.V2D());
24474   __ Mov(v23.V2D(), v4.V2D());
24475   __ Mov(v24.V2D(), v3.V2D());
24476   __ Mov(v25.V2D(), v4.V2D());
24477 
24478   __ Ssra(v16.V8B(), v0.V8B(), 4);
24479   __ Ssra(v17.V16B(), v0.V16B(), 4);
24480 
24481   __ Ssra(v18.V4H(), v1.V4H(), 8);
24482   __ Ssra(v19.V8H(), v1.V8H(), 8);
24483 
24484   __ Ssra(v20.V2S(), v2.V2S(), 16);
24485   __ Ssra(v21.V4S(), v2.V4S(), 16);
24486 
24487   __ Ssra(v22.V2D(), v3.V2D(), 32);
24488   __ Ssra(v23.V2D(), v4.V2D(), 32);
24489 
24490   __ Ssra(d24, d3, 48);
24491 
24492   END();
24493 
24494   RUN();
24495 
24496   ASSERT_EQUAL_128(0x0000000000000000, 0x7879fe0001867879, q16);
24497   ASSERT_EQUAL_128(0x860100fe79788601, 0x7879fe0001867879, q17);
24498   ASSERT_EQUAL_128(0x0000000000000000, 0xfffe00000001807e, q18);
24499   ASSERT_EQUAL_128(0x7f807f81fffe0000, 0xfffe00000001807e, q19);
24500   ASSERT_EQUAL_128(0x0000000000000000, 0x0000000080007ffe, q20);
24501   ASSERT_EQUAL_128(0x7fff8000fffffffe, 0x0000000080007ffe, q21);
24502   ASSERT_EQUAL_128(0x7fffffff80000001, 0x800000007ffffffe, q22);
24503   ASSERT_EQUAL_128(0x7fffffff80000000, 0x0000000000000000, q23);
24504   ASSERT_EQUAL_128(0x0000000000000000, 0x8000000000007ffe, q24);
24505   TEARDOWN();
24506 }
24507 
TEST(neon_srsra)24508 TEST(neon_srsra) {
24509   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
24510 
24511   START();
24512 
24513   __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081);
24514   __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff);
24515   __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff);
24516   __ Movi(v3.V2D(), 0x8000000000000001, 0x7fffffffffffffff);
24517   __ Movi(v4.V2D(), 0x8000000000000000, 0x0000000000000000);
24518 
24519   __ Mov(v16.V2D(), v0.V2D());
24520   __ Mov(v17.V2D(), v0.V2D());
24521   __ Mov(v18.V2D(), v1.V2D());
24522   __ Mov(v19.V2D(), v1.V2D());
24523   __ Mov(v20.V2D(), v2.V2D());
24524   __ Mov(v21.V2D(), v2.V2D());
24525   __ Mov(v22.V2D(), v3.V2D());
24526   __ Mov(v23.V2D(), v4.V2D());
24527   __ Mov(v24.V2D(), v3.V2D());
24528   __ Mov(v25.V2D(), v4.V2D());
24529 
24530   __ Srsra(v16.V8B(), v0.V8B(), 4);
24531   __ Srsra(v17.V16B(), v0.V16B(), 4);
24532 
24533   __ Srsra(v18.V4H(), v1.V4H(), 8);
24534   __ Srsra(v19.V8H(), v1.V8H(), 8);
24535 
24536   __ Srsra(v20.V2S(), v2.V2S(), 16);
24537   __ Srsra(v21.V4S(), v2.V4S(), 16);
24538 
24539   __ Srsra(v22.V2D(), v3.V2D(), 32);
24540   __ Srsra(v23.V2D(), v4.V2D(), 32);
24541 
24542   __ Srsra(d24, d3, 48);
24543 
24544   END();
24545 
24546   RUN();
24547 
24548   ASSERT_EQUAL_128(0x0000000000000000, 0x7879ff0001877879, q16);
24549   ASSERT_EQUAL_128(0x870100ff79788701, 0x7879ff0001877879, q17);
24550   ASSERT_EQUAL_128(0x0000000000000000, 0xffff00000001807f, q18);
24551   ASSERT_EQUAL_128(0x7f807f81ffff0000, 0xffff00000001807f, q19);
24552   ASSERT_EQUAL_128(0x0000000000000000, 0x0000000080007fff, q20);
24553   ASSERT_EQUAL_128(0x7fff8000ffffffff, 0x0000000080007fff, q21);
24554   ASSERT_EQUAL_128(0x7fffffff80000001, 0x800000007fffffff, q22);
24555   ASSERT_EQUAL_128(0x7fffffff80000000, 0x0000000000000000, q23);
24556   ASSERT_EQUAL_128(0x0000000000000000, 0x8000000000007fff, q24);
24557 
24558   TEARDOWN();
24559 }
24560 
TEST(neon_usra)24561 TEST(neon_usra) {
24562   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
24563 
24564   START();
24565 
24566   __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081);
24567   __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff);
24568   __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff);
24569   __ Movi(v3.V2D(), 0x8000000000000001, 0x7fffffffffffffff);
24570   __ Movi(v4.V2D(), 0x8000000000000000, 0x0000000000000000);
24571 
24572   __ Mov(v16.V2D(), v0.V2D());
24573   __ Mov(v17.V2D(), v0.V2D());
24574   __ Mov(v18.V2D(), v1.V2D());
24575   __ Mov(v19.V2D(), v1.V2D());
24576   __ Mov(v20.V2D(), v2.V2D());
24577   __ Mov(v21.V2D(), v2.V2D());
24578   __ Mov(v22.V2D(), v3.V2D());
24579   __ Mov(v23.V2D(), v4.V2D());
24580   __ Mov(v24.V2D(), v3.V2D());
24581   __ Mov(v25.V2D(), v4.V2D());
24582 
24583   __ Usra(v16.V8B(), v0.V8B(), 4);
24584   __ Usra(v17.V16B(), v0.V16B(), 4);
24585 
24586   __ Usra(v18.V4H(), v1.V4H(), 8);
24587   __ Usra(v19.V8H(), v1.V8H(), 8);
24588 
24589   __ Usra(v20.V2S(), v2.V2S(), 16);
24590   __ Usra(v21.V4S(), v2.V4S(), 16);
24591 
24592   __ Usra(v22.V2D(), v3.V2D(), 32);
24593   __ Usra(v23.V2D(), v4.V2D(), 32);
24594 
24595   __ Usra(d24, d3, 48);
24596 
24597   END();
24598 
24599   RUN();
24600 
24601   ASSERT_EQUAL_128(0x0000000000000000, 0x88890e0001868889, q16);
24602   ASSERT_EQUAL_128(0x8601000e89888601, 0x88890e0001868889, q17);
24603   ASSERT_EQUAL_128(0x0000000000000000, 0x00fe00000001807e, q18);
24604   ASSERT_EQUAL_128(0x8080808100fe0000, 0x00fe00000001807e, q19);
24605   ASSERT_EQUAL_128(0x0000000000000000, 0x0000000080007ffe, q20);
24606   ASSERT_EQUAL_128(0x800080000000fffe, 0x0000000080007ffe, q21);
24607   ASSERT_EQUAL_128(0x8000000080000001, 0x800000007ffffffe, q22);
24608   ASSERT_EQUAL_128(0x8000000080000000, 0x0000000000000000, q23);
24609   ASSERT_EQUAL_128(0x0000000000000000, 0x8000000000007ffe, q24);
24610 
24611   TEARDOWN();
24612 }
24613 
TEST(neon_ursra)24614 TEST(neon_ursra) {
24615   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
24616 
24617   START();
24618 
24619   __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081);
24620   __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff);
24621   __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff);
24622   __ Movi(v3.V2D(), 0x8000000000000001, 0x7fffffffffffffff);
24623   __ Movi(v4.V2D(), 0x8000000000000000, 0x0000000000000000);
24624 
24625   __ Mov(v16.V2D(), v0.V2D());
24626   __ Mov(v17.V2D(), v0.V2D());
24627   __ Mov(v18.V2D(), v1.V2D());
24628   __ Mov(v19.V2D(), v1.V2D());
24629   __ Mov(v20.V2D(), v2.V2D());
24630   __ Mov(v21.V2D(), v2.V2D());
24631   __ Mov(v22.V2D(), v3.V2D());
24632   __ Mov(v23.V2D(), v4.V2D());
24633   __ Mov(v24.V2D(), v3.V2D());
24634   __ Mov(v25.V2D(), v4.V2D());
24635 
24636   __ Ursra(v16.V8B(), v0.V8B(), 4);
24637   __ Ursra(v17.V16B(), v0.V16B(), 4);
24638 
24639   __ Ursra(v18.V4H(), v1.V4H(), 8);
24640   __ Ursra(v19.V8H(), v1.V8H(), 8);
24641 
24642   __ Ursra(v20.V2S(), v2.V2S(), 16);
24643   __ Ursra(v21.V4S(), v2.V4S(), 16);
24644 
24645   __ Ursra(v22.V2D(), v3.V2D(), 32);
24646   __ Ursra(v23.V2D(), v4.V2D(), 32);
24647 
24648   __ Ursra(d24, d3, 48);
24649 
24650   END();
24651 
24652   RUN();
24653 
24654   ASSERT_EQUAL_128(0x0000000000000000, 0x88890f0001878889, q16);
24655   ASSERT_EQUAL_128(0x8701000f89888701, 0x88890f0001878889, q17);
24656   ASSERT_EQUAL_128(0x0000000000000000, 0x00ff00000001807f, q18);
24657   ASSERT_EQUAL_128(0x8080808100ff0000, 0x00ff00000001807f, q19);
24658   ASSERT_EQUAL_128(0x0000000000000000, 0x0000000080007fff, q20);
24659   ASSERT_EQUAL_128(0x800080000000ffff, 0x0000000080007fff, q21);
24660   ASSERT_EQUAL_128(0x8000000080000001, 0x800000007fffffff, q22);
24661   ASSERT_EQUAL_128(0x8000000080000000, 0x0000000000000000, q23);
24662   ASSERT_EQUAL_128(0x0000000000000000, 0x8000000000007fff, q24);
24663   TEARDOWN();
24664 }
24665 
24666 
TEST(neon_uqshl_scalar)24667 TEST(neon_uqshl_scalar) {
24668   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
24669 
24670   START();
24671 
24672   __ Movi(v0.V2D(), 0xaaaaaaaaaaaaaaaa, 0xf0000000f000f0f0);
24673   __ Movi(v1.V2D(), 0x5555555555555555, 0x7fffffff7fff7f7f);
24674   __ Movi(v2.V2D(), 0xaaaaaaaaaaaaaaaa, 0x0000000000000001);
24675   __ Movi(v3.V2D(), 0xaaaaaaaaaaaaaaaa, 0xffffffffffffffff);
24676 
24677   __ Uqshl(b16, b0, b2);
24678   __ Uqshl(b17, b0, b3);
24679   __ Uqshl(b18, b1, b2);
24680   __ Uqshl(b19, b1, b3);
24681   __ Uqshl(h20, h0, h2);
24682   __ Uqshl(h21, h0, h3);
24683   __ Uqshl(h22, h1, h2);
24684   __ Uqshl(h23, h1, h3);
24685   __ Uqshl(s24, s0, s2);
24686   __ Uqshl(s25, s0, s3);
24687   __ Uqshl(s26, s1, s2);
24688   __ Uqshl(s27, s1, s3);
24689   __ Uqshl(d28, d0, d2);
24690   __ Uqshl(d29, d0, d3);
24691   __ Uqshl(d30, d1, d2);
24692   __ Uqshl(d31, d1, d3);
24693 
24694   END();
24695 
24696   RUN();
24697 
24698   ASSERT_EQUAL_128(0, 0xff, q16);
24699   ASSERT_EQUAL_128(0, 0x78, q17);
24700   ASSERT_EQUAL_128(0, 0xfe, q18);
24701   ASSERT_EQUAL_128(0, 0x3f, q19);
24702   ASSERT_EQUAL_128(0, 0xffff, q20);
24703   ASSERT_EQUAL_128(0, 0x7878, q21);
24704   ASSERT_EQUAL_128(0, 0xfefe, q22);
24705   ASSERT_EQUAL_128(0, 0x3fbf, q23);
24706   ASSERT_EQUAL_128(0, 0xffffffff, q24);
24707   ASSERT_EQUAL_128(0, 0x78007878, q25);
24708   ASSERT_EQUAL_128(0, 0xfffefefe, q26);
24709   ASSERT_EQUAL_128(0, 0x3fffbfbf, q27);
24710   ASSERT_EQUAL_128(0, 0xffffffffffffffff, q28);
24711   ASSERT_EQUAL_128(0, 0x7800000078007878, q29);
24712   ASSERT_EQUAL_128(0, 0xfffffffefffefefe, q30);
24713   ASSERT_EQUAL_128(0, 0x3fffffffbfffbfbf, q31);
24714 
24715   TEARDOWN();
24716 }
24717 
24718 
TEST(neon_sqshl_scalar)24719 TEST(neon_sqshl_scalar) {
24720   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
24721 
24722   START();
24723 
24724   __ Movi(v0.V2D(), 0xaaaaaaaaaaaaaaaa, 0xbfffffffbfffbfbf);
24725   __ Movi(v1.V2D(), 0x5555555555555555, 0x4000000040004040);
24726   __ Movi(v2.V2D(), 0xaaaaaaaaaaaaaaaa, 0x0000000000000001);
24727   __ Movi(v3.V2D(), 0xaaaaaaaaaaaaaaaa, 0xffffffffffffffff);
24728 
24729   __ Sqshl(b16, b0, b2);
24730   __ Sqshl(b17, b0, b3);
24731   __ Sqshl(b18, b1, b2);
24732   __ Sqshl(b19, b1, b3);
24733   __ Sqshl(h20, h0, h2);
24734   __ Sqshl(h21, h0, h3);
24735   __ Sqshl(h22, h1, h2);
24736   __ Sqshl(h23, h1, h3);
24737   __ Sqshl(s24, s0, s2);
24738   __ Sqshl(s25, s0, s3);
24739   __ Sqshl(s26, s1, s2);
24740   __ Sqshl(s27, s1, s3);
24741   __ Sqshl(d28, d0, d2);
24742   __ Sqshl(d29, d0, d3);
24743   __ Sqshl(d30, d1, d2);
24744   __ Sqshl(d31, d1, d3);
24745 
24746   END();
24747 
24748   RUN();
24749 
24750   ASSERT_EQUAL_128(0, 0x80, q16);
24751   ASSERT_EQUAL_128(0, 0xdf, q17);
24752   ASSERT_EQUAL_128(0, 0x7f, q18);
24753   ASSERT_EQUAL_128(0, 0x20, q19);
24754   ASSERT_EQUAL_128(0, 0x8000, q20);
24755   ASSERT_EQUAL_128(0, 0xdfdf, q21);
24756   ASSERT_EQUAL_128(0, 0x7fff, q22);
24757   ASSERT_EQUAL_128(0, 0x2020, q23);
24758   ASSERT_EQUAL_128(0, 0x80000000, q24);
24759   ASSERT_EQUAL_128(0, 0xdfffdfdf, q25);
24760   ASSERT_EQUAL_128(0, 0x7fffffff, q26);
24761   ASSERT_EQUAL_128(0, 0x20002020, q27);
24762   ASSERT_EQUAL_128(0, 0x8000000000000000, q28);
24763   ASSERT_EQUAL_128(0, 0xdfffffffdfffdfdf, q29);
24764   ASSERT_EQUAL_128(0, 0x7fffffffffffffff, q30);
24765   ASSERT_EQUAL_128(0, 0x2000000020002020, q31);
24766 
24767   TEARDOWN();
24768 }
24769 
24770 
TEST(neon_urshl_scalar)24771 TEST(neon_urshl_scalar) {
24772   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
24773 
24774   START();
24775 
24776   __ Movi(v0.V2D(), 0xaaaaaaaaaaaaaaaa, 0xf0000000f000f0f0);
24777   __ Movi(v1.V2D(), 0x5555555555555555, 0x7fffffff7fff7f7f);
24778   __ Movi(v2.V2D(), 0xaaaaaaaaaaaaaaaa, 0x0000000000000001);
24779   __ Movi(v3.V2D(), 0xaaaaaaaaaaaaaaaa, 0xffffffffffffffff);
24780 
24781   __ Urshl(d28, d0, d2);
24782   __ Urshl(d29, d0, d3);
24783   __ Urshl(d30, d1, d2);
24784   __ Urshl(d31, d1, d3);
24785 
24786   END();
24787 
24788   RUN();
24789 
24790   ASSERT_EQUAL_128(0, 0xe0000001e001e1e0, q28);
24791   ASSERT_EQUAL_128(0, 0x7800000078007878, q29);
24792   ASSERT_EQUAL_128(0, 0xfffffffefffefefe, q30);
24793   ASSERT_EQUAL_128(0, 0x3fffffffbfffbfc0, q31);
24794 
24795   TEARDOWN();
24796 }
24797 
24798 
TEST(neon_srshl_scalar)24799 TEST(neon_srshl_scalar) {
24800   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
24801 
24802   START();
24803 
24804   __ Movi(v0.V2D(), 0xaaaaaaaaaaaaaaaa, 0xbfffffffbfffbfbf);
24805   __ Movi(v1.V2D(), 0x5555555555555555, 0x4000000040004040);
24806   __ Movi(v2.V2D(), 0xaaaaaaaaaaaaaaaa, 0x0000000000000001);
24807   __ Movi(v3.V2D(), 0xaaaaaaaaaaaaaaaa, 0xffffffffffffffff);
24808 
24809   __ Srshl(d28, d0, d2);
24810   __ Srshl(d29, d0, d3);
24811   __ Srshl(d30, d1, d2);
24812   __ Srshl(d31, d1, d3);
24813 
24814   END();
24815 
24816   RUN();
24817 
24818   ASSERT_EQUAL_128(0, 0x7fffffff7fff7f7e, q28);
24819   ASSERT_EQUAL_128(0, 0xdfffffffdfffdfe0, q29);
24820   ASSERT_EQUAL_128(0, 0x8000000080008080, q30);
24821   ASSERT_EQUAL_128(0, 0x2000000020002020, q31);
24822 
24823   TEARDOWN();
24824 }
24825 
24826 
TEST(neon_uqrshl_scalar)24827 TEST(neon_uqrshl_scalar) {
24828   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
24829 
24830   START();
24831 
24832   __ Movi(v0.V2D(), 0xaaaaaaaaaaaaaaaa, 0xf0000000f000f0f0);
24833   __ Movi(v1.V2D(), 0x5555555555555555, 0x7fffffff7fff7f7f);
24834   __ Movi(v2.V2D(), 0xaaaaaaaaaaaaaaaa, 0x0000000000000001);
24835   __ Movi(v3.V2D(), 0xaaaaaaaaaaaaaaaa, 0xffffffffffffffff);
24836 
24837   __ Uqrshl(b16, b0, b2);
24838   __ Uqrshl(b17, b0, b3);
24839   __ Uqrshl(b18, b1, b2);
24840   __ Uqrshl(b19, b1, b3);
24841   __ Uqrshl(h20, h0, h2);
24842   __ Uqrshl(h21, h0, h3);
24843   __ Uqrshl(h22, h1, h2);
24844   __ Uqrshl(h23, h1, h3);
24845   __ Uqrshl(s24, s0, s2);
24846   __ Uqrshl(s25, s0, s3);
24847   __ Uqrshl(s26, s1, s2);
24848   __ Uqrshl(s27, s1, s3);
24849   __ Uqrshl(d28, d0, d2);
24850   __ Uqrshl(d29, d0, d3);
24851   __ Uqrshl(d30, d1, d2);
24852   __ Uqrshl(d31, d1, d3);
24853 
24854   END();
24855 
24856   RUN();
24857 
24858   ASSERT_EQUAL_128(0, 0xff, q16);
24859   ASSERT_EQUAL_128(0, 0x78, q17);
24860   ASSERT_EQUAL_128(0, 0xfe, q18);
24861   ASSERT_EQUAL_128(0, 0x40, q19);
24862   ASSERT_EQUAL_128(0, 0xffff, q20);
24863   ASSERT_EQUAL_128(0, 0x7878, q21);
24864   ASSERT_EQUAL_128(0, 0xfefe, q22);
24865   ASSERT_EQUAL_128(0, 0x3fc0, q23);
24866   ASSERT_EQUAL_128(0, 0xffffffff, q24);
24867   ASSERT_EQUAL_128(0, 0x78007878, q25);
24868   ASSERT_EQUAL_128(0, 0xfffefefe, q26);
24869   ASSERT_EQUAL_128(0, 0x3fffbfc0, q27);
24870   ASSERT_EQUAL_128(0, 0xffffffffffffffff, q28);
24871   ASSERT_EQUAL_128(0, 0x7800000078007878, q29);
24872   ASSERT_EQUAL_128(0, 0xfffffffefffefefe, q30);
24873   ASSERT_EQUAL_128(0, 0x3fffffffbfffbfc0, q31);
24874 
24875   TEARDOWN();
24876 }
24877 
24878 
TEST(neon_sqrshl_scalar)24879 TEST(neon_sqrshl_scalar) {
24880   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
24881 
24882   START();
24883 
24884   __ Movi(v0.V2D(), 0xaaaaaaaaaaaaaaaa, 0xbfffffffbfffbfbf);
24885   __ Movi(v1.V2D(), 0x5555555555555555, 0x4000000040004040);
24886   __ Movi(v2.V2D(), 0xaaaaaaaaaaaaaaaa, 0x0000000000000001);
24887   __ Movi(v3.V2D(), 0xaaaaaaaaaaaaaaaa, 0xffffffffffffffff);
24888 
24889   __ Sqrshl(b16, b0, b2);
24890   __ Sqrshl(b17, b0, b3);
24891   __ Sqrshl(b18, b1, b2);
24892   __ Sqrshl(b19, b1, b3);
24893   __ Sqrshl(h20, h0, h2);
24894   __ Sqrshl(h21, h0, h3);
24895   __ Sqrshl(h22, h1, h2);
24896   __ Sqrshl(h23, h1, h3);
24897   __ Sqrshl(s24, s0, s2);
24898   __ Sqrshl(s25, s0, s3);
24899   __ Sqrshl(s26, s1, s2);
24900   __ Sqrshl(s27, s1, s3);
24901   __ Sqrshl(d28, d0, d2);
24902   __ Sqrshl(d29, d0, d3);
24903   __ Sqrshl(d30, d1, d2);
24904   __ Sqrshl(d31, d1, d3);
24905 
24906   END();
24907 
24908   RUN();
24909 
24910   ASSERT_EQUAL_128(0, 0x80, q16);
24911   ASSERT_EQUAL_128(0, 0xe0, q17);
24912   ASSERT_EQUAL_128(0, 0x7f, q18);
24913   ASSERT_EQUAL_128(0, 0x20, q19);
24914   ASSERT_EQUAL_128(0, 0x8000, q20);
24915   ASSERT_EQUAL_128(0, 0xdfe0, q21);
24916   ASSERT_EQUAL_128(0, 0x7fff, q22);
24917   ASSERT_EQUAL_128(0, 0x2020, q23);
24918   ASSERT_EQUAL_128(0, 0x80000000, q24);
24919   ASSERT_EQUAL_128(0, 0xdfffdfe0, q25);
24920   ASSERT_EQUAL_128(0, 0x7fffffff, q26);
24921   ASSERT_EQUAL_128(0, 0x20002020, q27);
24922   ASSERT_EQUAL_128(0, 0x8000000000000000, q28);
24923   ASSERT_EQUAL_128(0, 0xdfffffffdfffdfe0, q29);
24924   ASSERT_EQUAL_128(0, 0x7fffffffffffffff, q30);
24925   ASSERT_EQUAL_128(0, 0x2000000020002020, q31);
24926 
24927   TEARDOWN();
24928 }
24929 
24930 
TEST(neon_uqadd_scalar)24931 TEST(neon_uqadd_scalar) {
24932   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
24933 
24934   START();
24935 
24936   __ Movi(v0.V2D(), 0xaaaaaaaaaaaaaaaa, 0xf0000000f000f0f0);
24937   __ Movi(v1.V2D(), 0x5555555555555555, 0x7fffffff7fff7f7f);
24938   __ Movi(v2.V2D(), 0xaaaaaaaaaaaaaaaa, 0x1000000010001010);
24939 
24940   __ Uqadd(b16, b0, b0);
24941   __ Uqadd(b17, b1, b1);
24942   __ Uqadd(b18, b2, b2);
24943   __ Uqadd(h19, h0, h0);
24944   __ Uqadd(h20, h1, h1);
24945   __ Uqadd(h21, h2, h2);
24946   __ Uqadd(s22, s0, s0);
24947   __ Uqadd(s23, s1, s1);
24948   __ Uqadd(s24, s2, s2);
24949   __ Uqadd(d25, d0, d0);
24950   __ Uqadd(d26, d1, d1);
24951   __ Uqadd(d27, d2, d2);
24952 
24953   END();
24954 
24955   RUN();
24956 
24957   ASSERT_EQUAL_128(0, 0xff, q16);
24958   ASSERT_EQUAL_128(0, 0xfe, q17);
24959   ASSERT_EQUAL_128(0, 0x20, q18);
24960   ASSERT_EQUAL_128(0, 0xffff, q19);
24961   ASSERT_EQUAL_128(0, 0xfefe, q20);
24962   ASSERT_EQUAL_128(0, 0x2020, q21);
24963   ASSERT_EQUAL_128(0, 0xffffffff, q22);
24964   ASSERT_EQUAL_128(0, 0xfffefefe, q23);
24965   ASSERT_EQUAL_128(0, 0x20002020, q24);
24966   ASSERT_EQUAL_128(0, 0xffffffffffffffff, q25);
24967   ASSERT_EQUAL_128(0, 0xfffffffefffefefe, q26);
24968   ASSERT_EQUAL_128(0, 0x2000000020002020, q27);
24969 
24970   TEARDOWN();
24971 }
24972 
24973 
TEST(neon_sqadd_scalar)24974 TEST(neon_sqadd_scalar) {
24975   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
24976 
24977   START();
24978 
24979   __ Movi(v0.V2D(), 0xaaaaaaaaaaaaaaaa, 0x8000000180018181);
24980   __ Movi(v1.V2D(), 0x5555555555555555, 0x7fffffff7fff7f7f);
24981   __ Movi(v2.V2D(), 0xaaaaaaaaaaaaaaaa, 0x1000000010001010);
24982 
24983   __ Sqadd(b16, b0, b0);
24984   __ Sqadd(b17, b1, b1);
24985   __ Sqadd(b18, b2, b2);
24986   __ Sqadd(h19, h0, h0);
24987   __ Sqadd(h20, h1, h1);
24988   __ Sqadd(h21, h2, h2);
24989   __ Sqadd(s22, s0, s0);
24990   __ Sqadd(s23, s1, s1);
24991   __ Sqadd(s24, s2, s2);
24992   __ Sqadd(d25, d0, d0);
24993   __ Sqadd(d26, d1, d1);
24994   __ Sqadd(d27, d2, d2);
24995 
24996   END();
24997 
24998   RUN();
24999 
25000   ASSERT_EQUAL_128(0, 0x80, q16);
25001   ASSERT_EQUAL_128(0, 0x7f, q17);
25002   ASSERT_EQUAL_128(0, 0x20, q18);
25003   ASSERT_EQUAL_128(0, 0x8000, q19);
25004   ASSERT_EQUAL_128(0, 0x7fff, q20);
25005   ASSERT_EQUAL_128(0, 0x2020, q21);
25006   ASSERT_EQUAL_128(0, 0x80000000, q22);
25007   ASSERT_EQUAL_128(0, 0x7fffffff, q23);
25008   ASSERT_EQUAL_128(0, 0x20002020, q24);
25009   ASSERT_EQUAL_128(0, 0x8000000000000000, q25);
25010   ASSERT_EQUAL_128(0, 0x7fffffffffffffff, q26);
25011   ASSERT_EQUAL_128(0, 0x2000000020002020, q27);
25012 
25013   TEARDOWN();
25014 }
25015 
25016 
TEST(neon_uqsub_scalar)25017 TEST(neon_uqsub_scalar) {
25018   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
25019 
25020   START();
25021 
25022   __ Movi(v0.V2D(), 0xaaaaaaaaaaaaaaaa, 0xf0000000f000f0f0);
25023   __ Movi(v1.V2D(), 0x5555555555555555, 0x7fffffff7fff7f7f);
25024 
25025   __ Uqsub(b16, b0, b0);
25026   __ Uqsub(b17, b0, b1);
25027   __ Uqsub(b18, b1, b0);
25028   __ Uqsub(h19, h0, h0);
25029   __ Uqsub(h20, h0, h1);
25030   __ Uqsub(h21, h1, h0);
25031   __ Uqsub(s22, s0, s0);
25032   __ Uqsub(s23, s0, s1);
25033   __ Uqsub(s24, s1, s0);
25034   __ Uqsub(d25, d0, d0);
25035   __ Uqsub(d26, d0, d1);
25036   __ Uqsub(d27, d1, d0);
25037 
25038   END();
25039 
25040   RUN();
25041 
25042   ASSERT_EQUAL_128(0, 0, q16);
25043   ASSERT_EQUAL_128(0, 0x71, q17);
25044   ASSERT_EQUAL_128(0, 0, q18);
25045 
25046   ASSERT_EQUAL_128(0, 0, q19);
25047   ASSERT_EQUAL_128(0, 0x7171, q20);
25048   ASSERT_EQUAL_128(0, 0, q21);
25049 
25050   ASSERT_EQUAL_128(0, 0, q22);
25051   ASSERT_EQUAL_128(0, 0x70017171, q23);
25052   ASSERT_EQUAL_128(0, 0, q24);
25053 
25054   ASSERT_EQUAL_128(0, 0, q25);
25055   ASSERT_EQUAL_128(0, 0x7000000170017171, q26);
25056   ASSERT_EQUAL_128(0, 0, q27);
25057 
25058   TEARDOWN();
25059 }
25060 
25061 
TEST(neon_sqsub_scalar)25062 TEST(neon_sqsub_scalar) {
25063   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
25064 
25065   START();
25066 
25067   __ Movi(v0.V2D(), 0xaaaaaaaaaaaaaaaa, 0xf0000000f000f0f0);
25068   __ Movi(v1.V2D(), 0x5555555555555555, 0x7eeeeeee7eee7e7e);
25069 
25070   __ Sqsub(b16, b0, b0);
25071   __ Sqsub(b17, b0, b1);
25072   __ Sqsub(b18, b1, b0);
25073   __ Sqsub(h19, h0, h0);
25074   __ Sqsub(h20, h0, h1);
25075   __ Sqsub(h21, h1, h0);
25076   __ Sqsub(s22, s0, s0);
25077   __ Sqsub(s23, s0, s1);
25078   __ Sqsub(s24, s1, s0);
25079   __ Sqsub(d25, d0, d0);
25080   __ Sqsub(d26, d0, d1);
25081   __ Sqsub(d27, d1, d0);
25082 
25083   END();
25084 
25085   RUN();
25086 
25087   ASSERT_EQUAL_128(0, 0, q16);
25088   ASSERT_EQUAL_128(0, 0x80, q17);
25089   ASSERT_EQUAL_128(0, 0x7f, q18);
25090 
25091   ASSERT_EQUAL_128(0, 0, q19);
25092   ASSERT_EQUAL_128(0, 0x8000, q20);
25093   ASSERT_EQUAL_128(0, 0x7fff, q21);
25094 
25095   ASSERT_EQUAL_128(0, 0, q22);
25096   ASSERT_EQUAL_128(0, 0x80000000, q23);
25097   ASSERT_EQUAL_128(0, 0x7fffffff, q24);
25098 
25099   ASSERT_EQUAL_128(0, 0, q25);
25100   ASSERT_EQUAL_128(0, 0x8000000000000000, q26);
25101   ASSERT_EQUAL_128(0, 0x7fffffffffffffff, q27);
25102 
25103   TEARDOWN();
25104 }
25105 
25106 
TEST(neon_fmla_fmls)25107 TEST(neon_fmla_fmls) {
25108   SETUP_WITH_FEATURES(CPUFeatures::kNEON, CPUFeatures::kFP);
25109 
25110   START();
25111   __ Movi(v0.V2D(), 0x3f80000040000000, 0x4100000000000000);
25112   __ Movi(v1.V2D(), 0x400000003f800000, 0x000000003f800000);
25113   __ Movi(v2.V2D(), 0x3f800000ffffffff, 0x7f800000ff800000);
25114   __ Mov(v16.V16B(), v0.V16B());
25115   __ Mov(v17.V16B(), v0.V16B());
25116   __ Mov(v18.V16B(), v0.V16B());
25117   __ Mov(v19.V16B(), v0.V16B());
25118   __ Mov(v20.V16B(), v0.V16B());
25119   __ Mov(v21.V16B(), v0.V16B());
25120 
25121   __ Fmla(v16.V2S(), v1.V2S(), v2.V2S());
25122   __ Fmla(v17.V4S(), v1.V4S(), v2.V4S());
25123   __ Fmla(v18.V2D(), v1.V2D(), v2.V2D());
25124   __ Fmls(v19.V2S(), v1.V2S(), v2.V2S());
25125   __ Fmls(v20.V4S(), v1.V4S(), v2.V4S());
25126   __ Fmls(v21.V2D(), v1.V2D(), v2.V2D());
25127   END();
25128 
25129   RUN();
25130 
25131   ASSERT_EQUAL_128(0x0000000000000000, 0x7fc00000ff800000, q16);
25132   ASSERT_EQUAL_128(0x40400000ffffffff, 0x7fc00000ff800000, q17);
25133   ASSERT_EQUAL_128(0x3f9800015f8003f7, 0x41000000000000fe, q18);
25134   ASSERT_EQUAL_128(0x0000000000000000, 0x7fc000007f800000, q19);
25135   ASSERT_EQUAL_128(0xbf800000ffffffff, 0x7fc000007f800000, q20);
25136   ASSERT_EQUAL_128(0xbf8000023f0007ee, 0x40fffffffffffe04, q21);
25137 
25138   TEARDOWN();
25139 }
25140 
25141 
TEST(neon_fmla_h)25142 TEST(neon_fmla_h) {
25143   SETUP_WITH_FEATURES(CPUFeatures::kNEON,
25144                       CPUFeatures::kFP,
25145                       CPUFeatures::kNEONHalf);
25146 
25147   START();
25148   __ Movi(v0.V2D(), 0x4000400040004000, 0x4000400040004000);
25149   __ Movi(v1.V2D(), 0x51a051a051a051a0, 0x51a051a051a051a0);
25150   __ Movi(v2.V2D(), 0x7c007c007c007c00, 0x7c007c007c007c00);
25151   __ Movi(v3.V2D(), 0xfc00fc00fc00fc00, 0xfc00fc00fc00fc00);
25152   __ Movi(v4.V2D(), 0x7e007e007e007e00, 0x7e007e007e007e00);
25153   __ Movi(v5.V2D(), 0x7c017c017c017c01, 0x7c017c017c017c01);
25154   __ Movi(v6.V2D(), 0x0000000000000000, 0x0000000000000000);
25155   __ Mov(v16.V2D(), v0.V2D());
25156   __ Mov(v17.V2D(), v0.V2D());
25157   __ Mov(v18.V2D(), v4.V2D());
25158   __ Mov(v19.V2D(), v5.V2D());
25159   __ Mov(v20.V2D(), v0.V2D());
25160   __ Mov(v21.V2D(), v0.V2D());
25161   __ Mov(v22.V2D(), v4.V2D());
25162   __ Mov(v23.V2D(), v5.V2D());
25163 
25164   __ Fmla(v16.V8H(), v0.V8H(), v1.V8H());
25165   __ Fmla(v17.V8H(), v2.V8H(), v3.V8H());
25166   __ Fmla(v18.V8H(), v2.V8H(), v6.V8H());
25167   __ Fmla(v19.V8H(), v3.V8H(), v6.V8H());
25168   __ Fmla(v20.V4H(), v0.V4H(), v1.V4H());
25169   __ Fmla(v21.V4H(), v2.V4H(), v3.V4H());
25170   __ Fmla(v22.V4H(), v2.V4H(), v6.V4H());
25171   __ Fmla(v23.V4H(), v3.V4H(), v6.V4H());
25172   END();
25173 
25174 #ifdef VIXL_INCLUDE_SIMULATOR_AARCH64
25175   RUN();
25176 
25177   ASSERT_EQUAL_128(0x55c055c055c055c0, 0x55c055c055c055c0, v16);
25178   ASSERT_EQUAL_128(0xfc00fc00fc00fc00, 0xfc00fc00fc00fc00, v17);
25179   ASSERT_EQUAL_128(0x7e007e007e007e00, 0x7e007e007e007e00, v18);
25180   ASSERT_EQUAL_128(0x7e017e017e017e01, 0x7e017e017e017e01, v19);
25181   ASSERT_EQUAL_128(0, 0x55c055c055c055c0, v20);
25182   ASSERT_EQUAL_128(0, 0xfc00fc00fc00fc00, v21);
25183   ASSERT_EQUAL_128(0, 0x7e007e007e007e00, v22);
25184   ASSERT_EQUAL_128(0, 0x7e017e017e017e01, v23);
25185 #endif  // VIXL_INCLUDE_SIMULATOR_AARCH64
25186 
25187   TEARDOWN();
25188 }
25189 
25190 
TEST(neon_fmls_h)25191 TEST(neon_fmls_h) {
25192   SETUP_WITH_FEATURES(CPUFeatures::kNEON,
25193                       CPUFeatures::kFP,
25194                       CPUFeatures::kNEONHalf);
25195 
25196   START();
25197   __ Movi(v0.V2D(), 0x4000400040004000, 0x4000400040004000);
25198   __ Movi(v1.V2D(), 0x51a051a051a051a0, 0x51a051a051a051a0);
25199   __ Movi(v2.V2D(), 0x7c007c007c007c00, 0x7c007c007c007c00);
25200   __ Movi(v3.V2D(), 0xfc00fc00fc00fc00, 0xfc00fc00fc00fc00);
25201   __ Movi(v4.V2D(), 0x7e007e007e007e00, 0x7e007e007e007e00);
25202   __ Movi(v5.V2D(), 0x7c017c017c017c01, 0x7c017c017c017c01);
25203   __ Movi(v6.V2D(), 0x0000000000000000, 0x0000000000000000);
25204   __ Mov(v16.V2D(), v0.V2D());
25205   __ Mov(v17.V2D(), v0.V2D());
25206   __ Mov(v18.V2D(), v4.V2D());
25207   __ Mov(v19.V2D(), v5.V2D());
25208   __ Mov(v20.V2D(), v0.V2D());
25209   __ Mov(v21.V2D(), v0.V2D());
25210   __ Mov(v22.V2D(), v4.V2D());
25211   __ Mov(v23.V2D(), v5.V2D());
25212 
25213   __ Fmls(v16.V8H(), v0.V8H(), v1.V8H());
25214   __ Fmls(v17.V8H(), v2.V8H(), v3.V8H());
25215   __ Fmls(v18.V8H(), v2.V8H(), v6.V8H());
25216   __ Fmls(v19.V8H(), v3.V8H(), v6.V8H());
25217   __ Fmls(v20.V4H(), v0.V4H(), v1.V4H());
25218   __ Fmls(v21.V4H(), v2.V4H(), v3.V4H());
25219   __ Fmls(v22.V4H(), v2.V4H(), v6.V4H());
25220   __ Fmls(v23.V4H(), v3.V4H(), v6.V4H());
25221   END();
25222 
25223 #ifdef VIXL_INCLUDE_SIMULATOR_AARCH64
25224   RUN();
25225 
25226   ASSERT_EQUAL_128(0xd580d580d580d580, 0xd580d580d580d580, v16);
25227   ASSERT_EQUAL_128(0x7c007c007c007c00, 0x7c007c007c007c00, v17);
25228   ASSERT_EQUAL_128(0x7e007e007e007e00, 0x7e007e007e007e00, v18);
25229   ASSERT_EQUAL_128(0x7e017e017e017e01, 0x7e017e017e017e01, v19);
25230   ASSERT_EQUAL_128(0, 0xd580d580d580d580, v20);
25231   ASSERT_EQUAL_128(0, 0x7c007c007c007c00, v21);
25232   ASSERT_EQUAL_128(0, 0x7e007e007e007e00, v22);
25233   ASSERT_EQUAL_128(0, 0x7e017e017e017e01, v23);
25234 #endif  // VIXL_INCLUDE_SIMULATOR_AARCH64
25235 
25236   TEARDOWN();
25237 }
25238 
25239 
TEST(neon_fmulx_scalar)25240 TEST(neon_fmulx_scalar) {
25241   SETUP_WITH_FEATURES(CPUFeatures::kNEON, CPUFeatures::kFP);
25242 
25243   START();
25244   __ Fmov(s0, 2.0);
25245   __ Fmov(s1, 0.5);
25246   __ Fmov(s2, 0.0);
25247   __ Fmov(s3, -0.0);
25248   __ Fmov(s4, kFP32PositiveInfinity);
25249   __ Fmov(s5, kFP32NegativeInfinity);
25250   __ Fmulx(s16, s0, s1);
25251   __ Fmulx(s17, s2, s4);
25252   __ Fmulx(s18, s2, s5);
25253   __ Fmulx(s19, s3, s4);
25254   __ Fmulx(s20, s3, s5);
25255 
25256   __ Fmov(d21, 2.0);
25257   __ Fmov(d22, 0.5);
25258   __ Fmov(d23, 0.0);
25259   __ Fmov(d24, -0.0);
25260   __ Fmov(d25, kFP64PositiveInfinity);
25261   __ Fmov(d26, kFP64NegativeInfinity);
25262   __ Fmulx(d27, d21, d22);
25263   __ Fmulx(d28, d23, d25);
25264   __ Fmulx(d29, d23, d26);
25265   __ Fmulx(d30, d24, d25);
25266   __ Fmulx(d31, d24, d26);
25267   END();
25268 
25269   RUN();
25270 
25271   ASSERT_EQUAL_FP32(1.0, s16);
25272   ASSERT_EQUAL_FP32(2.0, s17);
25273   ASSERT_EQUAL_FP32(-2.0, s18);
25274   ASSERT_EQUAL_FP32(-2.0, s19);
25275   ASSERT_EQUAL_FP32(2.0, s20);
25276   ASSERT_EQUAL_FP64(1.0, d27);
25277   ASSERT_EQUAL_FP64(2.0, d28);
25278   ASSERT_EQUAL_FP64(-2.0, d29);
25279   ASSERT_EQUAL_FP64(-2.0, d30);
25280   ASSERT_EQUAL_FP64(2.0, d31);
25281 
25282   TEARDOWN();
25283 }
25284 
25285 
TEST(neon_fmulx_h)25286 TEST(neon_fmulx_h) {
25287   SETUP_WITH_FEATURES(CPUFeatures::kNEON,
25288                       CPUFeatures::kFP,
25289                       CPUFeatures::kNEONHalf);
25290 
25291   START();
25292   __ Movi(v0.V2D(), 0x4000400040004000, 0x4000400040004000);
25293   __ Movi(v1.V2D(), 0x3800380038003800, 0x3800380038003800);
25294   __ Movi(v2.V2D(), 0x0000000000000000, 0x0000000000000000);
25295   __ Movi(v3.V2D(), 0x8000800080008000, 0x8000800080008000);
25296   __ Movi(v4.V2D(), 0x7c007c007c007c00, 0x7c007c007c007c00);
25297   __ Movi(v5.V2D(), 0xfc00fc00fc00fc00, 0xfc00fc00fc00fc00);
25298   __ Fmulx(v6.V8H(), v0.V8H(), v1.V8H());
25299   __ Fmulx(v7.V8H(), v2.V8H(), v4.V8H());
25300   __ Fmulx(v8.V8H(), v2.V8H(), v5.V8H());
25301   __ Fmulx(v9.V8H(), v3.V8H(), v4.V8H());
25302   __ Fmulx(v10.V8H(), v3.V8H(), v5.V8H());
25303   __ Fmulx(v11.V4H(), v0.V4H(), v1.V4H());
25304   __ Fmulx(v12.V4H(), v2.V4H(), v4.V4H());
25305   __ Fmulx(v13.V4H(), v2.V4H(), v5.V4H());
25306   __ Fmulx(v14.V4H(), v3.V4H(), v4.V4H());
25307   __ Fmulx(v15.V4H(), v3.V4H(), v5.V4H());
25308   END();
25309 
25310 #ifdef VIXL_INCLUDE_SIMULATOR_AARCH64
25311   RUN();
25312   ASSERT_EQUAL_128(0x3c003c003c003c00, 0x3c003c003c003c00, v6);
25313   ASSERT_EQUAL_128(0x4000400040004000, 0x4000400040004000, v7);
25314   ASSERT_EQUAL_128(0xc000c000c000c000, 0xc000c000c000c000, v8);
25315   ASSERT_EQUAL_128(0xc000c000c000c000, 0xc000c000c000c000, v9);
25316   ASSERT_EQUAL_128(0x4000400040004000, 0x4000400040004000, v10);
25317   ASSERT_EQUAL_128(0, 0x3c003c003c003c00, v11);
25318   ASSERT_EQUAL_128(0, 0x4000400040004000, v12);
25319   ASSERT_EQUAL_128(0, 0xc000c000c000c000, v13);
25320   ASSERT_EQUAL_128(0, 0xc000c000c000c000, v14);
25321   ASSERT_EQUAL_128(0, 0x4000400040004000, v15);
25322 #endif  // VIXL_INCLUDE_SIMULATOR_AARCH64
25323 
25324   TEARDOWN();
25325 }
25326 
25327 
TEST(neon_fmulx_h_scalar)25328 TEST(neon_fmulx_h_scalar) {
25329   SETUP_WITH_FEATURES(CPUFeatures::kNEON,
25330                       CPUFeatures::kFP,
25331                       CPUFeatures::kNEONHalf,
25332                       CPUFeatures::kFPHalf);
25333 
25334   START();
25335   __ Fmov(h0, Float16(2.0));
25336   __ Fmov(h1, Float16(0.5));
25337   __ Fmov(h2, Float16(0.0));
25338   __ Fmov(h3, Float16(-0.0));
25339   __ Fmov(h4, kFP16PositiveInfinity);
25340   __ Fmov(h5, kFP16NegativeInfinity);
25341   __ Fmulx(h6, h0, h1);
25342   __ Fmulx(h7, h2, h4);
25343   __ Fmulx(h8, h2, h5);
25344   __ Fmulx(h9, h3, h4);
25345   __ Fmulx(h10, h3, h5);
25346   END();
25347 
25348 #ifdef VIXL_INCLUDE_SIMULATOR_AARCH64
25349   RUN();
25350   ASSERT_EQUAL_FP16(Float16(1.0), h6);
25351   ASSERT_EQUAL_FP16(Float16(2.0), h7);
25352   ASSERT_EQUAL_FP16(Float16(-2.0), h8);
25353   ASSERT_EQUAL_FP16(Float16(-2.0), h9);
25354   ASSERT_EQUAL_FP16(Float16(2.0), h10);
25355 #endif  // VIXL_INCLUDE_SIMULATOR_AARCH64
25356 
25357   TEARDOWN();
25358 }
25359 
25360 
25361 // We currently disable tests for CRC32 instructions when running natively.
25362 // Support for this family of instruction is optional, and so native platforms
25363 // may simply fail to execute the test.
25364 // TODO: Run the test on native platforms where the CRC32 instructions are
25365 // available.
25366 #ifdef VIXL_INCLUDE_SIMULATOR_AARCH64
TEST(crc32b)25367 TEST(crc32b) {
25368   SETUP_WITH_FEATURES(CPUFeatures::kCRC32);
25369 
25370   START();
25371 
25372   __ Mov(w0, 0);
25373   __ Mov(w1, 0);
25374   __ Crc32b(w10, w0, w1);
25375 
25376   __ Mov(w0, 0x1);
25377   __ Mov(w1, 0x138);
25378   __ Crc32b(w11, w0, w1);
25379 
25380   __ Mov(w0, 0x1);
25381   __ Mov(w1, 0x38);
25382   __ Crc32b(w12, w0, w1);
25383 
25384   __ Mov(w0, 0);
25385   __ Mov(w1, 128);
25386   __ Crc32b(w13, w0, w1);
25387 
25388   __ Mov(w0, UINT32_MAX);
25389   __ Mov(w1, 255);
25390   __ Crc32b(w14, w0, w1);
25391 
25392   __ Mov(w0, 0x00010001);
25393   __ Mov(w1, 0x10001000);
25394   __ Crc32b(w15, w0, w1);
25395 
25396   END();
25397   RUN();
25398 
25399   ASSERT_EQUAL_64(0x0, x10);
25400   ASSERT_EQUAL_64(0x5f058808, x11);
25401   ASSERT_EQUAL_64(0x5f058808, x12);
25402   ASSERT_EQUAL_64(0xedb88320, x13);
25403   ASSERT_EQUAL_64(0x00ffffff, x14);
25404   ASSERT_EQUAL_64(0x77073196, x15);
25405 
25406   TEARDOWN();
25407 }
25408 
25409 
TEST(crc32h)25410 TEST(crc32h) {
25411   SETUP_WITH_FEATURES(CPUFeatures::kCRC32);
25412 
25413   START();
25414 
25415   __ Mov(w0, 0);
25416   __ Mov(w1, 0);
25417   __ Crc32h(w10, w0, w1);
25418 
25419   __ Mov(w0, 0x1);
25420   __ Mov(w1, 0x10038);
25421   __ Crc32h(w11, w0, w1);
25422 
25423   __ Mov(w0, 0x1);
25424   __ Mov(w1, 0x38);
25425   __ Crc32h(w12, w0, w1);
25426 
25427   __ Mov(w0, 0);
25428   __ Mov(w1, 128);
25429   __ Crc32h(w13, w0, w1);
25430 
25431   __ Mov(w0, UINT32_MAX);
25432   __ Mov(w1, 255);
25433   __ Crc32h(w14, w0, w1);
25434 
25435   __ Mov(w0, 0x00010001);
25436   __ Mov(w1, 0x10001000);
25437   __ Crc32h(w15, w0, w1);
25438 
25439   END();
25440   RUN();
25441 
25442   ASSERT_EQUAL_64(0x0, x10);
25443   ASSERT_EQUAL_64(0x0e848dba, x11);
25444   ASSERT_EQUAL_64(0x0e848dba, x12);
25445   ASSERT_EQUAL_64(0x3b83984b, x13);
25446   ASSERT_EQUAL_64(0x2d021072, x14);
25447   ASSERT_EQUAL_64(0x04ac2124, x15);
25448 
25449   TEARDOWN();
25450 }
25451 
25452 
TEST(crc32w)25453 TEST(crc32w) {
25454   SETUP_WITH_FEATURES(CPUFeatures::kCRC32);
25455 
25456   START();
25457 
25458   __ Mov(w0, 0);
25459   __ Mov(w1, 0);
25460   __ Crc32w(w10, w0, w1);
25461 
25462   __ Mov(w0, 0x1);
25463   __ Mov(w1, 0x80000031);
25464   __ Crc32w(w11, w0, w1);
25465 
25466   __ Mov(w0, 0);
25467   __ Mov(w1, 128);
25468   __ Crc32w(w13, w0, w1);
25469 
25470   __ Mov(w0, UINT32_MAX);
25471   __ Mov(w1, 255);
25472   __ Crc32w(w14, w0, w1);
25473 
25474   __ Mov(w0, 0x00010001);
25475   __ Mov(w1, 0x10001000);
25476   __ Crc32w(w15, w0, w1);
25477 
25478   END();
25479   RUN();
25480 
25481   ASSERT_EQUAL_64(0x0, x10);
25482   ASSERT_EQUAL_64(0x1d937b81, x11);
25483   ASSERT_EQUAL_64(0xed59b63b, x13);
25484   ASSERT_EQUAL_64(0x00be2612, x14);
25485   ASSERT_EQUAL_64(0xa036e530, x15);
25486 
25487   TEARDOWN();
25488 }
25489 
25490 
TEST(crc32x)25491 TEST(crc32x) {
25492   SETUP_WITH_FEATURES(CPUFeatures::kCRC32);
25493 
25494   START();
25495 
25496   __ Mov(w0, 0);
25497   __ Mov(x1, 0);
25498   __ Crc32x(w10, w0, x1);
25499 
25500   __ Mov(w0, 0x1);
25501   __ Mov(x1, UINT64_C(0x0000000800000031));
25502   __ Crc32x(w11, w0, x1);
25503 
25504   __ Mov(w0, 0);
25505   __ Mov(x1, 128);
25506   __ Crc32x(w13, w0, x1);
25507 
25508   __ Mov(w0, UINT32_MAX);
25509   __ Mov(x1, 255);
25510   __ Crc32x(w14, w0, x1);
25511 
25512   __ Mov(w0, 0x00010001);
25513   __ Mov(x1, UINT64_C(0x1000100000000000));
25514   __ Crc32x(w15, w0, x1);
25515 
25516   END();
25517   RUN();
25518 
25519   ASSERT_EQUAL_64(0x0, x10);
25520   ASSERT_EQUAL_64(0x40797b92, x11);
25521   ASSERT_EQUAL_64(0x533b85da, x13);
25522   ASSERT_EQUAL_64(0xbc962670, x14);
25523   ASSERT_EQUAL_64(0x0667602f, x15);
25524 
25525   TEARDOWN();
25526 }
25527 
25528 
TEST(crc32cb)25529 TEST(crc32cb) {
25530   SETUP_WITH_FEATURES(CPUFeatures::kCRC32);
25531 
25532   START();
25533 
25534   __ Mov(w0, 0);
25535   __ Mov(w1, 0);
25536   __ Crc32cb(w10, w0, w1);
25537 
25538   __ Mov(w0, 0x1);
25539   __ Mov(w1, 0x138);
25540   __ Crc32cb(w11, w0, w1);
25541 
25542   __ Mov(w0, 0x1);
25543   __ Mov(w1, 0x38);
25544   __ Crc32cb(w12, w0, w1);
25545 
25546   __ Mov(w0, 0);
25547   __ Mov(w1, 128);
25548   __ Crc32cb(w13, w0, w1);
25549 
25550   __ Mov(w0, UINT32_MAX);
25551   __ Mov(w1, 255);
25552   __ Crc32cb(w14, w0, w1);
25553 
25554   __ Mov(w0, 0x00010001);
25555   __ Mov(w1, 0x10001000);
25556   __ Crc32cb(w15, w0, w1);
25557 
25558   END();
25559   RUN();
25560 
25561   ASSERT_EQUAL_64(0x0, x10);
25562   ASSERT_EQUAL_64(0x4851927d, x11);
25563   ASSERT_EQUAL_64(0x4851927d, x12);
25564   ASSERT_EQUAL_64(0x82f63b78, x13);
25565   ASSERT_EQUAL_64(0x00ffffff, x14);
25566   ASSERT_EQUAL_64(0xf26b8203, x15);
25567 
25568   TEARDOWN();
25569 }
25570 
25571 
TEST(crc32ch)25572 TEST(crc32ch) {
25573   SETUP_WITH_FEATURES(CPUFeatures::kCRC32);
25574 
25575   START();
25576 
25577   __ Mov(w0, 0);
25578   __ Mov(w1, 0);
25579   __ Crc32ch(w10, w0, w1);
25580 
25581   __ Mov(w0, 0x1);
25582   __ Mov(w1, 0x10038);
25583   __ Crc32ch(w11, w0, w1);
25584 
25585   __ Mov(w0, 0x1);
25586   __ Mov(w1, 0x38);
25587   __ Crc32ch(w12, w0, w1);
25588 
25589   __ Mov(w0, 0);
25590   __ Mov(w1, 128);
25591   __ Crc32ch(w13, w0, w1);
25592 
25593   __ Mov(w0, UINT32_MAX);
25594   __ Mov(w1, 255);
25595   __ Crc32ch(w14, w0, w1);
25596 
25597   __ Mov(w0, 0x00010001);
25598   __ Mov(w1, 0x10001000);
25599   __ Crc32ch(w15, w0, w1);
25600 
25601   END();
25602   RUN();
25603 
25604   ASSERT_EQUAL_64(0x0, x10);
25605   ASSERT_EQUAL_64(0xcef8494c, x11);
25606   ASSERT_EQUAL_64(0xcef8494c, x12);
25607   ASSERT_EQUAL_64(0xfbc3faf9, x13);
25608   ASSERT_EQUAL_64(0xad7dacae, x14);
25609   ASSERT_EQUAL_64(0x03fc5f19, x15);
25610 
25611   TEARDOWN();
25612 }
25613 
25614 
TEST(crc32cw)25615 TEST(crc32cw) {
25616   SETUP_WITH_FEATURES(CPUFeatures::kCRC32);
25617 
25618   START();
25619 
25620   __ Mov(w0, 0);
25621   __ Mov(w1, 0);
25622   __ Crc32cw(w10, w0, w1);
25623 
25624   __ Mov(w0, 0x1);
25625   __ Mov(w1, 0x80000031);
25626   __ Crc32cw(w11, w0, w1);
25627 
25628   __ Mov(w0, 0);
25629   __ Mov(w1, 128);
25630   __ Crc32cw(w13, w0, w1);
25631 
25632   __ Mov(w0, UINT32_MAX);
25633   __ Mov(w1, 255);
25634   __ Crc32cw(w14, w0, w1);
25635 
25636   __ Mov(w0, 0x00010001);
25637   __ Mov(w1, 0x10001000);
25638   __ Crc32cw(w15, w0, w1);
25639 
25640   END();
25641   RUN();
25642 
25643   ASSERT_EQUAL_64(0x0, x10);
25644   ASSERT_EQUAL_64(0xbcb79ece, x11);
25645   ASSERT_EQUAL_64(0x52a0c93f, x13);
25646   ASSERT_EQUAL_64(0x9f9b5c7a, x14);
25647   ASSERT_EQUAL_64(0xae1b882a, x15);
25648 
25649   TEARDOWN();
25650 }
25651 
25652 
TEST(crc32cx)25653 TEST(crc32cx) {
25654   SETUP_WITH_FEATURES(CPUFeatures::kCRC32);
25655 
25656   START();
25657 
25658   __ Mov(w0, 0);
25659   __ Mov(x1, 0);
25660   __ Crc32cx(w10, w0, x1);
25661 
25662   __ Mov(w0, 0x1);
25663   __ Mov(x1, UINT64_C(0x0000000800000031));
25664   __ Crc32cx(w11, w0, x1);
25665 
25666   __ Mov(w0, 0);
25667   __ Mov(x1, 128);
25668   __ Crc32cx(w13, w0, x1);
25669 
25670   __ Mov(w0, UINT32_MAX);
25671   __ Mov(x1, 255);
25672   __ Crc32cx(w14, w0, x1);
25673 
25674   __ Mov(w0, 0x00010001);
25675   __ Mov(x1, UINT64_C(0x1000100000000000));
25676   __ Crc32cx(w15, w0, x1);
25677 
25678   END();
25679   RUN();
25680 
25681   ASSERT_EQUAL_64(0x0, x10);
25682   ASSERT_EQUAL_64(0x7f320fcb, x11);
25683   ASSERT_EQUAL_64(0x34019664, x13);
25684   ASSERT_EQUAL_64(0x6cc27dd0, x14);
25685   ASSERT_EQUAL_64(0xc6f0acdb, x15);
25686 
25687   TEARDOWN();
25688 }
25689 #endif  // VIXL_INCLUDE_SIMULATOR_AARCH64
25690 
25691 
TEST(neon_fabd_h)25692 TEST(neon_fabd_h) {
25693   SETUP_WITH_FEATURES(CPUFeatures::kNEON,
25694                       CPUFeatures::kFP,
25695                       CPUFeatures::kNEONHalf);
25696 
25697   START();
25698   __ Movi(v0.V2D(), 0x4000400040004000, 0x4000400040004000);
25699   __ Movi(v1.V2D(), 0x3800380038003800, 0x3800380038003800);
25700   __ Movi(v2.V2D(), 0x0000000000000000, 0x0000000000000000);
25701   __ Movi(v3.V2D(), 0x8000800080008000, 0x8000800080008000);
25702   __ Movi(v4.V2D(), 0x7c007c007c007c00, 0x7c007c007c007c00);
25703   __ Movi(v5.V2D(), 0xfc00fc00fc00fc00, 0xfc00fc00fc00fc00);
25704 
25705   __ Fabd(v6.V8H(), v1.V8H(), v0.V8H());
25706   __ Fabd(v7.V8H(), v2.V8H(), v3.V8H());
25707   __ Fabd(v8.V8H(), v2.V8H(), v5.V8H());
25708   __ Fabd(v9.V8H(), v3.V8H(), v4.V8H());
25709   __ Fabd(v10.V8H(), v3.V8H(), v5.V8H());
25710   __ Fabd(v11.V4H(), v1.V4H(), v0.V4H());
25711   __ Fabd(v12.V4H(), v2.V4H(), v3.V4H());
25712   __ Fabd(v13.V4H(), v2.V4H(), v5.V4H());
25713   __ Fabd(v14.V4H(), v3.V4H(), v4.V4H());
25714   __ Fabd(v15.V4H(), v3.V4H(), v5.V4H());
25715   END();
25716 
25717 #ifdef VIXL_INCLUDE_SIMULATOR_AARCH64
25718   RUN();
25719 
25720   ASSERT_EQUAL_128(0x3e003e003e003e00, 0x3e003e003e003e00, v6);
25721   ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, v7);
25722   ASSERT_EQUAL_128(0x7c007c007c007c00, 0x7c007c007c007c00, v8);
25723   ASSERT_EQUAL_128(0x7c007c007c007c00, 0x7c007c007c007c00, v9);
25724   ASSERT_EQUAL_128(0x7c007c007c007c00, 0x7c007c007c007c00, v10);
25725   ASSERT_EQUAL_128(0, 0x3e003e003e003e00, v11);
25726   ASSERT_EQUAL_128(0, 0x0000000000000000, v12);
25727   ASSERT_EQUAL_128(0, 0x7c007c007c007c00, v13);
25728   ASSERT_EQUAL_128(0, 0x7c007c007c007c00, v14);
25729   ASSERT_EQUAL_128(0, 0x7c007c007c007c00, v15);
25730 #endif  // VIXL_INCLUDE_SIMULATOR_AARCH64
25731 
25732   TEARDOWN();
25733 }
25734 
25735 
TEST(neon_fabd_h_scalar)25736 TEST(neon_fabd_h_scalar) {
25737   SETUP_WITH_FEATURES(CPUFeatures::kNEON,
25738                       CPUFeatures::kFP,
25739                       CPUFeatures::kNEONHalf,
25740                       CPUFeatures::kFPHalf);
25741 
25742   START();
25743   __ Fmov(h0, Float16(2.0));
25744   __ Fmov(h1, Float16(0.5));
25745   __ Fmov(h2, Float16(0.0));
25746   __ Fmov(h3, Float16(-0.0));
25747   __ Fmov(h4, kFP16PositiveInfinity);
25748   __ Fmov(h5, kFP16NegativeInfinity);
25749   __ Fabd(h16, h1, h0);
25750   __ Fabd(h17, h2, h3);
25751   __ Fabd(h18, h2, h5);
25752   __ Fabd(h19, h3, h4);
25753   __ Fabd(h20, h3, h5);
25754   END();
25755 
25756 #ifdef VIXL_INCLUDE_SIMULATOR_AARCH64
25757   RUN();
25758   ASSERT_EQUAL_FP16(Float16(1.5), h16);
25759   ASSERT_EQUAL_FP16(Float16(0.0), h17);
25760   ASSERT_EQUAL_FP16(kFP16PositiveInfinity, h18);
25761   ASSERT_EQUAL_FP16(kFP16PositiveInfinity, h19);
25762   ASSERT_EQUAL_FP16(kFP16PositiveInfinity, h20);
25763 #endif
25764 
25765   TEARDOWN();
25766 }
25767 
25768 
TEST(neon_fabd_scalar)25769 TEST(neon_fabd_scalar) {
25770   SETUP_WITH_FEATURES(CPUFeatures::kNEON, CPUFeatures::kFP);
25771 
25772   START();
25773   __ Fmov(s0, 2.0);
25774   __ Fmov(s1, 0.5);
25775   __ Fmov(s2, 0.0);
25776   __ Fmov(s3, -0.0);
25777   __ Fmov(s4, kFP32PositiveInfinity);
25778   __ Fmov(s5, kFP32NegativeInfinity);
25779   __ Fabd(s16, s1, s0);
25780   __ Fabd(s17, s2, s3);
25781   __ Fabd(s18, s2, s5);
25782   __ Fabd(s19, s3, s4);
25783   __ Fabd(s20, s3, s5);
25784 
25785   __ Fmov(d21, 2.0);
25786   __ Fmov(d22, 0.5);
25787   __ Fmov(d23, 0.0);
25788   __ Fmov(d24, -0.0);
25789   __ Fmov(d25, kFP64PositiveInfinity);
25790   __ Fmov(d26, kFP64NegativeInfinity);
25791   __ Fabd(d27, d21, d22);
25792   __ Fabd(d28, d23, d24);
25793   __ Fabd(d29, d23, d26);
25794   __ Fabd(d30, d24, d25);
25795   __ Fabd(d31, d24, d26);
25796   END();
25797 
25798   RUN();
25799 
25800   ASSERT_EQUAL_FP32(1.5, s16);
25801   ASSERT_EQUAL_FP32(0.0, s17);
25802   ASSERT_EQUAL_FP32(kFP32PositiveInfinity, s18);
25803   ASSERT_EQUAL_FP32(kFP32PositiveInfinity, s19);
25804   ASSERT_EQUAL_FP32(kFP32PositiveInfinity, s20);
25805   ASSERT_EQUAL_FP64(1.5, d27);
25806   ASSERT_EQUAL_FP64(0.0, d28);
25807   ASSERT_EQUAL_FP64(kFP64PositiveInfinity, d29);
25808   ASSERT_EQUAL_FP64(kFP64PositiveInfinity, d30);
25809   ASSERT_EQUAL_FP64(kFP64PositiveInfinity, d31);
25810 
25811   TEARDOWN();
25812 }
25813 
25814 
TEST(neon_frecps_h)25815 TEST(neon_frecps_h) {
25816   SETUP_WITH_FEATURES(CPUFeatures::kNEON,
25817                       CPUFeatures::kFP,
25818                       CPUFeatures::kNEONHalf);
25819 
25820   START();
25821   __ Movi(v0.V2D(), 0x4000400040004000, 0x4000400040004000);
25822   __ Movi(v1.V2D(), 0xbc00bc00bc00bc00, 0xbc00bc00bc00bc00);
25823   __ Movi(v2.V2D(), 0x51a051a051a051a0, 0x51a051a051a051a0);
25824   __ Movi(v3.V2D(), 0x7c007c007c007c00, 0x7c007c007c007c00);
25825   __ Movi(v4.V2D(), 0xfc00fc00fc00fc00, 0xfc00fc00fc00fc00);
25826 
25827   __ Frecps(v5.V8H(), v0.V8H(), v2.V8H());
25828   __ Frecps(v6.V8H(), v1.V8H(), v2.V8H());
25829   __ Frecps(v7.V8H(), v0.V8H(), v3.V8H());
25830   __ Frecps(v8.V8H(), v0.V8H(), v4.V8H());
25831   __ Frecps(v9.V4H(), v0.V4H(), v2.V4H());
25832   __ Frecps(v10.V4H(), v1.V4H(), v2.V4H());
25833   __ Frecps(v11.V4H(), v0.V4H(), v3.V4H());
25834   __ Frecps(v12.V4H(), v0.V4H(), v4.V4H());
25835   END();
25836 
25837 #ifdef VIXL_INCLUDE_SIMULATOR_AARCH64
25838   RUN();
25839 
25840   ASSERT_EQUAL_128(0xd580d580d580d580, 0xd580d580d580d580, v5);
25841   ASSERT_EQUAL_128(0x51e051e051e051e0, 0x51e051e051e051e0, v6);
25842   ASSERT_EQUAL_128(0xfc00fc00fc00fc00, 0xfc00fc00fc00fc00, v7);
25843   ASSERT_EQUAL_128(0x7c007c007c007c00, 0x7c007c007c007c00, v8);
25844   ASSERT_EQUAL_128(0, 0xd580d580d580d580, v9);
25845   ASSERT_EQUAL_128(0, 0x51e051e051e051e0, v10);
25846   ASSERT_EQUAL_128(0, 0xfc00fc00fc00fc00, v11);
25847   ASSERT_EQUAL_128(0, 0x7c007c007c007c00, v12);
25848 
25849 #endif  // VIXL_INCLUDE_SIMULATOR_AARCH64
25850 
25851   TEARDOWN();
25852 }
25853 
25854 
TEST(neon_frecps_h_scalar)25855 TEST(neon_frecps_h_scalar) {
25856   SETUP_WITH_FEATURES(CPUFeatures::kNEON,
25857                       CPUFeatures::kFP,
25858                       CPUFeatures::kNEONHalf,
25859                       CPUFeatures::kFPHalf);
25860 
25861   START();
25862   __ Fmov(h0, Float16(2.0));
25863   __ Fmov(h1, Float16(-1.0));
25864   __ Fmov(h2, Float16(45.0));
25865   __ Fmov(h3, kFP16PositiveInfinity);
25866   __ Fmov(h4, kFP16NegativeInfinity);
25867 
25868   __ Frecps(h5, h0, h2);
25869   __ Frecps(h6, h1, h2);
25870   __ Frecps(h7, h0, h3);
25871   __ Frecps(h8, h0, h4);
25872   END();
25873 
25874 #ifdef VIXL_INCLUDE_SIMULATOR_AARCH64
25875   RUN();
25876 
25877   ASSERT_EQUAL_FP16(Float16(-88.0), h5);
25878   ASSERT_EQUAL_FP16(Float16(47.0), h6);
25879   ASSERT_EQUAL_FP16(kFP16NegativeInfinity, h7);
25880   ASSERT_EQUAL_FP16(kFP16PositiveInfinity, h8);
25881 
25882 #endif  // VIXL_INCLUDE_SIMULATOR_AARCH64
25883 
25884   TEARDOWN();
25885 }
25886 
25887 
TEST(neon_frsqrts_h)25888 TEST(neon_frsqrts_h) {
25889   SETUP_WITH_FEATURES(CPUFeatures::kNEON,
25890                       CPUFeatures::kFP,
25891                       CPUFeatures::kNEONHalf);
25892 
25893   START();
25894   __ Movi(v0.V2D(), 0x4000400040004000, 0x4000400040004000);
25895   __ Movi(v1.V2D(), 0xbc00bc00bc00bc00, 0xbc00bc00bc00bc00);
25896   __ Movi(v2.V2D(), 0x51a051a051a051a0, 0x51a051a051a051a0);
25897   __ Movi(v3.V2D(), 0x7c007c007c007c00, 0x7c007c007c007c00);
25898   __ Movi(v4.V2D(), 0xfc00fc00fc00fc00, 0xfc00fc00fc00fc00);
25899 
25900   __ Frsqrts(v5.V8H(), v0.V8H(), v2.V8H());
25901   __ Frsqrts(v6.V8H(), v1.V8H(), v2.V8H());
25902   __ Frsqrts(v7.V8H(), v0.V8H(), v3.V8H());
25903   __ Frsqrts(v8.V8H(), v0.V8H(), v4.V8H());
25904   __ Frsqrts(v9.V4H(), v0.V4H(), v2.V4H());
25905   __ Frsqrts(v10.V4H(), v1.V4H(), v2.V4H());
25906   __ Frsqrts(v11.V4H(), v0.V4H(), v3.V4H());
25907   __ Frsqrts(v12.V4H(), v0.V4H(), v4.V4H());
25908   END();
25909 
25910 #ifdef VIXL_INCLUDE_SIMULATOR_AARCH64
25911   RUN();
25912 
25913   ASSERT_EQUAL_128(0xd170d170d170d170, 0xd170d170d170d170, v5);
25914   ASSERT_EQUAL_128(0x4e004e004e004e00, 0x4e004e004e004e00, v6);
25915   ASSERT_EQUAL_128(0xfc00fc00fc00fc00, 0xfc00fc00fc00fc00, v7);
25916   ASSERT_EQUAL_128(0x7c007c007c007c00, 0x7c007c007c007c00, v8);
25917   ASSERT_EQUAL_128(0, 0xd170d170d170d170, v9);
25918   ASSERT_EQUAL_128(0, 0x4e004e004e004e00, v10);
25919   ASSERT_EQUAL_128(0, 0xfc00fc00fc00fc00, v11);
25920   ASSERT_EQUAL_128(0, 0x7c007c007c007c00, v12);
25921 
25922 #endif  // VIXL_INCLUDE_SIMULATOR_AARCH64
25923 
25924   TEARDOWN();
25925 }
25926 
25927 
TEST(neon_frsqrts_h_scalar)25928 TEST(neon_frsqrts_h_scalar) {
25929   SETUP_WITH_FEATURES(CPUFeatures::kNEON,
25930                       CPUFeatures::kFP,
25931                       CPUFeatures::kNEONHalf,
25932                       CPUFeatures::kFPHalf);
25933 
25934   START();
25935   __ Fmov(h0, Float16(2.0));
25936   __ Fmov(h1, Float16(-1.0));
25937   __ Fmov(h2, Float16(45.0));
25938   __ Fmov(h3, kFP16PositiveInfinity);
25939   __ Fmov(h4, kFP16NegativeInfinity);
25940 
25941   __ Frsqrts(h5, h0, h2);
25942   __ Frsqrts(h6, h1, h2);
25943   __ Frsqrts(h7, h0, h3);
25944   __ Frsqrts(h8, h0, h4);
25945   END();
25946 
25947 #ifdef VIXL_INCLUDE_SIMULATOR_AARCH64
25948   RUN();
25949 
25950   ASSERT_EQUAL_FP16(Float16(-43.5), h5);
25951   ASSERT_EQUAL_FP16(Float16(24.0), h6);
25952   ASSERT_EQUAL_FP16(kFP16NegativeInfinity, h7);
25953   ASSERT_EQUAL_FP16(kFP16PositiveInfinity, h8);
25954 
25955 #endif  // VIXL_INCLUDE_SIMULATOR_AARCH64
25956 
25957   TEARDOWN();
25958 }
25959 
25960 
TEST(neon_faddp_h)25961 TEST(neon_faddp_h) {
25962   SETUP_WITH_FEATURES(CPUFeatures::kNEON,
25963                       CPUFeatures::kFP,
25964                       CPUFeatures::kNEONHalf);
25965 
25966   START();
25967   __ Movi(v0.V2D(), 0x3c0040003c004000, 0x3c0040003c004000);
25968   __ Movi(v1.V2D(), 0xfc007c00fc007c00, 0xfc007c00fc007c00);
25969   __ Movi(v2.V2D(), 0x0000800000008000, 0x0000800000008000);
25970   __ Movi(v3.V2D(), 0x7e007c017e007c01, 0x7e007c017e007c01);
25971 
25972   __ Faddp(v4.V8H(), v1.V8H(), v0.V8H());
25973   __ Faddp(v5.V8H(), v3.V8H(), v2.V8H());
25974   __ Faddp(v6.V4H(), v1.V4H(), v0.V4H());
25975   __ Faddp(v7.V4H(), v3.V4H(), v2.V4H());
25976   END();
25977 
25978 #ifdef VIXL_INCLUDE_SIMULATOR_AARCH64
25979   RUN();
25980 
25981   ASSERT_EQUAL_128(0x4200420042004200, 0x7e007e007e007e00, v4);
25982   ASSERT_EQUAL_128(0x0000000000000000, 0x7e017e017e017e01, v5);
25983   ASSERT_EQUAL_128(0, 0x420042007e007e00, v6);
25984   ASSERT_EQUAL_128(0, 0x000000007e017e01, v7);
25985 #endif  // VIXL_INCLUDE_SIMULATOR_AARCH64
25986 
25987   TEARDOWN();
25988 }
25989 
25990 
TEST(neon_faddp_scalar)25991 TEST(neon_faddp_scalar) {
25992   SETUP_WITH_FEATURES(CPUFeatures::kNEON, CPUFeatures::kFP);
25993 
25994   START();
25995   __ Movi(d0, 0x3f80000040000000);
25996   __ Movi(d1, 0xff8000007f800000);
25997   __ Movi(d2, 0x0000000080000000);
25998   __ Faddp(s0, v0.V2S());
25999   __ Faddp(s1, v1.V2S());
26000   __ Faddp(s2, v2.V2S());
26001 
26002   __ Movi(v3.V2D(), 0xc000000000000000, 0x4000000000000000);
26003   __ Movi(v4.V2D(), 0xfff8000000000000, 0x7ff8000000000000);
26004   __ Movi(v5.V2D(), 0x0000000000000000, 0x8000000000000000);
26005   __ Faddp(d3, v3.V2D());
26006   __ Faddp(d4, v4.V2D());
26007   __ Faddp(d5, v5.V2D());
26008   END();
26009 
26010   RUN();
26011 
26012   ASSERT_EQUAL_FP32(3.0, s0);
26013   ASSERT_EQUAL_FP32(kFP32DefaultNaN, s1);
26014   ASSERT_EQUAL_FP32(0.0, s2);
26015   ASSERT_EQUAL_FP64(0.0, d3);
26016   ASSERT_EQUAL_FP64(kFP64DefaultNaN, d4);
26017   ASSERT_EQUAL_FP64(0.0, d5);
26018 
26019   TEARDOWN();
26020 }
26021 
26022 
TEST(neon_faddp_h_scalar)26023 TEST(neon_faddp_h_scalar) {
26024   SETUP_WITH_FEATURES(CPUFeatures::kNEON,
26025                       CPUFeatures::kFP,
26026                       CPUFeatures::kNEONHalf);
26027 
26028   START();
26029   __ Movi(s0, 0x3c004000);
26030   __ Movi(s1, 0xfc007c00);
26031   __ Movi(s2, 0x00008000);
26032   __ Faddp(h0, v0.V2H());
26033   __ Faddp(h1, v1.V2H());
26034   __ Faddp(h2, v2.V2H());
26035   END();
26036 
26037 #ifdef VIXL_INCLUDE_SIMULATOR_AARCH64
26038   RUN();
26039 
26040   ASSERT_EQUAL_FP16(Float16(3.0), h0);
26041   ASSERT_EQUAL_FP16(kFP16DefaultNaN, h1);
26042   ASSERT_EQUAL_FP16(Float16(0.0), h2);
26043 #endif  // VIXL_INCLUDE_SIMULATOR_AARCH64
26044 
26045   TEARDOWN();
26046 }
26047 
26048 
TEST(neon_fmaxp_scalar)26049 TEST(neon_fmaxp_scalar) {
26050   SETUP_WITH_FEATURES(CPUFeatures::kNEON, CPUFeatures::kFP);
26051 
26052   START();
26053   __ Movi(d0, 0x3f80000040000000);
26054   __ Movi(d1, 0xff8000007f800000);
26055   __ Movi(d2, 0x7fc00000ff800000);
26056   __ Fmaxp(s0, v0.V2S());
26057   __ Fmaxp(s1, v1.V2S());
26058   __ Fmaxp(s2, v2.V2S());
26059 
26060   __ Movi(v3.V2D(), 0x3ff0000000000000, 0x4000000000000000);
26061   __ Movi(v4.V2D(), 0xfff0000000000000, 0x7ff0000000000000);
26062   __ Movi(v5.V2D(), 0x7ff0000000000000, 0x7ff8000000000000);
26063   __ Fmaxp(d3, v3.V2D());
26064   __ Fmaxp(d4, v4.V2D());
26065   __ Fmaxp(d5, v5.V2D());
26066   END();
26067 
26068   RUN();
26069 
26070   ASSERT_EQUAL_FP32(2.0, s0);
26071   ASSERT_EQUAL_FP32(kFP32PositiveInfinity, s1);
26072   ASSERT_EQUAL_FP32(kFP32DefaultNaN, s2);
26073   ASSERT_EQUAL_FP64(2.0, d3);
26074   ASSERT_EQUAL_FP64(kFP64PositiveInfinity, d4);
26075   ASSERT_EQUAL_FP64(kFP64DefaultNaN, d5);
26076 
26077   TEARDOWN();
26078 }
26079 
26080 
TEST(neon_fmaxp_h_scalar)26081 TEST(neon_fmaxp_h_scalar) {
26082   SETUP_WITH_FEATURES(CPUFeatures::kNEON,
26083                       CPUFeatures::kFP,
26084                       CPUFeatures::kNEONHalf);
26085 
26086   START();
26087   __ Movi(s0, 0x3c004000);
26088   __ Movi(s1, 0xfc007c00);
26089   __ Movi(s2, 0x7e00fc00);
26090   __ Fmaxp(h0, v0.V2H());
26091   __ Fmaxp(h1, v1.V2H());
26092   __ Fmaxp(h2, v2.V2H());
26093   END();
26094 
26095 #ifdef VIXL_INCLUDE_SIMULATOR_AARCH64
26096   RUN();
26097 
26098   ASSERT_EQUAL_FP16(Float16(2.0), h0);
26099   ASSERT_EQUAL_FP16(kFP16PositiveInfinity, h1);
26100   ASSERT_EQUAL_FP16(kFP16DefaultNaN, h2);
26101 #endif  // VIXL_INCLUDE_SIMULATOR_AARCH64
26102 
26103   TEARDOWN();
26104 }
26105 
26106 
TEST(neon_fmax_h)26107 TEST(neon_fmax_h) {
26108   SETUP_WITH_FEATURES(CPUFeatures::kNEON,
26109                       CPUFeatures::kFP,
26110                       CPUFeatures::kNEONHalf);
26111 
26112   START();
26113   __ Movi(v0.V2D(), 0x3c003c003c003c00, 0x3c003c003c003c00);
26114   __ Movi(v1.V2D(), 0x4000400040004000, 0x4000400040004000);
26115   __ Movi(v2.V2D(), 0xfc00fc00fc00fc00, 0xfc00fc00fc00fc00);
26116   __ Movi(v3.V2D(), 0x7c007c007c007c00, 0x7c007c007c007c00);
26117   __ Movi(v4.V2D(), 0x7e007e007e007e00, 0x7e007e007e007e00);
26118   __ Movi(v5.V2D(), 0x7c017c017c017c01, 0x7c017c017c017c01);
26119 
26120   __ Fmax(v6.V8H(), v0.V8H(), v1.V8H());
26121   __ Fmax(v7.V8H(), v2.V8H(), v3.V8H());
26122   __ Fmax(v8.V8H(), v4.V8H(), v0.V8H());
26123   __ Fmax(v9.V8H(), v5.V8H(), v1.V8H());
26124   __ Fmax(v10.V4H(), v0.V4H(), v1.V4H());
26125   __ Fmax(v11.V4H(), v2.V4H(), v3.V4H());
26126   __ Fmax(v12.V4H(), v4.V4H(), v0.V4H());
26127   __ Fmax(v13.V4H(), v5.V4H(), v1.V4H());
26128   END();
26129 
26130 #ifdef VIXL_INCLUDE_SIMULATOR_AARCH64
26131   RUN();
26132 
26133   ASSERT_EQUAL_128(0x4000400040004000, 0x4000400040004000, v6);
26134   ASSERT_EQUAL_128(0x7c007c007c007c00, 0x7c007c007c007c00, v7);
26135   ASSERT_EQUAL_128(0x7e007e007e007e00, 0x7e007e007e007e00, v8);
26136   ASSERT_EQUAL_128(0x7e017e017e017e01, 0x7e017e017e017e01, v9);
26137   ASSERT_EQUAL_128(0, 0x4000400040004000, v10);
26138   ASSERT_EQUAL_128(0, 0x7c007c007c007c00, v11);
26139   ASSERT_EQUAL_128(0, 0x7e007e007e007e00, v12);
26140   ASSERT_EQUAL_128(0, 0x7e017e017e017e01, v13);
26141 #endif  // VIXL_INCLUDE_SIMULATOR_AARCH64
26142 
26143   TEARDOWN();
26144 }
26145 
26146 
TEST(neon_fmaxp_h)26147 TEST(neon_fmaxp_h) {
26148   SETUP_WITH_FEATURES(CPUFeatures::kNEON,
26149                       CPUFeatures::kFP,
26150                       CPUFeatures::kNEONHalf);
26151 
26152   START();
26153   __ Movi(v0.V2D(), 0x3c0040003c004000, 0x3c0040003c004000);
26154   __ Movi(v1.V2D(), 0xfc007c00fc007c00, 0xfc007c00fc007c00);
26155   __ Movi(v2.V2D(), 0x7e003c007e003c00, 0x7e003c007e003c00);
26156   __ Movi(v3.V2D(), 0x7c0140007c014000, 0x7c0140007c014000);
26157 
26158   __ Fmaxp(v6.V8H(), v0.V8H(), v1.V8H());
26159   __ Fmaxp(v7.V8H(), v2.V8H(), v3.V8H());
26160   __ Fmaxp(v8.V4H(), v0.V4H(), v1.V4H());
26161   __ Fmaxp(v9.V4H(), v2.V4H(), v3.V4H());
26162   END();
26163 
26164 #ifdef VIXL_INCLUDE_SIMULATOR_AARCH64
26165   RUN();
26166 
26167   ASSERT_EQUAL_128(0x7c007c007c007c00, 0x4000400040004000, v6);
26168   ASSERT_EQUAL_128(0x7e017e017e017e01, 0x7e007e007e007e00, v7);
26169   ASSERT_EQUAL_128(0, 0x7c007c0040004000, v8);
26170   ASSERT_EQUAL_128(0, 0x7e017e017e007e00, v9);
26171 #endif  // VIXL_INCLUDE_SIMULATOR_AARCH64
26172 
26173   TEARDOWN();
26174 }
26175 
26176 
TEST(neon_fmaxnm_h)26177 TEST(neon_fmaxnm_h) {
26178   SETUP_WITH_FEATURES(CPUFeatures::kNEON,
26179                       CPUFeatures::kFP,
26180                       CPUFeatures::kNEONHalf);
26181 
26182   START();
26183   __ Movi(v0.V2D(), 0x3c003c003c003c00, 0x3c003c003c003c00);
26184   __ Movi(v1.V2D(), 0x4000400040004000, 0x4000400040004000);
26185   __ Movi(v2.V2D(), 0xfc00fc00fc00fc00, 0xfc00fc00fc00fc00);
26186   __ Movi(v3.V2D(), 0x7c007c007c007c00, 0x7c007c007c007c00);
26187   __ Movi(v4.V2D(), 0x7e007e007e007e00, 0x7e007e007e007e00);
26188   __ Movi(v5.V2D(), 0x7c017c017c017c01, 0x7c017c017c017c01);
26189 
26190   __ Fmaxnm(v6.V8H(), v0.V8H(), v1.V8H());
26191   __ Fmaxnm(v7.V8H(), v2.V8H(), v3.V8H());
26192   __ Fmaxnm(v8.V8H(), v4.V8H(), v0.V8H());
26193   __ Fmaxnm(v9.V8H(), v5.V8H(), v1.V8H());
26194   __ Fmaxnm(v10.V4H(), v0.V4H(), v1.V4H());
26195   __ Fmaxnm(v11.V4H(), v2.V4H(), v3.V4H());
26196   __ Fmaxnm(v12.V4H(), v4.V4H(), v0.V4H());
26197   __ Fmaxnm(v13.V4H(), v5.V4H(), v1.V4H());
26198   END();
26199 
26200 #ifdef VIXL_INCLUDE_SIMULATOR_AARCH64
26201   RUN();
26202 
26203   ASSERT_EQUAL_128(0x4000400040004000, 0x4000400040004000, v6);
26204   ASSERT_EQUAL_128(0x7c007c007c007c00, 0x7c007c007c007c00, v7);
26205   ASSERT_EQUAL_128(0x3c003c003c003c00, 0x3c003c003c003c00, v8);
26206   ASSERT_EQUAL_128(0x7e017e017e017e01, 0x7e017e017e017e01, v9);
26207   ASSERT_EQUAL_128(0, 0x4000400040004000, v10);
26208   ASSERT_EQUAL_128(0, 0x7c007c007c007c00, v11);
26209   ASSERT_EQUAL_128(0, 0x3c003c003c003c00, v12);
26210   ASSERT_EQUAL_128(0, 0x7e017e017e017e01, v13);
26211 #endif  // VIXL_INCLUDE_SIMULATOR_AARCH64
26212 
26213   TEARDOWN();
26214 }
26215 
26216 
TEST(neon_fmaxnmp_h)26217 TEST(neon_fmaxnmp_h) {
26218   SETUP_WITH_FEATURES(CPUFeatures::kNEON,
26219                       CPUFeatures::kFP,
26220                       CPUFeatures::kNEONHalf);
26221 
26222   START();
26223   __ Movi(v0.V2D(), 0x3c0040003c004000, 0x3c0040003c004000);
26224   __ Movi(v1.V2D(), 0xfc007c00fc007c00, 0xfc007c00fc007c00);
26225   __ Movi(v2.V2D(), 0x7e003c007e003c00, 0x7e003c007e003c00);
26226   __ Movi(v3.V2D(), 0x7c0140007c014000, 0x7c0140007c014000);
26227 
26228   __ Fmaxnmp(v6.V8H(), v0.V8H(), v1.V8H());
26229   __ Fmaxnmp(v7.V8H(), v2.V8H(), v3.V8H());
26230   __ Fmaxnmp(v8.V4H(), v0.V4H(), v1.V4H());
26231   __ Fmaxnmp(v9.V4H(), v2.V4H(), v3.V4H());
26232   END();
26233 
26234 #ifdef VIXL_INCLUDE_SIMULATOR_AARCH64
26235   RUN();
26236 
26237   ASSERT_EQUAL_128(0x7c007c007c007c00, 0x4000400040004000, v6);
26238   ASSERT_EQUAL_128(0x7e017e017e017e01, 0x3c003c003c003c00, v7);
26239   ASSERT_EQUAL_128(0, 0x7c007c0040004000, v8);
26240   ASSERT_EQUAL_128(0, 0x7e017e013c003c00, v9);
26241 #endif  // VIXL_INCLUDE_SIMULATOR_AARCH64
26242 
26243   TEARDOWN();
26244 }
26245 
26246 
TEST(neon_fmaxnmp_scalar)26247 TEST(neon_fmaxnmp_scalar) {
26248   SETUP_WITH_FEATURES(CPUFeatures::kNEON, CPUFeatures::kFP);
26249 
26250   START();
26251   __ Movi(d0, 0x3f80000040000000);
26252   __ Movi(d1, 0xff8000007f800000);
26253   __ Movi(d2, 0x7fc00000ff800000);
26254   __ Fmaxnmp(s0, v0.V2S());
26255   __ Fmaxnmp(s1, v1.V2S());
26256   __ Fmaxnmp(s2, v2.V2S());
26257 
26258   __ Movi(v3.V2D(), 0x3ff0000000000000, 0x4000000000000000);
26259   __ Movi(v4.V2D(), 0xfff0000000000000, 0x7ff0000000000000);
26260   __ Movi(v5.V2D(), 0x7ff8000000000000, 0xfff0000000000000);
26261   __ Fmaxnmp(d3, v3.V2D());
26262   __ Fmaxnmp(d4, v4.V2D());
26263   __ Fmaxnmp(d5, v5.V2D());
26264   END();
26265 
26266   RUN();
26267 
26268   ASSERT_EQUAL_FP32(2.0, s0);
26269   ASSERT_EQUAL_FP32(kFP32PositiveInfinity, s1);
26270   ASSERT_EQUAL_FP32(kFP32NegativeInfinity, s2);
26271   ASSERT_EQUAL_FP64(2.0, d3);
26272   ASSERT_EQUAL_FP64(kFP64PositiveInfinity, d4);
26273   ASSERT_EQUAL_FP64(kFP64NegativeInfinity, d5);
26274 
26275   TEARDOWN();
26276 }
26277 
26278 
TEST(neon_fmaxnmp_h_scalar)26279 TEST(neon_fmaxnmp_h_scalar) {
26280   SETUP_WITH_FEATURES(CPUFeatures::kNEON,
26281                       CPUFeatures::kFP,
26282                       CPUFeatures::kNEONHalf);
26283 
26284   START();
26285   __ Movi(s0, 0x3c004000);
26286   __ Movi(s1, 0xfc007c00);
26287   __ Movi(s2, 0x7e00fc00);
26288   __ Fmaxnmp(h0, v0.V2H());
26289   __ Fmaxnmp(h1, v1.V2H());
26290   __ Fmaxnmp(h2, v2.V2H());
26291   END();
26292 
26293 #ifdef VIXL_INCLUDE_SIMULATOR_AARCH64
26294   RUN();
26295 
26296   ASSERT_EQUAL_FP16(Float16(2.0), h0);
26297   ASSERT_EQUAL_FP16(kFP16PositiveInfinity, h1);
26298   ASSERT_EQUAL_FP16(kFP16NegativeInfinity, h2);
26299 #endif  // VIXL_INCLUDE_SIMULATOR_AARCH64
26300 
26301   TEARDOWN();
26302 }
26303 
26304 
TEST(neon_fminp_scalar)26305 TEST(neon_fminp_scalar) {
26306   SETUP_WITH_FEATURES(CPUFeatures::kNEON, CPUFeatures::kFP);
26307 
26308   START();
26309   __ Movi(d0, 0x3f80000040000000);
26310   __ Movi(d1, 0xff8000007f800000);
26311   __ Movi(d2, 0x7fc00000ff800000);
26312   __ Fminp(s0, v0.V2S());
26313   __ Fminp(s1, v1.V2S());
26314   __ Fminp(s2, v2.V2S());
26315 
26316   __ Movi(v3.V2D(), 0x3ff0000000000000, 0x4000000000000000);
26317   __ Movi(v4.V2D(), 0xfff0000000000000, 0x7ff0000000000000);
26318   __ Movi(v5.V2D(), 0x7ff0000000000000, 0x7ff8000000000000);
26319   __ Fminp(d3, v3.V2D());
26320   __ Fminp(d4, v4.V2D());
26321   __ Fminp(d5, v5.V2D());
26322   END();
26323 
26324   RUN();
26325 
26326   ASSERT_EQUAL_FP32(1.0, s0);
26327   ASSERT_EQUAL_FP32(kFP32NegativeInfinity, s1);
26328   ASSERT_EQUAL_FP32(kFP32DefaultNaN, s2);
26329   ASSERT_EQUAL_FP64(1.0, d3);
26330   ASSERT_EQUAL_FP64(kFP64NegativeInfinity, d4);
26331   ASSERT_EQUAL_FP64(kFP64DefaultNaN, d5);
26332 
26333   TEARDOWN();
26334 }
26335 
26336 
TEST(neon_fminp_h_scalar)26337 TEST(neon_fminp_h_scalar) {
26338   SETUP_WITH_FEATURES(CPUFeatures::kNEON,
26339                       CPUFeatures::kFP,
26340                       CPUFeatures::kNEONHalf);
26341 
26342   START();
26343   __ Movi(s0, 0x3c004000);
26344   __ Movi(s1, 0xfc007c00);
26345   __ Movi(s2, 0x7e00fc00);
26346   __ Fminp(h0, v0.V2H());
26347   __ Fminp(h1, v1.V2H());
26348   __ Fminp(h2, v2.V2H());
26349   END();
26350 
26351 #ifdef VIXL_INCLUDE_SIMULATOR_AARCH64
26352   RUN();
26353 
26354   ASSERT_EQUAL_FP16(Float16(1.0), h0);
26355   ASSERT_EQUAL_FP16(kFP16NegativeInfinity, h1);
26356   ASSERT_EQUAL_FP16(kFP16DefaultNaN, h2);
26357 #endif  // VIXL_INCLUDE_SIMULATOR_AARCH64
26358 
26359   TEARDOWN();
26360 }
26361 
26362 
TEST(neon_fmin_h)26363 TEST(neon_fmin_h) {
26364   SETUP_WITH_FEATURES(CPUFeatures::kNEON,
26365                       CPUFeatures::kFP,
26366                       CPUFeatures::kNEONHalf);
26367 
26368   START();
26369   __ Movi(v0.V2D(), 0x3c003c003c003c00, 0x3c003c003c003c00);
26370   __ Movi(v1.V2D(), 0x4000400040004000, 0x4000400040004000);
26371   __ Movi(v2.V2D(), 0xfc00fc00fc00fc00, 0xfc00fc00fc00fc00);
26372   __ Movi(v3.V2D(), 0x7c007c007c007c00, 0x7c007c007c007c00);
26373   __ Movi(v4.V2D(), 0x7e007e007e007e00, 0x7e007e007e007e00);
26374   __ Movi(v5.V2D(), 0x7c017c017c017c01, 0x7c017c017c017c01);
26375 
26376   __ Fmin(v6.V8H(), v0.V8H(), v1.V8H());
26377   __ Fmin(v7.V8H(), v2.V8H(), v3.V8H());
26378   __ Fmin(v8.V8H(), v4.V8H(), v0.V8H());
26379   __ Fmin(v9.V8H(), v5.V8H(), v1.V8H());
26380   __ Fmin(v10.V4H(), v0.V4H(), v1.V4H());
26381   __ Fmin(v11.V4H(), v2.V4H(), v3.V4H());
26382   __ Fmin(v12.V4H(), v4.V4H(), v0.V4H());
26383   __ Fmin(v13.V4H(), v5.V4H(), v1.V4H());
26384   END();
26385 
26386 #ifdef VIXL_INCLUDE_SIMULATOR_AARCH64
26387   RUN();
26388 
26389   ASSERT_EQUAL_128(0x3c003c003c003c00, 0x3c003c003c003c00, v6);
26390   ASSERT_EQUAL_128(0xfc00fc00fc00fc00, 0xfc00fc00fc00fc00, v7);
26391   ASSERT_EQUAL_128(0x7e007e007e007e00, 0x7e007e007e007e00, v8);
26392   ASSERT_EQUAL_128(0x7e017e017e017e01, 0x7e017e017e017e01, v9);
26393   ASSERT_EQUAL_128(0, 0x3c003c003c003c00, v10);
26394   ASSERT_EQUAL_128(0, 0xfc00fc00fc00fc00, v11);
26395   ASSERT_EQUAL_128(0, 0x7e007e007e007e00, v12);
26396   ASSERT_EQUAL_128(0, 0x7e017e017e017e01, v13);
26397 #endif  // VIXL_INCLUDE_SIMULATOR_AARCH64
26398 
26399   TEARDOWN();
26400 }
26401 
26402 
TEST(neon_fminp_h)26403 TEST(neon_fminp_h) {
26404   SETUP_WITH_FEATURES(CPUFeatures::kNEON,
26405                       CPUFeatures::kFP,
26406                       CPUFeatures::kNEONHalf);
26407 
26408   START();
26409   __ Movi(v0.V2D(), 0x3c0040003c004000, 0x3c0040003c004000);
26410   __ Movi(v1.V2D(), 0xfc007c00fc007c00, 0xfc007c00fc007c00);
26411   __ Movi(v2.V2D(), 0x7e003c007e003c00, 0x7e003c007e003c00);
26412   __ Movi(v3.V2D(), 0x7c0140007c014000, 0x7c0140007c014000);
26413 
26414   __ Fminp(v6.V8H(), v0.V8H(), v1.V8H());
26415   __ Fminp(v7.V8H(), v2.V8H(), v3.V8H());
26416   __ Fminp(v8.V4H(), v0.V4H(), v1.V4H());
26417   __ Fminp(v9.V4H(), v2.V4H(), v3.V4H());
26418   END();
26419 
26420 #ifdef VIXL_INCLUDE_SIMULATOR_AARCH64
26421   RUN();
26422 
26423   ASSERT_EQUAL_128(0xfc00fc00fc00fc00, 0x3c003c003c003c00, v6);
26424   ASSERT_EQUAL_128(0x7e017e017e017e01, 0x7e007e007e007e00, v7);
26425   ASSERT_EQUAL_128(0, 0xfc00fc003c003c00, v8);
26426   ASSERT_EQUAL_128(0, 0x7e017e017e007e00, v9);
26427 #endif  // VIXL_INCLUDE_SIMULATOR_AARCH64
26428 
26429   TEARDOWN();
26430 }
26431 
26432 
TEST(neon_fminnm_h)26433 TEST(neon_fminnm_h) {
26434   SETUP_WITH_FEATURES(CPUFeatures::kNEON,
26435                       CPUFeatures::kFP,
26436                       CPUFeatures::kNEONHalf);
26437 
26438   START();
26439   __ Movi(v0.V2D(), 0x3c003c003c003c00, 0x3c003c003c003c00);
26440   __ Movi(v1.V2D(), 0x4000400040004000, 0x4000400040004000);
26441   __ Movi(v2.V2D(), 0xfc00fc00fc00fc00, 0xfc00fc00fc00fc00);
26442   __ Movi(v3.V2D(), 0x7c007c007c007c00, 0x7c007c007c007c00);
26443   __ Movi(v4.V2D(), 0x7e007e007e007e00, 0x7e007e007e007e00);
26444   __ Movi(v5.V2D(), 0x7c017c017c017c01, 0x7c017c017c017c01);
26445 
26446   __ Fminnm(v6.V8H(), v0.V8H(), v1.V8H());
26447   __ Fminnm(v7.V8H(), v2.V8H(), v3.V8H());
26448   __ Fminnm(v8.V8H(), v4.V8H(), v0.V8H());
26449   __ Fminnm(v9.V8H(), v5.V8H(), v1.V8H());
26450   __ Fminnm(v10.V4H(), v0.V4H(), v1.V4H());
26451   __ Fminnm(v11.V4H(), v2.V4H(), v3.V4H());
26452   __ Fminnm(v12.V4H(), v4.V4H(), v0.V4H());
26453   __ Fminnm(v13.V4H(), v5.V4H(), v1.V4H());
26454   END();
26455 
26456 #ifdef VIXL_INCLUDE_SIMULATOR_AARCH64
26457   RUN();
26458 
26459   ASSERT_EQUAL_128(0x3c003c003c003c00, 0x3c003c003c003c00, v6);
26460   ASSERT_EQUAL_128(0xfc00fc00fc00fc00, 0xfc00fc00fc00fc00, v7);
26461   ASSERT_EQUAL_128(0x3c003c003c003c00, 0x3c003c003c003c00, v8);
26462   ASSERT_EQUAL_128(0x7e017e017e017e01, 0x7e017e017e017e01, v9);
26463   ASSERT_EQUAL_128(0, 0x3c003c003c003c00, v10);
26464   ASSERT_EQUAL_128(0, 0xfc00fc00fc00fc00, v11);
26465   ASSERT_EQUAL_128(0, 0x3c003c003c003c00, v12);
26466   ASSERT_EQUAL_128(0, 0x7e017e017e017e01, v13);
26467 #endif  // VIXL_INCLUDE_SIMULATOR_AARCH64
26468 
26469   TEARDOWN();
26470 }
26471 
26472 
TEST(neon_fminnmp_h)26473 TEST(neon_fminnmp_h) {
26474   SETUP_WITH_FEATURES(CPUFeatures::kNEON,
26475                       CPUFeatures::kFP,
26476                       CPUFeatures::kNEONHalf);
26477 
26478   START();
26479   __ Movi(v0.V2D(), 0x3c0040003c004000, 0x3c0040003c004000);
26480   __ Movi(v1.V2D(), 0xfc007c00fc007c00, 0xfc007c00fc007c00);
26481   __ Movi(v2.V2D(), 0x7e003c007e003c00, 0x7e003c007e003c00);
26482   __ Movi(v3.V2D(), 0x7c0140007c014000, 0x7c0140007c014000);
26483 
26484   __ Fminnmp(v6.V8H(), v0.V8H(), v1.V8H());
26485   __ Fminnmp(v7.V8H(), v2.V8H(), v3.V8H());
26486   __ Fminnmp(v8.V4H(), v0.V4H(), v1.V4H());
26487   __ Fminnmp(v9.V4H(), v2.V4H(), v3.V4H());
26488   END();
26489 
26490 #ifdef VIXL_INCLUDE_SIMULATOR_AARCH64
26491   RUN();
26492 
26493   ASSERT_EQUAL_128(0xfc00fc00fc00fc00, 0x3c003c003c003c00, v6);
26494   ASSERT_EQUAL_128(0x7e017e017e017e01, 0x3c003c003c003c00, v7);
26495   ASSERT_EQUAL_128(0, 0xfc00fc003c003c00, v8);
26496   ASSERT_EQUAL_128(0, 0x7e017e013c003c00, v9);
26497 #endif  // VIXL_INCLUDE_SIMULATOR_AARCH64
26498 
26499   TEARDOWN();
26500 }
26501 
26502 
TEST(neon_fminnmp_scalar)26503 TEST(neon_fminnmp_scalar) {
26504   SETUP_WITH_FEATURES(CPUFeatures::kNEON, CPUFeatures::kFP);
26505 
26506   START();
26507   __ Movi(d0, 0x3f80000040000000);
26508   __ Movi(d1, 0xff8000007f800000);
26509   __ Movi(d2, 0x7fc00000ff800000);
26510   __ Fminnmp(s0, v0.V2S());
26511   __ Fminnmp(s1, v1.V2S());
26512   __ Fminnmp(s2, v2.V2S());
26513 
26514   __ Movi(v3.V2D(), 0x3ff0000000000000, 0x4000000000000000);
26515   __ Movi(v4.V2D(), 0xfff0000000000000, 0x7ff0000000000000);
26516   __ Movi(v5.V2D(), 0x7ff8000000000000, 0xfff0000000000000);
26517   __ Fminnmp(d3, v3.V2D());
26518   __ Fminnmp(d4, v4.V2D());
26519   __ Fminnmp(d5, v5.V2D());
26520   END();
26521 
26522   RUN();
26523 
26524   ASSERT_EQUAL_FP32(1.0, s0);
26525   ASSERT_EQUAL_FP32(kFP32NegativeInfinity, s1);
26526   ASSERT_EQUAL_FP32(kFP32NegativeInfinity, s2);
26527   ASSERT_EQUAL_FP64(1.0, d3);
26528   ASSERT_EQUAL_FP64(kFP64NegativeInfinity, d4);
26529   ASSERT_EQUAL_FP64(kFP64NegativeInfinity, d5);
26530 
26531   TEARDOWN();
26532 }
26533 
26534 
TEST(neon_fminnmp_h_scalar)26535 TEST(neon_fminnmp_h_scalar) {
26536   SETUP_WITH_FEATURES(CPUFeatures::kNEON,
26537                       CPUFeatures::kFP,
26538                       CPUFeatures::kNEONHalf);
26539 
26540   START();
26541   __ Movi(s0, 0x3c004000);
26542   __ Movi(s1, 0xfc007c00);
26543   __ Movi(s2, 0x7e00fc00);
26544   __ Fminnmp(h0, v0.V2H());
26545   __ Fminnmp(h1, v1.V2H());
26546   __ Fminnmp(h2, v2.V2H());
26547   END();
26548 
26549 #ifdef VIXL_INCLUDE_SIMULATOR_AARCH64
26550   RUN();
26551 
26552   ASSERT_EQUAL_FP16(Float16(1.0), h0);
26553   ASSERT_EQUAL_FP16(kFP16NegativeInfinity, h1);
26554   ASSERT_EQUAL_FP16(kFP16NegativeInfinity, h2);
26555 #endif  // VIXL_INCLUDE_SIMULATOR_AARCH64
26556 
26557   TEARDOWN();
26558 }
26559 
26560 
TEST(neon_tbl)26561 TEST(neon_tbl) {
26562   SETUP_WITH_FEATURES(CPUFeatures::kNEON);
26563 
26564   START();
26565   __ Movi(v30.V2D(), 0xbf561e188b1280e9, 0xbd542b8cbd24e8e8);
26566   __ Movi(v31.V2D(), 0xb5e9883d2c88a46d, 0x12276d5b614c915e);
26567   __ Movi(v0.V2D(), 0xc45b7782bc5ecd72, 0x5dd4fe5a4bc6bf5e);
26568   __ Movi(v1.V2D(), 0x1e3254094bd1746a, 0xf099ecf50e861c80);
26569 
26570   __ Movi(v4.V2D(), 0xf80c030100031f16, 0x00070504031201ff);
26571   __ Movi(v5.V2D(), 0x1f01001afc14202a, 0x2a081e1b0c02020c);
26572   __ Movi(v6.V2D(), 0x353f1a13022a2360, 0x2c464a00203a0a33);
26573   __ Movi(v7.V2D(), 0x64801a1c054cf30d, 0x793a2c052e213739);
26574 
26575   __ Movi(v8.V2D(), 0xb7f60ad7d7d88f13, 0x13eefc240496e842);
26576   __ Movi(v9.V2D(), 0x1be199c7c69b47ec, 0x8e4b9919f6eed443);
26577   __ Movi(v10.V2D(), 0x9bd2e1654c69e48f, 0x2143d089e426c6d2);
26578   __ Movi(v11.V2D(), 0xc31dbdc4a0393065, 0x1ecc2077caaf64d8);
26579   __ Movi(v12.V2D(), 0x29b24463967bc6eb, 0xdaf59970df01c93b);
26580   __ Movi(v13.V2D(), 0x3e20a4a4cb6813f4, 0x20a5832713dae669);
26581   __ Movi(v14.V2D(), 0xc5ff9a94041b1fdf, 0x2f46cde38cba2682);
26582   __ Movi(v15.V2D(), 0xd8cc5b0e61f387e6, 0xe69d6d314971e8fd);
26583 
26584   __ Tbl(v8.V16B(), v1.V16B(), v4.V16B());
26585   __ Tbl(v9.V16B(), v0.V16B(), v1.V16B(), v5.V16B());
26586   __ Tbl(v10.V16B(), v31.V16B(), v0.V16B(), v1.V16B(), v6.V16B());
26587   __ Tbl(v11.V16B(), v30.V16B(), v31.V16B(), v0.V16B(), v1.V16B(), v7.V16B());
26588   __ Tbl(v12.V8B(), v1.V16B(), v4.V8B());
26589   __ Tbl(v13.V8B(), v0.V16B(), v1.V16B(), v5.V8B());
26590   __ Tbl(v14.V8B(), v31.V16B(), v0.V16B(), v1.V16B(), v6.V8B());
26591   __ Tbl(v15.V8B(), v30.V16B(), v31.V16B(), v0.V16B(), v1.V16B(), v7.V8B());
26592 
26593   __ Movi(v16.V2D(), 0xb7f60ad7d7d88f13, 0x13eefc240496e842);
26594   __ Movi(v17.V2D(), 0x1be199c7c69b47ec, 0x8e4b9919f6eed443);
26595   __ Movi(v18.V2D(), 0x9bd2e1654c69e48f, 0x2143d089e426c6d2);
26596   __ Movi(v19.V2D(), 0xc31dbdc4a0393065, 0x1ecc2077caaf64d8);
26597   __ Movi(v20.V2D(), 0x29b24463967bc6eb, 0xdaf59970df01c93b);
26598   __ Movi(v21.V2D(), 0x3e20a4a4cb6813f4, 0x20a5832713dae669);
26599   __ Movi(v22.V2D(), 0xc5ff9a94041b1fdf, 0x2f46cde38cba2682);
26600   __ Movi(v23.V2D(), 0xd8cc5b0e61f387e6, 0xe69d6d314971e8fd);
26601 
26602   __ Tbx(v16.V16B(), v1.V16B(), v4.V16B());
26603   __ Tbx(v17.V16B(), v0.V16B(), v1.V16B(), v5.V16B());
26604   __ Tbx(v18.V16B(), v31.V16B(), v0.V16B(), v1.V16B(), v6.V16B());
26605   __ Tbx(v19.V16B(), v30.V16B(), v31.V16B(), v0.V16B(), v1.V16B(), v7.V16B());
26606   __ Tbx(v20.V8B(), v1.V16B(), v4.V8B());
26607   __ Tbx(v21.V8B(), v0.V16B(), v1.V16B(), v5.V8B());
26608   __ Tbx(v22.V8B(), v31.V16B(), v0.V16B(), v1.V16B(), v6.V8B());
26609   __ Tbx(v23.V8B(), v30.V16B(), v31.V16B(), v0.V16B(), v1.V16B(), v7.V8B());
26610   END();
26611 
26612   RUN();
26613 
26614   ASSERT_EQUAL_128(0x00090e1c800e0000, 0x80f0ecf50e001c00, v8);
26615   ASSERT_EQUAL_128(0x1ebf5ed100f50000, 0x0072324b82c6c682, v9);
26616   ASSERT_EQUAL_128(0x00005e4b4cd10e00, 0x0900005e80008800, v10);
26617   ASSERT_EQUAL_128(0x0000883d2b00001e, 0x00d1822b5bbff074, v11);
26618   ASSERT_EQUAL_128(0x0000000000000000, 0x80f0ecf50e001c00, v12);
26619   ASSERT_EQUAL_128(0x0000000000000000, 0x0072324b82c6c682, v13);
26620   ASSERT_EQUAL_128(0x0000000000000000, 0x0900005e80008800, v14);
26621   ASSERT_EQUAL_128(0x0000000000000000, 0x00d1822b5bbff074, v15);
26622 
26623   ASSERT_EQUAL_128(0xb7090e1c800e8f13, 0x80f0ecf50e961c42, v16);
26624   ASSERT_EQUAL_128(0x1ebf5ed1c6f547ec, 0x8e72324b82c6c682, v17);
26625   ASSERT_EQUAL_128(0x9bd25e4b4cd10e8f, 0x0943d05e802688d2, v18);
26626   ASSERT_EQUAL_128(0xc31d883d2b39301e, 0x1ed1822b5bbff074, v19);
26627   ASSERT_EQUAL_128(0x0000000000000000, 0x80f0ecf50e011c3b, v20);
26628   ASSERT_EQUAL_128(0x0000000000000000, 0x2072324b82c6c682, v21);
26629   ASSERT_EQUAL_128(0x0000000000000000, 0x0946cd5e80ba8882, v22);
26630   ASSERT_EQUAL_128(0x0000000000000000, 0xe6d1822b5bbff074, v23);
26631 
26632   TEARDOWN();
26633 }
26634 
26635 
TEST(regress_cmp_shift_imm)26636 TEST(regress_cmp_shift_imm) {
26637   SETUP();
26638 
26639   START();
26640 
26641   __ Mov(x0, 0x3d720c8d);
26642   __ Cmp(x0, Operand(0x3d720c8d));
26643 
26644   END();
26645   RUN();
26646 
26647   ASSERT_EQUAL_NZCV(ZCFlag);
26648 
26649   TEARDOWN();
26650 }
26651 
26652 
TEST(compute_address)26653 TEST(compute_address) {
26654   SETUP();
26655 
26656   START();
26657   int64_t base_address = INT64_C(0x123000000abc);
26658   int64_t reg_offset = INT64_C(0x1087654321);
26659   Register base = x0;
26660   Register offset = x1;
26661 
26662   __ Mov(base, base_address);
26663   __ Mov(offset, reg_offset);
26664 
26665 
26666   __ ComputeAddress(x2, MemOperand(base, 0));
26667   __ ComputeAddress(x3, MemOperand(base, 8));
26668   __ ComputeAddress(x4, MemOperand(base, -100));
26669 
26670   __ ComputeAddress(x5, MemOperand(base, offset));
26671   __ ComputeAddress(x6, MemOperand(base, offset, LSL, 2));
26672   __ ComputeAddress(x7, MemOperand(base, offset, LSL, 4));
26673   __ ComputeAddress(x8, MemOperand(base, offset, LSL, 8));
26674 
26675   __ ComputeAddress(x9, MemOperand(base, offset, SXTW));
26676   __ ComputeAddress(x10, MemOperand(base, offset, UXTW, 1));
26677   __ ComputeAddress(x11, MemOperand(base, offset, SXTW, 2));
26678   __ ComputeAddress(x12, MemOperand(base, offset, UXTW, 3));
26679 
26680   END();
26681 
26682   RUN();
26683 
26684   ASSERT_EQUAL_64(base_address, base);
26685 
26686   ASSERT_EQUAL_64(INT64_C(0x123000000abc), x2);
26687   ASSERT_EQUAL_64(INT64_C(0x123000000ac4), x3);
26688   ASSERT_EQUAL_64(INT64_C(0x123000000a58), x4);
26689 
26690   ASSERT_EQUAL_64(INT64_C(0x124087654ddd), x5);
26691   ASSERT_EQUAL_64(INT64_C(0x12721d951740), x6);
26692   ASSERT_EQUAL_64(INT64_C(0x133876543ccc), x7);
26693   ASSERT_EQUAL_64(INT64_C(0x22b765432bbc), x8);
26694 
26695   ASSERT_EQUAL_64(INT64_C(0x122f87654ddd), x9);
26696   ASSERT_EQUAL_64(INT64_C(0x12310eca90fe), x10);
26697   ASSERT_EQUAL_64(INT64_C(0x122e1d951740), x11);
26698   ASSERT_EQUAL_64(INT64_C(0x12343b2a23c4), x12);
26699 
26700   TEARDOWN();
26701 }
26702 
26703 
TEST(far_branch_backward)26704 TEST(far_branch_backward) {
26705   // Test that the MacroAssembler correctly resolves backward branches to labels
26706   // that are outside the immediate range of branch instructions.
26707   // Take into account that backward branches can reach one instruction further
26708   // than forward branches.
26709   const int overflow_size =
26710       kInstructionSize +
26711       std::max(Instruction::GetImmBranchForwardRange(TestBranchType),
26712                std::max(Instruction::GetImmBranchForwardRange(
26713                             CompareBranchType),
26714                         Instruction::GetImmBranchForwardRange(CondBranchType)));
26715 
26716   SETUP();
26717   START();
26718 
26719   Label done, fail;
26720   Label test_tbz, test_cbz, test_bcond;
26721   Label success_tbz, success_cbz, success_bcond;
26722 
26723   __ Mov(x0, 0);
26724   __ Mov(x1, 1);
26725   __ Mov(x10, 0);
26726 
26727   __ B(&test_tbz);
26728   __ Bind(&success_tbz);
26729   __ Orr(x0, x0, 1 << 0);
26730   __ B(&test_cbz);
26731   __ Bind(&success_cbz);
26732   __ Orr(x0, x0, 1 << 1);
26733   __ B(&test_bcond);
26734   __ Bind(&success_bcond);
26735   __ Orr(x0, x0, 1 << 2);
26736 
26737   __ B(&done);
26738 
26739   // Generate enough code to overflow the immediate range of the three types of
26740   // branches below.
26741   for (unsigned i = 0; i < overflow_size / kInstructionSize; ++i) {
26742     if (i % 100 == 0) {
26743       // If we do land in this code, we do not want to execute so many nops
26744       // before reaching the end of test (especially if tracing is activated).
26745       __ B(&fail);
26746     } else {
26747       __ Nop();
26748     }
26749   }
26750   __ B(&fail);
26751 
26752   __ Bind(&test_tbz);
26753   __ Tbz(x10, 7, &success_tbz);
26754   __ Bind(&test_cbz);
26755   __ Cbz(x10, &success_cbz);
26756   __ Bind(&test_bcond);
26757   __ Cmp(x10, 0);
26758   __ B(eq, &success_bcond);
26759 
26760   // For each out-of-range branch instructions, at least two instructions should
26761   // have been generated.
26762   VIXL_CHECK(masm.GetSizeOfCodeGeneratedSince(&test_tbz) >=
26763              7 * kInstructionSize);
26764 
26765   __ Bind(&fail);
26766   __ Mov(x1, 0);
26767   __ Bind(&done);
26768 
26769   END();
26770   RUN();
26771 
26772   ASSERT_EQUAL_64(0x7, x0);
26773   ASSERT_EQUAL_64(0x1, x1);
26774 
26775   TEARDOWN();
26776 }
26777 
26778 
TEST(single_veneer)26779 TEST(single_veneer) {
26780   SETUP();
26781   START();
26782 
26783   const int max_range = Instruction::GetImmBranchForwardRange(TestBranchType);
26784 
26785   Label success, fail, done;
26786 
26787   __ Mov(x0, 0);
26788   __ Mov(x1, 1);
26789   __ Mov(x10, 0);
26790 
26791   __ Tbz(x10, 7, &success);
26792 
26793   // Generate enough code to overflow the immediate range of the `tbz`.
26794   for (unsigned i = 0; i < max_range / kInstructionSize + 1; ++i) {
26795     if (i % 100 == 0) {
26796       // If we do land in this code, we do not want to execute so many nops
26797       // before reaching the end of test (especially if tracing is activated).
26798       __ B(&fail);
26799     } else {
26800       __ Nop();
26801     }
26802   }
26803   __ B(&fail);
26804 
26805   __ Bind(&success);
26806   __ Mov(x0, 1);
26807 
26808   __ B(&done);
26809   __ Bind(&fail);
26810   __ Mov(x1, 0);
26811   __ Bind(&done);
26812 
26813   END();
26814   RUN();
26815 
26816   ASSERT_EQUAL_64(1, x0);
26817   ASSERT_EQUAL_64(1, x1);
26818 
26819   TEARDOWN();
26820 }
26821 
26822 
TEST(simple_veneers)26823 TEST(simple_veneers) {
26824   // Test that the MacroAssembler correctly emits veneers for forward branches
26825   // to labels that are outside the immediate range of branch instructions.
26826   const int max_range =
26827       std::max(Instruction::GetImmBranchForwardRange(TestBranchType),
26828                std::max(Instruction::GetImmBranchForwardRange(
26829                             CompareBranchType),
26830                         Instruction::GetImmBranchForwardRange(CondBranchType)));
26831 
26832   SETUP();
26833   START();
26834 
26835   Label done, fail;
26836   Label test_tbz, test_cbz, test_bcond;
26837   Label success_tbz, success_cbz, success_bcond;
26838 
26839   __ Mov(x0, 0);
26840   __ Mov(x1, 1);
26841   __ Mov(x10, 0);
26842 
26843   __ Bind(&test_tbz);
26844   __ Tbz(x10, 7, &success_tbz);
26845   __ Bind(&test_cbz);
26846   __ Cbz(x10, &success_cbz);
26847   __ Bind(&test_bcond);
26848   __ Cmp(x10, 0);
26849   __ B(eq, &success_bcond);
26850 
26851   // Generate enough code to overflow the immediate range of the three types of
26852   // branches below.
26853   for (unsigned i = 0; i < max_range / kInstructionSize + 1; ++i) {
26854     if (i % 100 == 0) {
26855       // If we do land in this code, we do not want to execute so many nops
26856       // before reaching the end of test (especially if tracing is activated).
26857       __ B(&fail);
26858     } else {
26859       __ Nop();
26860     }
26861   }
26862   __ B(&fail);
26863 
26864   __ Bind(&success_tbz);
26865   __ Orr(x0, x0, 1 << 0);
26866   __ B(&test_cbz);
26867   __ Bind(&success_cbz);
26868   __ Orr(x0, x0, 1 << 1);
26869   __ B(&test_bcond);
26870   __ Bind(&success_bcond);
26871   __ Orr(x0, x0, 1 << 2);
26872 
26873   __ B(&done);
26874   __ Bind(&fail);
26875   __ Mov(x1, 0);
26876   __ Bind(&done);
26877 
26878   END();
26879   RUN();
26880 
26881   ASSERT_EQUAL_64(0x7, x0);
26882   ASSERT_EQUAL_64(0x1, x1);
26883 
26884   TEARDOWN();
26885 }
26886 
26887 
TEST(veneers_stress)26888 TEST(veneers_stress) {
26889   SETUP();
26890   START();
26891 
26892   // This is a code generation test stressing the emission of veneers. The code
26893   // generated is not executed.
26894 
26895   Label target;
26896   const unsigned max_range =
26897       Instruction::GetImmBranchForwardRange(CondBranchType);
26898   const unsigned iterations =
26899       (max_range + max_range / 4) / (4 * kInstructionSize);
26900   for (unsigned i = 0; i < iterations; i++) {
26901     __ B(&target);
26902     __ B(eq, &target);
26903     __ Cbz(x0, &target);
26904     __ Tbz(x0, 0, &target);
26905   }
26906   __ Bind(&target);
26907 
26908   END();
26909   TEARDOWN();
26910 }
26911 
26912 
TEST(veneers_two_out_of_range)26913 TEST(veneers_two_out_of_range) {
26914   SETUP();
26915   START();
26916 
26917   // This is a code generation test. The code generated is not executed.
26918   // Ensure that the MacroAssembler considers unresolved branches to chose when
26919   // a veneer pool should be emitted. We generate two branches that go out of
26920   // range at the same offset. When the MacroAssembler decides to emit the
26921   // veneer pool, the emission of a first veneer should not cause the other
26922   // branch to go out of range.
26923 
26924   int range_cbz = Instruction::GetImmBranchForwardRange(CompareBranchType);
26925   int range_tbz = Instruction::GetImmBranchForwardRange(TestBranchType);
26926   int max_target = static_cast<int>(masm.GetCursorOffset()) + range_cbz;
26927 
26928   Label done;
26929 
26930   // We use different labels to prevent the MacroAssembler from sharing veneers.
26931   Label target_cbz, target_tbz;
26932 
26933   __ Cbz(x0, &target_cbz);
26934   while (masm.GetCursorOffset() < max_target - range_tbz) {
26935     __ Nop();
26936   }
26937   __ Tbz(x0, 0, &target_tbz);
26938   while (masm.GetCursorOffset() < max_target) {
26939     __ Nop();
26940   }
26941 
26942   // This additional nop makes the branches go out of range.
26943   __ Nop();
26944 
26945   __ Bind(&target_cbz);
26946   __ Bind(&target_tbz);
26947 
26948   END();
26949   TEARDOWN();
26950 }
26951 
26952 
TEST(veneers_hanging)26953 TEST(veneers_hanging) {
26954   SETUP();
26955   START();
26956 
26957   // This is a code generation test. The code generated is not executed.
26958   // Ensure that the MacroAssembler considers unresolved branches to chose when
26959   // a veneer pool should be emitted. This is similar to the
26960   // 'veneers_two_out_of_range' test. We try to trigger the following situation:
26961   //   b.eq label
26962   //   b.eq label
26963   //   ...
26964   //   nop
26965   //   ...
26966   //   cbz x0, label
26967   //   cbz x0, label
26968   //   ...
26969   //   tbz x0, 0 label
26970   //   nop
26971   //   ...
26972   //   nop    <- From here the `b.eq` and `cbz` instructions run out of range,
26973   //             so a literal pool is required.
26974   //   veneer
26975   //   veneer
26976   //   veneer <- The `tbz` runs out of range somewhere in the middle of the
26977   //   veneer    veneer pool.
26978   //   veneer
26979 
26980   const int range_bcond = Instruction::GetImmBranchForwardRange(CondBranchType);
26981   const int range_cbz =
26982       Instruction::GetImmBranchForwardRange(CompareBranchType);
26983   const int range_tbz = Instruction::GetImmBranchForwardRange(TestBranchType);
26984   const int max_target = static_cast<int>(masm.GetCursorOffset()) + range_bcond;
26985 
26986   Label done;
26987   const int n_bcond = 100;
26988   const int n_cbz = 100;
26989   const int n_tbz = 1;
26990   const int kNTotalBranches = n_bcond + n_cbz + n_tbz;
26991 
26992   // We use different labels to prevent the MacroAssembler from sharing veneers.
26993   Label labels[kNTotalBranches];
26994   for (int i = 0; i < kNTotalBranches; i++) {
26995     new (&labels[i]) Label();
26996   }
26997 
26998   for (int i = 0; i < n_bcond; i++) {
26999     __ B(eq, &labels[i]);
27000   }
27001 
27002   while (masm.GetCursorOffset() < max_target - range_cbz) {
27003     __ Nop();
27004   }
27005 
27006   for (int i = 0; i < n_cbz; i++) {
27007     __ Cbz(x0, &labels[n_bcond + i]);
27008   }
27009 
27010   // Ensure the 'tbz' will go out of range after some of the previously
27011   // generated branches.
27012   int margin = (n_bcond / 2) * kInstructionSize;
27013   while (masm.GetCursorOffset() < max_target - range_tbz + margin) {
27014     __ Nop();
27015   }
27016 
27017   __ Tbz(x0, 0, &labels[n_bcond + n_cbz]);
27018 
27019   while (masm.GetCursorOffset() < max_target) {
27020     __ Nop();
27021   }
27022 
27023   // This additional nop makes the 'b.eq' and 'cbz' instructions go out of range
27024   // and forces the emission of a veneer pool. The 'tbz' is not yet out of
27025   // range, but will go out of range while veneers are emitted for the other
27026   // branches.
27027   // The MacroAssembler should ensure that veneers are correctly emitted for all
27028   // the branches, including the 'tbz'. Checks will fail if the target of a
27029   // branch is out of range.
27030   __ Nop();
27031 
27032   for (int i = 0; i < kNTotalBranches; i++) {
27033     __ Bind(&labels[i]);
27034   }
27035 
27036   END();
27037   TEARDOWN();
27038 }
27039 
27040 
TEST(collision_literal_veneer_pools)27041 TEST(collision_literal_veneer_pools) {
27042   SETUP_WITH_FEATURES(CPUFeatures::kFP);
27043   START();
27044 
27045   // This is a code generation test. The code generated is not executed.
27046 
27047   // Make sure the literal pool is empty;
27048   masm.EmitLiteralPool(LiteralPool::kBranchRequired);
27049   ASSERT_LITERAL_POOL_SIZE(0);
27050 
27051   // We chose the offsets below to (try to) trigger the following situation:
27052   // buffer offset
27053   //              0:   tbz x0, 0, target_tbz ----------------------------------.
27054   //              4:   nop                                                     |
27055   //                   ...                                                     |
27056   //                   nop                                                     |
27057   //    literal gen:   ldr s0, [pc + ...]   ; load from `pool start + 0`       |
27058   //                   ldr s0, [pc + ...]   ; load from `pool start + 4`       |
27059   //                   ...                                                     |
27060   //                   ldr s0, [pc + ...]                                      |
27061   //     pool start:   floating-point literal (0.1)                            |
27062   //                   floating-point literal (1.1)                            |
27063   //                   ...                                                     |
27064   //                   floating-point literal (<n>.1)     <-----tbz-max-range--'
27065   //                   floating-point literal (<n+1>.1)
27066   //                   ...
27067 
27068   const int range_tbz = Instruction::GetImmBranchForwardRange(TestBranchType);
27069   const int max_target = static_cast<int>(masm.GetCursorOffset()) + range_tbz;
27070 
27071   const size_t target_literal_pool_size = 100 * kInstructionSize;
27072   const int offset_start_literal_gen =
27073       target_literal_pool_size + target_literal_pool_size / 2;
27074 
27075 
27076   Label target_tbz;
27077 
27078   __ Tbz(x0, 0, &target_tbz);
27079   VIXL_CHECK(masm.GetNumberOfPotentialVeneers() == 1);
27080   while (masm.GetCursorOffset() < max_target - offset_start_literal_gen) {
27081     __ Nop();
27082   }
27083   VIXL_CHECK(masm.GetNumberOfPotentialVeneers() == 1);
27084 
27085   for (int i = 0; i < 100; i++) {
27086     // Use a different value to force one literal pool entry per iteration.
27087     __ Ldr(s0, i + 0.1);
27088   }
27089   VIXL_CHECK(masm.GetLiteralPoolSize() >= target_literal_pool_size);
27090 
27091   // Force emission of a literal pool.
27092   masm.EmitLiteralPool(LiteralPool::kBranchRequired);
27093   ASSERT_LITERAL_POOL_SIZE(0);
27094 
27095   // The branch should not have gone out of range during the emission of the
27096   // literal pool.
27097   __ Bind(&target_tbz);
27098 
27099   VIXL_CHECK(masm.GetNumberOfPotentialVeneers() == 0);
27100 
27101   END();
27102   TEARDOWN();
27103 }
27104 
27105 
TEST(ldr_literal_explicit)27106 TEST(ldr_literal_explicit) {
27107   SETUP();
27108 
27109   START();
27110   Literal<int64_t> automatically_placed_literal(1, masm.GetLiteralPool());
27111   Literal<int64_t> manually_placed_literal(2);
27112   {
27113     ExactAssemblyScope scope(&masm, kInstructionSize + sizeof(int64_t));
27114     Label over_literal;
27115     __ b(&over_literal);
27116     __ place(&manually_placed_literal);
27117     __ bind(&over_literal);
27118   }
27119   __ Ldr(x1, &manually_placed_literal);
27120   __ Ldr(x2, &automatically_placed_literal);
27121   __ Add(x0, x1, x2);
27122   END();
27123 
27124   RUN();
27125 
27126   ASSERT_EQUAL_64(3, x0);
27127 
27128   TEARDOWN();
27129 }
27130 
27131 
TEST(ldr_literal_automatically_placed)27132 TEST(ldr_literal_automatically_placed) {
27133   SETUP_WITH_FEATURES(CPUFeatures::kFP);
27134 
27135   START();
27136 
27137   // We start with an empty literal pool.
27138   ASSERT_LITERAL_POOL_SIZE(0);
27139 
27140   // Create a literal that should be placed by the literal pool.
27141   Literal<int64_t> explicit_literal(2, masm.GetLiteralPool());
27142   // It should not appear in the literal pool until its first use.
27143   ASSERT_LITERAL_POOL_SIZE(0);
27144 
27145   // Check that using standard literals does not break the use of explicitly
27146   // created literals.
27147   __ Ldr(d1, 1.1);
27148   ASSERT_LITERAL_POOL_SIZE(8);
27149   masm.EmitLiteralPool(LiteralPool::kBranchRequired);
27150   ASSERT_LITERAL_POOL_SIZE(0);
27151 
27152   __ Ldr(x2, &explicit_literal);
27153   ASSERT_LITERAL_POOL_SIZE(8);
27154   masm.EmitLiteralPool(LiteralPool::kBranchRequired);
27155   ASSERT_LITERAL_POOL_SIZE(0);
27156 
27157   __ Ldr(d3, 3.3);
27158   ASSERT_LITERAL_POOL_SIZE(8);
27159   masm.EmitLiteralPool(LiteralPool::kBranchRequired);
27160   ASSERT_LITERAL_POOL_SIZE(0);
27161 
27162   // Re-use our explicitly created literal. It has already been placed, so it
27163   // should not impact the literal pool.
27164   __ Ldr(x4, &explicit_literal);
27165   ASSERT_LITERAL_POOL_SIZE(0);
27166 
27167   END();
27168 
27169   RUN();
27170 
27171   ASSERT_EQUAL_FP64(1.1, d1);
27172   ASSERT_EQUAL_64(2, x2);
27173   ASSERT_EQUAL_FP64(3.3, d3);
27174   ASSERT_EQUAL_64(2, x4);
27175 
27176   TEARDOWN();
27177 }
27178 
27179 
TEST(literal_update_overwrite)27180 TEST(literal_update_overwrite) {
27181   SETUP();
27182 
27183   START();
27184 
27185   ASSERT_LITERAL_POOL_SIZE(0);
27186   LiteralPool* literal_pool = masm.GetLiteralPool();
27187 
27188   Literal<int32_t> lit_32_update_before_pool(0xbad, literal_pool);
27189   Literal<int32_t> lit_32_update_after_pool(0xbad, literal_pool);
27190   Literal<int64_t> lit_64_update_before_pool(0xbad, literal_pool);
27191   Literal<int64_t> lit_64_update_after_pool(0xbad, literal_pool);
27192 
27193   ASSERT_LITERAL_POOL_SIZE(0);
27194 
27195   lit_32_update_before_pool.UpdateValue(32);
27196   lit_64_update_before_pool.UpdateValue(64);
27197 
27198   __ Ldr(w1, &lit_32_update_before_pool);
27199   __ Ldr(x2, &lit_64_update_before_pool);
27200   __ Ldr(w3, &lit_32_update_after_pool);
27201   __ Ldr(x4, &lit_64_update_after_pool);
27202 
27203   masm.EmitLiteralPool(LiteralPool::kBranchRequired);
27204 
27205   VIXL_ASSERT(lit_32_update_after_pool.IsPlaced());
27206   VIXL_ASSERT(lit_64_update_after_pool.IsPlaced());
27207   lit_32_update_after_pool.UpdateValue(128, &masm);
27208   lit_64_update_after_pool.UpdateValue(256, &masm);
27209 
27210   END();
27211 
27212   RUN();
27213 
27214   ASSERT_EQUAL_64(32, x1);
27215   ASSERT_EQUAL_64(64, x2);
27216   ASSERT_EQUAL_64(128, x3);
27217   ASSERT_EQUAL_64(256, x4);
27218 
27219   TEARDOWN();
27220 }
27221 
27222 
TEST(literal_deletion_policies)27223 TEST(literal_deletion_policies) {
27224   SETUP();
27225 
27226   START();
27227 
27228   // We cannot check exactly when the deletion of the literals occur, but we
27229   // check that usage of the deletion policies is not broken.
27230 
27231   ASSERT_LITERAL_POOL_SIZE(0);
27232   LiteralPool* literal_pool = masm.GetLiteralPool();
27233 
27234   Literal<int32_t> lit_manual(0xbad, literal_pool);
27235   Literal<int32_t>* lit_deleted_on_placement =
27236       new Literal<int32_t>(0xbad,
27237                            literal_pool,
27238                            RawLiteral::kDeletedOnPlacementByPool);
27239   Literal<int32_t>* lit_deleted_on_pool_destruction =
27240       new Literal<int32_t>(0xbad,
27241                            literal_pool,
27242                            RawLiteral::kDeletedOnPoolDestruction);
27243 
27244   ASSERT_LITERAL_POOL_SIZE(0);
27245 
27246   lit_manual.UpdateValue(32);
27247   lit_deleted_on_placement->UpdateValue(64);
27248 
27249   __ Ldr(w1, &lit_manual);
27250   __ Ldr(w2, lit_deleted_on_placement);
27251   __ Ldr(w3, lit_deleted_on_pool_destruction);
27252 
27253   masm.EmitLiteralPool(LiteralPool::kBranchRequired);
27254 
27255   VIXL_ASSERT(lit_manual.IsPlaced());
27256   VIXL_ASSERT(lit_deleted_on_pool_destruction->IsPlaced());
27257   lit_deleted_on_pool_destruction->UpdateValue(128, &masm);
27258 
27259   END();
27260 
27261   RUN();
27262 
27263   ASSERT_EQUAL_64(32, x1);
27264   ASSERT_EQUAL_64(64, x2);
27265   ASSERT_EQUAL_64(128, x3);
27266 
27267   TEARDOWN();
27268 }
27269 
27270 
TEST(generic_operand)27271 TEST(generic_operand) {
27272   SETUP_WITH_FEATURES(CPUFeatures::kFP);
27273 
27274   int32_t data_32_array[5] = {0xbadbeef,
27275                               0x11111111,
27276                               0xbadbeef,
27277                               0x33333333,
27278                               0xbadbeef};
27279   int64_t data_64_array[5] = {INT64_C(0xbadbadbadbeef),
27280                               INT64_C(0x1111111111111111),
27281                               INT64_C(0xbadbadbadbeef),
27282                               INT64_C(0x3333333333333333),
27283                               INT64_C(0xbadbadbadbeef)};
27284   size_t size_32 = sizeof(data_32_array[0]);
27285   size_t size_64 = sizeof(data_64_array[0]);
27286 
27287   START();
27288 
27289   intptr_t data_32_address = reinterpret_cast<intptr_t>(&data_32_array[0]);
27290   intptr_t data_64_address = reinterpret_cast<intptr_t>(&data_64_array[0]);
27291   Register data_32 = x27;
27292   Register data_64 = x28;
27293   __ Mov(data_32, data_32_address);
27294   __ Mov(data_64, data_64_address);
27295 
27296   __ Move(GenericOperand(w0),
27297           GenericOperand(MemOperand(data_32, 1 * size_32), size_32));
27298   __ Move(GenericOperand(s0),
27299           GenericOperand(MemOperand(data_32, 3 * size_32), size_32));
27300   __ Move(GenericOperand(x10),
27301           GenericOperand(MemOperand(data_64, 1 * size_64), size_64));
27302   __ Move(GenericOperand(d10),
27303           GenericOperand(MemOperand(data_64, 3 * size_64), size_64));
27304 
27305   __ Move(GenericOperand(w1), GenericOperand(w0));
27306   __ Move(GenericOperand(s1), GenericOperand(s0));
27307   __ Move(GenericOperand(x11), GenericOperand(x10));
27308   __ Move(GenericOperand(d11), GenericOperand(d10));
27309 
27310   __ Move(GenericOperand(MemOperand(data_32, 0 * size_32), size_32),
27311           GenericOperand(w1));
27312   __ Move(GenericOperand(MemOperand(data_32, 2 * size_32), size_32),
27313           GenericOperand(s1));
27314   __ Move(GenericOperand(MemOperand(data_64, 0 * size_64), size_64),
27315           GenericOperand(x11));
27316   __ Move(GenericOperand(MemOperand(data_64, 2 * size_64), size_64),
27317           GenericOperand(d11));
27318 
27319   __ Move(GenericOperand(MemOperand(data_32, 4 * size_32), size_32),
27320           GenericOperand(MemOperand(data_32, 0 * size_32), size_32));
27321   __ Move(GenericOperand(MemOperand(data_64, 4 * size_64), size_64),
27322           GenericOperand(MemOperand(data_64, 0 * size_64), size_64));
27323   END();
27324 
27325   RUN();
27326 
27327   ASSERT_EQUAL_64(data_32_address, data_32);
27328   ASSERT_EQUAL_64(data_64_address, data_64);
27329 
27330   ASSERT_EQUAL_32(0x11111111, w0);
27331   ASSERT_EQUAL_32(0x33333333, core.sreg_bits(0));
27332   ASSERT_EQUAL_64(INT64_C(0x1111111111111111), x10);
27333   ASSERT_EQUAL_64(INT64_C(0x3333333333333333), core.dreg_bits(10));
27334 
27335   ASSERT_EQUAL_32(0x11111111, w1);
27336   ASSERT_EQUAL_32(0x33333333, core.sreg_bits(1));
27337   ASSERT_EQUAL_64(INT64_C(0x1111111111111111), x11);
27338   ASSERT_EQUAL_64(INT64_C(0x3333333333333333), core.dreg_bits(11));
27339 
27340   VIXL_CHECK(data_32_array[0] == 0x11111111);
27341   VIXL_CHECK(data_32_array[1] == 0x11111111);
27342   VIXL_CHECK(data_32_array[2] == 0x33333333);
27343   VIXL_CHECK(data_32_array[3] == 0x33333333);
27344   VIXL_CHECK(data_32_array[4] == 0x11111111);
27345 
27346   VIXL_CHECK(data_64_array[0] == INT64_C(0x1111111111111111));
27347   VIXL_CHECK(data_64_array[1] == INT64_C(0x1111111111111111));
27348   VIXL_CHECK(data_64_array[2] == INT64_C(0x3333333333333333));
27349   VIXL_CHECK(data_64_array[3] == INT64_C(0x3333333333333333));
27350   VIXL_CHECK(data_64_array[4] == INT64_C(0x1111111111111111));
27351 
27352   TEARDOWN();
27353 }
27354 
27355 
27356 // Test feature detection of calls to runtime functions.
27357 
27358 // C++11 should be sufficient to provide simulated runtime calls, except for a
27359 // GCC bug before 4.9.1.
27360 #if defined(VIXL_INCLUDE_SIMULATOR_AARCH64) && (__cplusplus >= 201103L) && \
27361     (defined(__clang__) || GCC_VERSION_OR_NEWER(4, 9, 1)) &&               \
27362     !defined(VIXL_HAS_SIMULATED_RUNTIME_CALL_SUPPORT)
27363 #error \
27364     "C++11 should be sufficient to provide support for simulated runtime calls."
27365 #endif  // #if defined(VIXL_INCLUDE_SIMULATOR_AARCH64) && ...
27366 
27367 #if (__cplusplus >= 201103L) && \
27368     !defined(VIXL_HAS_MACROASSEMBLER_RUNTIME_CALL_SUPPORT)
27369 #error \
27370     "C++11 should be sufficient to provide support for `MacroAssembler::CallRuntime()`."
27371 #endif  // #if (__cplusplus >= 201103L) && ...
27372 
27373 #ifdef VIXL_HAS_MACROASSEMBLER_RUNTIME_CALL_SUPPORT
runtime_call_add_one(int32_t a)27374 int32_t runtime_call_add_one(int32_t a) { return a + 1; }
27375 
runtime_call_add_doubles(double a,double b,double c)27376 double runtime_call_add_doubles(double a, double b, double c) {
27377   return a + b + c;
27378 }
27379 
runtime_call_one_argument_on_stack(int64_t arg1,int64_t arg2,int64_t arg3,int64_t arg4,int64_t arg5,int64_t arg6,int64_t arg7,int64_t arg8,int64_t arg9)27380 int64_t runtime_call_one_argument_on_stack(int64_t arg1 __attribute__((unused)),
27381                                            int64_t arg2 __attribute__((unused)),
27382                                            int64_t arg3 __attribute__((unused)),
27383                                            int64_t arg4 __attribute__((unused)),
27384                                            int64_t arg5 __attribute__((unused)),
27385                                            int64_t arg6 __attribute__((unused)),
27386                                            int64_t arg7 __attribute__((unused)),
27387                                            int64_t arg8 __attribute__((unused)),
27388                                            int64_t arg9) {
27389   return arg9;
27390 }
27391 
runtime_call_two_arguments_on_stack(int64_t arg1,int64_t arg2,int64_t arg3,int64_t arg4,int64_t arg5,int64_t arg6,int64_t arg7,int64_t arg8,double arg9,double arg10)27392 double runtime_call_two_arguments_on_stack(int64_t arg1 __attribute__((unused)),
27393                                            int64_t arg2 __attribute__((unused)),
27394                                            int64_t arg3 __attribute__((unused)),
27395                                            int64_t arg4 __attribute__((unused)),
27396                                            int64_t arg5 __attribute__((unused)),
27397                                            int64_t arg6 __attribute__((unused)),
27398                                            int64_t arg7 __attribute__((unused)),
27399                                            int64_t arg8 __attribute__((unused)),
27400                                            double arg9,
27401                                            double arg10) {
27402   return arg9 - arg10;
27403 }
27404 
runtime_call_store_at_address(int64_t * address)27405 void runtime_call_store_at_address(int64_t* address) { *address = 0xf00d; }
27406 
27407 enum RuntimeCallTestEnum { Enum0 };
27408 
runtime_call_enum(RuntimeCallTestEnum e)27409 RuntimeCallTestEnum runtime_call_enum(RuntimeCallTestEnum e) { return e; }
27410 
27411 enum class RuntimeCallTestEnumClass { Enum0 };
27412 
runtime_call_enum_class(RuntimeCallTestEnumClass e)27413 RuntimeCallTestEnumClass runtime_call_enum_class(RuntimeCallTestEnumClass e) {
27414   return e;
27415 }
27416 
test_int8_t(int8_t x)27417 int8_t test_int8_t(int8_t x) { return x; }
test_uint8_t(uint8_t x)27418 uint8_t test_uint8_t(uint8_t x) { return x; }
test_int16_t(int16_t x)27419 int16_t test_int16_t(int16_t x) { return x; }
test_uint16_t(uint16_t x)27420 uint16_t test_uint16_t(uint16_t x) { return x; }
27421 
TEST(runtime_calls)27422 TEST(runtime_calls) {
27423   SETUP_WITH_FEATURES(CPUFeatures::kFP);
27424 
27425 #ifndef VIXL_HAS_SIMULATED_RUNTIME_CALL_SUPPORT
27426   if (masm.GenerateSimulatorCode()) {
27427     // This configuration is unsupported and a `VIXL_UNREACHABLE()` would fire
27428     // while trying to generate `CallRuntime`. This configuration should only be
27429     // reachable with C++11 and a (buggy) version of GCC pre-4.9.1.
27430     TEARDOWN();
27431     return;
27432   }
27433 #endif
27434 
27435   START();
27436 
27437   // Test `CallRuntime`.
27438 
27439   __ Mov(w0, 0);
27440   __ CallRuntime(runtime_call_add_one);
27441   __ Mov(w20, w0);
27442 
27443   __ Fmov(d0, 0.0);
27444   __ Fmov(d1, 1.5);
27445   __ Fmov(d2, 2.5);
27446   __ CallRuntime(runtime_call_add_doubles);
27447   __ Fmov(d20, d0);
27448 
27449   __ Mov(x0, 0x123);
27450   __ Push(x0, x0);
27451   __ CallRuntime(runtime_call_one_argument_on_stack);
27452   __ Mov(x21, x0);
27453   __ Pop(x0, x1);
27454 
27455   __ Fmov(d0, 314.0);
27456   __ Fmov(d1, 4.0);
27457   __ Push(d1, d0);
27458   __ CallRuntime(runtime_call_two_arguments_on_stack);
27459   __ Fmov(d21, d0);
27460   __ Pop(d1, d0);
27461 
27462   // Test that the template mechanisms don't break with enums.
27463   __ Mov(w0, 0);
27464   __ CallRuntime(runtime_call_enum);
27465   __ Mov(w0, 0);
27466   __ CallRuntime(runtime_call_enum_class);
27467 
27468   // Test `TailCallRuntime`.
27469 
27470   Label function, after_function;
27471   __ B(&after_function);
27472   __ Bind(&function);
27473   __ Mov(x22, 0);
27474   __ Mov(w0, 123);
27475   __ TailCallRuntime(runtime_call_add_one);
27476   // Control should not fall through.
27477   __ Mov(x22, 0xbad);
27478   __ Ret();
27479   __ Bind(&after_function);
27480 
27481   // Call our dummy function, taking care to preserve the link register.
27482   __ Push(ip0, lr);
27483   __ Bl(&function);
27484   __ Pop(lr, ip0);
27485   // Save the result.
27486   __ Mov(w23, w0);
27487 
27488   __ Mov(x24, 0);
27489   int test_values[] = {static_cast<int8_t>(-1),
27490                        static_cast<uint8_t>(-1),
27491                        static_cast<int16_t>(-1),
27492                        static_cast<uint16_t>(-1),
27493                        -256,
27494                        -1,
27495                        0,
27496                        1,
27497                        256};
27498   for (size_t i = 0; i < sizeof(test_values) / sizeof(test_values[0]); ++i) {
27499     Label pass_int8, pass_uint8, pass_int16, pass_uint16;
27500     int x = test_values[i];
27501     __ Mov(w0, static_cast<int8_t>(x));
27502     __ CallRuntime(test_int8_t);
27503     __ Cmp(w0, static_cast<int8_t>(x));
27504     __ Cinc(x24, x24, ne);
27505     __ Mov(w0, static_cast<uint8_t>(x));
27506     __ CallRuntime(test_uint8_t);
27507     __ Cmp(w0, static_cast<uint8_t>(x));
27508     __ Cinc(x24, x24, ne);
27509     __ Mov(w0, static_cast<int16_t>(x));
27510     __ CallRuntime(test_int16_t);
27511     __ Cmp(w0, static_cast<int16_t>(x));
27512     __ Cinc(x24, x24, ne);
27513     __ Mov(w0, static_cast<uint16_t>(x));
27514     __ CallRuntime(test_uint16_t);
27515     __ Cmp(w0, static_cast<uint16_t>(x));
27516     __ Cinc(x24, x24, ne);
27517   }
27518 
27519 
27520   int64_t value = 0xbadbeef;
27521   __ Mov(x0, reinterpret_cast<uint64_t>(&value));
27522   __ CallRuntime(runtime_call_store_at_address);
27523 
27524   END();
27525 
27526 #if defined(VIXL_HAS_SIMULATED_RUNTIME_CALL_SUPPORT) || \
27527     !defined(VIXL_INCLUDE_SIMULATOR_AARCH64)
27528   RUN();
27529 
27530   ASSERT_EQUAL_32(1, w20);
27531   ASSERT_EQUAL_FP64(4.0, d20);
27532   ASSERT_EQUAL_64(0x123, x21);
27533   ASSERT_EQUAL_FP64(310.0, d21);
27534   VIXL_CHECK(value == 0xf00d);
27535   ASSERT_EQUAL_64(0, x22);
27536   ASSERT_EQUAL_32(124, w23);
27537   ASSERT_EQUAL_64(0, x24);
27538 #endif  // #if defined(VIXL_HAS_SIMULATED_RUNTIME_CALL_SUPPORT) || ...
27539 
27540   TEARDOWN();
27541 }
27542 #endif  // #ifdef VIXL_HAS_MACROASSEMBLER_RUNTIME_CALL_SUPPORT
27543 
27544 
TEST(optimised_mov_register)27545 TEST(optimised_mov_register) {
27546   SETUP();
27547 
27548   START();
27549   Label start;
27550   __ Bind(&start);
27551   __ Mov(x0, x0);
27552   VIXL_CHECK(masm.GetSizeOfCodeGeneratedSince(&start) == 0);
27553   __ Mov(w0, w0, kDiscardForSameWReg);
27554   VIXL_CHECK(masm.GetSizeOfCodeGeneratedSince(&start) == 0);
27555   __ Mov(w0, w0);
27556   VIXL_CHECK(masm.GetSizeOfCodeGeneratedSince(&start) == kInstructionSize);
27557 
27558   END();
27559 
27560   RUN();
27561 
27562   TEARDOWN();
27563 }
27564 
27565 
TEST(nop)27566 TEST(nop) {
27567   MacroAssembler masm;
27568 
27569   Label start;
27570   __ Bind(&start);
27571   __ Nop();
27572   // `MacroAssembler::Nop` must generate at least one nop.
27573   VIXL_CHECK(masm.GetSizeOfCodeGeneratedSince(&start) >= kInstructionSize);
27574 
27575   masm.FinalizeCode();
27576 }
27577 
TEST(scratch_scope_basic_v)27578 TEST(scratch_scope_basic_v) {
27579   MacroAssembler masm;
27580 
27581   {
27582     UseScratchRegisterScope temps(&masm);
27583     VRegister temp = temps.AcquireVRegisterOfSize(kQRegSize);
27584     VIXL_CHECK(temp.Aliases(v31));
27585   }
27586   {
27587     UseScratchRegisterScope temps(&masm);
27588     VRegister temp = temps.AcquireVRegisterOfSize(kDRegSize);
27589     VIXL_CHECK(temp.Aliases(v31));
27590   }
27591   {
27592     UseScratchRegisterScope temps(&masm);
27593     VRegister temp = temps.AcquireVRegisterOfSize(kSRegSize);
27594     VIXL_CHECK(temp.Aliases(v31));
27595   }
27596 }
27597 
27598 #ifdef VIXL_INCLUDE_SIMULATOR_AARCH64
27599 // Test the pseudo-instructions that control CPUFeatures dynamically in the
27600 // Simulator. These are used by the test infrastructure itself, but in a fairly
27601 // limited way.
27602 
RunHelperWithFeatureCombinations(void (* helper)(const CPUFeatures & base,const CPUFeatures & f))27603 static void RunHelperWithFeatureCombinations(
27604     void (*helper)(const CPUFeatures& base, const CPUFeatures& f)) {
27605   // Iterate, testing the first n features in this list.
27606   CPUFeatures::Feature features[] = {
27607       // Put kNone first, so that the first iteration uses an empty feature set.
27608       CPUFeatures::kNone,
27609       // The remaining features used are arbitrary.
27610       CPUFeatures::kIDRegisterEmulation,
27611       CPUFeatures::kDCPoP,
27612       CPUFeatures::kPAuth,
27613       CPUFeatures::kFcma,
27614       CPUFeatures::kAES,
27615       CPUFeatures::kNEON,
27616       CPUFeatures::kCRC32,
27617       CPUFeatures::kFP,
27618       CPUFeatures::kPmull1Q,
27619       CPUFeatures::kSM4,
27620       CPUFeatures::kSM3,
27621       CPUFeatures::kDotProduct,
27622   };
27623   VIXL_ASSERT(CPUFeatures(CPUFeatures::kNone) == CPUFeatures::None());
27624   // The features are not necessarily encoded in kInstructionSize-sized slots,
27625   // so the MacroAssembler must pad the list to align the following instruction.
27626   // Ensure that we have enough features in the list to cover all interesting
27627   // alignment cases, even if the highest common factor of kInstructionSize and
27628   // an encoded feature is one.
27629   VIXL_STATIC_ASSERT(ARRAY_SIZE(features) > kInstructionSize);
27630 
27631   CPUFeatures base = CPUFeatures::None();
27632   for (size_t i = 0; i < ARRAY_SIZE(features); i++) {
27633     base.Combine(features[i]);
27634     CPUFeatures f = CPUFeatures::None();
27635     for (size_t j = 0; j < ARRAY_SIZE(features); j++) {
27636       f.Combine(features[j]);
27637       helper(base, f);
27638     }
27639   }
27640 }
27641 
SetSimulatorCPUFeaturesHelper(const CPUFeatures & base,const CPUFeatures & f)27642 static void SetSimulatorCPUFeaturesHelper(const CPUFeatures& base,
27643                                           const CPUFeatures& f) {
27644   SETUP_WITH_FEATURES(base);
27645   START();
27646 
27647   __ SetSimulatorCPUFeatures(f);
27648 
27649   END();
27650   RUN_WITHOUT_SEEN_FEATURE_CHECK();
27651   VIXL_CHECK(*(simulator.GetCPUFeatures()) == f);
27652   TEARDOWN();
27653 }
27654 
TEST(configure_cpu_features_set)27655 TEST(configure_cpu_features_set) {
27656   RunHelperWithFeatureCombinations(SetSimulatorCPUFeaturesHelper);
27657 }
27658 
EnableSimulatorCPUFeaturesHelper(const CPUFeatures & base,const CPUFeatures & f)27659 static void EnableSimulatorCPUFeaturesHelper(const CPUFeatures& base,
27660                                              const CPUFeatures& f) {
27661   SETUP_WITH_FEATURES(base);
27662   START();
27663 
27664   __ EnableSimulatorCPUFeatures(f);
27665 
27666   END();
27667   RUN_WITHOUT_SEEN_FEATURE_CHECK();
27668   VIXL_CHECK(*(simulator.GetCPUFeatures()) == base.With(f));
27669   TEARDOWN();
27670 }
27671 
TEST(configure_cpu_features_enable)27672 TEST(configure_cpu_features_enable) {
27673   RunHelperWithFeatureCombinations(EnableSimulatorCPUFeaturesHelper);
27674 }
27675 
DisableSimulatorCPUFeaturesHelper(const CPUFeatures & base,const CPUFeatures & f)27676 static void DisableSimulatorCPUFeaturesHelper(const CPUFeatures& base,
27677                                               const CPUFeatures& f) {
27678   SETUP_WITH_FEATURES(base);
27679   START();
27680 
27681   __ DisableSimulatorCPUFeatures(f);
27682 
27683   END();
27684   RUN_WITHOUT_SEEN_FEATURE_CHECK();
27685   VIXL_CHECK(*(simulator.GetCPUFeatures()) == base.Without(f));
27686   TEARDOWN();
27687 }
27688 
TEST(configure_cpu_features_disable)27689 TEST(configure_cpu_features_disable) {
27690   RunHelperWithFeatureCombinations(DisableSimulatorCPUFeaturesHelper);
27691 }
27692 
SaveRestoreSimulatorCPUFeaturesHelper(const CPUFeatures & base,const CPUFeatures & f)27693 static void SaveRestoreSimulatorCPUFeaturesHelper(const CPUFeatures& base,
27694                                                   const CPUFeatures& f) {
27695   SETUP_WITH_FEATURES(base);
27696   START();
27697 
27698   {
27699     __ SaveSimulatorCPUFeatures();
27700     __ SetSimulatorCPUFeatures(f);
27701     {
27702       __ SaveSimulatorCPUFeatures();
27703       __ SetSimulatorCPUFeatures(CPUFeatures::All());
27704       __ RestoreSimulatorCPUFeatures();
27705     }
27706     __ RestoreSimulatorCPUFeatures();
27707   }
27708 
27709   END();
27710   RUN_WITHOUT_SEEN_FEATURE_CHECK();
27711   VIXL_CHECK(*(simulator.GetCPUFeatures()) == base);
27712   TEARDOWN();
27713 }
27714 
TEST(configure_cpu_features_save_restore)27715 TEST(configure_cpu_features_save_restore) {
27716   RunHelperWithFeatureCombinations(SaveRestoreSimulatorCPUFeaturesHelper);
27717 }
27718 
SimulationCPUFeaturesScopeHelper(const CPUFeatures & base,const CPUFeatures & f)27719 static void SimulationCPUFeaturesScopeHelper(const CPUFeatures& base,
27720                                              const CPUFeatures& f) {
27721   SETUP_WITH_FEATURES(base);
27722   START();
27723 
27724   {
27725     SimulationCPUFeaturesScope scope_a(&masm, f);
27726     {
27727       SimulationCPUFeaturesScope scope_b(&masm, CPUFeatures::All());
27728       {
27729         SimulationCPUFeaturesScope scope_c(&masm, CPUFeatures::None());
27730         // The scope arguments should combine with 'Enable', so we should be
27731         // able to use any CPUFeatures here.
27732         __ Fadd(v0.V4S(), v1.V4S(), v2.V4S());  // Requires {FP, NEON}.
27733       }
27734     }
27735   }
27736 
27737   END();
27738   RUN_WITHOUT_SEEN_FEATURE_CHECK();
27739   VIXL_CHECK(*(simulator.GetCPUFeatures()) == base);
27740   TEARDOWN();
27741 }
27742 
TEST(configure_cpu_features_scope)27743 TEST(configure_cpu_features_scope) {
27744   RunHelperWithFeatureCombinations(SimulationCPUFeaturesScopeHelper);
27745 }
27746 
27747 #endif
27748 
27749 }  // namespace aarch64
27750 }  // namespace vixl
27751