1 // Copyright 2015, VIXL authors
2 // All rights reserved.
3 //
4 // Redistribution and use in source and binary forms, with or without
5 // modification, are permitted provided that the following conditions are met:
6 //
7 // * Redistributions of source code must retain the above copyright notice,
8 // this list of conditions and the following disclaimer.
9 // * Redistributions in binary form must reproduce the above copyright notice,
10 // this list of conditions and the following disclaimer in the documentation
11 // and/or other materials provided with the distribution.
12 // * Neither the name of ARM Limited nor the names of its contributors may be
13 // used to endorse or promote products derived from this software without
14 // specific prior written permission.
15 //
16 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
17 // ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18 // WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19 // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
20 // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
22 // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
23 // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
24 // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26
27 #include <sys/mman.h>
28
29 #include <cfloat>
30 #include <cmath>
31 #include <cstdio>
32 #include <cstdlib>
33 #include <cstring>
34
35 #include "test-runner.h"
36 #include "test-utils.h"
37 #include "aarch64/test-utils-aarch64.h"
38
39 #include "aarch64/cpu-aarch64.h"
40 #include "aarch64/disasm-aarch64.h"
41 #include "aarch64/macro-assembler-aarch64.h"
42 #include "aarch64/simulator-aarch64.h"
43
44 namespace vixl {
45 namespace aarch64 {
46
47 // Test infrastructure.
48 //
49 // Tests are functions which accept no parameters and have no return values.
50 // The testing code should not perform an explicit return once completed. For
51 // example to test the mov immediate instruction a very simple test would be:
52 //
53 // TEST(mov_x0_one) {
54 // SETUP();
55 //
56 // START();
57 // __ mov(x0, Operand(1));
58 // END();
59 //
60 // RUN();
61 //
62 // ASSERT_EQUAL_64(1, x0);
63 //
64 // TEARDOWN();
65 // }
66 //
67 // Within a START ... END block all registers but sp can be modified. sp has to
68 // be explicitly saved/restored. The END() macro replaces the function return
69 // so it may appear multiple times in a test if the test has multiple exit
70 // points.
71 //
72 // Once the test has been run all integer and floating point registers as well
73 // as flags are accessible through a RegisterDump instance, see
74 // utils-aarch64.cc for more info on RegisterDump.
75 //
76 // We provide some helper assert to handle common cases:
77 //
78 // ASSERT_EQUAL_32(int32_t, int_32t)
79 // ASSERT_EQUAL_FP32(float, float)
80 // ASSERT_EQUAL_32(int32_t, W register)
81 // ASSERT_EQUAL_FP32(float, S register)
82 // ASSERT_EQUAL_64(int64_t, int_64t)
83 // ASSERT_EQUAL_FP64(double, double)
84 // ASSERT_EQUAL_64(int64_t, X register)
85 // ASSERT_EQUAL_64(X register, X register)
86 // ASSERT_EQUAL_FP64(double, D register)
87 //
88 // e.g. ASSERT_EQUAL_64(0.5, d30);
89 //
90 // If more advanced computation is required before the assert then access the
91 // RegisterDump named core directly:
92 //
93 // ASSERT_EQUAL_64(0x1234, core->reg_x0() & 0xffff);
94
95
96 #define __ masm.
97 #define TEST(name) TEST_(AARCH64_ASM_##name)
98
99 // PushCalleeSavedRegisters(), PopCalleeSavedRegisters() and Dump() use NEON, so
100 // we need to enable it in the infrastructure code for each test.
101 const CPUFeatures kInfrastructureCPUFeatures(CPUFeatures::kNEON);
102
103 #ifdef VIXL_INCLUDE_SIMULATOR_AARCH64
104 // Run tests with the simulator.
105
106 #define SETUP() \
107 MacroAssembler masm; \
108 SETUP_COMMON()
109
110 #define SETUP_WITH_FEATURES(...) \
111 MacroAssembler masm; \
112 SETUP_COMMON(); \
113 masm.SetCPUFeatures(CPUFeatures(__VA_ARGS__)); \
114 simulator.SetCPUFeatures(CPUFeatures(__VA_ARGS__))
115
116 #define SETUP_CUSTOM(size, pic) \
117 byte* buf = new byte[size + CodeBuffer::kDefaultCapacity]; \
118 MacroAssembler masm(buf, size + CodeBuffer::kDefaultCapacity, pic); \
119 SETUP_COMMON()
120
121 #define SETUP_COMMON() \
122 masm.SetCPUFeatures(CPUFeatures::None()); \
123 masm.SetGenerateSimulatorCode(true); \
124 Decoder simulator_decoder; \
125 Simulator simulator(&simulator_decoder); \
126 simulator.SetColouredTrace(Test::coloured_trace()); \
127 simulator.SetInstructionStats(Test::instruction_stats()); \
128 simulator.SetCPUFeatures(CPUFeatures::None()); \
129 RegisterDump core; \
130 ptrdiff_t offset_after_infrastructure_start; \
131 ptrdiff_t offset_before_infrastructure_end
132
133 #define START() \
134 masm.Reset(); \
135 simulator.ResetState(); \
136 { \
137 SimulationCPUFeaturesScope cpu(&masm, kInfrastructureCPUFeatures); \
138 __ PushCalleeSavedRegisters(); \
139 } \
140 { \
141 int trace_parameters = 0; \
142 if (Test::trace_reg()) trace_parameters |= LOG_STATE; \
143 if (Test::trace_write()) trace_parameters |= LOG_WRITE; \
144 if (Test::trace_sim()) trace_parameters |= LOG_DISASM; \
145 if (Test::trace_branch()) trace_parameters |= LOG_BRANCH; \
146 if (trace_parameters != 0) { \
147 __ Trace(static_cast<TraceParameters>(trace_parameters), TRACE_ENABLE); \
148 } \
149 } \
150 if (Test::instruction_stats()) { \
151 __ EnableInstrumentation(); \
152 } \
153 offset_after_infrastructure_start = masm.GetCursorOffset(); \
154 /* Avoid unused-variable warnings in case a test never calls RUN(). */ \
155 USE(offset_after_infrastructure_start)
156
157 #define END() \
158 offset_before_infrastructure_end = masm.GetCursorOffset(); \
159 /* Avoid unused-variable warnings in case a test never calls RUN(). */ \
160 USE(offset_before_infrastructure_end); \
161 if (Test::instruction_stats()) { \
162 __ DisableInstrumentation(); \
163 } \
164 __ Trace(LOG_ALL, TRACE_DISABLE); \
165 { \
166 SimulationCPUFeaturesScope cpu(&masm, kInfrastructureCPUFeatures); \
167 core.Dump(&masm); \
168 __ PopCalleeSavedRegisters(); \
169 } \
170 __ Ret(); \
171 masm.FinalizeCode()
172
173 #define RUN() \
174 RUN_WITHOUT_SEEN_FEATURE_CHECK(); \
175 { \
176 /* We expect the test to use all of the features it requested, plus the */ \
177 /* features that the instructure code requires. */ \
178 CPUFeatures const& expected = \
179 simulator.GetCPUFeatures()->With(CPUFeatures::kNEON); \
180 CPUFeatures const& seen = simulator.GetSeenFeatures(); \
181 /* This gives three broad categories of features that we care about: */ \
182 /* 1. Things both expected and seen. */ \
183 /* 2. Things seen, but not expected. The simulator catches these. */ \
184 /* 3. Things expected, but not seen. We check these here. */ \
185 /* In a valid, passing test, categories 2 and 3 should be empty. */ \
186 if (seen != expected) { \
187 /* The Simulator should have caught anything in category 2 already. */ \
188 VIXL_ASSERT(expected.Has(seen)); \
189 /* Anything left is category 3: things expected, but not seen. This */ \
190 /* is not necessarily a bug in VIXL itself, but indicates that the */ \
191 /* test is less strict than it could be. */ \
192 CPUFeatures missing = expected.Without(seen); \
193 VIXL_ASSERT(missing.Count() > 0); \
194 std::cout << "Error: expected to see CPUFeatures { " << missing \
195 << " }\n"; \
196 VIXL_ABORT(); \
197 } \
198 }
199
200 #define RUN_WITHOUT_SEEN_FEATURE_CHECK() \
201 DISASSEMBLE(); \
202 simulator.RunFrom(masm.GetBuffer()->GetStartAddress<Instruction*>())
203
204 #define RUN_CUSTOM() RUN()
205
206 #define TEARDOWN()
207
208 #define TEARDOWN_CUSTOM() delete[] buf;
209
210 #else // ifdef VIXL_INCLUDE_SIMULATOR_AARCH64.
211 #define SETUP() \
212 MacroAssembler masm; \
213 SETUP_COMMON()
214
215 #define SETUP_WITH_FEATURES(...) \
216 MacroAssembler masm; \
217 SETUP_COMMON(); \
218 masm.SetCPUFeatures(CPUFeatures(__VA_ARGS__))
219
220 #define SETUP_CUSTOM(size, pic) \
221 byte* buffer = \
222 reinterpret_cast<byte*>(mmap(NULL, \
223 size + CodeBuffer::kDefaultCapacity, \
224 PROT_READ | PROT_WRITE, \
225 MAP_PRIVATE | MAP_ANONYMOUS, \
226 -1, \
227 0)); \
228 size_t buffer_size = size + CodeBuffer::kDefaultCapacity; \
229 MacroAssembler masm(buffer, buffer_size, pic); \
230 SETUP_COMMON()
231
232 #define SETUP_COMMON() \
233 masm.GetCPUFeatures()->Remove(CPUFeatures::All()); \
234 masm.SetGenerateSimulatorCode(false); \
235 RegisterDump core; \
236 CPU::SetUp(); \
237 ptrdiff_t offset_after_infrastructure_start; \
238 ptrdiff_t offset_before_infrastructure_end
239
240 #define START() \
241 masm.Reset(); \
242 { \
243 CPUFeaturesScope cpu(&masm, kInfrastructureCPUFeatures); \
244 __ PushCalleeSavedRegisters(); \
245 } \
246 offset_after_infrastructure_start = masm.GetCursorOffset(); \
247 /* Avoid unused-variable warnings in case a test never calls RUN(). */ \
248 USE(offset_after_infrastructure_start)
249
250 #define END() \
251 offset_before_infrastructure_end = masm.GetCursorOffset(); \
252 /* Avoid unused-variable warnings in case a test never calls RUN(). */ \
253 USE(offset_before_infrastructure_end); \
254 { \
255 CPUFeaturesScope cpu(&masm, kInfrastructureCPUFeatures); \
256 core.Dump(&masm); \
257 __ PopCalleeSavedRegisters(); \
258 } \
259 __ Ret(); \
260 masm.FinalizeCode()
261
262 // Execute the generated code from the memory area.
263 #define RUN() \
264 DISASSEMBLE(); \
265 masm.GetBuffer()->SetExecutable(); \
266 ExecuteMemory(masm.GetBuffer()->GetStartAddress<byte*>(), \
267 masm.GetSizeOfCodeGenerated()); \
268 masm.GetBuffer()->SetWritable()
269
270 // The generated code was written directly into `buffer`, execute it directly.
271 #define RUN_CUSTOM() \
272 DISASSEMBLE(); \
273 mprotect(buffer, buffer_size, PROT_READ | PROT_EXEC); \
274 ExecuteMemory(buffer, buffer_size); \
275 mprotect(buffer, buffer_size, PROT_READ | PROT_WRITE)
276
277 #define TEARDOWN()
278
279 #define TEARDOWN_CUSTOM()
280
281 #endif // ifdef VIXL_INCLUDE_SIMULATOR_AARCH64.
282
283 #define DISASSEMBLE() \
284 if (Test::disassemble()) { \
285 PrintDisassembler disasm(stdout); \
286 CodeBuffer* buffer = masm.GetBuffer(); \
287 Instruction* start = buffer->GetOffsetAddress<Instruction*>( \
288 offset_after_infrastructure_start); \
289 Instruction* end = buffer->GetOffsetAddress<Instruction*>( \
290 offset_before_infrastructure_end); \
291 \
292 if (Test::disassemble_infrastructure()) { \
293 Instruction* infra_start = buffer->GetStartAddress<Instruction*>(); \
294 printf("# Infrastructure code (prologue)\n"); \
295 disasm.DisassembleBuffer(infra_start, start); \
296 printf("# Test code\n"); \
297 } else { \
298 printf( \
299 "# Warning: Omitting infrastructure code. " \
300 "Use --disassemble to see it.\n"); \
301 } \
302 \
303 disasm.DisassembleBuffer(start, end); \
304 \
305 if (Test::disassemble_infrastructure()) { \
306 printf("# Infrastructure code (epilogue)\n"); \
307 Instruction* infra_end = buffer->GetEndAddress<Instruction*>(); \
308 disasm.DisassembleBuffer(end, infra_end); \
309 } \
310 }
311
312 #define ASSERT_EQUAL_NZCV(expected) \
313 VIXL_CHECK(EqualNzcv(expected, core.flags_nzcv()))
314
315 #define ASSERT_EQUAL_REGISTERS(expected) \
316 VIXL_CHECK(EqualRegisters(&expected, &core))
317
318 #define ASSERT_EQUAL_FP16(expected, result) \
319 VIXL_CHECK(EqualFP16(expected, &core, result))
320
321 #define ASSERT_EQUAL_32(expected, result) \
322 VIXL_CHECK(Equal32(static_cast<uint32_t>(expected), &core, result))
323
324 #define ASSERT_EQUAL_FP32(expected, result) \
325 VIXL_CHECK(EqualFP32(expected, &core, result))
326
327 #define ASSERT_EQUAL_64(expected, result) \
328 VIXL_CHECK(Equal64(expected, &core, result))
329
330 #define ASSERT_NOT_EQUAL_64(expected, result) \
331 VIXL_CHECK(!Equal64(expected, &core, result))
332
333 #define ASSERT_EQUAL_FP64(expected, result) \
334 VIXL_CHECK(EqualFP64(expected, &core, result))
335
336 #define ASSERT_EQUAL_128(expected_h, expected_l, result) \
337 VIXL_CHECK(Equal128(expected_h, expected_l, &core, result))
338
339 #define ASSERT_LITERAL_POOL_SIZE(expected) \
340 VIXL_CHECK((expected + kInstructionSize) == (masm.GetLiteralPoolSize()))
341
342 #define MUST_FAIL_WITH_MESSAGE(code, message) \
343 { \
344 bool aborted = false; \
345 try { \
346 code; \
347 } catch (const std::runtime_error& e) { \
348 const char* expected_error = message; \
349 size_t error_length = strlen(expected_error); \
350 VIXL_CHECK(strncmp(expected_error, e.what(), error_length) == 0); \
351 aborted = true; \
352 } \
353 VIXL_CHECK(aborted); \
354 }
355
356
TEST(preshift_immediates)357 TEST(preshift_immediates) {
358 SETUP();
359
360 START();
361 // Test operations involving immediates that could be generated using a
362 // pre-shifted encodable immediate followed by a post-shift applied to
363 // the arithmetic or logical operation.
364
365 // Save sp.
366 __ Mov(x29, sp);
367
368 // Set the registers to known values.
369 __ Mov(x0, 0x1000);
370 __ Mov(sp, 0x1004);
371
372 // Arithmetic ops.
373 __ Add(x1, x0, 0x1f7de);
374 __ Add(w2, w0, 0xffffff1);
375 __ Adds(x3, x0, 0x18001);
376 __ Adds(w4, w0, 0xffffff1);
377 __ Sub(x5, x0, 0x1f7de);
378 __ Sub(w6, w0, 0xffffff1);
379 __ Subs(x7, x0, 0x18001);
380 __ Subs(w8, w0, 0xffffff1);
381
382 // Logical ops.
383 __ And(x9, x0, 0x1f7de);
384 __ Orr(w10, w0, 0xffffff1);
385 __ Eor(x11, x0, 0x18001);
386
387 // Ops using the stack pointer.
388 __ Add(sp, sp, 0x18001);
389 __ Mov(x12, sp);
390 __ Mov(sp, 0x1004);
391
392 __ Add(sp, sp, 0x1f7de);
393 __ Mov(x13, sp);
394 __ Mov(sp, 0x1004);
395
396 __ Adds(x14, sp, 0x1f7de);
397
398 __ Orr(sp, x0, 0x1f7de);
399 __ Mov(x15, sp);
400
401 // Restore sp.
402 __ Mov(sp, x29);
403 END();
404
405 RUN();
406
407 ASSERT_EQUAL_64(0x1000, x0);
408 ASSERT_EQUAL_64(0x207de, x1);
409 ASSERT_EQUAL_64(0x10000ff1, x2);
410 ASSERT_EQUAL_64(0x19001, x3);
411 ASSERT_EQUAL_64(0x10000ff1, x4);
412 ASSERT_EQUAL_64(0xfffffffffffe1822, x5);
413 ASSERT_EQUAL_64(0xf000100f, x6);
414 ASSERT_EQUAL_64(0xfffffffffffe8fff, x7);
415 ASSERT_EQUAL_64(0xf000100f, x8);
416 ASSERT_EQUAL_64(0x1000, x9);
417 ASSERT_EQUAL_64(0xffffff1, x10);
418 ASSERT_EQUAL_64(0x19001, x11);
419 ASSERT_EQUAL_64(0x19005, x12);
420 ASSERT_EQUAL_64(0x207e2, x13);
421 ASSERT_EQUAL_64(0x207e2, x14);
422 ASSERT_EQUAL_64(0x1f7de, x15);
423
424 TEARDOWN();
425 }
426
427
TEST(stack_ops)428 TEST(stack_ops) {
429 SETUP();
430
431 START();
432 // save sp.
433 __ Mov(x29, sp);
434
435 // Set the sp to a known value.
436 __ Mov(sp, 0x1004);
437 __ Mov(x0, sp);
438
439 // Add immediate to the sp, and move the result to a normal register.
440 __ Add(sp, sp, 0x50);
441 __ Mov(x1, sp);
442
443 // Add extended to the sp, and move the result to a normal register.
444 __ Mov(x17, 0xfff);
445 __ Add(sp, sp, Operand(x17, SXTB));
446 __ Mov(x2, sp);
447
448 // Create an sp using a logical instruction, and move to normal register.
449 __ Orr(sp, xzr, 0x1fff);
450 __ Mov(x3, sp);
451
452 // Write wsp using a logical instruction.
453 __ Orr(wsp, wzr, 0xfffffff8);
454 __ Mov(x4, sp);
455
456 // Write sp, and read back wsp.
457 __ Orr(sp, xzr, 0xfffffff8);
458 __ Mov(w5, wsp);
459
460 // restore sp.
461 __ Mov(sp, x29);
462 END();
463
464 RUN();
465
466 ASSERT_EQUAL_64(0x1004, x0);
467 ASSERT_EQUAL_64(0x1054, x1);
468 ASSERT_EQUAL_64(0x1053, x2);
469 ASSERT_EQUAL_64(0x1fff, x3);
470 ASSERT_EQUAL_64(0xfffffff8, x4);
471 ASSERT_EQUAL_64(0xfffffff8, x5);
472
473 TEARDOWN();
474 }
475
476
TEST(mvn)477 TEST(mvn) {
478 SETUP();
479
480 START();
481 __ Mvn(w0, 0xfff);
482 __ Mvn(x1, 0xfff);
483 __ Mvn(w2, Operand(w0, LSL, 1));
484 __ Mvn(x3, Operand(x1, LSL, 2));
485 __ Mvn(w4, Operand(w0, LSR, 3));
486 __ Mvn(x5, Operand(x1, LSR, 4));
487 __ Mvn(w6, Operand(w0, ASR, 11));
488 __ Mvn(x7, Operand(x1, ASR, 12));
489 __ Mvn(w8, Operand(w0, ROR, 13));
490 __ Mvn(x9, Operand(x1, ROR, 14));
491 __ Mvn(w10, Operand(w2, UXTB));
492 __ Mvn(x11, Operand(x2, SXTB, 1));
493 __ Mvn(w12, Operand(w2, UXTH, 2));
494 __ Mvn(x13, Operand(x2, SXTH, 3));
495 __ Mvn(x14, Operand(w2, UXTW, 4));
496 __ Mvn(x15, Operand(w2, SXTW, 4));
497 END();
498
499 RUN();
500
501 ASSERT_EQUAL_64(0xfffff000, x0);
502 ASSERT_EQUAL_64(0xfffffffffffff000, x1);
503 ASSERT_EQUAL_64(0x00001fff, x2);
504 ASSERT_EQUAL_64(0x0000000000003fff, x3);
505 ASSERT_EQUAL_64(0xe00001ff, x4);
506 ASSERT_EQUAL_64(0xf0000000000000ff, x5);
507 ASSERT_EQUAL_64(0x00000001, x6);
508 ASSERT_EQUAL_64(0x0000000000000000, x7);
509 ASSERT_EQUAL_64(0x7ff80000, x8);
510 ASSERT_EQUAL_64(0x3ffc000000000000, x9);
511 ASSERT_EQUAL_64(0xffffff00, x10);
512 ASSERT_EQUAL_64(0x0000000000000001, x11);
513 ASSERT_EQUAL_64(0xffff8003, x12);
514 ASSERT_EQUAL_64(0xffffffffffff0007, x13);
515 ASSERT_EQUAL_64(0xfffffffffffe000f, x14);
516 ASSERT_EQUAL_64(0xfffffffffffe000f, x15);
517
518 TEARDOWN();
519 }
520
521
TEST(mov_imm_w)522 TEST(mov_imm_w) {
523 SETUP();
524
525 START();
526 __ Mov(w0, 0xffffffff);
527 __ Mov(w1, 0xffff1234);
528 __ Mov(w2, 0x1234ffff);
529 __ Mov(w3, 0x00000000);
530 __ Mov(w4, 0x00001234);
531 __ Mov(w5, 0x12340000);
532 __ Mov(w6, 0x12345678);
533 __ Mov(w7, (int32_t)0x80000000);
534 __ Mov(w8, (int32_t)0xffff0000);
535 __ Mov(w9, kWMinInt);
536 END();
537
538 RUN();
539
540 ASSERT_EQUAL_64(0xffffffff, x0);
541 ASSERT_EQUAL_64(0xffff1234, x1);
542 ASSERT_EQUAL_64(0x1234ffff, x2);
543 ASSERT_EQUAL_64(0x00000000, x3);
544 ASSERT_EQUAL_64(0x00001234, x4);
545 ASSERT_EQUAL_64(0x12340000, x5);
546 ASSERT_EQUAL_64(0x12345678, x6);
547 ASSERT_EQUAL_64(0x80000000, x7);
548 ASSERT_EQUAL_64(0xffff0000, x8);
549 ASSERT_EQUAL_32(kWMinInt, w9);
550
551 TEARDOWN();
552 }
553
554
TEST(mov_imm_x)555 TEST(mov_imm_x) {
556 SETUP();
557
558 START();
559 __ Mov(x0, 0xffffffffffffffff);
560 __ Mov(x1, 0xffffffffffff1234);
561 __ Mov(x2, 0xffffffff12345678);
562 __ Mov(x3, 0xffff1234ffff5678);
563 __ Mov(x4, 0x1234ffffffff5678);
564 __ Mov(x5, 0x1234ffff5678ffff);
565 __ Mov(x6, 0x12345678ffffffff);
566 __ Mov(x7, 0x1234ffffffffffff);
567 __ Mov(x8, 0x123456789abcffff);
568 __ Mov(x9, 0x12345678ffff9abc);
569 __ Mov(x10, 0x1234ffff56789abc);
570 __ Mov(x11, 0xffff123456789abc);
571 __ Mov(x12, 0x0000000000000000);
572 __ Mov(x13, 0x0000000000001234);
573 __ Mov(x14, 0x0000000012345678);
574 __ Mov(x15, 0x0000123400005678);
575 __ Mov(x18, 0x1234000000005678);
576 __ Mov(x19, 0x1234000056780000);
577 __ Mov(x20, 0x1234567800000000);
578 __ Mov(x21, 0x1234000000000000);
579 __ Mov(x22, 0x123456789abc0000);
580 __ Mov(x23, 0x1234567800009abc);
581 __ Mov(x24, 0x1234000056789abc);
582 __ Mov(x25, 0x0000123456789abc);
583 __ Mov(x26, 0x123456789abcdef0);
584 __ Mov(x27, 0xffff000000000001);
585 __ Mov(x28, 0x8000ffff00000000);
586 END();
587
588 RUN();
589
590 ASSERT_EQUAL_64(0xffffffffffff1234, x1);
591 ASSERT_EQUAL_64(0xffffffff12345678, x2);
592 ASSERT_EQUAL_64(0xffff1234ffff5678, x3);
593 ASSERT_EQUAL_64(0x1234ffffffff5678, x4);
594 ASSERT_EQUAL_64(0x1234ffff5678ffff, x5);
595 ASSERT_EQUAL_64(0x12345678ffffffff, x6);
596 ASSERT_EQUAL_64(0x1234ffffffffffff, x7);
597 ASSERT_EQUAL_64(0x123456789abcffff, x8);
598 ASSERT_EQUAL_64(0x12345678ffff9abc, x9);
599 ASSERT_EQUAL_64(0x1234ffff56789abc, x10);
600 ASSERT_EQUAL_64(0xffff123456789abc, x11);
601 ASSERT_EQUAL_64(0x0000000000000000, x12);
602 ASSERT_EQUAL_64(0x0000000000001234, x13);
603 ASSERT_EQUAL_64(0x0000000012345678, x14);
604 ASSERT_EQUAL_64(0x0000123400005678, x15);
605 ASSERT_EQUAL_64(0x1234000000005678, x18);
606 ASSERT_EQUAL_64(0x1234000056780000, x19);
607 ASSERT_EQUAL_64(0x1234567800000000, x20);
608 ASSERT_EQUAL_64(0x1234000000000000, x21);
609 ASSERT_EQUAL_64(0x123456789abc0000, x22);
610 ASSERT_EQUAL_64(0x1234567800009abc, x23);
611 ASSERT_EQUAL_64(0x1234000056789abc, x24);
612 ASSERT_EQUAL_64(0x0000123456789abc, x25);
613 ASSERT_EQUAL_64(0x123456789abcdef0, x26);
614 ASSERT_EQUAL_64(0xffff000000000001, x27);
615 ASSERT_EQUAL_64(0x8000ffff00000000, x28);
616
617
618 TEARDOWN();
619 }
620
621
TEST(mov)622 TEST(mov) {
623 SETUP();
624
625 START();
626 __ Mov(x0, 0xffffffffffffffff);
627 __ Mov(x1, 0xffffffffffffffff);
628 __ Mov(x2, 0xffffffffffffffff);
629 __ Mov(x3, 0xffffffffffffffff);
630
631 __ Mov(x0, 0x0123456789abcdef);
632
633 {
634 ExactAssemblyScope scope(&masm, 3 * kInstructionSize);
635 __ movz(x1, UINT64_C(0xabcd) << 16);
636 __ movk(x2, UINT64_C(0xabcd) << 32);
637 __ movn(x3, UINT64_C(0xabcd) << 48);
638 }
639
640 __ Mov(x4, 0x0123456789abcdef);
641 __ Mov(x5, x4);
642
643 __ Mov(w6, -1);
644
645 // Test that moves back to the same register have the desired effect. This
646 // is a no-op for X registers, and a truncation for W registers.
647 __ Mov(x7, 0x0123456789abcdef);
648 __ Mov(x7, x7);
649 __ Mov(x8, 0x0123456789abcdef);
650 __ Mov(w8, w8);
651 __ Mov(x9, 0x0123456789abcdef);
652 __ Mov(x9, Operand(x9));
653 __ Mov(x10, 0x0123456789abcdef);
654 __ Mov(w10, Operand(w10));
655
656 __ Mov(w11, 0xfff);
657 __ Mov(x12, 0xfff);
658 __ Mov(w13, Operand(w11, LSL, 1));
659 __ Mov(x14, Operand(x12, LSL, 2));
660 __ Mov(w15, Operand(w11, LSR, 3));
661 __ Mov(x18, Operand(x12, LSR, 4));
662 __ Mov(w19, Operand(w11, ASR, 11));
663 __ Mov(x20, Operand(x12, ASR, 12));
664 __ Mov(w21, Operand(w11, ROR, 13));
665 __ Mov(x22, Operand(x12, ROR, 14));
666 __ Mov(w23, Operand(w13, UXTB));
667 __ Mov(x24, Operand(x13, SXTB, 1));
668 __ Mov(w25, Operand(w13, UXTH, 2));
669 __ Mov(x26, Operand(x13, SXTH, 3));
670 __ Mov(x27, Operand(w13, UXTW, 4));
671
672 __ Mov(x28, 0x0123456789abcdef);
673 __ Mov(w28, w28, kDiscardForSameWReg);
674 END();
675
676 RUN();
677
678 ASSERT_EQUAL_64(0x0123456789abcdef, x0);
679 ASSERT_EQUAL_64(0x00000000abcd0000, x1);
680 ASSERT_EQUAL_64(0xffffabcdffffffff, x2);
681 ASSERT_EQUAL_64(0x5432ffffffffffff, x3);
682 ASSERT_EQUAL_64(x4, x5);
683 ASSERT_EQUAL_32(-1, w6);
684 ASSERT_EQUAL_64(0x0123456789abcdef, x7);
685 ASSERT_EQUAL_32(0x89abcdef, w8);
686 ASSERT_EQUAL_64(0x0123456789abcdef, x9);
687 ASSERT_EQUAL_32(0x89abcdef, w10);
688 ASSERT_EQUAL_64(0x00000fff, x11);
689 ASSERT_EQUAL_64(0x0000000000000fff, x12);
690 ASSERT_EQUAL_64(0x00001ffe, x13);
691 ASSERT_EQUAL_64(0x0000000000003ffc, x14);
692 ASSERT_EQUAL_64(0x000001ff, x15);
693 ASSERT_EQUAL_64(0x00000000000000ff, x18);
694 ASSERT_EQUAL_64(0x00000001, x19);
695 ASSERT_EQUAL_64(0x0000000000000000, x20);
696 ASSERT_EQUAL_64(0x7ff80000, x21);
697 ASSERT_EQUAL_64(0x3ffc000000000000, x22);
698 ASSERT_EQUAL_64(0x000000fe, x23);
699 ASSERT_EQUAL_64(0xfffffffffffffffc, x24);
700 ASSERT_EQUAL_64(0x00007ff8, x25);
701 ASSERT_EQUAL_64(0x000000000000fff0, x26);
702 ASSERT_EQUAL_64(0x000000000001ffe0, x27);
703 ASSERT_EQUAL_64(0x0123456789abcdef, x28);
704
705 TEARDOWN();
706 }
707
708
TEST(mov_negative)709 TEST(mov_negative) {
710 SETUP();
711
712 START();
713 __ Mov(w11, 0xffffffff);
714 __ Mov(x12, 0xffffffffffffffff);
715
716 __ Mov(w13, Operand(w11, LSL, 1));
717 __ Mov(w14, Operand(w11, LSR, 1));
718 __ Mov(w15, Operand(w11, ASR, 1));
719 __ Mov(w18, Operand(w11, ROR, 1));
720 __ Mov(w19, Operand(w11, UXTB, 1));
721 __ Mov(w20, Operand(w11, SXTB, 1));
722 __ Mov(w21, Operand(w11, UXTH, 1));
723 __ Mov(w22, Operand(w11, SXTH, 1));
724
725 __ Mov(x23, Operand(x12, LSL, 1));
726 __ Mov(x24, Operand(x12, LSR, 1));
727 __ Mov(x25, Operand(x12, ASR, 1));
728 __ Mov(x26, Operand(x12, ROR, 1));
729 __ Mov(x27, Operand(x12, UXTH, 1));
730 __ Mov(x28, Operand(x12, SXTH, 1));
731 __ Mov(x29, Operand(x12, UXTW, 1));
732 __ Mov(x30, Operand(x12, SXTW, 1));
733 END();
734
735 RUN();
736
737 ASSERT_EQUAL_64(0xfffffffe, x13);
738 ASSERT_EQUAL_64(0x7fffffff, x14);
739 ASSERT_EQUAL_64(0xffffffff, x15);
740 ASSERT_EQUAL_64(0xffffffff, x18);
741 ASSERT_EQUAL_64(0x000001fe, x19);
742 ASSERT_EQUAL_64(0xfffffffe, x20);
743 ASSERT_EQUAL_64(0x0001fffe, x21);
744 ASSERT_EQUAL_64(0xfffffffe, x22);
745
746 ASSERT_EQUAL_64(0xfffffffffffffffe, x23);
747 ASSERT_EQUAL_64(0x7fffffffffffffff, x24);
748 ASSERT_EQUAL_64(0xffffffffffffffff, x25);
749 ASSERT_EQUAL_64(0xffffffffffffffff, x26);
750 ASSERT_EQUAL_64(0x000000000001fffe, x27);
751 ASSERT_EQUAL_64(0xfffffffffffffffe, x28);
752 ASSERT_EQUAL_64(0x00000001fffffffe, x29);
753 ASSERT_EQUAL_64(0xfffffffffffffffe, x30);
754
755 TEARDOWN();
756 }
757
758
TEST(orr)759 TEST(orr) {
760 SETUP();
761
762 START();
763 __ Mov(x0, 0xf0f0);
764 __ Mov(x1, 0xf00000ff);
765
766 __ Orr(x2, x0, Operand(x1));
767 __ Orr(w3, w0, Operand(w1, LSL, 28));
768 __ Orr(x4, x0, Operand(x1, LSL, 32));
769 __ Orr(x5, x0, Operand(x1, LSR, 4));
770 __ Orr(w6, w0, Operand(w1, ASR, 4));
771 __ Orr(x7, x0, Operand(x1, ASR, 4));
772 __ Orr(w8, w0, Operand(w1, ROR, 12));
773 __ Orr(x9, x0, Operand(x1, ROR, 12));
774 __ Orr(w10, w0, 0xf);
775 __ Orr(x11, x0, 0xf0000000f0000000);
776 END();
777
778 RUN();
779
780 ASSERT_EQUAL_64(0x00000000f000f0ff, x2);
781 ASSERT_EQUAL_64(0xf000f0f0, x3);
782 ASSERT_EQUAL_64(0xf00000ff0000f0f0, x4);
783 ASSERT_EQUAL_64(0x000000000f00f0ff, x5);
784 ASSERT_EQUAL_64(0xff00f0ff, x6);
785 ASSERT_EQUAL_64(0x000000000f00f0ff, x7);
786 ASSERT_EQUAL_64(0x0ffff0f0, x8);
787 ASSERT_EQUAL_64(0x0ff00000000ff0f0, x9);
788 ASSERT_EQUAL_64(0x0000f0ff, x10);
789 ASSERT_EQUAL_64(0xf0000000f000f0f0, x11);
790
791 TEARDOWN();
792 }
793
794
TEST(orr_extend)795 TEST(orr_extend) {
796 SETUP();
797
798 START();
799 __ Mov(x0, 1);
800 __ Mov(x1, 0x8000000080008080);
801 __ Orr(w6, w0, Operand(w1, UXTB));
802 __ Orr(x7, x0, Operand(x1, UXTH, 1));
803 __ Orr(w8, w0, Operand(w1, UXTW, 2));
804 __ Orr(x9, x0, Operand(x1, UXTX, 3));
805 __ Orr(w10, w0, Operand(w1, SXTB));
806 __ Orr(x11, x0, Operand(x1, SXTH, 1));
807 __ Orr(x12, x0, Operand(x1, SXTW, 2));
808 __ Orr(x13, x0, Operand(x1, SXTX, 3));
809 END();
810
811 RUN();
812
813 ASSERT_EQUAL_64(0x00000081, x6);
814 ASSERT_EQUAL_64(0x0000000000010101, x7);
815 ASSERT_EQUAL_64(0x00020201, x8);
816 ASSERT_EQUAL_64(0x0000000400040401, x9);
817 ASSERT_EQUAL_64(0xffffff81, x10);
818 ASSERT_EQUAL_64(0xffffffffffff0101, x11);
819 ASSERT_EQUAL_64(0xfffffffe00020201, x12);
820 ASSERT_EQUAL_64(0x0000000400040401, x13);
821
822 TEARDOWN();
823 }
824
825
TEST(bitwise_wide_imm)826 TEST(bitwise_wide_imm) {
827 SETUP();
828
829 START();
830 __ Mov(x0, 0);
831 __ Mov(x1, 0xf0f0f0f0f0f0f0f0);
832
833 __ Orr(x10, x0, 0x1234567890abcdef);
834 __ Orr(w11, w1, 0x90abcdef);
835
836 __ Orr(w12, w0, kWMinInt);
837 __ Eor(w13, w0, kWMinInt);
838 END();
839
840 RUN();
841
842 ASSERT_EQUAL_64(0, x0);
843 ASSERT_EQUAL_64(0xf0f0f0f0f0f0f0f0, x1);
844 ASSERT_EQUAL_64(0x1234567890abcdef, x10);
845 ASSERT_EQUAL_64(0x00000000f0fbfdff, x11);
846 ASSERT_EQUAL_32(kWMinInt, w12);
847 ASSERT_EQUAL_32(kWMinInt, w13);
848
849 TEARDOWN();
850 }
851
852
TEST(orn)853 TEST(orn) {
854 SETUP();
855
856 START();
857 __ Mov(x0, 0xf0f0);
858 __ Mov(x1, 0xf00000ff);
859
860 __ Orn(x2, x0, Operand(x1));
861 __ Orn(w3, w0, Operand(w1, LSL, 4));
862 __ Orn(x4, x0, Operand(x1, LSL, 4));
863 __ Orn(x5, x0, Operand(x1, LSR, 1));
864 __ Orn(w6, w0, Operand(w1, ASR, 1));
865 __ Orn(x7, x0, Operand(x1, ASR, 1));
866 __ Orn(w8, w0, Operand(w1, ROR, 16));
867 __ Orn(x9, x0, Operand(x1, ROR, 16));
868 __ Orn(w10, w0, 0x0000ffff);
869 __ Orn(x11, x0, 0x0000ffff0000ffff);
870 END();
871
872 RUN();
873
874 ASSERT_EQUAL_64(0xffffffff0ffffff0, x2);
875 ASSERT_EQUAL_64(0xfffff0ff, x3);
876 ASSERT_EQUAL_64(0xfffffff0fffff0ff, x4);
877 ASSERT_EQUAL_64(0xffffffff87fffff0, x5);
878 ASSERT_EQUAL_64(0x07fffff0, x6);
879 ASSERT_EQUAL_64(0xffffffff87fffff0, x7);
880 ASSERT_EQUAL_64(0xff00ffff, x8);
881 ASSERT_EQUAL_64(0xff00ffffffffffff, x9);
882 ASSERT_EQUAL_64(0xfffff0f0, x10);
883 ASSERT_EQUAL_64(0xffff0000fffff0f0, x11);
884
885 TEARDOWN();
886 }
887
888
TEST(orn_extend)889 TEST(orn_extend) {
890 SETUP();
891
892 START();
893 __ Mov(x0, 1);
894 __ Mov(x1, 0x8000000080008081);
895 __ Orn(w6, w0, Operand(w1, UXTB));
896 __ Orn(x7, x0, Operand(x1, UXTH, 1));
897 __ Orn(w8, w0, Operand(w1, UXTW, 2));
898 __ Orn(x9, x0, Operand(x1, UXTX, 3));
899 __ Orn(w10, w0, Operand(w1, SXTB));
900 __ Orn(x11, x0, Operand(x1, SXTH, 1));
901 __ Orn(x12, x0, Operand(x1, SXTW, 2));
902 __ Orn(x13, x0, Operand(x1, SXTX, 3));
903 END();
904
905 RUN();
906
907 ASSERT_EQUAL_64(0xffffff7f, x6);
908 ASSERT_EQUAL_64(0xfffffffffffefefd, x7);
909 ASSERT_EQUAL_64(0xfffdfdfb, x8);
910 ASSERT_EQUAL_64(0xfffffffbfffbfbf7, x9);
911 ASSERT_EQUAL_64(0x0000007f, x10);
912 ASSERT_EQUAL_64(0x000000000000fefd, x11);
913 ASSERT_EQUAL_64(0x00000001fffdfdfb, x12);
914 ASSERT_EQUAL_64(0xfffffffbfffbfbf7, x13);
915
916 TEARDOWN();
917 }
918
919
TEST(and_)920 TEST(and_) {
921 SETUP();
922
923 START();
924 __ Mov(x0, 0xfff0);
925 __ Mov(x1, 0xf00000ff);
926
927 __ And(x2, x0, Operand(x1));
928 __ And(w3, w0, Operand(w1, LSL, 4));
929 __ And(x4, x0, Operand(x1, LSL, 4));
930 __ And(x5, x0, Operand(x1, LSR, 1));
931 __ And(w6, w0, Operand(w1, ASR, 20));
932 __ And(x7, x0, Operand(x1, ASR, 20));
933 __ And(w8, w0, Operand(w1, ROR, 28));
934 __ And(x9, x0, Operand(x1, ROR, 28));
935 __ And(w10, w0, Operand(0xff00));
936 __ And(x11, x0, Operand(0xff));
937 END();
938
939 RUN();
940
941 ASSERT_EQUAL_64(0x000000f0, x2);
942 ASSERT_EQUAL_64(0x00000ff0, x3);
943 ASSERT_EQUAL_64(0x00000ff0, x4);
944 ASSERT_EQUAL_64(0x00000070, x5);
945 ASSERT_EQUAL_64(0x0000ff00, x6);
946 ASSERT_EQUAL_64(0x00000f00, x7);
947 ASSERT_EQUAL_64(0x00000ff0, x8);
948 ASSERT_EQUAL_64(0x00000000, x9);
949 ASSERT_EQUAL_64(0x0000ff00, x10);
950 ASSERT_EQUAL_64(0x000000f0, x11);
951
952 TEARDOWN();
953 }
954
955
TEST(and_extend)956 TEST(and_extend) {
957 SETUP();
958
959 START();
960 __ Mov(x0, 0xffffffffffffffff);
961 __ Mov(x1, 0x8000000080008081);
962 __ And(w6, w0, Operand(w1, UXTB));
963 __ And(x7, x0, Operand(x1, UXTH, 1));
964 __ And(w8, w0, Operand(w1, UXTW, 2));
965 __ And(x9, x0, Operand(x1, UXTX, 3));
966 __ And(w10, w0, Operand(w1, SXTB));
967 __ And(x11, x0, Operand(x1, SXTH, 1));
968 __ And(x12, x0, Operand(x1, SXTW, 2));
969 __ And(x13, x0, Operand(x1, SXTX, 3));
970 END();
971
972 RUN();
973
974 ASSERT_EQUAL_64(0x00000081, x6);
975 ASSERT_EQUAL_64(0x0000000000010102, x7);
976 ASSERT_EQUAL_64(0x00020204, x8);
977 ASSERT_EQUAL_64(0x0000000400040408, x9);
978 ASSERT_EQUAL_64(0xffffff81, x10);
979 ASSERT_EQUAL_64(0xffffffffffff0102, x11);
980 ASSERT_EQUAL_64(0xfffffffe00020204, x12);
981 ASSERT_EQUAL_64(0x0000000400040408, x13);
982
983 TEARDOWN();
984 }
985
986
TEST(ands)987 TEST(ands) {
988 SETUP();
989
990 START();
991 __ Mov(x1, 0xf00000ff);
992 __ Ands(w0, w1, Operand(w1));
993 END();
994
995 RUN();
996
997 ASSERT_EQUAL_NZCV(NFlag);
998 ASSERT_EQUAL_64(0xf00000ff, x0);
999
1000 START();
1001 __ Mov(x0, 0xfff0);
1002 __ Mov(x1, 0xf00000ff);
1003 __ Ands(w0, w0, Operand(w1, LSR, 4));
1004 END();
1005
1006 RUN();
1007
1008 ASSERT_EQUAL_NZCV(ZFlag);
1009 ASSERT_EQUAL_64(0x00000000, x0);
1010
1011 START();
1012 __ Mov(x0, 0x8000000000000000);
1013 __ Mov(x1, 0x00000001);
1014 __ Ands(x0, x0, Operand(x1, ROR, 1));
1015 END();
1016
1017 RUN();
1018
1019 ASSERT_EQUAL_NZCV(NFlag);
1020 ASSERT_EQUAL_64(0x8000000000000000, x0);
1021
1022 START();
1023 __ Mov(x0, 0xfff0);
1024 __ Ands(w0, w0, Operand(0xf));
1025 END();
1026
1027 RUN();
1028
1029 ASSERT_EQUAL_NZCV(ZFlag);
1030 ASSERT_EQUAL_64(0x00000000, x0);
1031
1032 START();
1033 __ Mov(x0, 0xff000000);
1034 __ Ands(w0, w0, Operand(0x80000000));
1035 END();
1036
1037 RUN();
1038
1039 ASSERT_EQUAL_NZCV(NFlag);
1040 ASSERT_EQUAL_64(0x80000000, x0);
1041
1042 TEARDOWN();
1043 }
1044
1045
TEST(bic)1046 TEST(bic) {
1047 SETUP();
1048
1049 START();
1050 __ Mov(x0, 0xfff0);
1051 __ Mov(x1, 0xf00000ff);
1052
1053 __ Bic(x2, x0, Operand(x1));
1054 __ Bic(w3, w0, Operand(w1, LSL, 4));
1055 __ Bic(x4, x0, Operand(x1, LSL, 4));
1056 __ Bic(x5, x0, Operand(x1, LSR, 1));
1057 __ Bic(w6, w0, Operand(w1, ASR, 20));
1058 __ Bic(x7, x0, Operand(x1, ASR, 20));
1059 __ Bic(w8, w0, Operand(w1, ROR, 28));
1060 __ Bic(x9, x0, Operand(x1, ROR, 24));
1061 __ Bic(x10, x0, Operand(0x1f));
1062 __ Bic(x11, x0, Operand(0x100));
1063
1064 // Test bic into sp when the constant cannot be encoded in the immediate
1065 // field.
1066 // Use x20 to preserve sp. We check for the result via x21 because the
1067 // test infrastructure requires that sp be restored to its original value.
1068 __ Mov(x20, sp);
1069 __ Mov(x0, 0xffffff);
1070 __ Bic(sp, x0, Operand(0xabcdef));
1071 __ Mov(x21, sp);
1072 __ Mov(sp, x20);
1073 END();
1074
1075 RUN();
1076
1077 ASSERT_EQUAL_64(0x0000ff00, x2);
1078 ASSERT_EQUAL_64(0x0000f000, x3);
1079 ASSERT_EQUAL_64(0x0000f000, x4);
1080 ASSERT_EQUAL_64(0x0000ff80, x5);
1081 ASSERT_EQUAL_64(0x000000f0, x6);
1082 ASSERT_EQUAL_64(0x0000f0f0, x7);
1083 ASSERT_EQUAL_64(0x0000f000, x8);
1084 ASSERT_EQUAL_64(0x0000ff00, x9);
1085 ASSERT_EQUAL_64(0x0000ffe0, x10);
1086 ASSERT_EQUAL_64(0x0000fef0, x11);
1087
1088 ASSERT_EQUAL_64(0x543210, x21);
1089
1090 TEARDOWN();
1091 }
1092
1093
TEST(bic_extend)1094 TEST(bic_extend) {
1095 SETUP();
1096
1097 START();
1098 __ Mov(x0, 0xffffffffffffffff);
1099 __ Mov(x1, 0x8000000080008081);
1100 __ Bic(w6, w0, Operand(w1, UXTB));
1101 __ Bic(x7, x0, Operand(x1, UXTH, 1));
1102 __ Bic(w8, w0, Operand(w1, UXTW, 2));
1103 __ Bic(x9, x0, Operand(x1, UXTX, 3));
1104 __ Bic(w10, w0, Operand(w1, SXTB));
1105 __ Bic(x11, x0, Operand(x1, SXTH, 1));
1106 __ Bic(x12, x0, Operand(x1, SXTW, 2));
1107 __ Bic(x13, x0, Operand(x1, SXTX, 3));
1108 END();
1109
1110 RUN();
1111
1112 ASSERT_EQUAL_64(0xffffff7e, x6);
1113 ASSERT_EQUAL_64(0xfffffffffffefefd, x7);
1114 ASSERT_EQUAL_64(0xfffdfdfb, x8);
1115 ASSERT_EQUAL_64(0xfffffffbfffbfbf7, x9);
1116 ASSERT_EQUAL_64(0x0000007e, x10);
1117 ASSERT_EQUAL_64(0x000000000000fefd, x11);
1118 ASSERT_EQUAL_64(0x00000001fffdfdfb, x12);
1119 ASSERT_EQUAL_64(0xfffffffbfffbfbf7, x13);
1120
1121 TEARDOWN();
1122 }
1123
1124
TEST(bics)1125 TEST(bics) {
1126 SETUP();
1127
1128 START();
1129 __ Mov(x1, 0xffff);
1130 __ Bics(w0, w1, Operand(w1));
1131 END();
1132
1133 RUN();
1134
1135 ASSERT_EQUAL_NZCV(ZFlag);
1136 ASSERT_EQUAL_64(0x00000000, x0);
1137
1138 START();
1139 __ Mov(x0, 0xffffffff);
1140 __ Bics(w0, w0, Operand(w0, LSR, 1));
1141 END();
1142
1143 RUN();
1144
1145 ASSERT_EQUAL_NZCV(NFlag);
1146 ASSERT_EQUAL_64(0x80000000, x0);
1147
1148 START();
1149 __ Mov(x0, 0x8000000000000000);
1150 __ Mov(x1, 0x00000001);
1151 __ Bics(x0, x0, Operand(x1, ROR, 1));
1152 END();
1153
1154 RUN();
1155
1156 ASSERT_EQUAL_NZCV(ZFlag);
1157 ASSERT_EQUAL_64(0x00000000, x0);
1158
1159 START();
1160 __ Mov(x0, 0xffffffffffffffff);
1161 __ Bics(x0, x0, 0x7fffffffffffffff);
1162 END();
1163
1164 RUN();
1165
1166 ASSERT_EQUAL_NZCV(NFlag);
1167 ASSERT_EQUAL_64(0x8000000000000000, x0);
1168
1169 START();
1170 __ Mov(w0, 0xffff0000);
1171 __ Bics(w0, w0, 0xfffffff0);
1172 END();
1173
1174 RUN();
1175
1176 ASSERT_EQUAL_NZCV(ZFlag);
1177 ASSERT_EQUAL_64(0x00000000, x0);
1178
1179 TEARDOWN();
1180 }
1181
1182
TEST(eor)1183 TEST(eor) {
1184 SETUP();
1185
1186 START();
1187 __ Mov(x0, 0xfff0);
1188 __ Mov(x1, 0xf00000ff);
1189
1190 __ Eor(x2, x0, Operand(x1));
1191 __ Eor(w3, w0, Operand(w1, LSL, 4));
1192 __ Eor(x4, x0, Operand(x1, LSL, 4));
1193 __ Eor(x5, x0, Operand(x1, LSR, 1));
1194 __ Eor(w6, w0, Operand(w1, ASR, 20));
1195 __ Eor(x7, x0, Operand(x1, ASR, 20));
1196 __ Eor(w8, w0, Operand(w1, ROR, 28));
1197 __ Eor(x9, x0, Operand(x1, ROR, 28));
1198 __ Eor(w10, w0, 0xff00ff00);
1199 __ Eor(x11, x0, 0xff00ff00ff00ff00);
1200 END();
1201
1202 RUN();
1203
1204 ASSERT_EQUAL_64(0x00000000f000ff0f, x2);
1205 ASSERT_EQUAL_64(0x0000f000, x3);
1206 ASSERT_EQUAL_64(0x0000000f0000f000, x4);
1207 ASSERT_EQUAL_64(0x000000007800ff8f, x5);
1208 ASSERT_EQUAL_64(0xffff00f0, x6);
1209 ASSERT_EQUAL_64(0x000000000000f0f0, x7);
1210 ASSERT_EQUAL_64(0x0000f00f, x8);
1211 ASSERT_EQUAL_64(0x00000ff00000ffff, x9);
1212 ASSERT_EQUAL_64(0xff0000f0, x10);
1213 ASSERT_EQUAL_64(0xff00ff00ff0000f0, x11);
1214
1215 TEARDOWN();
1216 }
1217
TEST(eor_extend)1218 TEST(eor_extend) {
1219 SETUP();
1220
1221 START();
1222 __ Mov(x0, 0x1111111111111111);
1223 __ Mov(x1, 0x8000000080008081);
1224 __ Eor(w6, w0, Operand(w1, UXTB));
1225 __ Eor(x7, x0, Operand(x1, UXTH, 1));
1226 __ Eor(w8, w0, Operand(w1, UXTW, 2));
1227 __ Eor(x9, x0, Operand(x1, UXTX, 3));
1228 __ Eor(w10, w0, Operand(w1, SXTB));
1229 __ Eor(x11, x0, Operand(x1, SXTH, 1));
1230 __ Eor(x12, x0, Operand(x1, SXTW, 2));
1231 __ Eor(x13, x0, Operand(x1, SXTX, 3));
1232 END();
1233
1234 RUN();
1235
1236 ASSERT_EQUAL_64(0x11111190, x6);
1237 ASSERT_EQUAL_64(0x1111111111101013, x7);
1238 ASSERT_EQUAL_64(0x11131315, x8);
1239 ASSERT_EQUAL_64(0x1111111511151519, x9);
1240 ASSERT_EQUAL_64(0xeeeeee90, x10);
1241 ASSERT_EQUAL_64(0xeeeeeeeeeeee1013, x11);
1242 ASSERT_EQUAL_64(0xeeeeeeef11131315, x12);
1243 ASSERT_EQUAL_64(0x1111111511151519, x13);
1244
1245 TEARDOWN();
1246 }
1247
1248
TEST(eon)1249 TEST(eon) {
1250 SETUP();
1251
1252 START();
1253 __ Mov(x0, 0xfff0);
1254 __ Mov(x1, 0xf00000ff);
1255
1256 __ Eon(x2, x0, Operand(x1));
1257 __ Eon(w3, w0, Operand(w1, LSL, 4));
1258 __ Eon(x4, x0, Operand(x1, LSL, 4));
1259 __ Eon(x5, x0, Operand(x1, LSR, 1));
1260 __ Eon(w6, w0, Operand(w1, ASR, 20));
1261 __ Eon(x7, x0, Operand(x1, ASR, 20));
1262 __ Eon(w8, w0, Operand(w1, ROR, 28));
1263 __ Eon(x9, x0, Operand(x1, ROR, 28));
1264 __ Eon(w10, w0, 0x03c003c0);
1265 __ Eon(x11, x0, 0x0000100000001000);
1266 END();
1267
1268 RUN();
1269
1270 ASSERT_EQUAL_64(0xffffffff0fff00f0, x2);
1271 ASSERT_EQUAL_64(0xffff0fff, x3);
1272 ASSERT_EQUAL_64(0xfffffff0ffff0fff, x4);
1273 ASSERT_EQUAL_64(0xffffffff87ff0070, x5);
1274 ASSERT_EQUAL_64(0x0000ff0f, x6);
1275 ASSERT_EQUAL_64(0xffffffffffff0f0f, x7);
1276 ASSERT_EQUAL_64(0xffff0ff0, x8);
1277 ASSERT_EQUAL_64(0xfffff00fffff0000, x9);
1278 ASSERT_EQUAL_64(0xfc3f03cf, x10);
1279 ASSERT_EQUAL_64(0xffffefffffff100f, x11);
1280
1281 TEARDOWN();
1282 }
1283
1284
TEST(eon_extend)1285 TEST(eon_extend) {
1286 SETUP();
1287
1288 START();
1289 __ Mov(x0, 0x1111111111111111);
1290 __ Mov(x1, 0x8000000080008081);
1291 __ Eon(w6, w0, Operand(w1, UXTB));
1292 __ Eon(x7, x0, Operand(x1, UXTH, 1));
1293 __ Eon(w8, w0, Operand(w1, UXTW, 2));
1294 __ Eon(x9, x0, Operand(x1, UXTX, 3));
1295 __ Eon(w10, w0, Operand(w1, SXTB));
1296 __ Eon(x11, x0, Operand(x1, SXTH, 1));
1297 __ Eon(x12, x0, Operand(x1, SXTW, 2));
1298 __ Eon(x13, x0, Operand(x1, SXTX, 3));
1299 END();
1300
1301 RUN();
1302
1303 ASSERT_EQUAL_64(0xeeeeee6f, x6);
1304 ASSERT_EQUAL_64(0xeeeeeeeeeeefefec, x7);
1305 ASSERT_EQUAL_64(0xeeececea, x8);
1306 ASSERT_EQUAL_64(0xeeeeeeeaeeeaeae6, x9);
1307 ASSERT_EQUAL_64(0x1111116f, x10);
1308 ASSERT_EQUAL_64(0x111111111111efec, x11);
1309 ASSERT_EQUAL_64(0x11111110eeececea, x12);
1310 ASSERT_EQUAL_64(0xeeeeeeeaeeeaeae6, x13);
1311
1312 TEARDOWN();
1313 }
1314
1315
TEST(mul)1316 TEST(mul) {
1317 SETUP();
1318
1319 START();
1320 __ Mov(x25, 0);
1321 __ Mov(x26, 1);
1322 __ Mov(x18, 0xffffffff);
1323 __ Mov(x19, 0xffffffffffffffff);
1324
1325 __ Mul(w0, w25, w25);
1326 __ Mul(w1, w25, w26);
1327 __ Mul(w2, w26, w18);
1328 __ Mul(w3, w18, w19);
1329 __ Mul(x4, x25, x25);
1330 __ Mul(x5, x26, x18);
1331 __ Mul(x6, x18, x19);
1332 __ Mul(x7, x19, x19);
1333 __ Smull(x8, w26, w18);
1334 __ Smull(x9, w18, w18);
1335 __ Smull(x10, w19, w19);
1336 __ Mneg(w11, w25, w25);
1337 __ Mneg(w12, w25, w26);
1338 __ Mneg(w13, w26, w18);
1339 __ Mneg(w14, w18, w19);
1340 __ Mneg(x20, x25, x25);
1341 __ Mneg(x21, x26, x18);
1342 __ Mneg(x22, x18, x19);
1343 __ Mneg(x23, x19, x19);
1344 END();
1345
1346 RUN();
1347
1348 ASSERT_EQUAL_64(0, x0);
1349 ASSERT_EQUAL_64(0, x1);
1350 ASSERT_EQUAL_64(0xffffffff, x2);
1351 ASSERT_EQUAL_64(1, x3);
1352 ASSERT_EQUAL_64(0, x4);
1353 ASSERT_EQUAL_64(0xffffffff, x5);
1354 ASSERT_EQUAL_64(0xffffffff00000001, x6);
1355 ASSERT_EQUAL_64(1, x7);
1356 ASSERT_EQUAL_64(0xffffffffffffffff, x8);
1357 ASSERT_EQUAL_64(1, x9);
1358 ASSERT_EQUAL_64(1, x10);
1359 ASSERT_EQUAL_64(0, x11);
1360 ASSERT_EQUAL_64(0, x12);
1361 ASSERT_EQUAL_64(1, x13);
1362 ASSERT_EQUAL_64(0xffffffff, x14);
1363 ASSERT_EQUAL_64(0, x20);
1364 ASSERT_EQUAL_64(0xffffffff00000001, x21);
1365 ASSERT_EQUAL_64(0xffffffff, x22);
1366 ASSERT_EQUAL_64(0xffffffffffffffff, x23);
1367
1368 TEARDOWN();
1369 }
1370
1371
SmullHelper(int64_t expected,int64_t a,int64_t b)1372 static void SmullHelper(int64_t expected, int64_t a, int64_t b) {
1373 SETUP();
1374 START();
1375 __ Mov(w0, a);
1376 __ Mov(w1, b);
1377 __ Smull(x2, w0, w1);
1378 END();
1379 RUN();
1380 ASSERT_EQUAL_64(expected, x2);
1381 TEARDOWN();
1382 }
1383
1384
TEST(smull)1385 TEST(smull) {
1386 SmullHelper(0, 0, 0);
1387 SmullHelper(1, 1, 1);
1388 SmullHelper(-1, -1, 1);
1389 SmullHelper(1, -1, -1);
1390 SmullHelper(0xffffffff80000000, 0x80000000, 1);
1391 SmullHelper(0x0000000080000000, 0x00010000, 0x00008000);
1392 }
1393
1394
TEST(madd)1395 TEST(madd) {
1396 SETUP();
1397
1398 START();
1399 __ Mov(x16, 0);
1400 __ Mov(x17, 1);
1401 __ Mov(x18, 0xffffffff);
1402 __ Mov(x19, 0xffffffffffffffff);
1403
1404 __ Madd(w0, w16, w16, w16);
1405 __ Madd(w1, w16, w16, w17);
1406 __ Madd(w2, w16, w16, w18);
1407 __ Madd(w3, w16, w16, w19);
1408 __ Madd(w4, w16, w17, w17);
1409 __ Madd(w5, w17, w17, w18);
1410 __ Madd(w6, w17, w17, w19);
1411 __ Madd(w7, w17, w18, w16);
1412 __ Madd(w8, w17, w18, w18);
1413 __ Madd(w9, w18, w18, w17);
1414 __ Madd(w10, w18, w19, w18);
1415 __ Madd(w11, w19, w19, w19);
1416
1417 __ Madd(x12, x16, x16, x16);
1418 __ Madd(x13, x16, x16, x17);
1419 __ Madd(x14, x16, x16, x18);
1420 __ Madd(x15, x16, x16, x19);
1421 __ Madd(x20, x16, x17, x17);
1422 __ Madd(x21, x17, x17, x18);
1423 __ Madd(x22, x17, x17, x19);
1424 __ Madd(x23, x17, x18, x16);
1425 __ Madd(x24, x17, x18, x18);
1426 __ Madd(x25, x18, x18, x17);
1427 __ Madd(x26, x18, x19, x18);
1428 __ Madd(x27, x19, x19, x19);
1429
1430 END();
1431
1432 RUN();
1433
1434 ASSERT_EQUAL_64(0, x0);
1435 ASSERT_EQUAL_64(1, x1);
1436 ASSERT_EQUAL_64(0xffffffff, x2);
1437 ASSERT_EQUAL_64(0xffffffff, x3);
1438 ASSERT_EQUAL_64(1, x4);
1439 ASSERT_EQUAL_64(0, x5);
1440 ASSERT_EQUAL_64(0, x6);
1441 ASSERT_EQUAL_64(0xffffffff, x7);
1442 ASSERT_EQUAL_64(0xfffffffe, x8);
1443 ASSERT_EQUAL_64(2, x9);
1444 ASSERT_EQUAL_64(0, x10);
1445 ASSERT_EQUAL_64(0, x11);
1446
1447 ASSERT_EQUAL_64(0, x12);
1448 ASSERT_EQUAL_64(1, x13);
1449 ASSERT_EQUAL_64(0x00000000ffffffff, x14);
1450 ASSERT_EQUAL_64(0xffffffffffffffff, x15);
1451 ASSERT_EQUAL_64(1, x20);
1452 ASSERT_EQUAL_64(0x0000000100000000, x21);
1453 ASSERT_EQUAL_64(0, x22);
1454 ASSERT_EQUAL_64(0x00000000ffffffff, x23);
1455 ASSERT_EQUAL_64(0x00000001fffffffe, x24);
1456 ASSERT_EQUAL_64(0xfffffffe00000002, x25);
1457 ASSERT_EQUAL_64(0, x26);
1458 ASSERT_EQUAL_64(0, x27);
1459
1460 TEARDOWN();
1461 }
1462
1463
TEST(msub)1464 TEST(msub) {
1465 SETUP();
1466
1467 START();
1468 __ Mov(x16, 0);
1469 __ Mov(x17, 1);
1470 __ Mov(x18, 0xffffffff);
1471 __ Mov(x19, 0xffffffffffffffff);
1472
1473 __ Msub(w0, w16, w16, w16);
1474 __ Msub(w1, w16, w16, w17);
1475 __ Msub(w2, w16, w16, w18);
1476 __ Msub(w3, w16, w16, w19);
1477 __ Msub(w4, w16, w17, w17);
1478 __ Msub(w5, w17, w17, w18);
1479 __ Msub(w6, w17, w17, w19);
1480 __ Msub(w7, w17, w18, w16);
1481 __ Msub(w8, w17, w18, w18);
1482 __ Msub(w9, w18, w18, w17);
1483 __ Msub(w10, w18, w19, w18);
1484 __ Msub(w11, w19, w19, w19);
1485
1486 __ Msub(x12, x16, x16, x16);
1487 __ Msub(x13, x16, x16, x17);
1488 __ Msub(x14, x16, x16, x18);
1489 __ Msub(x15, x16, x16, x19);
1490 __ Msub(x20, x16, x17, x17);
1491 __ Msub(x21, x17, x17, x18);
1492 __ Msub(x22, x17, x17, x19);
1493 __ Msub(x23, x17, x18, x16);
1494 __ Msub(x24, x17, x18, x18);
1495 __ Msub(x25, x18, x18, x17);
1496 __ Msub(x26, x18, x19, x18);
1497 __ Msub(x27, x19, x19, x19);
1498
1499 END();
1500
1501 RUN();
1502
1503 ASSERT_EQUAL_64(0, x0);
1504 ASSERT_EQUAL_64(1, x1);
1505 ASSERT_EQUAL_64(0xffffffff, x2);
1506 ASSERT_EQUAL_64(0xffffffff, x3);
1507 ASSERT_EQUAL_64(1, x4);
1508 ASSERT_EQUAL_64(0xfffffffe, x5);
1509 ASSERT_EQUAL_64(0xfffffffe, x6);
1510 ASSERT_EQUAL_64(1, x7);
1511 ASSERT_EQUAL_64(0, x8);
1512 ASSERT_EQUAL_64(0, x9);
1513 ASSERT_EQUAL_64(0xfffffffe, x10);
1514 ASSERT_EQUAL_64(0xfffffffe, x11);
1515
1516 ASSERT_EQUAL_64(0, x12);
1517 ASSERT_EQUAL_64(1, x13);
1518 ASSERT_EQUAL_64(0x00000000ffffffff, x14);
1519 ASSERT_EQUAL_64(0xffffffffffffffff, x15);
1520 ASSERT_EQUAL_64(1, x20);
1521 ASSERT_EQUAL_64(0x00000000fffffffe, x21);
1522 ASSERT_EQUAL_64(0xfffffffffffffffe, x22);
1523 ASSERT_EQUAL_64(0xffffffff00000001, x23);
1524 ASSERT_EQUAL_64(0, x24);
1525 ASSERT_EQUAL_64(0x0000000200000000, x25);
1526 ASSERT_EQUAL_64(0x00000001fffffffe, x26);
1527 ASSERT_EQUAL_64(0xfffffffffffffffe, x27);
1528
1529 TEARDOWN();
1530 }
1531
1532
TEST(smulh)1533 TEST(smulh) {
1534 SETUP();
1535
1536 START();
1537 __ Mov(x20, 0);
1538 __ Mov(x21, 1);
1539 __ Mov(x22, 0x0000000100000000);
1540 __ Mov(x23, 0x0000000012345678);
1541 __ Mov(x24, 0x0123456789abcdef);
1542 __ Mov(x25, 0x0000000200000000);
1543 __ Mov(x26, 0x8000000000000000);
1544 __ Mov(x27, 0xffffffffffffffff);
1545 __ Mov(x28, 0x5555555555555555);
1546 __ Mov(x29, 0xaaaaaaaaaaaaaaaa);
1547
1548 __ Smulh(x0, x20, x24);
1549 __ Smulh(x1, x21, x24);
1550 __ Smulh(x2, x22, x23);
1551 __ Smulh(x3, x22, x24);
1552 __ Smulh(x4, x24, x25);
1553 __ Smulh(x5, x23, x27);
1554 __ Smulh(x6, x26, x26);
1555 __ Smulh(x7, x26, x27);
1556 __ Smulh(x8, x27, x27);
1557 __ Smulh(x9, x28, x28);
1558 __ Smulh(x10, x28, x29);
1559 __ Smulh(x11, x29, x29);
1560 END();
1561
1562 RUN();
1563
1564 ASSERT_EQUAL_64(0, x0);
1565 ASSERT_EQUAL_64(0, x1);
1566 ASSERT_EQUAL_64(0, x2);
1567 ASSERT_EQUAL_64(0x0000000001234567, x3);
1568 ASSERT_EQUAL_64(0x0000000002468acf, x4);
1569 ASSERT_EQUAL_64(0xffffffffffffffff, x5);
1570 ASSERT_EQUAL_64(0x4000000000000000, x6);
1571 ASSERT_EQUAL_64(0, x7);
1572 ASSERT_EQUAL_64(0, x8);
1573 ASSERT_EQUAL_64(0x1c71c71c71c71c71, x9);
1574 ASSERT_EQUAL_64(0xe38e38e38e38e38e, x10);
1575 ASSERT_EQUAL_64(0x1c71c71c71c71c72, x11);
1576
1577 TEARDOWN();
1578 }
1579
1580
TEST(umulh)1581 TEST(umulh) {
1582 SETUP();
1583
1584 START();
1585 __ Mov(x20, 0);
1586 __ Mov(x21, 1);
1587 __ Mov(x22, 0x0000000100000000);
1588 __ Mov(x23, 0x0000000012345678);
1589 __ Mov(x24, 0x0123456789abcdef);
1590 __ Mov(x25, 0x0000000200000000);
1591 __ Mov(x26, 0x8000000000000000);
1592 __ Mov(x27, 0xffffffffffffffff);
1593 __ Mov(x28, 0x5555555555555555);
1594 __ Mov(x29, 0xaaaaaaaaaaaaaaaa);
1595
1596 __ Umulh(x0, x20, x24);
1597 __ Umulh(x1, x21, x24);
1598 __ Umulh(x2, x22, x23);
1599 __ Umulh(x3, x22, x24);
1600 __ Umulh(x4, x24, x25);
1601 __ Umulh(x5, x23, x27);
1602 __ Umulh(x6, x26, x26);
1603 __ Umulh(x7, x26, x27);
1604 __ Umulh(x8, x27, x27);
1605 __ Umulh(x9, x28, x28);
1606 __ Umulh(x10, x28, x29);
1607 __ Umulh(x11, x29, x29);
1608 END();
1609
1610 RUN();
1611
1612 ASSERT_EQUAL_64(0, x0);
1613 ASSERT_EQUAL_64(0, x1);
1614 ASSERT_EQUAL_64(0, x2);
1615 ASSERT_EQUAL_64(0x0000000001234567, x3);
1616 ASSERT_EQUAL_64(0x0000000002468acf, x4);
1617 ASSERT_EQUAL_64(0x0000000012345677, x5);
1618 ASSERT_EQUAL_64(0x4000000000000000, x6);
1619 ASSERT_EQUAL_64(0x7fffffffffffffff, x7);
1620 ASSERT_EQUAL_64(0xfffffffffffffffe, x8);
1621 ASSERT_EQUAL_64(0x1c71c71c71c71c71, x9);
1622 ASSERT_EQUAL_64(0x38e38e38e38e38e3, x10);
1623 ASSERT_EQUAL_64(0x71c71c71c71c71c6, x11);
1624
1625 TEARDOWN();
1626 }
1627
1628
TEST(smaddl_umaddl_umull)1629 TEST(smaddl_umaddl_umull) {
1630 SETUP();
1631
1632 START();
1633 __ Mov(x17, 1);
1634 __ Mov(x18, 0x00000000ffffffff);
1635 __ Mov(x19, 0xffffffffffffffff);
1636 __ Mov(x20, 4);
1637 __ Mov(x21, 0x0000000200000000);
1638
1639 __ Smaddl(x9, w17, w18, x20);
1640 __ Smaddl(x10, w18, w18, x20);
1641 __ Smaddl(x11, w19, w19, x20);
1642 __ Smaddl(x12, w19, w19, x21);
1643 __ Umaddl(x13, w17, w18, x20);
1644 __ Umaddl(x14, w18, w18, x20);
1645 __ Umaddl(x15, w19, w19, x20);
1646 __ Umaddl(x22, w19, w19, x21);
1647 __ Umull(x24, w19, w19);
1648 __ Umull(x25, w17, w18);
1649 END();
1650
1651 RUN();
1652
1653 ASSERT_EQUAL_64(3, x9);
1654 ASSERT_EQUAL_64(5, x10);
1655 ASSERT_EQUAL_64(5, x11);
1656 ASSERT_EQUAL_64(0x0000000200000001, x12);
1657 ASSERT_EQUAL_64(0x0000000100000003, x13);
1658 ASSERT_EQUAL_64(0xfffffffe00000005, x14);
1659 ASSERT_EQUAL_64(0xfffffffe00000005, x15);
1660 ASSERT_EQUAL_64(1, x22);
1661 ASSERT_EQUAL_64(0xfffffffe00000001, x24);
1662 ASSERT_EQUAL_64(0x00000000ffffffff, x25);
1663
1664 TEARDOWN();
1665 }
1666
1667
TEST(smsubl_umsubl)1668 TEST(smsubl_umsubl) {
1669 SETUP();
1670
1671 START();
1672 __ Mov(x17, 1);
1673 __ Mov(x18, 0x00000000ffffffff);
1674 __ Mov(x19, 0xffffffffffffffff);
1675 __ Mov(x20, 4);
1676 __ Mov(x21, 0x0000000200000000);
1677
1678 __ Smsubl(x9, w17, w18, x20);
1679 __ Smsubl(x10, w18, w18, x20);
1680 __ Smsubl(x11, w19, w19, x20);
1681 __ Smsubl(x12, w19, w19, x21);
1682 __ Umsubl(x13, w17, w18, x20);
1683 __ Umsubl(x14, w18, w18, x20);
1684 __ Umsubl(x15, w19, w19, x20);
1685 __ Umsubl(x22, w19, w19, x21);
1686 END();
1687
1688 RUN();
1689
1690 ASSERT_EQUAL_64(5, x9);
1691 ASSERT_EQUAL_64(3, x10);
1692 ASSERT_EQUAL_64(3, x11);
1693 ASSERT_EQUAL_64(0x00000001ffffffff, x12);
1694 ASSERT_EQUAL_64(0xffffffff00000005, x13);
1695 ASSERT_EQUAL_64(0x0000000200000003, x14);
1696 ASSERT_EQUAL_64(0x0000000200000003, x15);
1697 ASSERT_EQUAL_64(0x00000003ffffffff, x22);
1698
1699 TEARDOWN();
1700 }
1701
1702
TEST(div)1703 TEST(div) {
1704 SETUP();
1705
1706 START();
1707 __ Mov(x16, 1);
1708 __ Mov(x17, 0xffffffff);
1709 __ Mov(x18, 0xffffffffffffffff);
1710 __ Mov(x19, 0x80000000);
1711 __ Mov(x20, 0x8000000000000000);
1712 __ Mov(x21, 2);
1713
1714 __ Udiv(w0, w16, w16);
1715 __ Udiv(w1, w17, w16);
1716 __ Sdiv(w2, w16, w16);
1717 __ Sdiv(w3, w16, w17);
1718 __ Sdiv(w4, w17, w18);
1719
1720 __ Udiv(x5, x16, x16);
1721 __ Udiv(x6, x17, x18);
1722 __ Sdiv(x7, x16, x16);
1723 __ Sdiv(x8, x16, x17);
1724 __ Sdiv(x9, x17, x18);
1725
1726 __ Udiv(w10, w19, w21);
1727 __ Sdiv(w11, w19, w21);
1728 __ Udiv(x12, x19, x21);
1729 __ Sdiv(x13, x19, x21);
1730 __ Udiv(x14, x20, x21);
1731 __ Sdiv(x15, x20, x21);
1732
1733 __ Udiv(w22, w19, w17);
1734 __ Sdiv(w23, w19, w17);
1735 __ Udiv(x24, x20, x18);
1736 __ Sdiv(x25, x20, x18);
1737
1738 __ Udiv(x26, x16, x21);
1739 __ Sdiv(x27, x16, x21);
1740 __ Udiv(x28, x18, x21);
1741 __ Sdiv(x29, x18, x21);
1742
1743 __ Mov(x17, 0);
1744 __ Udiv(w18, w16, w17);
1745 __ Sdiv(w19, w16, w17);
1746 __ Udiv(x20, x16, x17);
1747 __ Sdiv(x21, x16, x17);
1748 END();
1749
1750 RUN();
1751
1752 ASSERT_EQUAL_64(1, x0);
1753 ASSERT_EQUAL_64(0xffffffff, x1);
1754 ASSERT_EQUAL_64(1, x2);
1755 ASSERT_EQUAL_64(0xffffffff, x3);
1756 ASSERT_EQUAL_64(1, x4);
1757 ASSERT_EQUAL_64(1, x5);
1758 ASSERT_EQUAL_64(0, x6);
1759 ASSERT_EQUAL_64(1, x7);
1760 ASSERT_EQUAL_64(0, x8);
1761 ASSERT_EQUAL_64(0xffffffff00000001, x9);
1762 ASSERT_EQUAL_64(0x40000000, x10);
1763 ASSERT_EQUAL_64(0xc0000000, x11);
1764 ASSERT_EQUAL_64(0x0000000040000000, x12);
1765 ASSERT_EQUAL_64(0x0000000040000000, x13);
1766 ASSERT_EQUAL_64(0x4000000000000000, x14);
1767 ASSERT_EQUAL_64(0xc000000000000000, x15);
1768 ASSERT_EQUAL_64(0, x22);
1769 ASSERT_EQUAL_64(0x80000000, x23);
1770 ASSERT_EQUAL_64(0, x24);
1771 ASSERT_EQUAL_64(0x8000000000000000, x25);
1772 ASSERT_EQUAL_64(0, x26);
1773 ASSERT_EQUAL_64(0, x27);
1774 ASSERT_EQUAL_64(0x7fffffffffffffff, x28);
1775 ASSERT_EQUAL_64(0, x29);
1776 ASSERT_EQUAL_64(0, x18);
1777 ASSERT_EQUAL_64(0, x19);
1778 ASSERT_EQUAL_64(0, x20);
1779 ASSERT_EQUAL_64(0, x21);
1780
1781 TEARDOWN();
1782 }
1783
1784
TEST(rbit_rev)1785 TEST(rbit_rev) {
1786 SETUP();
1787
1788 START();
1789 __ Mov(x24, 0xfedcba9876543210);
1790 __ Rbit(w0, w24);
1791 __ Rbit(x1, x24);
1792 __ Rev16(w2, w24);
1793 __ Rev16(x3, x24);
1794 __ Rev(w4, w24);
1795 __ Rev32(x5, x24);
1796 __ Rev64(x6, x24);
1797 __ Rev(x7, x24);
1798 END();
1799
1800 RUN();
1801
1802 ASSERT_EQUAL_64(0x084c2a6e, x0);
1803 ASSERT_EQUAL_64(0x084c2a6e195d3b7f, x1);
1804 ASSERT_EQUAL_64(0x54761032, x2);
1805 ASSERT_EQUAL_64(0xdcfe98ba54761032, x3);
1806 ASSERT_EQUAL_64(0x10325476, x4);
1807 ASSERT_EQUAL_64(0x98badcfe10325476, x5);
1808 ASSERT_EQUAL_64(0x1032547698badcfe, x6);
1809 ASSERT_EQUAL_64(0x1032547698badcfe, x7);
1810
1811 TEARDOWN();
1812 }
1813
1814 typedef void (MacroAssembler::*TestBranchSignature)(const Register& rt,
1815 unsigned bit_pos,
1816 Label* label);
1817
TbzRangePoolLimitHelper(TestBranchSignature test_branch)1818 static void TbzRangePoolLimitHelper(TestBranchSignature test_branch) {
1819 const int kTbzRange = 32768;
1820 const int kNumLdrLiteral = kTbzRange / 4;
1821 const int fuzzRange = 2;
1822 for (int n = kNumLdrLiteral - fuzzRange; n <= kNumLdrLiteral + fuzzRange;
1823 ++n) {
1824 for (int margin = -32; margin < 32; margin += 4) {
1825 SETUP();
1826
1827 START();
1828
1829 // Emit 32KB of literals (equal to the range of TBZ).
1830 for (int i = 0; i < n; ++i) {
1831 __ Ldr(w0, 0x12345678);
1832 }
1833
1834 const int kLiteralMargin = 128 * KBytes;
1835
1836 // Emit enough NOPs to be just about to emit the literal pool.
1837 ptrdiff_t end =
1838 masm.GetCursorOffset() + (kLiteralMargin - n * 4 + margin);
1839 while (masm.GetCursorOffset() < end) {
1840 __ Nop();
1841 }
1842
1843 // Add a TBZ instruction.
1844 Label label;
1845
1846 (masm.*test_branch)(x0, 2, &label);
1847
1848 // Add enough NOPs to surpass its range, to make sure we can encode the
1849 // veneer.
1850 end = masm.GetCursorOffset() + (kTbzRange - 4);
1851 {
1852 ExactAssemblyScope scope(&masm,
1853 kTbzRange,
1854 ExactAssemblyScope::kMaximumSize);
1855 while (masm.GetCursorOffset() < end) __ nop();
1856 }
1857
1858 // Finally, bind the label.
1859 __ Bind(&label);
1860
1861 END();
1862
1863 RUN();
1864
1865 TEARDOWN();
1866 }
1867 }
1868 }
1869
TEST(test_branch_limits_literal_pool_size)1870 TEST(test_branch_limits_literal_pool_size) {
1871 TbzRangePoolLimitHelper(&MacroAssembler::Tbz);
1872 TbzRangePoolLimitHelper(&MacroAssembler::Tbnz);
1873 }
1874
TEST(clz_cls)1875 TEST(clz_cls) {
1876 SETUP();
1877
1878 START();
1879 __ Mov(x24, 0x0008000000800000);
1880 __ Mov(x25, 0xff800000fff80000);
1881 __ Mov(x26, 0);
1882 __ Clz(w0, w24);
1883 __ Clz(x1, x24);
1884 __ Clz(w2, w25);
1885 __ Clz(x3, x25);
1886 __ Clz(w4, w26);
1887 __ Clz(x5, x26);
1888 __ Cls(w6, w24);
1889 __ Cls(x7, x24);
1890 __ Cls(w8, w25);
1891 __ Cls(x9, x25);
1892 __ Cls(w10, w26);
1893 __ Cls(x11, x26);
1894 END();
1895
1896 RUN();
1897
1898 ASSERT_EQUAL_64(8, x0);
1899 ASSERT_EQUAL_64(12, x1);
1900 ASSERT_EQUAL_64(0, x2);
1901 ASSERT_EQUAL_64(0, x3);
1902 ASSERT_EQUAL_64(32, x4);
1903 ASSERT_EQUAL_64(64, x5);
1904 ASSERT_EQUAL_64(7, x6);
1905 ASSERT_EQUAL_64(11, x7);
1906 ASSERT_EQUAL_64(12, x8);
1907 ASSERT_EQUAL_64(8, x9);
1908 ASSERT_EQUAL_64(31, x10);
1909 ASSERT_EQUAL_64(63, x11);
1910
1911 TEARDOWN();
1912 }
1913
1914
TEST(pacia_pacib_autia_autib)1915 TEST(pacia_pacib_autia_autib) {
1916 SETUP_WITH_FEATURES(CPUFeatures::kPAuth);
1917
1918 START();
1919
1920 Register pointer = x24;
1921 Register modifier = x25;
1922
1923 __ Mov(pointer, 0x0000000012345678);
1924 __ Mov(modifier, 0x477d469dec0b8760);
1925
1926 // Generate PACs using keys A and B.
1927 __ Mov(x0, pointer);
1928 __ Pacia(x0, modifier);
1929
1930 __ Mov(x1, pointer);
1931 __ Pacib(x1, modifier);
1932
1933 // Authenticate the pointers above.
1934 __ Mov(x2, x0);
1935 __ Autia(x2, modifier);
1936
1937 __ Mov(x3, x1);
1938 __ Autib(x3, modifier);
1939
1940 // Attempt to authenticate incorrect pointers.
1941 __ Mov(x4, x1);
1942 __ Autia(x4, modifier);
1943
1944 __ Mov(x5, x0);
1945 __ Autib(x5, modifier);
1946
1947 // Mask out just the PAC code bits.
1948 // TODO: use Simulator::CalculatePACMask in a nice way.
1949 __ And(x0, x0, 0x007f000000000000);
1950 __ And(x1, x1, 0x007f000000000000);
1951
1952 END();
1953
1954 #ifdef VIXL_INCLUDE_SIMULATOR_AARCH64
1955 RUN();
1956
1957 // Check PAC codes have been generated and aren't equal.
1958 // NOTE: with a different ComputePAC implementation, there may be a collision.
1959 ASSERT_NOT_EQUAL_64(0, x0);
1960 ASSERT_NOT_EQUAL_64(0, x1);
1961 ASSERT_NOT_EQUAL_64(x0, x1);
1962
1963 // Pointers correctly authenticated.
1964 ASSERT_EQUAL_64(pointer, x2);
1965 ASSERT_EQUAL_64(pointer, x3);
1966
1967 // Pointers corrupted after failing to authenticate.
1968 ASSERT_EQUAL_64(0x0020000012345678, x4);
1969 ASSERT_EQUAL_64(0x0040000012345678, x5);
1970 #endif
1971
1972 TEARDOWN();
1973 }
1974
1975
TEST(paciza_pacizb_autiza_autizb)1976 TEST(paciza_pacizb_autiza_autizb) {
1977 SETUP_WITH_FEATURES(CPUFeatures::kPAuth);
1978
1979 START();
1980
1981 Register pointer = x24;
1982
1983 __ Mov(pointer, 0x0000000012345678);
1984
1985 // Generate PACs using keys A and B.
1986 __ Mov(x0, pointer);
1987 __ Paciza(x0);
1988
1989 __ Mov(x1, pointer);
1990 __ Pacizb(x1);
1991
1992 // Authenticate the pointers above.
1993 __ Mov(x2, x0);
1994 __ Autiza(x2);
1995
1996 __ Mov(x3, x1);
1997 __ Autizb(x3);
1998
1999 // Attempt to authenticate incorrect pointers.
2000 __ Mov(x4, x1);
2001 __ Autiza(x4);
2002
2003 __ Mov(x5, x0);
2004 __ Autizb(x5);
2005
2006 // Mask out just the PAC code bits.
2007 // TODO: use Simulator::CalculatePACMask in a nice way.
2008 __ And(x0, x0, 0x007f000000000000);
2009 __ And(x1, x1, 0x007f000000000000);
2010
2011 END();
2012
2013 #ifdef VIXL_INCLUDE_SIMULATOR_AARCH64
2014 RUN();
2015
2016 // Check PAC codes have been generated and aren't equal.
2017 // NOTE: with a different ComputePAC implementation, there may be a collision.
2018 ASSERT_NOT_EQUAL_64(0, x0);
2019 ASSERT_NOT_EQUAL_64(0, x1);
2020 ASSERT_NOT_EQUAL_64(x0, x1);
2021
2022 // Pointers correctly authenticated.
2023 ASSERT_EQUAL_64(pointer, x2);
2024 ASSERT_EQUAL_64(pointer, x3);
2025
2026 // Pointers corrupted after failing to authenticate.
2027 ASSERT_EQUAL_64(0x0020000012345678, x4);
2028 ASSERT_EQUAL_64(0x0040000012345678, x5);
2029 #endif
2030
2031 TEARDOWN();
2032 }
2033
2034
TEST(pacda_pacdb_autda_autdb)2035 TEST(pacda_pacdb_autda_autdb) {
2036 SETUP_WITH_FEATURES(CPUFeatures::kPAuth);
2037
2038 START();
2039
2040 Register pointer = x24;
2041 Register modifier = x25;
2042
2043 __ Mov(pointer, 0x0000000012345678);
2044 __ Mov(modifier, 0x477d469dec0b8760);
2045
2046 // Generate PACs using keys A and B.
2047 __ Mov(x0, pointer);
2048 __ Pacda(x0, modifier);
2049
2050 __ Mov(x1, pointer);
2051 __ Pacdb(x1, modifier);
2052
2053 // Authenticate the pointers above.
2054 __ Mov(x2, x0);
2055 __ Autda(x2, modifier);
2056
2057 __ Mov(x3, x1);
2058 __ Autdb(x3, modifier);
2059
2060 // Attempt to authenticate incorrect pointers.
2061 __ Mov(x4, x1);
2062 __ Autda(x4, modifier);
2063
2064 __ Mov(x5, x0);
2065 __ Autdb(x5, modifier);
2066
2067 // Mask out just the PAC code bits.
2068 // TODO: use Simulator::CalculatePACMask in a nice way.
2069 __ And(x0, x0, 0x007f000000000000);
2070 __ And(x1, x1, 0x007f000000000000);
2071
2072 END();
2073
2074 #ifdef VIXL_INCLUDE_SIMULATOR_AARCH64
2075 RUN();
2076
2077 // Check PAC codes have been generated and aren't equal.
2078 // NOTE: with a different ComputePAC implementation, there may be a collision.
2079 ASSERT_NOT_EQUAL_64(0, x0);
2080 ASSERT_NOT_EQUAL_64(0, x1);
2081 ASSERT_NOT_EQUAL_64(x0, x1);
2082
2083 // Pointers correctly authenticated.
2084 ASSERT_EQUAL_64(pointer, x2);
2085 ASSERT_EQUAL_64(pointer, x3);
2086
2087 // Pointers corrupted after failing to authenticate.
2088 ASSERT_EQUAL_64(0x0020000012345678, x4);
2089 ASSERT_EQUAL_64(0x0040000012345678, x5);
2090 #endif
2091
2092 TEARDOWN();
2093 }
2094
2095
TEST(pacdza_pacdzb_autdza_autdzb)2096 TEST(pacdza_pacdzb_autdza_autdzb) {
2097 SETUP_WITH_FEATURES(CPUFeatures::kPAuth);
2098
2099 START();
2100
2101 Register pointer = x24;
2102
2103 __ Mov(pointer, 0x0000000012345678);
2104
2105 // Generate PACs using keys A and B.
2106 __ Mov(x0, pointer);
2107 __ Pacdza(x0);
2108
2109 __ Mov(x1, pointer);
2110 __ Pacdzb(x1);
2111
2112 // Authenticate the pointers above.
2113 __ Mov(x2, x0);
2114 __ Autdza(x2);
2115
2116 __ Mov(x3, x1);
2117 __ Autdzb(x3);
2118
2119 // Attempt to authenticate incorrect pointers.
2120 __ Mov(x4, x1);
2121 __ Autdza(x4);
2122
2123 __ Mov(x5, x0);
2124 __ Autdzb(x5);
2125
2126 // Mask out just the PAC code bits.
2127 // TODO: use Simulator::CalculatePACMask in a nice way.
2128 __ And(x0, x0, 0x007f000000000000);
2129 __ And(x1, x1, 0x007f000000000000);
2130
2131 END();
2132
2133 #ifdef VIXL_INCLUDE_SIMULATOR_AARCH64
2134 RUN();
2135
2136 // Check PAC codes have been generated and aren't equal.
2137 // NOTE: with a different ComputePAC implementation, there may be a collision.
2138 ASSERT_NOT_EQUAL_64(0, x0);
2139 ASSERT_NOT_EQUAL_64(0, x1);
2140 ASSERT_NOT_EQUAL_64(x0, x1);
2141
2142 // Pointers correctly authenticated.
2143 ASSERT_EQUAL_64(pointer, x2);
2144 ASSERT_EQUAL_64(pointer, x3);
2145
2146 // Pointers corrupted after failing to authenticate.
2147 ASSERT_EQUAL_64(0x0020000012345678, x4);
2148 ASSERT_EQUAL_64(0x0040000012345678, x5);
2149 #endif
2150
2151 TEARDOWN();
2152 }
2153
2154
TEST(pacga_xpaci_xpacd)2155 TEST(pacga_xpaci_xpacd) {
2156 SETUP_WITH_FEATURES(CPUFeatures::kPAuth, CPUFeatures::kPAuthGeneric);
2157
2158 START();
2159
2160 Register pointer = x24;
2161 Register modifier = x25;
2162
2163 __ Mov(pointer, 0x0000000012345678);
2164 __ Mov(modifier, 0x477d469dec0b8760);
2165
2166 // Generate generic PAC.
2167 __ Pacga(x0, pointer, modifier);
2168
2169 // Generate PACs using key A.
2170 __ Mov(x1, pointer);
2171 __ Mov(x2, pointer);
2172 __ Pacia(x1, modifier);
2173 __ Pacda(x2, modifier);
2174
2175 // Strip PACs.
2176 __ Mov(x3, x1);
2177 __ Mov(x4, x2);
2178 __ Xpaci(x3);
2179 __ Xpacd(x4);
2180
2181 // Mask out just the PAC code bits.
2182 // TODO: use Simulator::CalculatePACMask in a nice way.
2183 __ And(x0, x0, 0xffffffff00000000);
2184 __ And(x1, x1, 0x007f000000000000);
2185 __ And(x2, x2, 0x007f000000000000);
2186
2187 END();
2188
2189 #ifdef VIXL_INCLUDE_SIMULATOR_AARCH64
2190 RUN();
2191
2192
2193 // Check PAC codes have been generated and aren't equal.
2194 // NOTE: with a different ComputePAC implementation, there may be a collision.
2195 ASSERT_NOT_EQUAL_64(0, x0);
2196
2197 ASSERT_NOT_EQUAL_64(0, x1);
2198 ASSERT_NOT_EQUAL_64(0, x2);
2199 ASSERT_NOT_EQUAL_64(x1, x2);
2200
2201 ASSERT_EQUAL_64(pointer, x3);
2202 ASSERT_EQUAL_64(pointer, x4);
2203 #endif
2204
2205 TEARDOWN();
2206 }
2207
2208
TEST(label)2209 TEST(label) {
2210 SETUP();
2211
2212 Label label_1, label_2, label_3, label_4;
2213
2214 START();
2215 __ Mov(x0, 0x1);
2216 __ Mov(x1, 0x0);
2217 __ Mov(x22, lr); // Save lr.
2218
2219 __ B(&label_1);
2220 __ B(&label_1);
2221 __ B(&label_1); // Multiple branches to the same label.
2222 __ Mov(x0, 0x0);
2223 __ Bind(&label_2);
2224 __ B(&label_3); // Forward branch.
2225 __ Mov(x0, 0x0);
2226 __ Bind(&label_1);
2227 __ B(&label_2); // Backward branch.
2228 __ Mov(x0, 0x0);
2229 __ Bind(&label_3);
2230 __ Bl(&label_4);
2231 END();
2232
2233 __ Bind(&label_4);
2234 __ Mov(x1, 0x1);
2235 __ Mov(lr, x22);
2236 END();
2237
2238 RUN();
2239
2240 ASSERT_EQUAL_64(0x1, x0);
2241 ASSERT_EQUAL_64(0x1, x1);
2242
2243 TEARDOWN();
2244 }
2245
2246
TEST(label_2)2247 TEST(label_2) {
2248 SETUP();
2249
2250 Label label_1, label_2, label_3;
2251 Label first_jump_to_3;
2252
2253 START();
2254 __ Mov(x0, 0x0);
2255
2256 __ B(&label_1);
2257 ptrdiff_t offset_2 = masm.GetCursorOffset();
2258 __ Orr(x0, x0, 1 << 1);
2259 __ B(&label_3);
2260 ptrdiff_t offset_1 = masm.GetCursorOffset();
2261 __ Orr(x0, x0, 1 << 0);
2262 __ B(&label_2);
2263 ptrdiff_t offset_3 = masm.GetCursorOffset();
2264 __ Tbz(x0, 2, &first_jump_to_3);
2265 __ Orr(x0, x0, 1 << 3);
2266 __ Bind(&first_jump_to_3);
2267 __ Orr(x0, x0, 1 << 2);
2268 __ Tbz(x0, 3, &label_3);
2269
2270 // Labels 1, 2, and 3 are bound before the current buffer offset. Branches to
2271 // label_1 and label_2 branch respectively forward and backward. Branches to
2272 // label 3 include both forward and backward branches.
2273 masm.BindToOffset(&label_1, offset_1);
2274 masm.BindToOffset(&label_2, offset_2);
2275 masm.BindToOffset(&label_3, offset_3);
2276
2277 END();
2278
2279 RUN();
2280
2281 ASSERT_EQUAL_64(0xf, x0);
2282
2283 TEARDOWN();
2284 }
2285
2286
TEST(adr)2287 TEST(adr) {
2288 SETUP();
2289
2290 Label label_1, label_2, label_3, label_4;
2291
2292 START();
2293 __ Mov(x0, 0x0); // Set to non-zero to indicate failure.
2294 __ Adr(x1, &label_3); // Set to zero to indicate success.
2295
2296 __ Adr(x2, &label_1); // Multiple forward references to the same label.
2297 __ Adr(x3, &label_1);
2298 __ Adr(x4, &label_1);
2299
2300 __ Bind(&label_2);
2301 __ Eor(x5, x2, Operand(x3)); // Ensure that x2,x3 and x4 are identical.
2302 __ Eor(x6, x2, Operand(x4));
2303 __ Orr(x0, x0, Operand(x5));
2304 __ Orr(x0, x0, Operand(x6));
2305 __ Br(x2); // label_1, label_3
2306
2307 __ Bind(&label_3);
2308 __ Adr(x2, &label_3); // Self-reference (offset 0).
2309 __ Eor(x1, x1, Operand(x2));
2310 __ Adr(x2, &label_4); // Simple forward reference.
2311 __ Br(x2); // label_4
2312
2313 __ Bind(&label_1);
2314 __ Adr(x2, &label_3); // Multiple reverse references to the same label.
2315 __ Adr(x3, &label_3);
2316 __ Adr(x4, &label_3);
2317 __ Adr(x5, &label_2); // Simple reverse reference.
2318 __ Br(x5); // label_2
2319
2320 __ Bind(&label_4);
2321 END();
2322
2323 RUN();
2324
2325 ASSERT_EQUAL_64(0x0, x0);
2326 ASSERT_EQUAL_64(0x0, x1);
2327
2328 TEARDOWN();
2329 }
2330
2331
2332 // Simple adrp tests: check that labels are linked and handled properly.
2333 // This is similar to the adr test, but all the adrp instructions are put on the
2334 // same page so that they return the same value.
TEST(adrp)2335 TEST(adrp) {
2336 Label start;
2337 Label label_1, label_2, label_3;
2338
2339 SETUP_CUSTOM(2 * kPageSize, PageOffsetDependentCode);
2340 START();
2341
2342 // Waste space until the start of a page.
2343 {
2344 ExactAssemblyScope scope(&masm,
2345 kPageSize,
2346 ExactAssemblyScope::kMaximumSize);
2347 const uintptr_t kPageOffsetMask = kPageSize - 1;
2348 while ((masm.GetCursorAddress<uintptr_t>() & kPageOffsetMask) != 0) {
2349 __ b(&start);
2350 }
2351 __ bind(&start);
2352 }
2353
2354 // Simple forward reference.
2355 __ Adrp(x0, &label_2);
2356
2357 __ Bind(&label_1);
2358
2359 // Multiple forward references to the same label.
2360 __ Adrp(x1, &label_3);
2361 __ Adrp(x2, &label_3);
2362 __ Adrp(x3, &label_3);
2363
2364 __ Bind(&label_2);
2365
2366 // Self-reference (offset 0).
2367 __ Adrp(x4, &label_2);
2368
2369 __ Bind(&label_3);
2370
2371 // Simple reverse reference.
2372 __ Adrp(x5, &label_1);
2373
2374 // Multiple reverse references to the same label.
2375 __ Adrp(x6, &label_2);
2376 __ Adrp(x7, &label_2);
2377 __ Adrp(x8, &label_2);
2378
2379 VIXL_ASSERT(masm.GetSizeOfCodeGeneratedSince(&start) < kPageSize);
2380 END();
2381 RUN_CUSTOM();
2382
2383 uint64_t expected = reinterpret_cast<uint64_t>(
2384 AlignDown(masm.GetLabelAddress<uint64_t*>(&start), kPageSize));
2385 ASSERT_EQUAL_64(expected, x0);
2386 ASSERT_EQUAL_64(expected, x1);
2387 ASSERT_EQUAL_64(expected, x2);
2388 ASSERT_EQUAL_64(expected, x3);
2389 ASSERT_EQUAL_64(expected, x4);
2390 ASSERT_EQUAL_64(expected, x5);
2391 ASSERT_EQUAL_64(expected, x6);
2392 ASSERT_EQUAL_64(expected, x7);
2393 ASSERT_EQUAL_64(expected, x8);
2394
2395 TEARDOWN_CUSTOM();
2396 }
2397
2398
AdrpPageBoundaryHelper(unsigned offset_into_page)2399 static void AdrpPageBoundaryHelper(unsigned offset_into_page) {
2400 VIXL_ASSERT(offset_into_page < kPageSize);
2401 VIXL_ASSERT((offset_into_page % kInstructionSize) == 0);
2402
2403 const uintptr_t kPageOffsetMask = kPageSize - 1;
2404
2405 // The test label is always bound on page 0. Adrp instructions are generated
2406 // on pages from kStartPage to kEndPage (inclusive).
2407 const int kStartPage = -16;
2408 const int kEndPage = 16;
2409 const int kMaxCodeSize = (kEndPage - kStartPage + 2) * kPageSize;
2410
2411 SETUP_CUSTOM(kMaxCodeSize, PageOffsetDependentCode);
2412 START();
2413
2414 Label test;
2415 Label start;
2416
2417 {
2418 ExactAssemblyScope scope(&masm,
2419 kMaxCodeSize,
2420 ExactAssemblyScope::kMaximumSize);
2421 // Initialize NZCV with `eq` flags.
2422 __ cmp(wzr, wzr);
2423 // Waste space until the start of a page.
2424 while ((masm.GetCursorAddress<uintptr_t>() & kPageOffsetMask) != 0) {
2425 __ b(&start);
2426 }
2427
2428 // The first page.
2429 VIXL_STATIC_ASSERT(kStartPage < 0);
2430 {
2431 ExactAssemblyScope scope_page(&masm, kPageSize);
2432 __ bind(&start);
2433 __ adrp(x0, &test);
2434 __ adrp(x1, &test);
2435 for (size_t i = 2; i < (kPageSize / kInstructionSize); i += 2) {
2436 __ ccmp(x0, x1, NoFlag, eq);
2437 __ adrp(x1, &test);
2438 }
2439 }
2440
2441 // Subsequent pages.
2442 VIXL_STATIC_ASSERT(kEndPage >= 0);
2443 for (int page = (kStartPage + 1); page <= kEndPage; page++) {
2444 ExactAssemblyScope scope_page(&masm, kPageSize);
2445 if (page == 0) {
2446 for (size_t i = 0; i < (kPageSize / kInstructionSize);) {
2447 if (i++ == (offset_into_page / kInstructionSize)) __ bind(&test);
2448 __ ccmp(x0, x1, NoFlag, eq);
2449 if (i++ == (offset_into_page / kInstructionSize)) __ bind(&test);
2450 __ adrp(x1, &test);
2451 }
2452 } else {
2453 for (size_t i = 0; i < (kPageSize / kInstructionSize); i += 2) {
2454 __ ccmp(x0, x1, NoFlag, eq);
2455 __ adrp(x1, &test);
2456 }
2457 }
2458 }
2459 }
2460
2461 // Every adrp instruction pointed to the same label (`test`), so they should
2462 // all have produced the same result.
2463
2464 END();
2465 RUN_CUSTOM();
2466
2467 uintptr_t expected =
2468 AlignDown(masm.GetLabelAddress<uintptr_t>(&test), kPageSize);
2469 ASSERT_EQUAL_64(expected, x0);
2470 ASSERT_EQUAL_64(expected, x1);
2471 ASSERT_EQUAL_NZCV(ZCFlag);
2472
2473 TEARDOWN_CUSTOM();
2474 }
2475
2476
2477 // Test that labels are correctly referenced by adrp across page boundaries.
TEST(adrp_page_boundaries)2478 TEST(adrp_page_boundaries) {
2479 VIXL_STATIC_ASSERT(kPageSize == 4096);
2480 AdrpPageBoundaryHelper(kInstructionSize * 0);
2481 AdrpPageBoundaryHelper(kInstructionSize * 1);
2482 AdrpPageBoundaryHelper(kInstructionSize * 512);
2483 AdrpPageBoundaryHelper(kInstructionSize * 1022);
2484 AdrpPageBoundaryHelper(kInstructionSize * 1023);
2485 }
2486
2487
AdrpOffsetHelper(int64_t offset)2488 static void AdrpOffsetHelper(int64_t offset) {
2489 const size_t kPageOffsetMask = kPageSize - 1;
2490 const int kMaxCodeSize = 2 * kPageSize;
2491
2492 SETUP_CUSTOM(kMaxCodeSize, PageOffsetDependentCode);
2493 START();
2494
2495 Label page;
2496
2497 {
2498 ExactAssemblyScope scope(&masm,
2499 kMaxCodeSize,
2500 ExactAssemblyScope::kMaximumSize);
2501 // Initialize NZCV with `eq` flags.
2502 __ cmp(wzr, wzr);
2503 // Waste space until the start of a page.
2504 while ((masm.GetCursorAddress<uintptr_t>() & kPageOffsetMask) != 0) {
2505 __ b(&page);
2506 }
2507 __ bind(&page);
2508
2509 {
2510 ExactAssemblyScope scope_page(&masm, kPageSize);
2511 // Every adrp instruction on this page should return the same value.
2512 __ adrp(x0, offset);
2513 __ adrp(x1, offset);
2514 for (size_t i = 2; i < kPageSize / kInstructionSize; i += 2) {
2515 __ ccmp(x0, x1, NoFlag, eq);
2516 __ adrp(x1, offset);
2517 }
2518 }
2519 }
2520
2521 END();
2522 RUN_CUSTOM();
2523
2524 uintptr_t expected =
2525 masm.GetLabelAddress<uintptr_t>(&page) + (kPageSize * offset);
2526 ASSERT_EQUAL_64(expected, x0);
2527 ASSERT_EQUAL_64(expected, x1);
2528 ASSERT_EQUAL_NZCV(ZCFlag);
2529
2530 TEARDOWN_CUSTOM();
2531 }
2532
2533
2534 // Check that adrp produces the correct result for a specific offset.
TEST(adrp_offset)2535 TEST(adrp_offset) {
2536 AdrpOffsetHelper(0);
2537 AdrpOffsetHelper(1);
2538 AdrpOffsetHelper(-1);
2539 AdrpOffsetHelper(4);
2540 AdrpOffsetHelper(-4);
2541 AdrpOffsetHelper(0x000fffff);
2542 AdrpOffsetHelper(-0x000fffff);
2543 AdrpOffsetHelper(-0x00100000);
2544 }
2545
2546
TEST(branch_cond)2547 TEST(branch_cond) {
2548 SETUP();
2549
2550 Label done, wrong;
2551
2552 START();
2553 __ Mov(x0, 0x1);
2554 __ Mov(x1, 0x1);
2555 __ Mov(x2, 0x8000000000000000);
2556
2557 // For each 'cmp' instruction below, condition codes other than the ones
2558 // following it would branch.
2559
2560 __ Cmp(x1, 0);
2561 __ B(&wrong, eq);
2562 __ B(&wrong, lo);
2563 __ B(&wrong, mi);
2564 __ B(&wrong, vs);
2565 __ B(&wrong, ls);
2566 __ B(&wrong, lt);
2567 __ B(&wrong, le);
2568 Label ok_1;
2569 __ B(&ok_1, ne);
2570 __ Mov(x0, 0x0);
2571 __ Bind(&ok_1);
2572
2573 __ Cmp(x1, 1);
2574 __ B(&wrong, ne);
2575 __ B(&wrong, lo);
2576 __ B(&wrong, mi);
2577 __ B(&wrong, vs);
2578 __ B(&wrong, hi);
2579 __ B(&wrong, lt);
2580 __ B(&wrong, gt);
2581 Label ok_2;
2582 __ B(&ok_2, pl);
2583 __ Mov(x0, 0x0);
2584 __ Bind(&ok_2);
2585
2586 __ Cmp(x1, 2);
2587 __ B(&wrong, eq);
2588 __ B(&wrong, hs);
2589 __ B(&wrong, pl);
2590 __ B(&wrong, vs);
2591 __ B(&wrong, hi);
2592 __ B(&wrong, ge);
2593 __ B(&wrong, gt);
2594 Label ok_3;
2595 __ B(&ok_3, vc);
2596 __ Mov(x0, 0x0);
2597 __ Bind(&ok_3);
2598
2599 __ Cmp(x2, 1);
2600 __ B(&wrong, eq);
2601 __ B(&wrong, lo);
2602 __ B(&wrong, mi);
2603 __ B(&wrong, vc);
2604 __ B(&wrong, ls);
2605 __ B(&wrong, ge);
2606 __ B(&wrong, gt);
2607 Label ok_4;
2608 __ B(&ok_4, le);
2609 __ Mov(x0, 0x0);
2610 __ Bind(&ok_4);
2611
2612 // The MacroAssembler does not allow al as a branch condition.
2613 Label ok_5;
2614 {
2615 ExactAssemblyScope scope(&masm, kInstructionSize);
2616 __ b(&ok_5, al);
2617 }
2618 __ Mov(x0, 0x0);
2619 __ Bind(&ok_5);
2620
2621 // The MacroAssembler does not allow nv as a branch condition.
2622 Label ok_6;
2623 {
2624 ExactAssemblyScope scope(&masm, kInstructionSize);
2625 __ b(&ok_6, nv);
2626 }
2627 __ Mov(x0, 0x0);
2628 __ Bind(&ok_6);
2629
2630 __ B(&done);
2631
2632 __ Bind(&wrong);
2633 __ Mov(x0, 0x0);
2634
2635 __ Bind(&done);
2636 END();
2637
2638 RUN();
2639
2640 ASSERT_EQUAL_64(0x1, x0);
2641
2642 TEARDOWN();
2643 }
2644
2645
TEST(branch_to_reg)2646 TEST(branch_to_reg) {
2647 SETUP();
2648
2649 // Test br.
2650 Label fn1, after_fn1;
2651
2652 START();
2653 __ Mov(x29, lr);
2654
2655 __ Mov(x1, 0);
2656 __ B(&after_fn1);
2657
2658 __ Bind(&fn1);
2659 __ Mov(x0, lr);
2660 __ Mov(x1, 42);
2661 __ Br(x0);
2662
2663 __ Bind(&after_fn1);
2664 __ Bl(&fn1);
2665
2666 // Test blr.
2667 Label fn2, after_fn2, after_bl2;
2668
2669 __ Mov(x2, 0);
2670 __ B(&after_fn2);
2671
2672 __ Bind(&fn2);
2673 __ Mov(x0, lr);
2674 __ Mov(x2, 84);
2675 __ Blr(x0);
2676
2677 __ Bind(&after_fn2);
2678 __ Bl(&fn2);
2679 __ Bind(&after_bl2);
2680 __ Mov(x3, lr);
2681 __ Adr(x4, &after_bl2);
2682 __ Adr(x5, &after_fn2);
2683
2684 __ Mov(lr, x29);
2685 END();
2686
2687 RUN();
2688
2689 ASSERT_EQUAL_64(x4, x0);
2690 ASSERT_EQUAL_64(x5, x3);
2691 ASSERT_EQUAL_64(42, x1);
2692 ASSERT_EQUAL_64(84, x2);
2693
2694 TEARDOWN();
2695 }
2696
TEST(branch_to_reg_auth_a)2697 TEST(branch_to_reg_auth_a) {
2698 SETUP_WITH_FEATURES(CPUFeatures::kPAuth);
2699
2700 START();
2701
2702 Label fn1, after_fn1;
2703
2704 __ Mov(x28, 0x477d469dec0b8760);
2705 __ Mov(x29, lr);
2706
2707 __ Mov(x1, 0);
2708 __ B(&after_fn1);
2709
2710 __ Bind(&fn1);
2711 __ Mov(x0, lr);
2712 __ Mov(x1, 42);
2713 __ Pacia(x0, x28);
2714 __ Braa(x0, x28);
2715
2716 __ Bind(&after_fn1);
2717 __ Bl(&fn1);
2718
2719 Label fn2, after_fn2, after_bl2;
2720
2721 __ Mov(x2, 0);
2722 __ B(&after_fn2);
2723
2724 __ Bind(&fn2);
2725 __ Mov(x0, lr);
2726 __ Mov(x2, 84);
2727 __ Pacia(x0, x28);
2728 __ Blraa(x0, x28);
2729
2730 __ Bind(&after_fn2);
2731 __ Bl(&fn2);
2732 __ Bind(&after_bl2);
2733 __ Mov(x3, lr);
2734 __ Adr(x4, &after_bl2);
2735 __ Adr(x5, &after_fn2);
2736
2737 __ Xpaci(x0);
2738 __ Mov(lr, x29);
2739 END();
2740
2741 #ifdef VIXL_INCLUDE_SIMULATOR_AARCH64
2742 RUN();
2743
2744 ASSERT_EQUAL_64(x4, x0);
2745 ASSERT_EQUAL_64(x5, x3);
2746 ASSERT_EQUAL_64(42, x1);
2747 ASSERT_EQUAL_64(84, x2);
2748 #endif
2749
2750 TEARDOWN();
2751 }
2752
TEST(return_to_reg_auth)2753 TEST(return_to_reg_auth) {
2754 SETUP_WITH_FEATURES(CPUFeatures::kPAuth);
2755
2756 START();
2757
2758 Label fn1, after_fn1;
2759
2760 __ Mov(x28, sp);
2761 __ Mov(x29, lr);
2762 __ Mov(sp, 0x477d469dec0b8760);
2763
2764 __ Mov(x0, 0);
2765 __ B(&after_fn1);
2766
2767 __ Bind(&fn1);
2768 __ Mov(x0, 42);
2769 __ Paciasp();
2770 __ Retaa();
2771
2772 __ Bind(&after_fn1);
2773 __ Bl(&fn1);
2774
2775 Label fn2, after_fn2;
2776
2777 __ Mov(x1, 0);
2778 __ B(&after_fn2);
2779
2780 __ Bind(&fn2);
2781 __ Mov(x1, 84);
2782 __ Pacibsp();
2783 __ Retab();
2784
2785 __ Bind(&after_fn2);
2786 __ Bl(&fn2);
2787
2788 __ Mov(sp, x28);
2789 __ Mov(lr, x29);
2790 END();
2791
2792 #ifdef VIXL_INCLUDE_SIMULATOR_AARCH64
2793 RUN();
2794
2795 ASSERT_EQUAL_64(42, x0);
2796 ASSERT_EQUAL_64(84, x1);
2797 #endif
2798
2799 TEARDOWN();
2800 }
2801
2802 #ifdef VIXL_NEGATIVE_TESTING
TEST(branch_to_reg_auth_fail)2803 TEST(branch_to_reg_auth_fail) {
2804 SETUP_WITH_FEATURES(CPUFeatures::kPAuth);
2805
2806 START();
2807
2808 Label fn1, after_fn1;
2809
2810 __ Mov(x29, lr);
2811
2812 __ B(&after_fn1);
2813
2814 __ Bind(&fn1);
2815 __ Mov(x0, lr);
2816 __ Pacizb(x0);
2817 __ Blraaz(x0);
2818
2819 __ Bind(&after_fn1);
2820 __ Bl(&fn1);
2821
2822 __ Mov(lr, x29);
2823 END();
2824
2825 #ifdef VIXL_INCLUDE_SIMULATOR_AARCH64
2826 MUST_FAIL_WITH_MESSAGE(RUN(), "Failed to authenticate pointer.");
2827 #endif // VIXL_INCLUDE_SIMULATOR_AARCH64
2828
2829 TEARDOWN();
2830 }
2831 #endif // VIXL_NEGATIVE_TESTING
2832
2833 #ifdef VIXL_NEGATIVE_TESTING
TEST(return_to_reg_auth_fail)2834 TEST(return_to_reg_auth_fail) {
2835 SETUP_WITH_FEATURES(CPUFeatures::kPAuth);
2836
2837 START();
2838
2839 Label fn1, after_fn1;
2840
2841 __ Mov(x28, sp);
2842 __ Mov(x29, lr);
2843 __ Mov(sp, 0x477d469dec0b8760);
2844
2845 __ B(&after_fn1);
2846
2847 __ Bind(&fn1);
2848 __ Paciasp();
2849 __ Retab();
2850
2851 __ Bind(&after_fn1);
2852 __ Bl(&fn1);
2853
2854 __ Mov(sp, x28);
2855 __ Mov(lr, x29);
2856 END();
2857
2858 #ifdef VIXL_INCLUDE_SIMULATOR_AARCH64
2859 MUST_FAIL_WITH_MESSAGE(RUN(), "Failed to authenticate pointer.");
2860 #endif // VIXL_INCLUDE_SIMULATOR_AARCH64
2861
2862 TEARDOWN();
2863 }
2864 #endif // VIXL_NEGATIVE_TESTING
2865
TEST(branch_to_reg_auth_a_zero)2866 TEST(branch_to_reg_auth_a_zero) {
2867 SETUP_WITH_FEATURES(CPUFeatures::kPAuth);
2868
2869 START();
2870
2871 Label fn1, after_fn1;
2872
2873 __ Mov(x29, lr);
2874
2875 __ Mov(x1, 0);
2876 __ B(&after_fn1);
2877
2878 __ Bind(&fn1);
2879 __ Mov(x0, lr);
2880 __ Mov(x1, 42);
2881 __ Paciza(x0);
2882 __ Braaz(x0);
2883
2884 __ Bind(&after_fn1);
2885 __ Bl(&fn1);
2886
2887 Label fn2, after_fn2, after_bl2;
2888
2889 __ Mov(x2, 0);
2890 __ B(&after_fn2);
2891
2892 __ Bind(&fn2);
2893 __ Mov(x0, lr);
2894 __ Mov(x2, 84);
2895 __ Paciza(x0);
2896 __ Blraaz(x0);
2897
2898 __ Bind(&after_fn2);
2899 __ Bl(&fn2);
2900 __ Bind(&after_bl2);
2901 __ Mov(x3, lr);
2902 __ Adr(x4, &after_bl2);
2903 __ Adr(x5, &after_fn2);
2904
2905 __ Xpaci(x0);
2906 __ Mov(lr, x29);
2907 END();
2908
2909 #ifdef VIXL_INCLUDE_SIMULATOR_AARCH64
2910 RUN();
2911
2912 ASSERT_EQUAL_64(x4, x0);
2913 ASSERT_EQUAL_64(x5, x3);
2914 ASSERT_EQUAL_64(42, x1);
2915 ASSERT_EQUAL_64(84, x2);
2916 #endif
2917
2918 TEARDOWN();
2919 }
2920
2921
TEST(compare_branch)2922 TEST(compare_branch) {
2923 SETUP();
2924
2925 START();
2926 __ Mov(x0, 0);
2927 __ Mov(x1, 0);
2928 __ Mov(x2, 0);
2929 __ Mov(x3, 0);
2930 __ Mov(x4, 0);
2931 __ Mov(x5, 0);
2932 __ Mov(x16, 0);
2933 __ Mov(x17, 42);
2934
2935 Label zt, zt_end;
2936 __ Cbz(w16, &zt);
2937 __ B(&zt_end);
2938 __ Bind(&zt);
2939 __ Mov(x0, 1);
2940 __ Bind(&zt_end);
2941
2942 Label zf, zf_end;
2943 __ Cbz(x17, &zf);
2944 __ B(&zf_end);
2945 __ Bind(&zf);
2946 __ Mov(x1, 1);
2947 __ Bind(&zf_end);
2948
2949 Label nzt, nzt_end;
2950 __ Cbnz(w17, &nzt);
2951 __ B(&nzt_end);
2952 __ Bind(&nzt);
2953 __ Mov(x2, 1);
2954 __ Bind(&nzt_end);
2955
2956 Label nzf, nzf_end;
2957 __ Cbnz(x16, &nzf);
2958 __ B(&nzf_end);
2959 __ Bind(&nzf);
2960 __ Mov(x3, 1);
2961 __ Bind(&nzf_end);
2962
2963 __ Mov(x18, 0xffffffff00000000);
2964
2965 Label a, a_end;
2966 __ Cbz(w18, &a);
2967 __ B(&a_end);
2968 __ Bind(&a);
2969 __ Mov(x4, 1);
2970 __ Bind(&a_end);
2971
2972 Label b, b_end;
2973 __ Cbnz(w18, &b);
2974 __ B(&b_end);
2975 __ Bind(&b);
2976 __ Mov(x5, 1);
2977 __ Bind(&b_end);
2978
2979 END();
2980
2981 RUN();
2982
2983 ASSERT_EQUAL_64(1, x0);
2984 ASSERT_EQUAL_64(0, x1);
2985 ASSERT_EQUAL_64(1, x2);
2986 ASSERT_EQUAL_64(0, x3);
2987 ASSERT_EQUAL_64(1, x4);
2988 ASSERT_EQUAL_64(0, x5);
2989
2990 TEARDOWN();
2991 }
2992
2993
TEST(test_branch)2994 TEST(test_branch) {
2995 SETUP();
2996
2997 START();
2998 __ Mov(x0, 0);
2999 __ Mov(x1, 0);
3000 __ Mov(x2, 0);
3001 __ Mov(x3, 0);
3002 __ Mov(x16, 0xaaaaaaaaaaaaaaaa);
3003
3004 Label bz, bz_end;
3005 __ Tbz(w16, 0, &bz);
3006 __ B(&bz_end);
3007 __ Bind(&bz);
3008 __ Mov(x0, 1);
3009 __ Bind(&bz_end);
3010
3011 Label bo, bo_end;
3012 __ Tbz(x16, 63, &bo);
3013 __ B(&bo_end);
3014 __ Bind(&bo);
3015 __ Mov(x1, 1);
3016 __ Bind(&bo_end);
3017
3018 Label nbz, nbz_end;
3019 __ Tbnz(x16, 61, &nbz);
3020 __ B(&nbz_end);
3021 __ Bind(&nbz);
3022 __ Mov(x2, 1);
3023 __ Bind(&nbz_end);
3024
3025 Label nbo, nbo_end;
3026 __ Tbnz(w16, 2, &nbo);
3027 __ B(&nbo_end);
3028 __ Bind(&nbo);
3029 __ Mov(x3, 1);
3030 __ Bind(&nbo_end);
3031 END();
3032
3033 RUN();
3034
3035 ASSERT_EQUAL_64(1, x0);
3036 ASSERT_EQUAL_64(0, x1);
3037 ASSERT_EQUAL_64(1, x2);
3038 ASSERT_EQUAL_64(0, x3);
3039
3040 TEARDOWN();
3041 }
3042
3043
TEST(branch_type)3044 TEST(branch_type) {
3045 SETUP();
3046
3047 Label fail, done;
3048
3049 START();
3050 __ Mov(x0, 0x0);
3051 __ Mov(x10, 0x7);
3052 __ Mov(x11, 0x0);
3053
3054 // Test non taken branches.
3055 __ Cmp(x10, 0x7);
3056 __ B(&fail, ne);
3057 __ B(&fail, never);
3058 __ B(&fail, reg_zero, x10);
3059 __ B(&fail, reg_not_zero, x11);
3060 __ B(&fail, reg_bit_clear, x10, 0);
3061 __ B(&fail, reg_bit_set, x10, 3);
3062
3063 // Test taken branches.
3064 Label l1, l2, l3, l4, l5;
3065 __ Cmp(x10, 0x7);
3066 __ B(&l1, eq);
3067 __ B(&fail);
3068 __ Bind(&l1);
3069 __ B(&l2, always);
3070 __ B(&fail);
3071 __ Bind(&l2);
3072 __ B(&l3, reg_not_zero, x10);
3073 __ B(&fail);
3074 __ Bind(&l3);
3075 __ B(&l4, reg_bit_clear, x10, 15);
3076 __ B(&fail);
3077 __ Bind(&l4);
3078 __ B(&l5, reg_bit_set, x10, 1);
3079 __ B(&fail);
3080 __ Bind(&l5);
3081
3082 __ B(&done);
3083
3084 __ Bind(&fail);
3085 __ Mov(x0, 0x1);
3086
3087 __ Bind(&done);
3088
3089 END();
3090
3091 RUN();
3092
3093 ASSERT_EQUAL_64(0x0, x0);
3094
3095 TEARDOWN();
3096 }
3097
3098
TEST(ldr_str_offset)3099 TEST(ldr_str_offset) {
3100 SETUP();
3101
3102 uint64_t src[2] = {0xfedcba9876543210, 0x0123456789abcdef};
3103 uint64_t dst[5] = {0, 0, 0, 0, 0};
3104 uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
3105 uintptr_t dst_base = reinterpret_cast<uintptr_t>(dst);
3106
3107 START();
3108 __ Mov(x17, src_base);
3109 __ Mov(x18, dst_base);
3110 __ Ldr(w0, MemOperand(x17));
3111 __ Str(w0, MemOperand(x18));
3112 __ Ldr(w1, MemOperand(x17, 4));
3113 __ Str(w1, MemOperand(x18, 12));
3114 __ Ldr(x2, MemOperand(x17, 8));
3115 __ Str(x2, MemOperand(x18, 16));
3116 __ Ldrb(w3, MemOperand(x17, 1));
3117 __ Strb(w3, MemOperand(x18, 25));
3118 __ Ldrh(w4, MemOperand(x17, 2));
3119 __ Strh(w4, MemOperand(x18, 33));
3120 END();
3121
3122 RUN();
3123
3124 ASSERT_EQUAL_64(0x76543210, x0);
3125 ASSERT_EQUAL_64(0x76543210, dst[0]);
3126 ASSERT_EQUAL_64(0xfedcba98, x1);
3127 ASSERT_EQUAL_64(0xfedcba9800000000, dst[1]);
3128 ASSERT_EQUAL_64(0x0123456789abcdef, x2);
3129 ASSERT_EQUAL_64(0x0123456789abcdef, dst[2]);
3130 ASSERT_EQUAL_64(0x32, x3);
3131 ASSERT_EQUAL_64(0x3200, dst[3]);
3132 ASSERT_EQUAL_64(0x7654, x4);
3133 ASSERT_EQUAL_64(0x765400, dst[4]);
3134 ASSERT_EQUAL_64(src_base, x17);
3135 ASSERT_EQUAL_64(dst_base, x18);
3136
3137 TEARDOWN();
3138 }
3139
3140
TEST(ldr_str_wide)3141 TEST(ldr_str_wide) {
3142 SETUP();
3143
3144 uint32_t src[8192];
3145 uint32_t dst[8192];
3146 uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
3147 uintptr_t dst_base = reinterpret_cast<uintptr_t>(dst);
3148 memset(src, 0xaa, 8192 * sizeof(src[0]));
3149 memset(dst, 0xaa, 8192 * sizeof(dst[0]));
3150 src[0] = 0;
3151 src[6144] = 6144;
3152 src[8191] = 8191;
3153
3154 START();
3155 __ Mov(x22, src_base);
3156 __ Mov(x23, dst_base);
3157 __ Mov(x24, src_base);
3158 __ Mov(x25, dst_base);
3159 __ Mov(x26, src_base);
3160 __ Mov(x27, dst_base);
3161
3162 __ Ldr(w0, MemOperand(x22, 8191 * sizeof(src[0])));
3163 __ Str(w0, MemOperand(x23, 8191 * sizeof(dst[0])));
3164 __ Ldr(w1, MemOperand(x24, 4096 * sizeof(src[0]), PostIndex));
3165 __ Str(w1, MemOperand(x25, 4096 * sizeof(dst[0]), PostIndex));
3166 __ Ldr(w2, MemOperand(x26, 6144 * sizeof(src[0]), PreIndex));
3167 __ Str(w2, MemOperand(x27, 6144 * sizeof(dst[0]), PreIndex));
3168 END();
3169
3170 RUN();
3171
3172 ASSERT_EQUAL_32(8191, w0);
3173 ASSERT_EQUAL_32(8191, dst[8191]);
3174 ASSERT_EQUAL_64(src_base, x22);
3175 ASSERT_EQUAL_64(dst_base, x23);
3176 ASSERT_EQUAL_32(0, w1);
3177 ASSERT_EQUAL_32(0, dst[0]);
3178 ASSERT_EQUAL_64(src_base + 4096 * sizeof(src[0]), x24);
3179 ASSERT_EQUAL_64(dst_base + 4096 * sizeof(dst[0]), x25);
3180 ASSERT_EQUAL_32(6144, w2);
3181 ASSERT_EQUAL_32(6144, dst[6144]);
3182 ASSERT_EQUAL_64(src_base + 6144 * sizeof(src[0]), x26);
3183 ASSERT_EQUAL_64(dst_base + 6144 * sizeof(dst[0]), x27);
3184
3185 TEARDOWN();
3186 }
3187
3188
TEST(ldr_str_preindex)3189 TEST(ldr_str_preindex) {
3190 SETUP();
3191
3192 uint64_t src[2] = {0xfedcba9876543210, 0x0123456789abcdef};
3193 uint64_t dst[6] = {0, 0, 0, 0, 0, 0};
3194 uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
3195 uintptr_t dst_base = reinterpret_cast<uintptr_t>(dst);
3196
3197 START();
3198 __ Mov(x17, src_base);
3199 __ Mov(x18, dst_base);
3200 __ Mov(x19, src_base);
3201 __ Mov(x20, dst_base);
3202 __ Mov(x21, src_base + 16);
3203 __ Mov(x22, dst_base + 40);
3204 __ Mov(x23, src_base);
3205 __ Mov(x24, dst_base);
3206 __ Mov(x25, src_base);
3207 __ Mov(x26, dst_base);
3208 __ Ldr(w0, MemOperand(x17, 4, PreIndex));
3209 __ Str(w0, MemOperand(x18, 12, PreIndex));
3210 __ Ldr(x1, MemOperand(x19, 8, PreIndex));
3211 __ Str(x1, MemOperand(x20, 16, PreIndex));
3212 __ Ldr(w2, MemOperand(x21, -4, PreIndex));
3213 __ Str(w2, MemOperand(x22, -4, PreIndex));
3214 __ Ldrb(w3, MemOperand(x23, 1, PreIndex));
3215 __ Strb(w3, MemOperand(x24, 25, PreIndex));
3216 __ Ldrh(w4, MemOperand(x25, 3, PreIndex));
3217 __ Strh(w4, MemOperand(x26, 41, PreIndex));
3218 END();
3219
3220 RUN();
3221
3222 ASSERT_EQUAL_64(0xfedcba98, x0);
3223 ASSERT_EQUAL_64(0xfedcba9800000000, dst[1]);
3224 ASSERT_EQUAL_64(0x0123456789abcdef, x1);
3225 ASSERT_EQUAL_64(0x0123456789abcdef, dst[2]);
3226 ASSERT_EQUAL_64(0x01234567, x2);
3227 ASSERT_EQUAL_64(0x0123456700000000, dst[4]);
3228 ASSERT_EQUAL_64(0x32, x3);
3229 ASSERT_EQUAL_64(0x3200, dst[3]);
3230 ASSERT_EQUAL_64(0x9876, x4);
3231 ASSERT_EQUAL_64(0x987600, dst[5]);
3232 ASSERT_EQUAL_64(src_base + 4, x17);
3233 ASSERT_EQUAL_64(dst_base + 12, x18);
3234 ASSERT_EQUAL_64(src_base + 8, x19);
3235 ASSERT_EQUAL_64(dst_base + 16, x20);
3236 ASSERT_EQUAL_64(src_base + 12, x21);
3237 ASSERT_EQUAL_64(dst_base + 36, x22);
3238 ASSERT_EQUAL_64(src_base + 1, x23);
3239 ASSERT_EQUAL_64(dst_base + 25, x24);
3240 ASSERT_EQUAL_64(src_base + 3, x25);
3241 ASSERT_EQUAL_64(dst_base + 41, x26);
3242
3243 TEARDOWN();
3244 }
3245
3246
TEST(ldr_str_postindex)3247 TEST(ldr_str_postindex) {
3248 SETUP();
3249
3250 uint64_t src[2] = {0xfedcba9876543210, 0x0123456789abcdef};
3251 uint64_t dst[6] = {0, 0, 0, 0, 0, 0};
3252 uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
3253 uintptr_t dst_base = reinterpret_cast<uintptr_t>(dst);
3254
3255 START();
3256 __ Mov(x17, src_base + 4);
3257 __ Mov(x18, dst_base + 12);
3258 __ Mov(x19, src_base + 8);
3259 __ Mov(x20, dst_base + 16);
3260 __ Mov(x21, src_base + 8);
3261 __ Mov(x22, dst_base + 32);
3262 __ Mov(x23, src_base + 1);
3263 __ Mov(x24, dst_base + 25);
3264 __ Mov(x25, src_base + 3);
3265 __ Mov(x26, dst_base + 41);
3266 __ Ldr(w0, MemOperand(x17, 4, PostIndex));
3267 __ Str(w0, MemOperand(x18, 12, PostIndex));
3268 __ Ldr(x1, MemOperand(x19, 8, PostIndex));
3269 __ Str(x1, MemOperand(x20, 16, PostIndex));
3270 __ Ldr(x2, MemOperand(x21, -8, PostIndex));
3271 __ Str(x2, MemOperand(x22, -32, PostIndex));
3272 __ Ldrb(w3, MemOperand(x23, 1, PostIndex));
3273 __ Strb(w3, MemOperand(x24, 5, PostIndex));
3274 __ Ldrh(w4, MemOperand(x25, -3, PostIndex));
3275 __ Strh(w4, MemOperand(x26, -41, PostIndex));
3276 END();
3277
3278 RUN();
3279
3280 ASSERT_EQUAL_64(0xfedcba98, x0);
3281 ASSERT_EQUAL_64(0xfedcba9800000000, dst[1]);
3282 ASSERT_EQUAL_64(0x0123456789abcdef, x1);
3283 ASSERT_EQUAL_64(0x0123456789abcdef, dst[2]);
3284 ASSERT_EQUAL_64(0x0123456789abcdef, x2);
3285 ASSERT_EQUAL_64(0x0123456789abcdef, dst[4]);
3286 ASSERT_EQUAL_64(0x32, x3);
3287 ASSERT_EQUAL_64(0x3200, dst[3]);
3288 ASSERT_EQUAL_64(0x9876, x4);
3289 ASSERT_EQUAL_64(0x987600, dst[5]);
3290 ASSERT_EQUAL_64(src_base + 8, x17);
3291 ASSERT_EQUAL_64(dst_base + 24, x18);
3292 ASSERT_EQUAL_64(src_base + 16, x19);
3293 ASSERT_EQUAL_64(dst_base + 32, x20);
3294 ASSERT_EQUAL_64(src_base, x21);
3295 ASSERT_EQUAL_64(dst_base, x22);
3296 ASSERT_EQUAL_64(src_base + 2, x23);
3297 ASSERT_EQUAL_64(dst_base + 30, x24);
3298 ASSERT_EQUAL_64(src_base, x25);
3299 ASSERT_EQUAL_64(dst_base, x26);
3300
3301 TEARDOWN();
3302 }
3303
3304
TEST(ldr_str_largeindex)3305 TEST(ldr_str_largeindex) {
3306 SETUP();
3307
3308 // This value won't fit in the immediate offset field of ldr/str instructions.
3309 int largeoffset = 0xabcdef;
3310
3311 int64_t data[3] = {0x1122334455667788, 0, 0};
3312 uint64_t base_addr = reinterpret_cast<uintptr_t>(data);
3313 uint64_t drifted_addr = base_addr - largeoffset;
3314
3315 // This test checks that we we can use large immediate offsets when
3316 // using PreIndex or PostIndex addressing mode of the MacroAssembler
3317 // Ldr/Str instructions.
3318
3319 START();
3320 __ Mov(x19, drifted_addr);
3321 __ Ldr(x0, MemOperand(x19, largeoffset, PreIndex));
3322
3323 __ Mov(x20, base_addr);
3324 __ Ldr(x1, MemOperand(x20, largeoffset, PostIndex));
3325
3326 __ Mov(x21, drifted_addr);
3327 __ Str(x0, MemOperand(x21, largeoffset + 8, PreIndex));
3328
3329 __ Mov(x22, base_addr + 16);
3330 __ Str(x0, MemOperand(x22, largeoffset, PostIndex));
3331 END();
3332
3333 RUN();
3334
3335 ASSERT_EQUAL_64(0x1122334455667788, data[0]);
3336 ASSERT_EQUAL_64(0x1122334455667788, data[1]);
3337 ASSERT_EQUAL_64(0x1122334455667788, data[2]);
3338 ASSERT_EQUAL_64(0x1122334455667788, x0);
3339 ASSERT_EQUAL_64(0x1122334455667788, x1);
3340
3341 ASSERT_EQUAL_64(base_addr, x19);
3342 ASSERT_EQUAL_64(base_addr + largeoffset, x20);
3343 ASSERT_EQUAL_64(base_addr + 8, x21);
3344 ASSERT_EQUAL_64(base_addr + 16 + largeoffset, x22);
3345
3346 TEARDOWN();
3347 }
3348
3349
TEST(load_signed)3350 TEST(load_signed) {
3351 SETUP();
3352
3353 uint32_t src[2] = {0x80008080, 0x7fff7f7f};
3354 uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
3355
3356 START();
3357 __ Mov(x24, src_base);
3358 __ Ldrsb(w0, MemOperand(x24));
3359 __ Ldrsb(w1, MemOperand(x24, 4));
3360 __ Ldrsh(w2, MemOperand(x24));
3361 __ Ldrsh(w3, MemOperand(x24, 4));
3362 __ Ldrsb(x4, MemOperand(x24));
3363 __ Ldrsb(x5, MemOperand(x24, 4));
3364 __ Ldrsh(x6, MemOperand(x24));
3365 __ Ldrsh(x7, MemOperand(x24, 4));
3366 __ Ldrsw(x8, MemOperand(x24));
3367 __ Ldrsw(x9, MemOperand(x24, 4));
3368 END();
3369
3370 RUN();
3371
3372 ASSERT_EQUAL_64(0xffffff80, x0);
3373 ASSERT_EQUAL_64(0x0000007f, x1);
3374 ASSERT_EQUAL_64(0xffff8080, x2);
3375 ASSERT_EQUAL_64(0x00007f7f, x3);
3376 ASSERT_EQUAL_64(0xffffffffffffff80, x4);
3377 ASSERT_EQUAL_64(0x000000000000007f, x5);
3378 ASSERT_EQUAL_64(0xffffffffffff8080, x6);
3379 ASSERT_EQUAL_64(0x0000000000007f7f, x7);
3380 ASSERT_EQUAL_64(0xffffffff80008080, x8);
3381 ASSERT_EQUAL_64(0x000000007fff7f7f, x9);
3382
3383 TEARDOWN();
3384 }
3385
3386
TEST(load_store_regoffset)3387 TEST(load_store_regoffset) {
3388 SETUP();
3389
3390 uint32_t src[3] = {1, 2, 3};
3391 uint32_t dst[4] = {0, 0, 0, 0};
3392 uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
3393 uintptr_t dst_base = reinterpret_cast<uintptr_t>(dst);
3394
3395 START();
3396 __ Mov(x16, src_base);
3397 __ Mov(x17, dst_base);
3398 __ Mov(x18, src_base + 3 * sizeof(src[0]));
3399 __ Mov(x19, dst_base + 3 * sizeof(dst[0]));
3400 __ Mov(x20, dst_base + 4 * sizeof(dst[0]));
3401 __ Mov(x24, 0);
3402 __ Mov(x25, 4);
3403 __ Mov(x26, -4);
3404 __ Mov(x27, 0xfffffffc); // 32-bit -4.
3405 __ Mov(x28, 0xfffffffe); // 32-bit -2.
3406 __ Mov(x29, 0xffffffff); // 32-bit -1.
3407
3408 __ Ldr(w0, MemOperand(x16, x24));
3409 __ Ldr(x1, MemOperand(x16, x25));
3410 __ Ldr(w2, MemOperand(x18, x26));
3411 __ Ldr(w3, MemOperand(x18, x27, SXTW));
3412 __ Ldr(w4, MemOperand(x18, x28, SXTW, 2));
3413 __ Str(w0, MemOperand(x17, x24));
3414 __ Str(x1, MemOperand(x17, x25));
3415 __ Str(w2, MemOperand(x20, x29, SXTW, 2));
3416 END();
3417
3418 RUN();
3419
3420 ASSERT_EQUAL_64(1, x0);
3421 ASSERT_EQUAL_64(0x0000000300000002, x1);
3422 ASSERT_EQUAL_64(3, x2);
3423 ASSERT_EQUAL_64(3, x3);
3424 ASSERT_EQUAL_64(2, x4);
3425 ASSERT_EQUAL_32(1, dst[0]);
3426 ASSERT_EQUAL_32(2, dst[1]);
3427 ASSERT_EQUAL_32(3, dst[2]);
3428 ASSERT_EQUAL_32(3, dst[3]);
3429
3430 TEARDOWN();
3431 }
3432
3433
TEST(load_store_float)3434 TEST(load_store_float) {
3435 SETUP_WITH_FEATURES(CPUFeatures::kFP);
3436
3437 float src[3] = {1.0, 2.0, 3.0};
3438 float dst[3] = {0.0, 0.0, 0.0};
3439 uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
3440 uintptr_t dst_base = reinterpret_cast<uintptr_t>(dst);
3441
3442 START();
3443 __ Mov(x17, src_base);
3444 __ Mov(x18, dst_base);
3445 __ Mov(x19, src_base);
3446 __ Mov(x20, dst_base);
3447 __ Mov(x21, src_base);
3448 __ Mov(x22, dst_base);
3449 __ Ldr(s0, MemOperand(x17, sizeof(src[0])));
3450 __ Str(s0, MemOperand(x18, sizeof(dst[0]), PostIndex));
3451 __ Ldr(s1, MemOperand(x19, sizeof(src[0]), PostIndex));
3452 __ Str(s1, MemOperand(x20, 2 * sizeof(dst[0]), PreIndex));
3453 __ Ldr(s2, MemOperand(x21, 2 * sizeof(src[0]), PreIndex));
3454 __ Str(s2, MemOperand(x22, sizeof(dst[0])));
3455 END();
3456
3457 RUN();
3458
3459 ASSERT_EQUAL_FP32(2.0, s0);
3460 ASSERT_EQUAL_FP32(2.0, dst[0]);
3461 ASSERT_EQUAL_FP32(1.0, s1);
3462 ASSERT_EQUAL_FP32(1.0, dst[2]);
3463 ASSERT_EQUAL_FP32(3.0, s2);
3464 ASSERT_EQUAL_FP32(3.0, dst[1]);
3465 ASSERT_EQUAL_64(src_base, x17);
3466 ASSERT_EQUAL_64(dst_base + sizeof(dst[0]), x18);
3467 ASSERT_EQUAL_64(src_base + sizeof(src[0]), x19);
3468 ASSERT_EQUAL_64(dst_base + 2 * sizeof(dst[0]), x20);
3469 ASSERT_EQUAL_64(src_base + 2 * sizeof(src[0]), x21);
3470 ASSERT_EQUAL_64(dst_base, x22);
3471
3472 TEARDOWN();
3473 }
3474
3475
TEST(load_store_double)3476 TEST(load_store_double) {
3477 SETUP_WITH_FEATURES(CPUFeatures::kFP);
3478
3479 double src[3] = {1.0, 2.0, 3.0};
3480 double dst[3] = {0.0, 0.0, 0.0};
3481 uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
3482 uintptr_t dst_base = reinterpret_cast<uintptr_t>(dst);
3483
3484 START();
3485 __ Mov(x17, src_base);
3486 __ Mov(x18, dst_base);
3487 __ Mov(x19, src_base);
3488 __ Mov(x20, dst_base);
3489 __ Mov(x21, src_base);
3490 __ Mov(x22, dst_base);
3491 __ Ldr(d0, MemOperand(x17, sizeof(src[0])));
3492 __ Str(d0, MemOperand(x18, sizeof(dst[0]), PostIndex));
3493 __ Ldr(d1, MemOperand(x19, sizeof(src[0]), PostIndex));
3494 __ Str(d1, MemOperand(x20, 2 * sizeof(dst[0]), PreIndex));
3495 __ Ldr(d2, MemOperand(x21, 2 * sizeof(src[0]), PreIndex));
3496 __ Str(d2, MemOperand(x22, sizeof(dst[0])));
3497 END();
3498
3499 RUN();
3500
3501 ASSERT_EQUAL_FP64(2.0, d0);
3502 ASSERT_EQUAL_FP64(2.0, dst[0]);
3503 ASSERT_EQUAL_FP64(1.0, d1);
3504 ASSERT_EQUAL_FP64(1.0, dst[2]);
3505 ASSERT_EQUAL_FP64(3.0, d2);
3506 ASSERT_EQUAL_FP64(3.0, dst[1]);
3507 ASSERT_EQUAL_64(src_base, x17);
3508 ASSERT_EQUAL_64(dst_base + sizeof(dst[0]), x18);
3509 ASSERT_EQUAL_64(src_base + sizeof(src[0]), x19);
3510 ASSERT_EQUAL_64(dst_base + 2 * sizeof(dst[0]), x20);
3511 ASSERT_EQUAL_64(src_base + 2 * sizeof(src[0]), x21);
3512 ASSERT_EQUAL_64(dst_base, x22);
3513
3514 TEARDOWN();
3515 }
3516
3517
TEST(load_store_b)3518 TEST(load_store_b) {
3519 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
3520
3521 uint8_t src[3] = {0x12, 0x23, 0x34};
3522 uint8_t dst[3] = {0, 0, 0};
3523 uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
3524 uintptr_t dst_base = reinterpret_cast<uintptr_t>(dst);
3525
3526 START();
3527 __ Mov(x17, src_base);
3528 __ Mov(x18, dst_base);
3529 __ Mov(x19, src_base);
3530 __ Mov(x20, dst_base);
3531 __ Mov(x21, src_base);
3532 __ Mov(x22, dst_base);
3533 __ Ldr(b0, MemOperand(x17, sizeof(src[0])));
3534 __ Str(b0, MemOperand(x18, sizeof(dst[0]), PostIndex));
3535 __ Ldr(b1, MemOperand(x19, sizeof(src[0]), PostIndex));
3536 __ Str(b1, MemOperand(x20, 2 * sizeof(dst[0]), PreIndex));
3537 __ Ldr(b2, MemOperand(x21, 2 * sizeof(src[0]), PreIndex));
3538 __ Str(b2, MemOperand(x22, sizeof(dst[0])));
3539 END();
3540
3541 RUN();
3542
3543 ASSERT_EQUAL_128(0, 0x23, q0);
3544 ASSERT_EQUAL_64(0x23, dst[0]);
3545 ASSERT_EQUAL_128(0, 0x12, q1);
3546 ASSERT_EQUAL_64(0x12, dst[2]);
3547 ASSERT_EQUAL_128(0, 0x34, q2);
3548 ASSERT_EQUAL_64(0x34, dst[1]);
3549 ASSERT_EQUAL_64(src_base, x17);
3550 ASSERT_EQUAL_64(dst_base + sizeof(dst[0]), x18);
3551 ASSERT_EQUAL_64(src_base + sizeof(src[0]), x19);
3552 ASSERT_EQUAL_64(dst_base + 2 * sizeof(dst[0]), x20);
3553 ASSERT_EQUAL_64(src_base + 2 * sizeof(src[0]), x21);
3554 ASSERT_EQUAL_64(dst_base, x22);
3555
3556 TEARDOWN();
3557 }
3558
3559
TEST(load_store_h)3560 TEST(load_store_h) {
3561 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
3562
3563 uint16_t src[3] = {0x1234, 0x2345, 0x3456};
3564 uint16_t dst[3] = {0, 0, 0};
3565 uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
3566 uintptr_t dst_base = reinterpret_cast<uintptr_t>(dst);
3567
3568 START();
3569 __ Mov(x17, src_base);
3570 __ Mov(x18, dst_base);
3571 __ Mov(x19, src_base);
3572 __ Mov(x20, dst_base);
3573 __ Mov(x21, src_base);
3574 __ Mov(x22, dst_base);
3575 __ Ldr(h0, MemOperand(x17, sizeof(src[0])));
3576 __ Str(h0, MemOperand(x18, sizeof(dst[0]), PostIndex));
3577 __ Ldr(h1, MemOperand(x19, sizeof(src[0]), PostIndex));
3578 __ Str(h1, MemOperand(x20, 2 * sizeof(dst[0]), PreIndex));
3579 __ Ldr(h2, MemOperand(x21, 2 * sizeof(src[0]), PreIndex));
3580 __ Str(h2, MemOperand(x22, sizeof(dst[0])));
3581 END();
3582
3583 RUN();
3584
3585 ASSERT_EQUAL_128(0, 0x2345, q0);
3586 ASSERT_EQUAL_64(0x2345, dst[0]);
3587 ASSERT_EQUAL_128(0, 0x1234, q1);
3588 ASSERT_EQUAL_64(0x1234, dst[2]);
3589 ASSERT_EQUAL_128(0, 0x3456, q2);
3590 ASSERT_EQUAL_64(0x3456, dst[1]);
3591 ASSERT_EQUAL_64(src_base, x17);
3592 ASSERT_EQUAL_64(dst_base + sizeof(dst[0]), x18);
3593 ASSERT_EQUAL_64(src_base + sizeof(src[0]), x19);
3594 ASSERT_EQUAL_64(dst_base + 2 * sizeof(dst[0]), x20);
3595 ASSERT_EQUAL_64(src_base + 2 * sizeof(src[0]), x21);
3596 ASSERT_EQUAL_64(dst_base, x22);
3597
3598 TEARDOWN();
3599 }
3600
3601
TEST(load_store_q)3602 TEST(load_store_q) {
3603 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
3604
3605 uint8_t src[48] = {0x10, 0x32, 0x54, 0x76, 0x98, 0xba, 0xdc, 0xfe, 0x01, 0x23,
3606 0x45, 0x67, 0x89, 0xab, 0xcd, 0xef, 0x21, 0x43, 0x65, 0x87,
3607 0xa9, 0xcb, 0xed, 0x0f, 0x12, 0x34, 0x56, 0x78, 0x9a, 0xbc,
3608 0xde, 0xf0, 0x24, 0x46, 0x68, 0x8a, 0xac, 0xce, 0xe0, 0x02,
3609 0x42, 0x64, 0x86, 0xa8, 0xca, 0xec, 0x0e, 0x20};
3610
3611 uint64_t dst[6] = {0, 0, 0, 0, 0, 0};
3612 uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
3613 uintptr_t dst_base = reinterpret_cast<uintptr_t>(dst);
3614
3615 START();
3616 __ Mov(x17, src_base);
3617 __ Mov(x18, dst_base);
3618 __ Mov(x19, src_base);
3619 __ Mov(x20, dst_base);
3620 __ Mov(x21, src_base);
3621 __ Mov(x22, dst_base);
3622 __ Ldr(q0, MemOperand(x17, 16));
3623 __ Str(q0, MemOperand(x18, 16, PostIndex));
3624 __ Ldr(q1, MemOperand(x19, 16, PostIndex));
3625 __ Str(q1, MemOperand(x20, 32, PreIndex));
3626 __ Ldr(q2, MemOperand(x21, 32, PreIndex));
3627 __ Str(q2, MemOperand(x22, 16));
3628 END();
3629
3630 RUN();
3631
3632 ASSERT_EQUAL_128(0xf0debc9a78563412, 0x0fedcba987654321, q0);
3633 ASSERT_EQUAL_64(0x0fedcba987654321, dst[0]);
3634 ASSERT_EQUAL_64(0xf0debc9a78563412, dst[1]);
3635 ASSERT_EQUAL_128(0xefcdab8967452301, 0xfedcba9876543210, q1);
3636 ASSERT_EQUAL_64(0xfedcba9876543210, dst[4]);
3637 ASSERT_EQUAL_64(0xefcdab8967452301, dst[5]);
3638 ASSERT_EQUAL_128(0x200eeccaa8866442, 0x02e0ceac8a684624, q2);
3639 ASSERT_EQUAL_64(0x02e0ceac8a684624, dst[2]);
3640 ASSERT_EQUAL_64(0x200eeccaa8866442, dst[3]);
3641 ASSERT_EQUAL_64(src_base, x17);
3642 ASSERT_EQUAL_64(dst_base + 16, x18);
3643 ASSERT_EQUAL_64(src_base + 16, x19);
3644 ASSERT_EQUAL_64(dst_base + 32, x20);
3645 ASSERT_EQUAL_64(src_base + 32, x21);
3646 ASSERT_EQUAL_64(dst_base, x22);
3647
3648 TEARDOWN();
3649 }
3650
3651
TEST(load_store_v_regoffset)3652 TEST(load_store_v_regoffset) {
3653 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
3654
3655 uint8_t src[64];
3656 for (unsigned i = 0; i < sizeof(src); i++) {
3657 src[i] = i;
3658 }
3659 uint8_t dst[64];
3660 memset(dst, 0, sizeof(dst));
3661
3662 uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
3663 uintptr_t dst_base = reinterpret_cast<uintptr_t>(dst);
3664
3665 START();
3666 __ Mov(x17, src_base + 16);
3667 __ Mov(x18, 1);
3668 __ Mov(w19, -1);
3669 __ Mov(x20, dst_base - 1);
3670
3671 __ Ldr(b0, MemOperand(x17, x18));
3672 __ Ldr(b1, MemOperand(x17, x19, SXTW));
3673
3674 __ Ldr(h2, MemOperand(x17, x18));
3675 __ Ldr(h3, MemOperand(x17, x18, UXTW, 1));
3676 __ Ldr(h4, MemOperand(x17, x19, SXTW, 1));
3677 __ Ldr(h5, MemOperand(x17, x18, LSL, 1));
3678
3679 __ Ldr(s16, MemOperand(x17, x18));
3680 __ Ldr(s17, MemOperand(x17, x18, UXTW, 2));
3681 __ Ldr(s18, MemOperand(x17, x19, SXTW, 2));
3682 __ Ldr(s19, MemOperand(x17, x18, LSL, 2));
3683
3684 __ Ldr(d20, MemOperand(x17, x18));
3685 __ Ldr(d21, MemOperand(x17, x18, UXTW, 3));
3686 __ Ldr(d22, MemOperand(x17, x19, SXTW, 3));
3687 __ Ldr(d23, MemOperand(x17, x18, LSL, 3));
3688
3689 __ Ldr(q24, MemOperand(x17, x18));
3690 __ Ldr(q25, MemOperand(x17, x18, UXTW, 4));
3691 __ Ldr(q26, MemOperand(x17, x19, SXTW, 4));
3692 __ Ldr(q27, MemOperand(x17, x18, LSL, 4));
3693
3694 // Store [bhsdq]27 to adjacent memory locations, then load again to check.
3695 __ Str(b27, MemOperand(x20, x18));
3696 __ Str(h27, MemOperand(x20, x18, UXTW, 1));
3697 __ Add(x20, x20, 8);
3698 __ Str(s27, MemOperand(x20, x19, SXTW, 2));
3699 __ Sub(x20, x20, 8);
3700 __ Str(d27, MemOperand(x20, x18, LSL, 3));
3701 __ Add(x20, x20, 32);
3702 __ Str(q27, MemOperand(x20, x19, SXTW, 4));
3703
3704 __ Sub(x20, x20, 32);
3705 __ Ldr(q6, MemOperand(x20, x18));
3706 __ Ldr(q7, MemOperand(x20, x18, LSL, 4));
3707
3708 END();
3709
3710 RUN();
3711
3712 ASSERT_EQUAL_128(0, 0x11, q0);
3713 ASSERT_EQUAL_128(0, 0x0f, q1);
3714 ASSERT_EQUAL_128(0, 0x1211, q2);
3715 ASSERT_EQUAL_128(0, 0x1312, q3);
3716 ASSERT_EQUAL_128(0, 0x0f0e, q4);
3717 ASSERT_EQUAL_128(0, 0x1312, q5);
3718 ASSERT_EQUAL_128(0, 0x14131211, q16);
3719 ASSERT_EQUAL_128(0, 0x17161514, q17);
3720 ASSERT_EQUAL_128(0, 0x0f0e0d0c, q18);
3721 ASSERT_EQUAL_128(0, 0x17161514, q19);
3722 ASSERT_EQUAL_128(0, 0x1817161514131211, q20);
3723 ASSERT_EQUAL_128(0, 0x1f1e1d1c1b1a1918, q21);
3724 ASSERT_EQUAL_128(0, 0x0f0e0d0c0b0a0908, q22);
3725 ASSERT_EQUAL_128(0, 0x1f1e1d1c1b1a1918, q23);
3726 ASSERT_EQUAL_128(0x201f1e1d1c1b1a19, 0x1817161514131211, q24);
3727 ASSERT_EQUAL_128(0x2f2e2d2c2b2a2928, 0x2726252423222120, q25);
3728 ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0706050403020100, q26);
3729 ASSERT_EQUAL_128(0x2f2e2d2c2b2a2928, 0x2726252423222120, q27);
3730 ASSERT_EQUAL_128(0x2027262524232221, 0x2023222120212020, q6);
3731 ASSERT_EQUAL_128(0x2f2e2d2c2b2a2928, 0x2726252423222120, q7);
3732
3733 TEARDOWN();
3734 }
3735
3736
TEST(neon_ld1_d)3737 TEST(neon_ld1_d) {
3738 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
3739
3740 uint8_t src[32 + 5];
3741 for (unsigned i = 0; i < sizeof(src); i++) {
3742 src[i] = i;
3743 }
3744 uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
3745
3746 START();
3747 __ Mov(x17, src_base);
3748 __ Ldr(q2, MemOperand(x17)); // Initialise top 64-bits of Q register.
3749 __ Ld1(v2.V8B(), MemOperand(x17));
3750 __ Add(x17, x17, 1);
3751 __ Ld1(v3.V8B(), v4.V8B(), MemOperand(x17));
3752 __ Add(x17, x17, 1);
3753 __ Ld1(v5.V4H(), v6.V4H(), v7.V4H(), MemOperand(x17));
3754 __ Add(x17, x17, 1);
3755 __ Ld1(v16.V2S(), v17.V2S(), v18.V2S(), v19.V2S(), MemOperand(x17));
3756 __ Add(x17, x17, 1);
3757 __ Ld1(v30.V2S(), v31.V2S(), v0.V2S(), v1.V2S(), MemOperand(x17));
3758 __ Add(x17, x17, 1);
3759 __ Ld1(v20.V1D(), v21.V1D(), v22.V1D(), v23.V1D(), MemOperand(x17));
3760 END();
3761
3762 RUN();
3763
3764 ASSERT_EQUAL_128(0, 0x0706050403020100, q2);
3765 ASSERT_EQUAL_128(0, 0x0807060504030201, q3);
3766 ASSERT_EQUAL_128(0, 0x100f0e0d0c0b0a09, q4);
3767 ASSERT_EQUAL_128(0, 0x0908070605040302, q5);
3768 ASSERT_EQUAL_128(0, 0x11100f0e0d0c0b0a, q6);
3769 ASSERT_EQUAL_128(0, 0x1918171615141312, q7);
3770 ASSERT_EQUAL_128(0, 0x0a09080706050403, q16);
3771 ASSERT_EQUAL_128(0, 0x1211100f0e0d0c0b, q17);
3772 ASSERT_EQUAL_128(0, 0x1a19181716151413, q18);
3773 ASSERT_EQUAL_128(0, 0x2221201f1e1d1c1b, q19);
3774 ASSERT_EQUAL_128(0, 0x0b0a090807060504, q30);
3775 ASSERT_EQUAL_128(0, 0x131211100f0e0d0c, q31);
3776 ASSERT_EQUAL_128(0, 0x1b1a191817161514, q0);
3777 ASSERT_EQUAL_128(0, 0x232221201f1e1d1c, q1);
3778 ASSERT_EQUAL_128(0, 0x0c0b0a0908070605, q20);
3779 ASSERT_EQUAL_128(0, 0x14131211100f0e0d, q21);
3780 ASSERT_EQUAL_128(0, 0x1c1b1a1918171615, q22);
3781 ASSERT_EQUAL_128(0, 0x24232221201f1e1d, q23);
3782
3783 TEARDOWN();
3784 }
3785
3786
TEST(neon_ld1_d_postindex)3787 TEST(neon_ld1_d_postindex) {
3788 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
3789
3790 uint8_t src[32 + 5];
3791 for (unsigned i = 0; i < sizeof(src); i++) {
3792 src[i] = i;
3793 }
3794 uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
3795
3796 START();
3797 __ Mov(x17, src_base);
3798 __ Mov(x18, src_base + 1);
3799 __ Mov(x19, src_base + 2);
3800 __ Mov(x20, src_base + 3);
3801 __ Mov(x21, src_base + 4);
3802 __ Mov(x22, src_base + 5);
3803 __ Mov(x23, 1);
3804 __ Ldr(q2, MemOperand(x17)); // Initialise top 64-bits of Q register.
3805 __ Ld1(v2.V8B(), MemOperand(x17, x23, PostIndex));
3806 __ Ld1(v3.V8B(), v4.V8B(), MemOperand(x18, 16, PostIndex));
3807 __ Ld1(v5.V4H(), v6.V4H(), v7.V4H(), MemOperand(x19, 24, PostIndex));
3808 __ Ld1(v16.V2S(),
3809 v17.V2S(),
3810 v18.V2S(),
3811 v19.V2S(),
3812 MemOperand(x20, 32, PostIndex));
3813 __ Ld1(v30.V2S(),
3814 v31.V2S(),
3815 v0.V2S(),
3816 v1.V2S(),
3817 MemOperand(x21, 32, PostIndex));
3818 __ Ld1(v20.V1D(),
3819 v21.V1D(),
3820 v22.V1D(),
3821 v23.V1D(),
3822 MemOperand(x22, 32, PostIndex));
3823 END();
3824
3825 RUN();
3826
3827 ASSERT_EQUAL_128(0, 0x0706050403020100, q2);
3828 ASSERT_EQUAL_128(0, 0x0807060504030201, q3);
3829 ASSERT_EQUAL_128(0, 0x100f0e0d0c0b0a09, q4);
3830 ASSERT_EQUAL_128(0, 0x0908070605040302, q5);
3831 ASSERT_EQUAL_128(0, 0x11100f0e0d0c0b0a, q6);
3832 ASSERT_EQUAL_128(0, 0x1918171615141312, q7);
3833 ASSERT_EQUAL_128(0, 0x0a09080706050403, q16);
3834 ASSERT_EQUAL_128(0, 0x1211100f0e0d0c0b, q17);
3835 ASSERT_EQUAL_128(0, 0x1a19181716151413, q18);
3836 ASSERT_EQUAL_128(0, 0x2221201f1e1d1c1b, q19);
3837 ASSERT_EQUAL_128(0, 0x0b0a090807060504, q30);
3838 ASSERT_EQUAL_128(0, 0x131211100f0e0d0c, q31);
3839 ASSERT_EQUAL_128(0, 0x1b1a191817161514, q0);
3840 ASSERT_EQUAL_128(0, 0x232221201f1e1d1c, q1);
3841 ASSERT_EQUAL_128(0, 0x0c0b0a0908070605, q20);
3842 ASSERT_EQUAL_128(0, 0x14131211100f0e0d, q21);
3843 ASSERT_EQUAL_128(0, 0x1c1b1a1918171615, q22);
3844 ASSERT_EQUAL_128(0, 0x24232221201f1e1d, q23);
3845 ASSERT_EQUAL_64(src_base + 1, x17);
3846 ASSERT_EQUAL_64(src_base + 1 + 16, x18);
3847 ASSERT_EQUAL_64(src_base + 2 + 24, x19);
3848 ASSERT_EQUAL_64(src_base + 3 + 32, x20);
3849 ASSERT_EQUAL_64(src_base + 4 + 32, x21);
3850 ASSERT_EQUAL_64(src_base + 5 + 32, x22);
3851
3852 TEARDOWN();
3853 }
3854
3855
TEST(neon_ld1_q)3856 TEST(neon_ld1_q) {
3857 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
3858
3859 uint8_t src[64 + 4];
3860 for (unsigned i = 0; i < sizeof(src); i++) {
3861 src[i] = i;
3862 }
3863 uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
3864
3865 START();
3866 __ Mov(x17, src_base);
3867 __ Ld1(v2.V16B(), MemOperand(x17));
3868 __ Add(x17, x17, 1);
3869 __ Ld1(v3.V16B(), v4.V16B(), MemOperand(x17));
3870 __ Add(x17, x17, 1);
3871 __ Ld1(v5.V8H(), v6.V8H(), v7.V8H(), MemOperand(x17));
3872 __ Add(x17, x17, 1);
3873 __ Ld1(v16.V4S(), v17.V4S(), v18.V4S(), v19.V4S(), MemOperand(x17));
3874 __ Add(x17, x17, 1);
3875 __ Ld1(v30.V2D(), v31.V2D(), v0.V2D(), v1.V2D(), MemOperand(x17));
3876 END();
3877
3878 RUN();
3879
3880 ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0706050403020100, q2);
3881 ASSERT_EQUAL_128(0x100f0e0d0c0b0a09, 0x0807060504030201, q3);
3882 ASSERT_EQUAL_128(0x201f1e1d1c1b1a19, 0x1817161514131211, q4);
3883 ASSERT_EQUAL_128(0x11100f0e0d0c0b0a, 0x0908070605040302, q5);
3884 ASSERT_EQUAL_128(0x21201f1e1d1c1b1a, 0x1918171615141312, q6);
3885 ASSERT_EQUAL_128(0x31302f2e2d2c2b2a, 0x2928272625242322, q7);
3886 ASSERT_EQUAL_128(0x1211100f0e0d0c0b, 0x0a09080706050403, q16);
3887 ASSERT_EQUAL_128(0x2221201f1e1d1c1b, 0x1a19181716151413, q17);
3888 ASSERT_EQUAL_128(0x3231302f2e2d2c2b, 0x2a29282726252423, q18);
3889 ASSERT_EQUAL_128(0x4241403f3e3d3c3b, 0x3a39383736353433, q19);
3890 ASSERT_EQUAL_128(0x131211100f0e0d0c, 0x0b0a090807060504, q30);
3891 ASSERT_EQUAL_128(0x232221201f1e1d1c, 0x1b1a191817161514, q31);
3892 ASSERT_EQUAL_128(0x333231302f2e2d2c, 0x2b2a292827262524, q0);
3893 ASSERT_EQUAL_128(0x434241403f3e3d3c, 0x3b3a393837363534, q1);
3894
3895 TEARDOWN();
3896 }
3897
3898
TEST(neon_ld1_q_postindex)3899 TEST(neon_ld1_q_postindex) {
3900 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
3901
3902 uint8_t src[64 + 4];
3903 for (unsigned i = 0; i < sizeof(src); i++) {
3904 src[i] = i;
3905 }
3906 uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
3907
3908 START();
3909 __ Mov(x17, src_base);
3910 __ Mov(x18, src_base + 1);
3911 __ Mov(x19, src_base + 2);
3912 __ Mov(x20, src_base + 3);
3913 __ Mov(x21, src_base + 4);
3914 __ Mov(x22, 1);
3915 __ Ld1(v2.V16B(), MemOperand(x17, x22, PostIndex));
3916 __ Ld1(v3.V16B(), v4.V16B(), MemOperand(x18, 32, PostIndex));
3917 __ Ld1(v5.V8H(), v6.V8H(), v7.V8H(), MemOperand(x19, 48, PostIndex));
3918 __ Ld1(v16.V4S(),
3919 v17.V4S(),
3920 v18.V4S(),
3921 v19.V4S(),
3922 MemOperand(x20, 64, PostIndex));
3923 __ Ld1(v30.V2D(),
3924 v31.V2D(),
3925 v0.V2D(),
3926 v1.V2D(),
3927 MemOperand(x21, 64, PostIndex));
3928 END();
3929
3930 RUN();
3931
3932 ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0706050403020100, q2);
3933 ASSERT_EQUAL_128(0x100f0e0d0c0b0a09, 0x0807060504030201, q3);
3934 ASSERT_EQUAL_128(0x201f1e1d1c1b1a19, 0x1817161514131211, q4);
3935 ASSERT_EQUAL_128(0x11100f0e0d0c0b0a, 0x0908070605040302, q5);
3936 ASSERT_EQUAL_128(0x21201f1e1d1c1b1a, 0x1918171615141312, q6);
3937 ASSERT_EQUAL_128(0x31302f2e2d2c2b2a, 0x2928272625242322, q7);
3938 ASSERT_EQUAL_128(0x1211100f0e0d0c0b, 0x0a09080706050403, q16);
3939 ASSERT_EQUAL_128(0x2221201f1e1d1c1b, 0x1a19181716151413, q17);
3940 ASSERT_EQUAL_128(0x3231302f2e2d2c2b, 0x2a29282726252423, q18);
3941 ASSERT_EQUAL_128(0x4241403f3e3d3c3b, 0x3a39383736353433, q19);
3942 ASSERT_EQUAL_128(0x131211100f0e0d0c, 0x0b0a090807060504, q30);
3943 ASSERT_EQUAL_128(0x232221201f1e1d1c, 0x1b1a191817161514, q31);
3944 ASSERT_EQUAL_128(0x333231302f2e2d2c, 0x2b2a292827262524, q0);
3945 ASSERT_EQUAL_128(0x434241403f3e3d3c, 0x3b3a393837363534, q1);
3946 ASSERT_EQUAL_64(src_base + 1, x17);
3947 ASSERT_EQUAL_64(src_base + 1 + 32, x18);
3948 ASSERT_EQUAL_64(src_base + 2 + 48, x19);
3949 ASSERT_EQUAL_64(src_base + 3 + 64, x20);
3950 ASSERT_EQUAL_64(src_base + 4 + 64, x21);
3951
3952 TEARDOWN();
3953 }
3954
3955
TEST(neon_ld1_lane)3956 TEST(neon_ld1_lane) {
3957 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
3958
3959 uint8_t src[64];
3960 for (unsigned i = 0; i < sizeof(src); i++) {
3961 src[i] = i;
3962 }
3963 uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
3964
3965 START();
3966
3967 // Test loading whole register by element.
3968 __ Mov(x17, src_base);
3969 for (int i = 15; i >= 0; i--) {
3970 __ Ld1(v0.B(), i, MemOperand(x17));
3971 __ Add(x17, x17, 1);
3972 }
3973
3974 __ Mov(x17, src_base);
3975 for (int i = 7; i >= 0; i--) {
3976 __ Ld1(v1.H(), i, MemOperand(x17));
3977 __ Add(x17, x17, 1);
3978 }
3979
3980 __ Mov(x17, src_base);
3981 for (int i = 3; i >= 0; i--) {
3982 __ Ld1(v2.S(), i, MemOperand(x17));
3983 __ Add(x17, x17, 1);
3984 }
3985
3986 __ Mov(x17, src_base);
3987 for (int i = 1; i >= 0; i--) {
3988 __ Ld1(v3.D(), i, MemOperand(x17));
3989 __ Add(x17, x17, 1);
3990 }
3991
3992 // Test loading a single element into an initialised register.
3993 __ Mov(x17, src_base);
3994 __ Ldr(q4, MemOperand(x17));
3995 __ Ld1(v4.B(), 4, MemOperand(x17));
3996 __ Ldr(q5, MemOperand(x17));
3997 __ Ld1(v5.H(), 3, MemOperand(x17));
3998 __ Ldr(q6, MemOperand(x17));
3999 __ Ld1(v6.S(), 2, MemOperand(x17));
4000 __ Ldr(q7, MemOperand(x17));
4001 __ Ld1(v7.D(), 1, MemOperand(x17));
4002
4003 END();
4004
4005 RUN();
4006
4007 ASSERT_EQUAL_128(0x0001020304050607, 0x08090a0b0c0d0e0f, q0);
4008 ASSERT_EQUAL_128(0x0100020103020403, 0x0504060507060807, q1);
4009 ASSERT_EQUAL_128(0x0302010004030201, 0x0504030206050403, q2);
4010 ASSERT_EQUAL_128(0x0706050403020100, 0x0807060504030201, q3);
4011 ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0706050003020100, q4);
4012 ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0100050403020100, q5);
4013 ASSERT_EQUAL_128(0x0f0e0d0c03020100, 0x0706050403020100, q6);
4014 ASSERT_EQUAL_128(0x0706050403020100, 0x0706050403020100, q7);
4015
4016 TEARDOWN();
4017 }
4018
TEST(neon_ld2_d)4019 TEST(neon_ld2_d) {
4020 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
4021
4022 uint8_t src[64 + 4];
4023 for (unsigned i = 0; i < sizeof(src); i++) {
4024 src[i] = i;
4025 }
4026 uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
4027
4028 START();
4029 __ Mov(x17, src_base);
4030 __ Ld2(v2.V8B(), v3.V8B(), MemOperand(x17));
4031 __ Add(x17, x17, 1);
4032 __ Ld2(v4.V8B(), v5.V8B(), MemOperand(x17));
4033 __ Add(x17, x17, 1);
4034 __ Ld2(v6.V4H(), v7.V4H(), MemOperand(x17));
4035 __ Add(x17, x17, 1);
4036 __ Ld2(v31.V2S(), v0.V2S(), MemOperand(x17));
4037 END();
4038
4039 RUN();
4040
4041 ASSERT_EQUAL_128(0, 0x0e0c0a0806040200, q2);
4042 ASSERT_EQUAL_128(0, 0x0f0d0b0907050301, q3);
4043 ASSERT_EQUAL_128(0, 0x0f0d0b0907050301, q4);
4044 ASSERT_EQUAL_128(0, 0x100e0c0a08060402, q5);
4045 ASSERT_EQUAL_128(0, 0x0f0e0b0a07060302, q6);
4046 ASSERT_EQUAL_128(0, 0x11100d0c09080504, q7);
4047 ASSERT_EQUAL_128(0, 0x0e0d0c0b06050403, q31);
4048 ASSERT_EQUAL_128(0, 0x1211100f0a090807, q0);
4049
4050 TEARDOWN();
4051 }
4052
TEST(neon_ld2_d_postindex)4053 TEST(neon_ld2_d_postindex) {
4054 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
4055
4056 uint8_t src[32 + 4];
4057 for (unsigned i = 0; i < sizeof(src); i++) {
4058 src[i] = i;
4059 }
4060 uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
4061
4062 START();
4063 __ Mov(x17, src_base);
4064 __ Mov(x18, src_base + 1);
4065 __ Mov(x19, src_base + 2);
4066 __ Mov(x20, src_base + 3);
4067 __ Mov(x21, src_base + 4);
4068 __ Mov(x22, 1);
4069 __ Ld2(v2.V8B(), v3.V8B(), MemOperand(x17, x22, PostIndex));
4070 __ Ld2(v4.V8B(), v5.V8B(), MemOperand(x18, 16, PostIndex));
4071 __ Ld2(v5.V4H(), v6.V4H(), MemOperand(x19, 16, PostIndex));
4072 __ Ld2(v16.V2S(), v17.V2S(), MemOperand(x20, 16, PostIndex));
4073 __ Ld2(v31.V2S(), v0.V2S(), MemOperand(x21, 16, PostIndex));
4074 END();
4075
4076 RUN();
4077
4078 ASSERT_EQUAL_128(0, 0x0e0c0a0806040200, q2);
4079 ASSERT_EQUAL_128(0, 0x0f0d0b0907050301, q3);
4080 ASSERT_EQUAL_128(0, 0x0f0d0b0907050301, q4);
4081 ASSERT_EQUAL_128(0, 0x0f0e0b0a07060302, q5);
4082 ASSERT_EQUAL_128(0, 0x11100d0c09080504, q6);
4083 ASSERT_EQUAL_128(0, 0x0e0d0c0b06050403, q16);
4084 ASSERT_EQUAL_128(0, 0x1211100f0a090807, q17);
4085 ASSERT_EQUAL_128(0, 0x0f0e0d0c07060504, q31);
4086 ASSERT_EQUAL_128(0, 0x131211100b0a0908, q0);
4087
4088 ASSERT_EQUAL_64(src_base + 1, x17);
4089 ASSERT_EQUAL_64(src_base + 1 + 16, x18);
4090 ASSERT_EQUAL_64(src_base + 2 + 16, x19);
4091 ASSERT_EQUAL_64(src_base + 3 + 16, x20);
4092 ASSERT_EQUAL_64(src_base + 4 + 16, x21);
4093
4094 TEARDOWN();
4095 }
4096
4097
TEST(neon_ld2_q)4098 TEST(neon_ld2_q) {
4099 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
4100
4101 uint8_t src[64 + 4];
4102 for (unsigned i = 0; i < sizeof(src); i++) {
4103 src[i] = i;
4104 }
4105 uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
4106
4107 START();
4108 __ Mov(x17, src_base);
4109 __ Ld2(v2.V16B(), v3.V16B(), MemOperand(x17));
4110 __ Add(x17, x17, 1);
4111 __ Ld2(v4.V16B(), v5.V16B(), MemOperand(x17));
4112 __ Add(x17, x17, 1);
4113 __ Ld2(v6.V8H(), v7.V8H(), MemOperand(x17));
4114 __ Add(x17, x17, 1);
4115 __ Ld2(v16.V4S(), v17.V4S(), MemOperand(x17));
4116 __ Add(x17, x17, 1);
4117 __ Ld2(v31.V2D(), v0.V2D(), MemOperand(x17));
4118 END();
4119
4120 RUN();
4121
4122 ASSERT_EQUAL_128(0x1e1c1a1816141210, 0x0e0c0a0806040200, q2);
4123 ASSERT_EQUAL_128(0x1f1d1b1917151311, 0x0f0d0b0907050301, q3);
4124 ASSERT_EQUAL_128(0x1f1d1b1917151311, 0x0f0d0b0907050301, q4);
4125 ASSERT_EQUAL_128(0x201e1c1a18161412, 0x100e0c0a08060402, q5);
4126 ASSERT_EQUAL_128(0x1f1e1b1a17161312, 0x0f0e0b0a07060302, q6);
4127 ASSERT_EQUAL_128(0x21201d1c19181514, 0x11100d0c09080504, q7);
4128 ASSERT_EQUAL_128(0x1e1d1c1b16151413, 0x0e0d0c0b06050403, q16);
4129 ASSERT_EQUAL_128(0x2221201f1a191817, 0x1211100f0a090807, q17);
4130 ASSERT_EQUAL_128(0x1b1a191817161514, 0x0b0a090807060504, q31);
4131 ASSERT_EQUAL_128(0x232221201f1e1d1c, 0x131211100f0e0d0c, q0);
4132
4133 TEARDOWN();
4134 }
4135
4136
TEST(neon_ld2_q_postindex)4137 TEST(neon_ld2_q_postindex) {
4138 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
4139
4140 uint8_t src[64 + 4];
4141 for (unsigned i = 0; i < sizeof(src); i++) {
4142 src[i] = i;
4143 }
4144 uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
4145
4146 START();
4147 __ Mov(x17, src_base);
4148 __ Mov(x18, src_base + 1);
4149 __ Mov(x19, src_base + 2);
4150 __ Mov(x20, src_base + 3);
4151 __ Mov(x21, src_base + 4);
4152 __ Mov(x22, 1);
4153 __ Ld2(v2.V16B(), v3.V16B(), MemOperand(x17, x22, PostIndex));
4154 __ Ld2(v4.V16B(), v5.V16B(), MemOperand(x18, 32, PostIndex));
4155 __ Ld2(v6.V8H(), v7.V8H(), MemOperand(x19, 32, PostIndex));
4156 __ Ld2(v16.V4S(), v17.V4S(), MemOperand(x20, 32, PostIndex));
4157 __ Ld2(v31.V2D(), v0.V2D(), MemOperand(x21, 32, PostIndex));
4158 END();
4159
4160 RUN();
4161
4162 ASSERT_EQUAL_128(0x1e1c1a1816141210, 0x0e0c0a0806040200, q2);
4163 ASSERT_EQUAL_128(0x1f1d1b1917151311, 0x0f0d0b0907050301, q3);
4164 ASSERT_EQUAL_128(0x1f1d1b1917151311, 0x0f0d0b0907050301, q4);
4165 ASSERT_EQUAL_128(0x201e1c1a18161412, 0x100e0c0a08060402, q5);
4166 ASSERT_EQUAL_128(0x1f1e1b1a17161312, 0x0f0e0b0a07060302, q6);
4167 ASSERT_EQUAL_128(0x21201d1c19181514, 0x11100d0c09080504, q7);
4168 ASSERT_EQUAL_128(0x1e1d1c1b16151413, 0x0e0d0c0b06050403, q16);
4169 ASSERT_EQUAL_128(0x2221201f1a191817, 0x1211100f0a090807, q17);
4170 ASSERT_EQUAL_128(0x1b1a191817161514, 0x0b0a090807060504, q31);
4171 ASSERT_EQUAL_128(0x232221201f1e1d1c, 0x131211100f0e0d0c, q0);
4172
4173
4174 ASSERT_EQUAL_64(src_base + 1, x17);
4175 ASSERT_EQUAL_64(src_base + 1 + 32, x18);
4176 ASSERT_EQUAL_64(src_base + 2 + 32, x19);
4177 ASSERT_EQUAL_64(src_base + 3 + 32, x20);
4178 ASSERT_EQUAL_64(src_base + 4 + 32, x21);
4179
4180 TEARDOWN();
4181 }
4182
4183
TEST(neon_ld2_lane)4184 TEST(neon_ld2_lane) {
4185 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
4186
4187 uint8_t src[64];
4188 for (unsigned i = 0; i < sizeof(src); i++) {
4189 src[i] = i;
4190 }
4191 uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
4192
4193 START();
4194
4195 // Test loading whole register by element.
4196 __ Mov(x17, src_base);
4197 for (int i = 15; i >= 0; i--) {
4198 __ Ld2(v0.B(), v1.B(), i, MemOperand(x17));
4199 __ Add(x17, x17, 1);
4200 }
4201
4202 __ Mov(x17, src_base);
4203 for (int i = 7; i >= 0; i--) {
4204 __ Ld2(v2.H(), v3.H(), i, MemOperand(x17));
4205 __ Add(x17, x17, 1);
4206 }
4207
4208 __ Mov(x17, src_base);
4209 for (int i = 3; i >= 0; i--) {
4210 __ Ld2(v4.S(), v5.S(), i, MemOperand(x17));
4211 __ Add(x17, x17, 1);
4212 }
4213
4214 __ Mov(x17, src_base);
4215 for (int i = 1; i >= 0; i--) {
4216 __ Ld2(v6.D(), v7.D(), i, MemOperand(x17));
4217 __ Add(x17, x17, 1);
4218 }
4219
4220 // Test loading a single element into an initialised register.
4221 __ Mov(x17, src_base);
4222 __ Mov(x4, x17);
4223 __ Ldr(q8, MemOperand(x4, 16, PostIndex));
4224 __ Ldr(q9, MemOperand(x4));
4225 __ Ld2(v8.B(), v9.B(), 4, MemOperand(x17));
4226 __ Mov(x5, x17);
4227 __ Ldr(q10, MemOperand(x5, 16, PostIndex));
4228 __ Ldr(q11, MemOperand(x5));
4229 __ Ld2(v10.H(), v11.H(), 3, MemOperand(x17));
4230 __ Mov(x6, x17);
4231 __ Ldr(q12, MemOperand(x6, 16, PostIndex));
4232 __ Ldr(q13, MemOperand(x6));
4233 __ Ld2(v12.S(), v13.S(), 2, MemOperand(x17));
4234 __ Mov(x7, x17);
4235 __ Ldr(q14, MemOperand(x7, 16, PostIndex));
4236 __ Ldr(q15, MemOperand(x7));
4237 __ Ld2(v14.D(), v15.D(), 1, MemOperand(x17));
4238
4239 END();
4240
4241 RUN();
4242
4243 ASSERT_EQUAL_128(0x0001020304050607, 0x08090a0b0c0d0e0f, q0);
4244 ASSERT_EQUAL_128(0x0102030405060708, 0x090a0b0c0d0e0f10, q1);
4245 ASSERT_EQUAL_128(0x0100020103020403, 0x0504060507060807, q2);
4246 ASSERT_EQUAL_128(0x0302040305040605, 0x0706080709080a09, q3);
4247 ASSERT_EQUAL_128(0x0302010004030201, 0x0504030206050403, q4);
4248 ASSERT_EQUAL_128(0x0706050408070605, 0x090807060a090807, q5);
4249 ASSERT_EQUAL_128(0x0706050403020100, 0x0807060504030201, q6);
4250 ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x100f0e0d0c0b0a09, q7);
4251 ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0706050003020100, q8);
4252 ASSERT_EQUAL_128(0x1f1e1d1c1b1a1918, 0x1716150113121110, q9);
4253 ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0100050403020100, q10);
4254 ASSERT_EQUAL_128(0x1f1e1d1c1b1a1918, 0x0302151413121110, q11);
4255 ASSERT_EQUAL_128(0x0f0e0d0c03020100, 0x0706050403020100, q12);
4256 ASSERT_EQUAL_128(0x1f1e1d1c07060504, 0x1716151413121110, q13);
4257 ASSERT_EQUAL_128(0x0706050403020100, 0x0706050403020100, q14);
4258 ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x1716151413121110, q15);
4259
4260 TEARDOWN();
4261 }
4262
4263
TEST(neon_ld2_lane_postindex)4264 TEST(neon_ld2_lane_postindex) {
4265 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
4266
4267 uint8_t src[64];
4268 for (unsigned i = 0; i < sizeof(src); i++) {
4269 src[i] = i;
4270 }
4271 uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
4272
4273 START();
4274 __ Mov(x17, src_base);
4275 __ Mov(x18, src_base);
4276 __ Mov(x19, src_base);
4277 __ Mov(x20, src_base);
4278 __ Mov(x21, src_base);
4279 __ Mov(x22, src_base);
4280 __ Mov(x23, src_base);
4281 __ Mov(x24, src_base);
4282
4283 // Test loading whole register by element.
4284 for (int i = 15; i >= 0; i--) {
4285 __ Ld2(v0.B(), v1.B(), i, MemOperand(x17, 2, PostIndex));
4286 }
4287
4288 for (int i = 7; i >= 0; i--) {
4289 __ Ld2(v2.H(), v3.H(), i, MemOperand(x18, 4, PostIndex));
4290 }
4291
4292 for (int i = 3; i >= 0; i--) {
4293 __ Ld2(v4.S(), v5.S(), i, MemOperand(x19, 8, PostIndex));
4294 }
4295
4296 for (int i = 1; i >= 0; i--) {
4297 __ Ld2(v6.D(), v7.D(), i, MemOperand(x20, 16, PostIndex));
4298 }
4299
4300 // Test loading a single element into an initialised register.
4301 __ Mov(x25, 1);
4302 __ Mov(x4, x21);
4303 __ Ldr(q8, MemOperand(x4, 16, PostIndex));
4304 __ Ldr(q9, MemOperand(x4));
4305 __ Ld2(v8.B(), v9.B(), 4, MemOperand(x21, x25, PostIndex));
4306 __ Add(x25, x25, 1);
4307
4308 __ Mov(x5, x22);
4309 __ Ldr(q10, MemOperand(x5, 16, PostIndex));
4310 __ Ldr(q11, MemOperand(x5));
4311 __ Ld2(v10.H(), v11.H(), 3, MemOperand(x22, x25, PostIndex));
4312 __ Add(x25, x25, 1);
4313
4314 __ Mov(x6, x23);
4315 __ Ldr(q12, MemOperand(x6, 16, PostIndex));
4316 __ Ldr(q13, MemOperand(x6));
4317 __ Ld2(v12.S(), v13.S(), 2, MemOperand(x23, x25, PostIndex));
4318 __ Add(x25, x25, 1);
4319
4320 __ Mov(x7, x24);
4321 __ Ldr(q14, MemOperand(x7, 16, PostIndex));
4322 __ Ldr(q15, MemOperand(x7));
4323 __ Ld2(v14.D(), v15.D(), 1, MemOperand(x24, x25, PostIndex));
4324
4325 END();
4326
4327 RUN();
4328
4329 ASSERT_EQUAL_128(0x00020406080a0c0e, 0x10121416181a1c1e, q0);
4330 ASSERT_EQUAL_128(0x01030507090b0d0f, 0x11131517191b1d1f, q1);
4331 ASSERT_EQUAL_128(0x0100050409080d0c, 0x1110151419181d1c, q2);
4332 ASSERT_EQUAL_128(0x030207060b0a0f0e, 0x131217161b1a1f1e, q3);
4333 ASSERT_EQUAL_128(0x030201000b0a0908, 0x131211101b1a1918, q4);
4334 ASSERT_EQUAL_128(0x070605040f0e0d0c, 0x171615141f1e1d1c, q5);
4335 ASSERT_EQUAL_128(0x0706050403020100, 0x1716151413121110, q6);
4336 ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x1f1e1d1c1b1a1918, q7);
4337 ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0706050003020100, q8);
4338 ASSERT_EQUAL_128(0x1f1e1d1c1b1a1918, 0x1716150113121110, q9);
4339 ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0100050403020100, q10);
4340 ASSERT_EQUAL_128(0x1f1e1d1c1b1a1918, 0x0302151413121110, q11);
4341 ASSERT_EQUAL_128(0x0f0e0d0c03020100, 0x0706050403020100, q12);
4342 ASSERT_EQUAL_128(0x1f1e1d1c07060504, 0x1716151413121110, q13);
4343 ASSERT_EQUAL_128(0x0706050403020100, 0x0706050403020100, q14);
4344 ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x1716151413121110, q15);
4345
4346
4347 ASSERT_EQUAL_64(src_base + 32, x17);
4348 ASSERT_EQUAL_64(src_base + 32, x18);
4349 ASSERT_EQUAL_64(src_base + 32, x19);
4350 ASSERT_EQUAL_64(src_base + 32, x20);
4351 ASSERT_EQUAL_64(src_base + 1, x21);
4352 ASSERT_EQUAL_64(src_base + 2, x22);
4353 ASSERT_EQUAL_64(src_base + 3, x23);
4354 ASSERT_EQUAL_64(src_base + 4, x24);
4355
4356 TEARDOWN();
4357 }
4358
4359
TEST(neon_ld2_alllanes)4360 TEST(neon_ld2_alllanes) {
4361 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
4362
4363 uint8_t src[64];
4364 for (unsigned i = 0; i < sizeof(src); i++) {
4365 src[i] = i;
4366 }
4367 uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
4368
4369 START();
4370 __ Mov(x17, src_base + 1);
4371 __ Mov(x18, 1);
4372 __ Ld2r(v0.V8B(), v1.V8B(), MemOperand(x17));
4373 __ Add(x17, x17, 2);
4374 __ Ld2r(v2.V16B(), v3.V16B(), MemOperand(x17));
4375 __ Add(x17, x17, 1);
4376 __ Ld2r(v4.V4H(), v5.V4H(), MemOperand(x17));
4377 __ Add(x17, x17, 1);
4378 __ Ld2r(v6.V8H(), v7.V8H(), MemOperand(x17));
4379 __ Add(x17, x17, 4);
4380 __ Ld2r(v8.V2S(), v9.V2S(), MemOperand(x17));
4381 __ Add(x17, x17, 1);
4382 __ Ld2r(v10.V4S(), v11.V4S(), MemOperand(x17));
4383 __ Add(x17, x17, 8);
4384 __ Ld2r(v12.V2D(), v13.V2D(), MemOperand(x17));
4385 END();
4386
4387 RUN();
4388
4389 ASSERT_EQUAL_128(0x0000000000000000, 0x0101010101010101, q0);
4390 ASSERT_EQUAL_128(0x0000000000000000, 0x0202020202020202, q1);
4391 ASSERT_EQUAL_128(0x0303030303030303, 0x0303030303030303, q2);
4392 ASSERT_EQUAL_128(0x0404040404040404, 0x0404040404040404, q3);
4393 ASSERT_EQUAL_128(0x0000000000000000, 0x0504050405040504, q4);
4394 ASSERT_EQUAL_128(0x0000000000000000, 0x0706070607060706, q5);
4395 ASSERT_EQUAL_128(0x0605060506050605, 0x0605060506050605, q6);
4396 ASSERT_EQUAL_128(0x0807080708070807, 0x0807080708070807, q7);
4397 ASSERT_EQUAL_128(0x0000000000000000, 0x0c0b0a090c0b0a09, q8);
4398 ASSERT_EQUAL_128(0x0000000000000000, 0x100f0e0d100f0e0d, q9);
4399 ASSERT_EQUAL_128(0x0d0c0b0a0d0c0b0a, 0x0d0c0b0a0d0c0b0a, q10);
4400 ASSERT_EQUAL_128(0x11100f0e11100f0e, 0x11100f0e11100f0e, q11);
4401 ASSERT_EQUAL_128(0x1918171615141312, 0x1918171615141312, q12);
4402 ASSERT_EQUAL_128(0x21201f1e1d1c1b1a, 0x21201f1e1d1c1b1a, q13);
4403
4404 TEARDOWN();
4405 }
4406
4407
TEST(neon_ld2_alllanes_postindex)4408 TEST(neon_ld2_alllanes_postindex) {
4409 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
4410
4411 uint8_t src[64];
4412 for (unsigned i = 0; i < sizeof(src); i++) {
4413 src[i] = i;
4414 }
4415 uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
4416
4417 START();
4418 __ Mov(x17, src_base + 1);
4419 __ Mov(x18, 1);
4420 __ Ld2r(v0.V8B(), v1.V8B(), MemOperand(x17, 2, PostIndex));
4421 __ Ld2r(v2.V16B(), v3.V16B(), MemOperand(x17, x18, PostIndex));
4422 __ Ld2r(v4.V4H(), v5.V4H(), MemOperand(x17, x18, PostIndex));
4423 __ Ld2r(v6.V8H(), v7.V8H(), MemOperand(x17, 4, PostIndex));
4424 __ Ld2r(v8.V2S(), v9.V2S(), MemOperand(x17, x18, PostIndex));
4425 __ Ld2r(v10.V4S(), v11.V4S(), MemOperand(x17, 8, PostIndex));
4426 __ Ld2r(v12.V2D(), v13.V2D(), MemOperand(x17, 16, PostIndex));
4427 END();
4428
4429 RUN();
4430
4431 ASSERT_EQUAL_128(0x0000000000000000, 0x0101010101010101, q0);
4432 ASSERT_EQUAL_128(0x0000000000000000, 0x0202020202020202, q1);
4433 ASSERT_EQUAL_128(0x0303030303030303, 0x0303030303030303, q2);
4434 ASSERT_EQUAL_128(0x0404040404040404, 0x0404040404040404, q3);
4435 ASSERT_EQUAL_128(0x0000000000000000, 0x0504050405040504, q4);
4436 ASSERT_EQUAL_128(0x0000000000000000, 0x0706070607060706, q5);
4437 ASSERT_EQUAL_128(0x0605060506050605, 0x0605060506050605, q6);
4438 ASSERT_EQUAL_128(0x0807080708070807, 0x0807080708070807, q7);
4439 ASSERT_EQUAL_128(0x0000000000000000, 0x0c0b0a090c0b0a09, q8);
4440 ASSERT_EQUAL_128(0x0000000000000000, 0x100f0e0d100f0e0d, q9);
4441 ASSERT_EQUAL_128(0x0d0c0b0a0d0c0b0a, 0x0d0c0b0a0d0c0b0a, q10);
4442 ASSERT_EQUAL_128(0x11100f0e11100f0e, 0x11100f0e11100f0e, q11);
4443 ASSERT_EQUAL_128(0x1918171615141312, 0x1918171615141312, q12);
4444 ASSERT_EQUAL_128(0x21201f1e1d1c1b1a, 0x21201f1e1d1c1b1a, q13);
4445 ASSERT_EQUAL_64(src_base + 34, x17);
4446
4447 TEARDOWN();
4448 }
4449
4450
TEST(neon_ld3_d)4451 TEST(neon_ld3_d) {
4452 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
4453
4454 uint8_t src[64 + 4];
4455 for (unsigned i = 0; i < sizeof(src); i++) {
4456 src[i] = i;
4457 }
4458 uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
4459
4460 START();
4461 __ Mov(x17, src_base);
4462 __ Ld3(v2.V8B(), v3.V8B(), v4.V8B(), MemOperand(x17));
4463 __ Add(x17, x17, 1);
4464 __ Ld3(v5.V8B(), v6.V8B(), v7.V8B(), MemOperand(x17));
4465 __ Add(x17, x17, 1);
4466 __ Ld3(v8.V4H(), v9.V4H(), v10.V4H(), MemOperand(x17));
4467 __ Add(x17, x17, 1);
4468 __ Ld3(v31.V2S(), v0.V2S(), v1.V2S(), MemOperand(x17));
4469 END();
4470
4471 RUN();
4472
4473 ASSERT_EQUAL_128(0, 0x15120f0c09060300, q2);
4474 ASSERT_EQUAL_128(0, 0x1613100d0a070401, q3);
4475 ASSERT_EQUAL_128(0, 0x1714110e0b080502, q4);
4476 ASSERT_EQUAL_128(0, 0x1613100d0a070401, q5);
4477 ASSERT_EQUAL_128(0, 0x1714110e0b080502, q6);
4478 ASSERT_EQUAL_128(0, 0x1815120f0c090603, q7);
4479 ASSERT_EQUAL_128(0, 0x15140f0e09080302, q8);
4480 ASSERT_EQUAL_128(0, 0x171611100b0a0504, q9);
4481 ASSERT_EQUAL_128(0, 0x191813120d0c0706, q10);
4482 ASSERT_EQUAL_128(0, 0x1211100f06050403, q31);
4483 ASSERT_EQUAL_128(0, 0x161514130a090807, q0);
4484 ASSERT_EQUAL_128(0, 0x1a1918170e0d0c0b, q1);
4485
4486 TEARDOWN();
4487 }
4488
4489
TEST(neon_ld3_d_postindex)4490 TEST(neon_ld3_d_postindex) {
4491 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
4492
4493 uint8_t src[32 + 4];
4494 for (unsigned i = 0; i < sizeof(src); i++) {
4495 src[i] = i;
4496 }
4497 uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
4498
4499 START();
4500 __ Mov(x17, src_base);
4501 __ Mov(x18, src_base + 1);
4502 __ Mov(x19, src_base + 2);
4503 __ Mov(x20, src_base + 3);
4504 __ Mov(x21, src_base + 4);
4505 __ Mov(x22, 1);
4506 __ Ld3(v2.V8B(), v3.V8B(), v4.V8B(), MemOperand(x17, x22, PostIndex));
4507 __ Ld3(v5.V8B(), v6.V8B(), v7.V8B(), MemOperand(x18, 24, PostIndex));
4508 __ Ld3(v8.V4H(), v9.V4H(), v10.V4H(), MemOperand(x19, 24, PostIndex));
4509 __ Ld3(v11.V2S(), v12.V2S(), v13.V2S(), MemOperand(x20, 24, PostIndex));
4510 __ Ld3(v31.V2S(), v0.V2S(), v1.V2S(), MemOperand(x21, 24, PostIndex));
4511 END();
4512
4513 RUN();
4514
4515 ASSERT_EQUAL_128(0, 0x15120f0c09060300, q2);
4516 ASSERT_EQUAL_128(0, 0x1613100d0a070401, q3);
4517 ASSERT_EQUAL_128(0, 0x1714110e0b080502, q4);
4518 ASSERT_EQUAL_128(0, 0x1613100d0a070401, q5);
4519 ASSERT_EQUAL_128(0, 0x1714110e0b080502, q6);
4520 ASSERT_EQUAL_128(0, 0x1815120f0c090603, q7);
4521 ASSERT_EQUAL_128(0, 0x15140f0e09080302, q8);
4522 ASSERT_EQUAL_128(0, 0x171611100b0a0504, q9);
4523 ASSERT_EQUAL_128(0, 0x191813120d0c0706, q10);
4524 ASSERT_EQUAL_128(0, 0x1211100f06050403, q11);
4525 ASSERT_EQUAL_128(0, 0x161514130a090807, q12);
4526 ASSERT_EQUAL_128(0, 0x1a1918170e0d0c0b, q13);
4527 ASSERT_EQUAL_128(0, 0x1312111007060504, q31);
4528 ASSERT_EQUAL_128(0, 0x171615140b0a0908, q0);
4529 ASSERT_EQUAL_128(0, 0x1b1a19180f0e0d0c, q1);
4530
4531 ASSERT_EQUAL_64(src_base + 1, x17);
4532 ASSERT_EQUAL_64(src_base + 1 + 24, x18);
4533 ASSERT_EQUAL_64(src_base + 2 + 24, x19);
4534 ASSERT_EQUAL_64(src_base + 3 + 24, x20);
4535 ASSERT_EQUAL_64(src_base + 4 + 24, x21);
4536
4537 TEARDOWN();
4538 }
4539
4540
TEST(neon_ld3_q)4541 TEST(neon_ld3_q) {
4542 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
4543
4544 uint8_t src[64 + 4];
4545 for (unsigned i = 0; i < sizeof(src); i++) {
4546 src[i] = i;
4547 }
4548 uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
4549
4550 START();
4551 __ Mov(x17, src_base);
4552 __ Ld3(v2.V16B(), v3.V16B(), v4.V16B(), MemOperand(x17));
4553 __ Add(x17, x17, 1);
4554 __ Ld3(v5.V16B(), v6.V16B(), v7.V16B(), MemOperand(x17));
4555 __ Add(x17, x17, 1);
4556 __ Ld3(v8.V8H(), v9.V8H(), v10.V8H(), MemOperand(x17));
4557 __ Add(x17, x17, 1);
4558 __ Ld3(v11.V4S(), v12.V4S(), v13.V4S(), MemOperand(x17));
4559 __ Add(x17, x17, 1);
4560 __ Ld3(v31.V2D(), v0.V2D(), v1.V2D(), MemOperand(x17));
4561 END();
4562
4563 RUN();
4564
4565 ASSERT_EQUAL_128(0x2d2a2724211e1b18, 0x15120f0c09060300, q2);
4566 ASSERT_EQUAL_128(0x2e2b2825221f1c19, 0x1613100d0a070401, q3);
4567 ASSERT_EQUAL_128(0x2f2c292623201d1a, 0x1714110e0b080502, q4);
4568 ASSERT_EQUAL_128(0x2e2b2825221f1c19, 0x1613100d0a070401, q5);
4569 ASSERT_EQUAL_128(0x2f2c292623201d1a, 0x1714110e0b080502, q6);
4570 ASSERT_EQUAL_128(0x302d2a2724211e1b, 0x1815120f0c090603, q7);
4571 ASSERT_EQUAL_128(0x2d2c272621201b1a, 0x15140f0e09080302, q8);
4572 ASSERT_EQUAL_128(0x2f2e292823221d1c, 0x171611100b0a0504, q9);
4573 ASSERT_EQUAL_128(0x31302b2a25241f1e, 0x191813120d0c0706, q10);
4574 ASSERT_EQUAL_128(0x2a2928271e1d1c1b, 0x1211100f06050403, q11);
4575 ASSERT_EQUAL_128(0x2e2d2c2b2221201f, 0x161514130a090807, q12);
4576 ASSERT_EQUAL_128(0x3231302f26252423, 0x1a1918170e0d0c0b, q13);
4577 ASSERT_EQUAL_128(0x232221201f1e1d1c, 0x0b0a090807060504, q31);
4578 ASSERT_EQUAL_128(0x2b2a292827262524, 0x131211100f0e0d0c, q0);
4579 ASSERT_EQUAL_128(0x333231302f2e2d2c, 0x1b1a191817161514, q1);
4580
4581 TEARDOWN();
4582 }
4583
4584
TEST(neon_ld3_q_postindex)4585 TEST(neon_ld3_q_postindex) {
4586 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
4587
4588 uint8_t src[64 + 4];
4589 for (unsigned i = 0; i < sizeof(src); i++) {
4590 src[i] = i;
4591 }
4592 uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
4593
4594 START();
4595 __ Mov(x17, src_base);
4596 __ Mov(x18, src_base + 1);
4597 __ Mov(x19, src_base + 2);
4598 __ Mov(x20, src_base + 3);
4599 __ Mov(x21, src_base + 4);
4600 __ Mov(x22, 1);
4601
4602 __ Ld3(v2.V16B(), v3.V16B(), v4.V16B(), MemOperand(x17, x22, PostIndex));
4603 __ Ld3(v5.V16B(), v6.V16B(), v7.V16B(), MemOperand(x18, 48, PostIndex));
4604 __ Ld3(v8.V8H(), v9.V8H(), v10.V8H(), MemOperand(x19, 48, PostIndex));
4605 __ Ld3(v11.V4S(), v12.V4S(), v13.V4S(), MemOperand(x20, 48, PostIndex));
4606 __ Ld3(v31.V2D(), v0.V2D(), v1.V2D(), MemOperand(x21, 48, PostIndex));
4607 END();
4608
4609 RUN();
4610
4611 ASSERT_EQUAL_128(0x2d2a2724211e1b18, 0x15120f0c09060300, q2);
4612 ASSERT_EQUAL_128(0x2e2b2825221f1c19, 0x1613100d0a070401, q3);
4613 ASSERT_EQUAL_128(0x2f2c292623201d1a, 0x1714110e0b080502, q4);
4614 ASSERT_EQUAL_128(0x2e2b2825221f1c19, 0x1613100d0a070401, q5);
4615 ASSERT_EQUAL_128(0x2f2c292623201d1a, 0x1714110e0b080502, q6);
4616 ASSERT_EQUAL_128(0x302d2a2724211e1b, 0x1815120f0c090603, q7);
4617 ASSERT_EQUAL_128(0x2d2c272621201b1a, 0x15140f0e09080302, q8);
4618 ASSERT_EQUAL_128(0x2f2e292823221d1c, 0x171611100b0a0504, q9);
4619 ASSERT_EQUAL_128(0x31302b2a25241f1e, 0x191813120d0c0706, q10);
4620 ASSERT_EQUAL_128(0x2a2928271e1d1c1b, 0x1211100f06050403, q11);
4621 ASSERT_EQUAL_128(0x2e2d2c2b2221201f, 0x161514130a090807, q12);
4622 ASSERT_EQUAL_128(0x3231302f26252423, 0x1a1918170e0d0c0b, q13);
4623 ASSERT_EQUAL_128(0x232221201f1e1d1c, 0x0b0a090807060504, q31);
4624 ASSERT_EQUAL_128(0x2b2a292827262524, 0x131211100f0e0d0c, q0);
4625 ASSERT_EQUAL_128(0x333231302f2e2d2c, 0x1b1a191817161514, q1);
4626
4627 ASSERT_EQUAL_64(src_base + 1, x17);
4628 ASSERT_EQUAL_64(src_base + 1 + 48, x18);
4629 ASSERT_EQUAL_64(src_base + 2 + 48, x19);
4630 ASSERT_EQUAL_64(src_base + 3 + 48, x20);
4631 ASSERT_EQUAL_64(src_base + 4 + 48, x21);
4632
4633 TEARDOWN();
4634 }
4635
4636
TEST(neon_ld3_lane)4637 TEST(neon_ld3_lane) {
4638 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
4639
4640 uint8_t src[64];
4641 for (unsigned i = 0; i < sizeof(src); i++) {
4642 src[i] = i;
4643 }
4644 uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
4645
4646 START();
4647
4648 // Test loading whole register by element.
4649 __ Mov(x17, src_base);
4650 for (int i = 15; i >= 0; i--) {
4651 __ Ld3(v0.B(), v1.B(), v2.B(), i, MemOperand(x17));
4652 __ Add(x17, x17, 1);
4653 }
4654
4655 __ Mov(x17, src_base);
4656 for (int i = 7; i >= 0; i--) {
4657 __ Ld3(v3.H(), v4.H(), v5.H(), i, MemOperand(x17));
4658 __ Add(x17, x17, 1);
4659 }
4660
4661 __ Mov(x17, src_base);
4662 for (int i = 3; i >= 0; i--) {
4663 __ Ld3(v6.S(), v7.S(), v8.S(), i, MemOperand(x17));
4664 __ Add(x17, x17, 1);
4665 }
4666
4667 __ Mov(x17, src_base);
4668 for (int i = 1; i >= 0; i--) {
4669 __ Ld3(v9.D(), v10.D(), v11.D(), i, MemOperand(x17));
4670 __ Add(x17, x17, 1);
4671 }
4672
4673 // Test loading a single element into an initialised register.
4674 __ Mov(x17, src_base);
4675 __ Mov(x4, x17);
4676 __ Ldr(q12, MemOperand(x4, 16, PostIndex));
4677 __ Ldr(q13, MemOperand(x4, 16, PostIndex));
4678 __ Ldr(q14, MemOperand(x4));
4679 __ Ld3(v12.B(), v13.B(), v14.B(), 4, MemOperand(x17));
4680 __ Mov(x5, x17);
4681 __ Ldr(q15, MemOperand(x5, 16, PostIndex));
4682 __ Ldr(q16, MemOperand(x5, 16, PostIndex));
4683 __ Ldr(q17, MemOperand(x5));
4684 __ Ld3(v15.H(), v16.H(), v17.H(), 3, MemOperand(x17));
4685 __ Mov(x6, x17);
4686 __ Ldr(q18, MemOperand(x6, 16, PostIndex));
4687 __ Ldr(q19, MemOperand(x6, 16, PostIndex));
4688 __ Ldr(q20, MemOperand(x6));
4689 __ Ld3(v18.S(), v19.S(), v20.S(), 2, MemOperand(x17));
4690 __ Mov(x7, x17);
4691 __ Ldr(q21, MemOperand(x7, 16, PostIndex));
4692 __ Ldr(q22, MemOperand(x7, 16, PostIndex));
4693 __ Ldr(q23, MemOperand(x7));
4694 __ Ld3(v21.D(), v22.D(), v23.D(), 1, MemOperand(x17));
4695
4696 END();
4697
4698 RUN();
4699
4700 ASSERT_EQUAL_128(0x0001020304050607, 0x08090a0b0c0d0e0f, q0);
4701 ASSERT_EQUAL_128(0x0102030405060708, 0x090a0b0c0d0e0f10, q1);
4702 ASSERT_EQUAL_128(0x0203040506070809, 0x0a0b0c0d0e0f1011, q2);
4703 ASSERT_EQUAL_128(0x0100020103020403, 0x0504060507060807, q3);
4704 ASSERT_EQUAL_128(0x0302040305040605, 0x0706080709080a09, q4);
4705 ASSERT_EQUAL_128(0x0504060507060807, 0x09080a090b0a0c0b, q5);
4706 ASSERT_EQUAL_128(0x0302010004030201, 0x0504030206050403, q6);
4707 ASSERT_EQUAL_128(0x0706050408070605, 0x090807060a090807, q7);
4708 ASSERT_EQUAL_128(0x0b0a09080c0b0a09, 0x0d0c0b0a0e0d0c0b, q8);
4709 ASSERT_EQUAL_128(0x0706050403020100, 0x0807060504030201, q9);
4710 ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x100f0e0d0c0b0a09, q10);
4711 ASSERT_EQUAL_128(0x1716151413121110, 0x1817161514131211, q11);
4712 ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0706050003020100, q12);
4713 ASSERT_EQUAL_128(0x1f1e1d1c1b1a1918, 0x1716150113121110, q13);
4714 ASSERT_EQUAL_128(0x2f2e2d2c2b2a2928, 0x2726250223222120, q14);
4715 ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0100050403020100, q15);
4716 ASSERT_EQUAL_128(0x1f1e1d1c1b1a1918, 0x0302151413121110, q16);
4717 ASSERT_EQUAL_128(0x2f2e2d2c2b2a2928, 0x0504252423222120, q17);
4718
4719 TEARDOWN();
4720 }
4721
4722
TEST(neon_ld3_lane_postindex)4723 TEST(neon_ld3_lane_postindex) {
4724 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
4725
4726 uint8_t src[64];
4727 for (unsigned i = 0; i < sizeof(src); i++) {
4728 src[i] = i;
4729 }
4730 uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
4731
4732 START();
4733
4734 // Test loading whole register by element.
4735 __ Mov(x17, src_base);
4736 __ Mov(x18, src_base);
4737 __ Mov(x19, src_base);
4738 __ Mov(x20, src_base);
4739 __ Mov(x21, src_base);
4740 __ Mov(x22, src_base);
4741 __ Mov(x23, src_base);
4742 __ Mov(x24, src_base);
4743 for (int i = 15; i >= 0; i--) {
4744 __ Ld3(v0.B(), v1.B(), v2.B(), i, MemOperand(x17, 3, PostIndex));
4745 }
4746
4747 for (int i = 7; i >= 0; i--) {
4748 __ Ld3(v3.H(), v4.H(), v5.H(), i, MemOperand(x18, 6, PostIndex));
4749 }
4750
4751 for (int i = 3; i >= 0; i--) {
4752 __ Ld3(v6.S(), v7.S(), v8.S(), i, MemOperand(x19, 12, PostIndex));
4753 }
4754
4755 for (int i = 1; i >= 0; i--) {
4756 __ Ld3(v9.D(), v10.D(), v11.D(), i, MemOperand(x20, 24, PostIndex));
4757 }
4758
4759
4760 // Test loading a single element into an initialised register.
4761 __ Mov(x25, 1);
4762 __ Mov(x4, x21);
4763 __ Ldr(q12, MemOperand(x4, 16, PostIndex));
4764 __ Ldr(q13, MemOperand(x4, 16, PostIndex));
4765 __ Ldr(q14, MemOperand(x4));
4766 __ Ld3(v12.B(), v13.B(), v14.B(), 4, MemOperand(x21, x25, PostIndex));
4767 __ Add(x25, x25, 1);
4768
4769 __ Mov(x5, x22);
4770 __ Ldr(q15, MemOperand(x5, 16, PostIndex));
4771 __ Ldr(q16, MemOperand(x5, 16, PostIndex));
4772 __ Ldr(q17, MemOperand(x5));
4773 __ Ld3(v15.H(), v16.H(), v17.H(), 3, MemOperand(x22, x25, PostIndex));
4774 __ Add(x25, x25, 1);
4775
4776 __ Mov(x6, x23);
4777 __ Ldr(q18, MemOperand(x6, 16, PostIndex));
4778 __ Ldr(q19, MemOperand(x6, 16, PostIndex));
4779 __ Ldr(q20, MemOperand(x6));
4780 __ Ld3(v18.S(), v19.S(), v20.S(), 2, MemOperand(x23, x25, PostIndex));
4781 __ Add(x25, x25, 1);
4782
4783 __ Mov(x7, x24);
4784 __ Ldr(q21, MemOperand(x7, 16, PostIndex));
4785 __ Ldr(q22, MemOperand(x7, 16, PostIndex));
4786 __ Ldr(q23, MemOperand(x7));
4787 __ Ld3(v21.D(), v22.D(), v23.D(), 1, MemOperand(x24, x25, PostIndex));
4788
4789 END();
4790
4791 RUN();
4792
4793 ASSERT_EQUAL_128(0x000306090c0f1215, 0x181b1e2124272a2d, q0);
4794 ASSERT_EQUAL_128(0x0104070a0d101316, 0x191c1f2225282b2e, q1);
4795 ASSERT_EQUAL_128(0x0205080b0e111417, 0x1a1d202326292c2f, q2);
4796 ASSERT_EQUAL_128(0x010007060d0c1312, 0x19181f1e25242b2a, q3);
4797 ASSERT_EQUAL_128(0x030209080f0e1514, 0x1b1a212027262d2c, q4);
4798 ASSERT_EQUAL_128(0x05040b0a11101716, 0x1d1c232229282f2e, q5);
4799 ASSERT_EQUAL_128(0x030201000f0e0d0c, 0x1b1a191827262524, q6);
4800 ASSERT_EQUAL_128(0x0706050413121110, 0x1f1e1d1c2b2a2928, q7);
4801 ASSERT_EQUAL_128(0x0b0a090817161514, 0x232221202f2e2d2c, q8);
4802 ASSERT_EQUAL_128(0x0706050403020100, 0x1f1e1d1c1b1a1918, q9);
4803 ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x2726252423222120, q10);
4804 ASSERT_EQUAL_128(0x1716151413121110, 0x2f2e2d2c2b2a2928, q11);
4805 ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0706050003020100, q12);
4806 ASSERT_EQUAL_128(0x1f1e1d1c1b1a1918, 0x1716150113121110, q13);
4807 ASSERT_EQUAL_128(0x2f2e2d2c2b2a2928, 0x2726250223222120, q14);
4808 ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0100050403020100, q15);
4809 ASSERT_EQUAL_128(0x1f1e1d1c1b1a1918, 0x0302151413121110, q16);
4810 ASSERT_EQUAL_128(0x2f2e2d2c2b2a2928, 0x0504252423222120, q17);
4811 ASSERT_EQUAL_128(0x0f0e0d0c03020100, 0x0706050403020100, q18);
4812 ASSERT_EQUAL_128(0x1f1e1d1c07060504, 0x1716151413121110, q19);
4813 ASSERT_EQUAL_128(0x2f2e2d2c0b0a0908, 0x2726252423222120, q20);
4814 ASSERT_EQUAL_128(0x0706050403020100, 0x0706050403020100, q21);
4815 ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x1716151413121110, q22);
4816 ASSERT_EQUAL_128(0x1716151413121110, 0x2726252423222120, q23);
4817
4818 ASSERT_EQUAL_64(src_base + 48, x17);
4819 ASSERT_EQUAL_64(src_base + 48, x18);
4820 ASSERT_EQUAL_64(src_base + 48, x19);
4821 ASSERT_EQUAL_64(src_base + 48, x20);
4822 ASSERT_EQUAL_64(src_base + 1, x21);
4823 ASSERT_EQUAL_64(src_base + 2, x22);
4824 ASSERT_EQUAL_64(src_base + 3, x23);
4825 ASSERT_EQUAL_64(src_base + 4, x24);
4826
4827 TEARDOWN();
4828 }
4829
4830
TEST(neon_ld3_alllanes)4831 TEST(neon_ld3_alllanes) {
4832 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
4833
4834 uint8_t src[64];
4835 for (unsigned i = 0; i < sizeof(src); i++) {
4836 src[i] = i;
4837 }
4838 uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
4839
4840 START();
4841 __ Mov(x17, src_base + 1);
4842 __ Mov(x18, 1);
4843 __ Ld3r(v0.V8B(), v1.V8B(), v2.V8B(), MemOperand(x17));
4844 __ Add(x17, x17, 3);
4845 __ Ld3r(v3.V16B(), v4.V16B(), v5.V16B(), MemOperand(x17));
4846 __ Add(x17, x17, 1);
4847 __ Ld3r(v6.V4H(), v7.V4H(), v8.V4H(), MemOperand(x17));
4848 __ Add(x17, x17, 1);
4849 __ Ld3r(v9.V8H(), v10.V8H(), v11.V8H(), MemOperand(x17));
4850 __ Add(x17, x17, 6);
4851 __ Ld3r(v12.V2S(), v13.V2S(), v14.V2S(), MemOperand(x17));
4852 __ Add(x17, x17, 1);
4853 __ Ld3r(v15.V4S(), v16.V4S(), v17.V4S(), MemOperand(x17));
4854 __ Add(x17, x17, 12);
4855 __ Ld3r(v18.V2D(), v19.V2D(), v20.V2D(), MemOperand(x17));
4856 END();
4857
4858 RUN();
4859
4860 ASSERT_EQUAL_128(0x0000000000000000, 0x0101010101010101, q0);
4861 ASSERT_EQUAL_128(0x0000000000000000, 0x0202020202020202, q1);
4862 ASSERT_EQUAL_128(0x0000000000000000, 0x0303030303030303, q2);
4863 ASSERT_EQUAL_128(0x0404040404040404, 0x0404040404040404, q3);
4864 ASSERT_EQUAL_128(0x0505050505050505, 0x0505050505050505, q4);
4865 ASSERT_EQUAL_128(0x0606060606060606, 0x0606060606060606, q5);
4866 ASSERT_EQUAL_128(0x0000000000000000, 0x0605060506050605, q6);
4867 ASSERT_EQUAL_128(0x0000000000000000, 0x0807080708070807, q7);
4868 ASSERT_EQUAL_128(0x0000000000000000, 0x0a090a090a090a09, q8);
4869 ASSERT_EQUAL_128(0x0706070607060706, 0x0706070607060706, q9);
4870 ASSERT_EQUAL_128(0x0908090809080908, 0x0908090809080908, q10);
4871 ASSERT_EQUAL_128(0x0b0a0b0a0b0a0b0a, 0x0b0a0b0a0b0a0b0a, q11);
4872 ASSERT_EQUAL_128(0x0000000000000000, 0x0f0e0d0c0f0e0d0c, q12);
4873 ASSERT_EQUAL_128(0x0000000000000000, 0x1312111013121110, q13);
4874 ASSERT_EQUAL_128(0x0000000000000000, 0x1716151417161514, q14);
4875 ASSERT_EQUAL_128(0x100f0e0d100f0e0d, 0x100f0e0d100f0e0d, q15);
4876 ASSERT_EQUAL_128(0x1413121114131211, 0x1413121114131211, q16);
4877 ASSERT_EQUAL_128(0x1817161518171615, 0x1817161518171615, q17);
4878 ASSERT_EQUAL_128(0x201f1e1d1c1b1a19, 0x201f1e1d1c1b1a19, q18);
4879 ASSERT_EQUAL_128(0x2827262524232221, 0x2827262524232221, q19);
4880 ASSERT_EQUAL_128(0x302f2e2d2c2b2a29, 0x302f2e2d2c2b2a29, q20);
4881
4882 TEARDOWN();
4883 }
4884
4885
TEST(neon_ld3_alllanes_postindex)4886 TEST(neon_ld3_alllanes_postindex) {
4887 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
4888
4889 uint8_t src[64];
4890 for (unsigned i = 0; i < sizeof(src); i++) {
4891 src[i] = i;
4892 }
4893 uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
4894 __ Mov(x17, src_base + 1);
4895 __ Mov(x18, 1);
4896
4897 START();
4898 __ Mov(x17, src_base + 1);
4899 __ Mov(x18, 1);
4900 __ Ld3r(v0.V8B(), v1.V8B(), v2.V8B(), MemOperand(x17, 3, PostIndex));
4901 __ Ld3r(v3.V16B(), v4.V16B(), v5.V16B(), MemOperand(x17, x18, PostIndex));
4902 __ Ld3r(v6.V4H(), v7.V4H(), v8.V4H(), MemOperand(x17, x18, PostIndex));
4903 __ Ld3r(v9.V8H(), v10.V8H(), v11.V8H(), MemOperand(x17, 6, PostIndex));
4904 __ Ld3r(v12.V2S(), v13.V2S(), v14.V2S(), MemOperand(x17, x18, PostIndex));
4905 __ Ld3r(v15.V4S(), v16.V4S(), v17.V4S(), MemOperand(x17, 12, PostIndex));
4906 __ Ld3r(v18.V2D(), v19.V2D(), v20.V2D(), MemOperand(x17, 24, PostIndex));
4907 END();
4908
4909 RUN();
4910
4911 ASSERT_EQUAL_128(0x0000000000000000, 0x0101010101010101, q0);
4912 ASSERT_EQUAL_128(0x0000000000000000, 0x0202020202020202, q1);
4913 ASSERT_EQUAL_128(0x0000000000000000, 0x0303030303030303, q2);
4914 ASSERT_EQUAL_128(0x0404040404040404, 0x0404040404040404, q3);
4915 ASSERT_EQUAL_128(0x0505050505050505, 0x0505050505050505, q4);
4916 ASSERT_EQUAL_128(0x0606060606060606, 0x0606060606060606, q5);
4917 ASSERT_EQUAL_128(0x0000000000000000, 0x0605060506050605, q6);
4918 ASSERT_EQUAL_128(0x0000000000000000, 0x0807080708070807, q7);
4919 ASSERT_EQUAL_128(0x0000000000000000, 0x0a090a090a090a09, q8);
4920 ASSERT_EQUAL_128(0x0706070607060706, 0x0706070607060706, q9);
4921 ASSERT_EQUAL_128(0x0908090809080908, 0x0908090809080908, q10);
4922 ASSERT_EQUAL_128(0x0b0a0b0a0b0a0b0a, 0x0b0a0b0a0b0a0b0a, q11);
4923 ASSERT_EQUAL_128(0x0000000000000000, 0x0f0e0d0c0f0e0d0c, q12);
4924 ASSERT_EQUAL_128(0x0000000000000000, 0x1312111013121110, q13);
4925 ASSERT_EQUAL_128(0x0000000000000000, 0x1716151417161514, q14);
4926 ASSERT_EQUAL_128(0x100f0e0d100f0e0d, 0x100f0e0d100f0e0d, q15);
4927 ASSERT_EQUAL_128(0x1413121114131211, 0x1413121114131211, q16);
4928 ASSERT_EQUAL_128(0x1817161518171615, 0x1817161518171615, q17);
4929 ASSERT_EQUAL_128(0x201f1e1d1c1b1a19, 0x201f1e1d1c1b1a19, q18);
4930 ASSERT_EQUAL_128(0x2827262524232221, 0x2827262524232221, q19);
4931 ASSERT_EQUAL_128(0x302f2e2d2c2b2a29, 0x302f2e2d2c2b2a29, q20);
4932
4933 TEARDOWN();
4934 }
4935
4936
TEST(neon_ld4_d)4937 TEST(neon_ld4_d) {
4938 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
4939
4940 uint8_t src[64 + 4];
4941 for (unsigned i = 0; i < sizeof(src); i++) {
4942 src[i] = i;
4943 }
4944 uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
4945
4946 START();
4947 __ Mov(x17, src_base);
4948 __ Ld4(v2.V8B(), v3.V8B(), v4.V8B(), v5.V8B(), MemOperand(x17));
4949 __ Add(x17, x17, 1);
4950 __ Ld4(v6.V8B(), v7.V8B(), v8.V8B(), v9.V8B(), MemOperand(x17));
4951 __ Add(x17, x17, 1);
4952 __ Ld4(v10.V4H(), v11.V4H(), v12.V4H(), v13.V4H(), MemOperand(x17));
4953 __ Add(x17, x17, 1);
4954 __ Ld4(v30.V2S(), v31.V2S(), v0.V2S(), v1.V2S(), MemOperand(x17));
4955 END();
4956
4957 RUN();
4958
4959 ASSERT_EQUAL_128(0, 0x1c1814100c080400, q2);
4960 ASSERT_EQUAL_128(0, 0x1d1915110d090501, q3);
4961 ASSERT_EQUAL_128(0, 0x1e1a16120e0a0602, q4);
4962 ASSERT_EQUAL_128(0, 0x1f1b17130f0b0703, q5);
4963 ASSERT_EQUAL_128(0, 0x1d1915110d090501, q6);
4964 ASSERT_EQUAL_128(0, 0x1e1a16120e0a0602, q7);
4965 ASSERT_EQUAL_128(0, 0x1f1b17130f0b0703, q8);
4966 ASSERT_EQUAL_128(0, 0x201c1814100c0804, q9);
4967 ASSERT_EQUAL_128(0, 0x1b1a13120b0a0302, q10);
4968 ASSERT_EQUAL_128(0, 0x1d1c15140d0c0504, q11);
4969 ASSERT_EQUAL_128(0, 0x1f1e17160f0e0706, q12);
4970 ASSERT_EQUAL_128(0, 0x2120191811100908, q13);
4971 ASSERT_EQUAL_128(0, 0x1615141306050403, q30);
4972 ASSERT_EQUAL_128(0, 0x1a1918170a090807, q31);
4973 ASSERT_EQUAL_128(0, 0x1e1d1c1b0e0d0c0b, q0);
4974 ASSERT_EQUAL_128(0, 0x2221201f1211100f, q1);
4975
4976 TEARDOWN();
4977 }
4978
4979
TEST(neon_ld4_d_postindex)4980 TEST(neon_ld4_d_postindex) {
4981 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
4982
4983 uint8_t src[32 + 4];
4984 for (unsigned i = 0; i < sizeof(src); i++) {
4985 src[i] = i;
4986 }
4987 uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
4988
4989 START();
4990 __ Mov(x17, src_base);
4991 __ Mov(x18, src_base + 1);
4992 __ Mov(x19, src_base + 2);
4993 __ Mov(x20, src_base + 3);
4994 __ Mov(x21, src_base + 4);
4995 __ Mov(x22, 1);
4996 __ Ld4(v2.V8B(),
4997 v3.V8B(),
4998 v4.V8B(),
4999 v5.V8B(),
5000 MemOperand(x17, x22, PostIndex));
5001 __ Ld4(v6.V8B(),
5002 v7.V8B(),
5003 v8.V8B(),
5004 v9.V8B(),
5005 MemOperand(x18, 32, PostIndex));
5006 __ Ld4(v10.V4H(),
5007 v11.V4H(),
5008 v12.V4H(),
5009 v13.V4H(),
5010 MemOperand(x19, 32, PostIndex));
5011 __ Ld4(v14.V2S(),
5012 v15.V2S(),
5013 v16.V2S(),
5014 v17.V2S(),
5015 MemOperand(x20, 32, PostIndex));
5016 __ Ld4(v30.V2S(),
5017 v31.V2S(),
5018 v0.V2S(),
5019 v1.V2S(),
5020 MemOperand(x21, 32, PostIndex));
5021 END();
5022
5023 RUN();
5024
5025 ASSERT_EQUAL_128(0, 0x1c1814100c080400, q2);
5026 ASSERT_EQUAL_128(0, 0x1d1915110d090501, q3);
5027 ASSERT_EQUAL_128(0, 0x1e1a16120e0a0602, q4);
5028 ASSERT_EQUAL_128(0, 0x1f1b17130f0b0703, q5);
5029 ASSERT_EQUAL_128(0, 0x1d1915110d090501, q6);
5030 ASSERT_EQUAL_128(0, 0x1e1a16120e0a0602, q7);
5031 ASSERT_EQUAL_128(0, 0x1f1b17130f0b0703, q8);
5032 ASSERT_EQUAL_128(0, 0x201c1814100c0804, q9);
5033 ASSERT_EQUAL_128(0, 0x1b1a13120b0a0302, q10);
5034 ASSERT_EQUAL_128(0, 0x1d1c15140d0c0504, q11);
5035 ASSERT_EQUAL_128(0, 0x1f1e17160f0e0706, q12);
5036 ASSERT_EQUAL_128(0, 0x2120191811100908, q13);
5037 ASSERT_EQUAL_128(0, 0x1615141306050403, q14);
5038 ASSERT_EQUAL_128(0, 0x1a1918170a090807, q15);
5039 ASSERT_EQUAL_128(0, 0x1e1d1c1b0e0d0c0b, q16);
5040 ASSERT_EQUAL_128(0, 0x2221201f1211100f, q17);
5041 ASSERT_EQUAL_128(0, 0x1716151407060504, q30);
5042 ASSERT_EQUAL_128(0, 0x1b1a19180b0a0908, q31);
5043 ASSERT_EQUAL_128(0, 0x1f1e1d1c0f0e0d0c, q0);
5044 ASSERT_EQUAL_128(0, 0x2322212013121110, q1);
5045
5046
5047 ASSERT_EQUAL_64(src_base + 1, x17);
5048 ASSERT_EQUAL_64(src_base + 1 + 32, x18);
5049 ASSERT_EQUAL_64(src_base + 2 + 32, x19);
5050 ASSERT_EQUAL_64(src_base + 3 + 32, x20);
5051 ASSERT_EQUAL_64(src_base + 4 + 32, x21);
5052 TEARDOWN();
5053 }
5054
5055
TEST(neon_ld4_q)5056 TEST(neon_ld4_q) {
5057 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
5058
5059 uint8_t src[64 + 4];
5060 for (unsigned i = 0; i < sizeof(src); i++) {
5061 src[i] = i;
5062 }
5063 uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
5064
5065 START();
5066 __ Mov(x17, src_base);
5067 __ Ld4(v2.V16B(), v3.V16B(), v4.V16B(), v5.V16B(), MemOperand(x17));
5068 __ Add(x17, x17, 1);
5069 __ Ld4(v6.V16B(), v7.V16B(), v8.V16B(), v9.V16B(), MemOperand(x17));
5070 __ Add(x17, x17, 1);
5071 __ Ld4(v10.V8H(), v11.V8H(), v12.V8H(), v13.V8H(), MemOperand(x17));
5072 __ Add(x17, x17, 1);
5073 __ Ld4(v14.V4S(), v15.V4S(), v16.V4S(), v17.V4S(), MemOperand(x17));
5074 __ Add(x17, x17, 1);
5075 __ Ld4(v18.V2D(), v19.V2D(), v20.V2D(), v21.V2D(), MemOperand(x17));
5076 END();
5077
5078 RUN();
5079
5080 ASSERT_EQUAL_128(0x3c3834302c282420, 0x1c1814100c080400, q2);
5081 ASSERT_EQUAL_128(0x3d3935312d292521, 0x1d1915110d090501, q3);
5082 ASSERT_EQUAL_128(0x3e3a36322e2a2622, 0x1e1a16120e0a0602, q4);
5083 ASSERT_EQUAL_128(0x3f3b37332f2b2723, 0x1f1b17130f0b0703, q5);
5084 ASSERT_EQUAL_128(0x3d3935312d292521, 0x1d1915110d090501, q6);
5085 ASSERT_EQUAL_128(0x3e3a36322e2a2622, 0x1e1a16120e0a0602, q7);
5086 ASSERT_EQUAL_128(0x3f3b37332f2b2723, 0x1f1b17130f0b0703, q8);
5087 ASSERT_EQUAL_128(0x403c3834302c2824, 0x201c1814100c0804, q9);
5088 ASSERT_EQUAL_128(0x3b3a33322b2a2322, 0x1b1a13120b0a0302, q10);
5089 ASSERT_EQUAL_128(0x3d3c35342d2c2524, 0x1d1c15140d0c0504, q11);
5090 ASSERT_EQUAL_128(0x3f3e37362f2e2726, 0x1f1e17160f0e0706, q12);
5091 ASSERT_EQUAL_128(0x4140393831302928, 0x2120191811100908, q13);
5092 ASSERT_EQUAL_128(0x3635343326252423, 0x1615141306050403, q14);
5093 ASSERT_EQUAL_128(0x3a3938372a292827, 0x1a1918170a090807, q15);
5094 ASSERT_EQUAL_128(0x3e3d3c3b2e2d2c2b, 0x1e1d1c1b0e0d0c0b, q16);
5095 ASSERT_EQUAL_128(0x4241403f3231302f, 0x2221201f1211100f, q17);
5096 ASSERT_EQUAL_128(0x2b2a292827262524, 0x0b0a090807060504, q18);
5097 ASSERT_EQUAL_128(0x333231302f2e2d2c, 0x131211100f0e0d0c, q19);
5098 ASSERT_EQUAL_128(0x3b3a393837363534, 0x1b1a191817161514, q20);
5099 ASSERT_EQUAL_128(0x434241403f3e3d3c, 0x232221201f1e1d1c, q21);
5100 TEARDOWN();
5101 }
5102
5103
TEST(neon_ld4_q_postindex)5104 TEST(neon_ld4_q_postindex) {
5105 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
5106
5107 uint8_t src[64 + 4];
5108 for (unsigned i = 0; i < sizeof(src); i++) {
5109 src[i] = i;
5110 }
5111 uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
5112
5113 START();
5114 __ Mov(x17, src_base);
5115 __ Mov(x18, src_base + 1);
5116 __ Mov(x19, src_base + 2);
5117 __ Mov(x20, src_base + 3);
5118 __ Mov(x21, src_base + 4);
5119 __ Mov(x22, 1);
5120
5121 __ Ld4(v2.V16B(),
5122 v3.V16B(),
5123 v4.V16B(),
5124 v5.V16B(),
5125 MemOperand(x17, x22, PostIndex));
5126 __ Ld4(v6.V16B(),
5127 v7.V16B(),
5128 v8.V16B(),
5129 v9.V16B(),
5130 MemOperand(x18, 64, PostIndex));
5131 __ Ld4(v10.V8H(),
5132 v11.V8H(),
5133 v12.V8H(),
5134 v13.V8H(),
5135 MemOperand(x19, 64, PostIndex));
5136 __ Ld4(v14.V4S(),
5137 v15.V4S(),
5138 v16.V4S(),
5139 v17.V4S(),
5140 MemOperand(x20, 64, PostIndex));
5141 __ Ld4(v30.V2D(),
5142 v31.V2D(),
5143 v0.V2D(),
5144 v1.V2D(),
5145 MemOperand(x21, 64, PostIndex));
5146 END();
5147
5148 RUN();
5149
5150 ASSERT_EQUAL_128(0x3c3834302c282420, 0x1c1814100c080400, q2);
5151 ASSERT_EQUAL_128(0x3d3935312d292521, 0x1d1915110d090501, q3);
5152 ASSERT_EQUAL_128(0x3e3a36322e2a2622, 0x1e1a16120e0a0602, q4);
5153 ASSERT_EQUAL_128(0x3f3b37332f2b2723, 0x1f1b17130f0b0703, q5);
5154 ASSERT_EQUAL_128(0x3d3935312d292521, 0x1d1915110d090501, q6);
5155 ASSERT_EQUAL_128(0x3e3a36322e2a2622, 0x1e1a16120e0a0602, q7);
5156 ASSERT_EQUAL_128(0x3f3b37332f2b2723, 0x1f1b17130f0b0703, q8);
5157 ASSERT_EQUAL_128(0x403c3834302c2824, 0x201c1814100c0804, q9);
5158 ASSERT_EQUAL_128(0x3b3a33322b2a2322, 0x1b1a13120b0a0302, q10);
5159 ASSERT_EQUAL_128(0x3d3c35342d2c2524, 0x1d1c15140d0c0504, q11);
5160 ASSERT_EQUAL_128(0x3f3e37362f2e2726, 0x1f1e17160f0e0706, q12);
5161 ASSERT_EQUAL_128(0x4140393831302928, 0x2120191811100908, q13);
5162 ASSERT_EQUAL_128(0x3635343326252423, 0x1615141306050403, q14);
5163 ASSERT_EQUAL_128(0x3a3938372a292827, 0x1a1918170a090807, q15);
5164 ASSERT_EQUAL_128(0x3e3d3c3b2e2d2c2b, 0x1e1d1c1b0e0d0c0b, q16);
5165 ASSERT_EQUAL_128(0x4241403f3231302f, 0x2221201f1211100f, q17);
5166 ASSERT_EQUAL_128(0x2b2a292827262524, 0x0b0a090807060504, q30);
5167 ASSERT_EQUAL_128(0x333231302f2e2d2c, 0x131211100f0e0d0c, q31);
5168 ASSERT_EQUAL_128(0x3b3a393837363534, 0x1b1a191817161514, q0);
5169 ASSERT_EQUAL_128(0x434241403f3e3d3c, 0x232221201f1e1d1c, q1);
5170
5171
5172 ASSERT_EQUAL_64(src_base + 1, x17);
5173 ASSERT_EQUAL_64(src_base + 1 + 64, x18);
5174 ASSERT_EQUAL_64(src_base + 2 + 64, x19);
5175 ASSERT_EQUAL_64(src_base + 3 + 64, x20);
5176 ASSERT_EQUAL_64(src_base + 4 + 64, x21);
5177
5178 TEARDOWN();
5179 }
5180
5181
TEST(neon_ld4_lane)5182 TEST(neon_ld4_lane) {
5183 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
5184
5185 uint8_t src[64];
5186 for (unsigned i = 0; i < sizeof(src); i++) {
5187 src[i] = i;
5188 }
5189 uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
5190
5191 START();
5192
5193 // Test loading whole register by element.
5194 __ Mov(x17, src_base);
5195 for (int i = 15; i >= 0; i--) {
5196 __ Ld4(v0.B(), v1.B(), v2.B(), v3.B(), i, MemOperand(x17));
5197 __ Add(x17, x17, 1);
5198 }
5199
5200 __ Mov(x17, src_base);
5201 for (int i = 7; i >= 0; i--) {
5202 __ Ld4(v4.H(), v5.H(), v6.H(), v7.H(), i, MemOperand(x17));
5203 __ Add(x17, x17, 1);
5204 }
5205
5206 __ Mov(x17, src_base);
5207 for (int i = 3; i >= 0; i--) {
5208 __ Ld4(v8.S(), v9.S(), v10.S(), v11.S(), i, MemOperand(x17));
5209 __ Add(x17, x17, 1);
5210 }
5211
5212 __ Mov(x17, src_base);
5213 for (int i = 1; i >= 0; i--) {
5214 __ Ld4(v12.D(), v13.D(), v14.D(), v15.D(), i, MemOperand(x17));
5215 __ Add(x17, x17, 1);
5216 }
5217
5218 // Test loading a single element into an initialised register.
5219 __ Mov(x17, src_base);
5220 __ Mov(x4, x17);
5221 __ Ldr(q16, MemOperand(x4, 16, PostIndex));
5222 __ Ldr(q17, MemOperand(x4, 16, PostIndex));
5223 __ Ldr(q18, MemOperand(x4, 16, PostIndex));
5224 __ Ldr(q19, MemOperand(x4));
5225 __ Ld4(v16.B(), v17.B(), v18.B(), v19.B(), 4, MemOperand(x17));
5226
5227 __ Mov(x5, x17);
5228 __ Ldr(q20, MemOperand(x5, 16, PostIndex));
5229 __ Ldr(q21, MemOperand(x5, 16, PostIndex));
5230 __ Ldr(q22, MemOperand(x5, 16, PostIndex));
5231 __ Ldr(q23, MemOperand(x5));
5232 __ Ld4(v20.H(), v21.H(), v22.H(), v23.H(), 3, MemOperand(x17));
5233
5234 __ Mov(x6, x17);
5235 __ Ldr(q24, MemOperand(x6, 16, PostIndex));
5236 __ Ldr(q25, MemOperand(x6, 16, PostIndex));
5237 __ Ldr(q26, MemOperand(x6, 16, PostIndex));
5238 __ Ldr(q27, MemOperand(x6));
5239 __ Ld4(v24.S(), v25.S(), v26.S(), v27.S(), 2, MemOperand(x17));
5240
5241 __ Mov(x7, x17);
5242 __ Ldr(q28, MemOperand(x7, 16, PostIndex));
5243 __ Ldr(q29, MemOperand(x7, 16, PostIndex));
5244 __ Ldr(q30, MemOperand(x7, 16, PostIndex));
5245 __ Ldr(q31, MemOperand(x7));
5246 __ Ld4(v28.D(), v29.D(), v30.D(), v31.D(), 1, MemOperand(x17));
5247
5248 END();
5249
5250 RUN();
5251
5252 ASSERT_EQUAL_128(0x0001020304050607, 0x08090a0b0c0d0e0f, q0);
5253 ASSERT_EQUAL_128(0x0102030405060708, 0x090a0b0c0d0e0f10, q1);
5254 ASSERT_EQUAL_128(0x0203040506070809, 0x0a0b0c0d0e0f1011, q2);
5255 ASSERT_EQUAL_128(0x030405060708090a, 0x0b0c0d0e0f101112, q3);
5256 ASSERT_EQUAL_128(0x0100020103020403, 0x0504060507060807, q4);
5257 ASSERT_EQUAL_128(0x0302040305040605, 0x0706080709080a09, q5);
5258 ASSERT_EQUAL_128(0x0504060507060807, 0x09080a090b0a0c0b, q6);
5259 ASSERT_EQUAL_128(0x0706080709080a09, 0x0b0a0c0b0d0c0e0d, q7);
5260 ASSERT_EQUAL_128(0x0302010004030201, 0x0504030206050403, q8);
5261 ASSERT_EQUAL_128(0x0706050408070605, 0x090807060a090807, q9);
5262 ASSERT_EQUAL_128(0x0b0a09080c0b0a09, 0x0d0c0b0a0e0d0c0b, q10);
5263 ASSERT_EQUAL_128(0x0f0e0d0c100f0e0d, 0x11100f0e1211100f, q11);
5264 ASSERT_EQUAL_128(0x0706050403020100, 0x0807060504030201, q12);
5265 ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x100f0e0d0c0b0a09, q13);
5266 ASSERT_EQUAL_128(0x1716151413121110, 0x1817161514131211, q14);
5267 ASSERT_EQUAL_128(0x1f1e1d1c1b1a1918, 0x201f1e1d1c1b1a19, q15);
5268 ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0706050003020100, q16);
5269 ASSERT_EQUAL_128(0x1f1e1d1c1b1a1918, 0x1716150113121110, q17);
5270 ASSERT_EQUAL_128(0x2f2e2d2c2b2a2928, 0x2726250223222120, q18);
5271 ASSERT_EQUAL_128(0x3f3e3d3c3b3a3938, 0x3736350333323130, q19);
5272 ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0100050403020100, q20);
5273 ASSERT_EQUAL_128(0x1f1e1d1c1b1a1918, 0x0302151413121110, q21);
5274 ASSERT_EQUAL_128(0x2f2e2d2c2b2a2928, 0x0504252423222120, q22);
5275 ASSERT_EQUAL_128(0x3f3e3d3c3b3a3938, 0x0706353433323130, q23);
5276 ASSERT_EQUAL_128(0x0f0e0d0c03020100, 0x0706050403020100, q24);
5277 ASSERT_EQUAL_128(0x1f1e1d1c07060504, 0x1716151413121110, q25);
5278 ASSERT_EQUAL_128(0x2f2e2d2c0b0a0908, 0x2726252423222120, q26);
5279 ASSERT_EQUAL_128(0x3f3e3d3c0f0e0d0c, 0x3736353433323130, q27);
5280 ASSERT_EQUAL_128(0x0706050403020100, 0x0706050403020100, q28);
5281 ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x1716151413121110, q29);
5282 ASSERT_EQUAL_128(0x1716151413121110, 0x2726252423222120, q30);
5283 ASSERT_EQUAL_128(0x1f1e1d1c1b1a1918, 0x3736353433323130, q31);
5284
5285 TEARDOWN();
5286 }
5287
5288
TEST(neon_ld4_lane_postindex)5289 TEST(neon_ld4_lane_postindex) {
5290 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
5291
5292 uint8_t src[64];
5293 for (unsigned i = 0; i < sizeof(src); i++) {
5294 src[i] = i;
5295 }
5296 uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
5297
5298 START();
5299
5300 // Test loading whole register by element.
5301 __ Mov(x17, src_base);
5302 for (int i = 15; i >= 0; i--) {
5303 __ Ld4(v0.B(), v1.B(), v2.B(), v3.B(), i, MemOperand(x17, 4, PostIndex));
5304 }
5305
5306 __ Mov(x18, src_base);
5307 for (int i = 7; i >= 0; i--) {
5308 __ Ld4(v4.H(), v5.H(), v6.H(), v7.H(), i, MemOperand(x18, 8, PostIndex));
5309 }
5310
5311 __ Mov(x19, src_base);
5312 for (int i = 3; i >= 0; i--) {
5313 __ Ld4(v8.S(), v9.S(), v10.S(), v11.S(), i, MemOperand(x19, 16, PostIndex));
5314 }
5315
5316 __ Mov(x20, src_base);
5317 for (int i = 1; i >= 0; i--) {
5318 __ Ld4(v12.D(),
5319 v13.D(),
5320 v14.D(),
5321 v15.D(),
5322 i,
5323 MemOperand(x20, 32, PostIndex));
5324 }
5325
5326 // Test loading a single element into an initialised register.
5327 __ Mov(x25, 1);
5328 __ Mov(x21, src_base);
5329 __ Mov(x22, src_base);
5330 __ Mov(x23, src_base);
5331 __ Mov(x24, src_base);
5332
5333 __ Mov(x4, x21);
5334 __ Ldr(q16, MemOperand(x4, 16, PostIndex));
5335 __ Ldr(q17, MemOperand(x4, 16, PostIndex));
5336 __ Ldr(q18, MemOperand(x4, 16, PostIndex));
5337 __ Ldr(q19, MemOperand(x4));
5338 __ Ld4(v16.B(),
5339 v17.B(),
5340 v18.B(),
5341 v19.B(),
5342 4,
5343 MemOperand(x21, x25, PostIndex));
5344 __ Add(x25, x25, 1);
5345
5346 __ Mov(x5, x22);
5347 __ Ldr(q20, MemOperand(x5, 16, PostIndex));
5348 __ Ldr(q21, MemOperand(x5, 16, PostIndex));
5349 __ Ldr(q22, MemOperand(x5, 16, PostIndex));
5350 __ Ldr(q23, MemOperand(x5));
5351 __ Ld4(v20.H(),
5352 v21.H(),
5353 v22.H(),
5354 v23.H(),
5355 3,
5356 MemOperand(x22, x25, PostIndex));
5357 __ Add(x25, x25, 1);
5358
5359 __ Mov(x6, x23);
5360 __ Ldr(q24, MemOperand(x6, 16, PostIndex));
5361 __ Ldr(q25, MemOperand(x6, 16, PostIndex));
5362 __ Ldr(q26, MemOperand(x6, 16, PostIndex));
5363 __ Ldr(q27, MemOperand(x6));
5364 __ Ld4(v24.S(),
5365 v25.S(),
5366 v26.S(),
5367 v27.S(),
5368 2,
5369 MemOperand(x23, x25, PostIndex));
5370 __ Add(x25, x25, 1);
5371
5372 __ Mov(x7, x24);
5373 __ Ldr(q28, MemOperand(x7, 16, PostIndex));
5374 __ Ldr(q29, MemOperand(x7, 16, PostIndex));
5375 __ Ldr(q30, MemOperand(x7, 16, PostIndex));
5376 __ Ldr(q31, MemOperand(x7));
5377 __ Ld4(v28.D(),
5378 v29.D(),
5379 v30.D(),
5380 v31.D(),
5381 1,
5382 MemOperand(x24, x25, PostIndex));
5383
5384 END();
5385
5386 RUN();
5387
5388 ASSERT_EQUAL_128(0x0004080c1014181c, 0x2024282c3034383c, q0);
5389 ASSERT_EQUAL_128(0x0105090d1115191d, 0x2125292d3135393d, q1);
5390 ASSERT_EQUAL_128(0x02060a0e12161a1e, 0x22262a2e32363a3e, q2);
5391 ASSERT_EQUAL_128(0x03070b0f13171b1f, 0x23272b2f33373b3f, q3);
5392 ASSERT_EQUAL_128(0x0100090811101918, 0x2120292831303938, q4);
5393 ASSERT_EQUAL_128(0x03020b0a13121b1a, 0x23222b2a33323b3a, q5);
5394 ASSERT_EQUAL_128(0x05040d0c15141d1c, 0x25242d2c35343d3c, q6);
5395 ASSERT_EQUAL_128(0x07060f0e17161f1e, 0x27262f2e37363f3e, q7);
5396 ASSERT_EQUAL_128(0x0302010013121110, 0x2322212033323130, q8);
5397 ASSERT_EQUAL_128(0x0706050417161514, 0x2726252437363534, q9);
5398 ASSERT_EQUAL_128(0x0b0a09081b1a1918, 0x2b2a29283b3a3938, q10);
5399 ASSERT_EQUAL_128(0x0f0e0d0c1f1e1d1c, 0x2f2e2d2c3f3e3d3c, q11);
5400 ASSERT_EQUAL_128(0x0706050403020100, 0x2726252423222120, q12);
5401 ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x2f2e2d2c2b2a2928, q13);
5402 ASSERT_EQUAL_128(0x1716151413121110, 0x3736353433323130, q14);
5403 ASSERT_EQUAL_128(0x1f1e1d1c1b1a1918, 0x3f3e3d3c3b3a3938, q15);
5404 ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0706050003020100, q16);
5405 ASSERT_EQUAL_128(0x1f1e1d1c1b1a1918, 0x1716150113121110, q17);
5406 ASSERT_EQUAL_128(0x2f2e2d2c2b2a2928, 0x2726250223222120, q18);
5407 ASSERT_EQUAL_128(0x3f3e3d3c3b3a3938, 0x3736350333323130, q19);
5408 ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0100050403020100, q20);
5409 ASSERT_EQUAL_128(0x1f1e1d1c1b1a1918, 0x0302151413121110, q21);
5410 ASSERT_EQUAL_128(0x2f2e2d2c2b2a2928, 0x0504252423222120, q22);
5411 ASSERT_EQUAL_128(0x3f3e3d3c3b3a3938, 0x0706353433323130, q23);
5412 ASSERT_EQUAL_128(0x0f0e0d0c03020100, 0x0706050403020100, q24);
5413 ASSERT_EQUAL_128(0x1f1e1d1c07060504, 0x1716151413121110, q25);
5414 ASSERT_EQUAL_128(0x2f2e2d2c0b0a0908, 0x2726252423222120, q26);
5415 ASSERT_EQUAL_128(0x3f3e3d3c0f0e0d0c, 0x3736353433323130, q27);
5416 ASSERT_EQUAL_128(0x0706050403020100, 0x0706050403020100, q28);
5417 ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x1716151413121110, q29);
5418 ASSERT_EQUAL_128(0x1716151413121110, 0x2726252423222120, q30);
5419 ASSERT_EQUAL_128(0x1f1e1d1c1b1a1918, 0x3736353433323130, q31);
5420
5421 ASSERT_EQUAL_64(src_base + 64, x17);
5422 ASSERT_EQUAL_64(src_base + 64, x18);
5423 ASSERT_EQUAL_64(src_base + 64, x19);
5424 ASSERT_EQUAL_64(src_base + 64, x20);
5425 ASSERT_EQUAL_64(src_base + 1, x21);
5426 ASSERT_EQUAL_64(src_base + 2, x22);
5427 ASSERT_EQUAL_64(src_base + 3, x23);
5428 ASSERT_EQUAL_64(src_base + 4, x24);
5429
5430 TEARDOWN();
5431 }
5432
5433
TEST(neon_ld4_alllanes)5434 TEST(neon_ld4_alllanes) {
5435 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
5436
5437 uint8_t src[64];
5438 for (unsigned i = 0; i < sizeof(src); i++) {
5439 src[i] = i;
5440 }
5441 uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
5442
5443 START();
5444 __ Mov(x17, src_base + 1);
5445 __ Mov(x18, 1);
5446 __ Ld4r(v0.V8B(), v1.V8B(), v2.V8B(), v3.V8B(), MemOperand(x17));
5447 __ Add(x17, x17, 4);
5448 __ Ld4r(v4.V16B(), v5.V16B(), v6.V16B(), v7.V16B(), MemOperand(x17));
5449 __ Add(x17, x17, 1);
5450 __ Ld4r(v8.V4H(), v9.V4H(), v10.V4H(), v11.V4H(), MemOperand(x17));
5451 __ Add(x17, x17, 1);
5452 __ Ld4r(v12.V8H(), v13.V8H(), v14.V8H(), v15.V8H(), MemOperand(x17));
5453 __ Add(x17, x17, 8);
5454 __ Ld4r(v16.V2S(), v17.V2S(), v18.V2S(), v19.V2S(), MemOperand(x17));
5455 __ Add(x17, x17, 1);
5456 __ Ld4r(v20.V4S(), v21.V4S(), v22.V4S(), v23.V4S(), MemOperand(x17));
5457 __ Add(x17, x17, 16);
5458 __ Ld4r(v24.V2D(), v25.V2D(), v26.V2D(), v27.V2D(), MemOperand(x17));
5459
5460
5461 END();
5462
5463 RUN();
5464
5465 ASSERT_EQUAL_128(0x0000000000000000, 0x0101010101010101, q0);
5466 ASSERT_EQUAL_128(0x0000000000000000, 0x0202020202020202, q1);
5467 ASSERT_EQUAL_128(0x0000000000000000, 0x0303030303030303, q2);
5468 ASSERT_EQUAL_128(0x0000000000000000, 0x0404040404040404, q3);
5469 ASSERT_EQUAL_128(0x0505050505050505, 0x0505050505050505, q4);
5470 ASSERT_EQUAL_128(0x0606060606060606, 0x0606060606060606, q5);
5471 ASSERT_EQUAL_128(0x0707070707070707, 0x0707070707070707, q6);
5472 ASSERT_EQUAL_128(0x0808080808080808, 0x0808080808080808, q7);
5473 ASSERT_EQUAL_128(0x0000000000000000, 0x0706070607060706, q8);
5474 ASSERT_EQUAL_128(0x0000000000000000, 0x0908090809080908, q9);
5475 ASSERT_EQUAL_128(0x0000000000000000, 0x0b0a0b0a0b0a0b0a, q10);
5476 ASSERT_EQUAL_128(0x0000000000000000, 0x0d0c0d0c0d0c0d0c, q11);
5477 ASSERT_EQUAL_128(0x0807080708070807, 0x0807080708070807, q12);
5478 ASSERT_EQUAL_128(0x0a090a090a090a09, 0x0a090a090a090a09, q13);
5479 ASSERT_EQUAL_128(0x0c0b0c0b0c0b0c0b, 0x0c0b0c0b0c0b0c0b, q14);
5480 ASSERT_EQUAL_128(0x0e0d0e0d0e0d0e0d, 0x0e0d0e0d0e0d0e0d, q15);
5481 ASSERT_EQUAL_128(0x0000000000000000, 0x1211100f1211100f, q16);
5482 ASSERT_EQUAL_128(0x0000000000000000, 0x1615141316151413, q17);
5483 ASSERT_EQUAL_128(0x0000000000000000, 0x1a1918171a191817, q18);
5484 ASSERT_EQUAL_128(0x0000000000000000, 0x1e1d1c1b1e1d1c1b, q19);
5485 ASSERT_EQUAL_128(0x1312111013121110, 0x1312111013121110, q20);
5486 ASSERT_EQUAL_128(0x1716151417161514, 0x1716151417161514, q21);
5487 ASSERT_EQUAL_128(0x1b1a19181b1a1918, 0x1b1a19181b1a1918, q22);
5488 ASSERT_EQUAL_128(0x1f1e1d1c1f1e1d1c, 0x1f1e1d1c1f1e1d1c, q23);
5489 ASSERT_EQUAL_128(0x2726252423222120, 0x2726252423222120, q24);
5490 ASSERT_EQUAL_128(0x2f2e2d2c2b2a2928, 0x2f2e2d2c2b2a2928, q25);
5491 ASSERT_EQUAL_128(0x3736353433323130, 0x3736353433323130, q26);
5492 ASSERT_EQUAL_128(0x3f3e3d3c3b3a3938, 0x3f3e3d3c3b3a3938, q27);
5493
5494 TEARDOWN();
5495 }
5496
5497
TEST(neon_ld4_alllanes_postindex)5498 TEST(neon_ld4_alllanes_postindex) {
5499 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
5500
5501 uint8_t src[64];
5502 for (unsigned i = 0; i < sizeof(src); i++) {
5503 src[i] = i;
5504 }
5505 uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
5506 __ Mov(x17, src_base + 1);
5507 __ Mov(x18, 1);
5508
5509 START();
5510 __ Mov(x17, src_base + 1);
5511 __ Mov(x18, 1);
5512 __ Ld4r(v0.V8B(),
5513 v1.V8B(),
5514 v2.V8B(),
5515 v3.V8B(),
5516 MemOperand(x17, 4, PostIndex));
5517 __ Ld4r(v4.V16B(),
5518 v5.V16B(),
5519 v6.V16B(),
5520 v7.V16B(),
5521 MemOperand(x17, x18, PostIndex));
5522 __ Ld4r(v8.V4H(),
5523 v9.V4H(),
5524 v10.V4H(),
5525 v11.V4H(),
5526 MemOperand(x17, x18, PostIndex));
5527 __ Ld4r(v12.V8H(),
5528 v13.V8H(),
5529 v14.V8H(),
5530 v15.V8H(),
5531 MemOperand(x17, 8, PostIndex));
5532 __ Ld4r(v16.V2S(),
5533 v17.V2S(),
5534 v18.V2S(),
5535 v19.V2S(),
5536 MemOperand(x17, x18, PostIndex));
5537 __ Ld4r(v20.V4S(),
5538 v21.V4S(),
5539 v22.V4S(),
5540 v23.V4S(),
5541 MemOperand(x17, 16, PostIndex));
5542 __ Ld4r(v24.V2D(),
5543 v25.V2D(),
5544 v26.V2D(),
5545 v27.V2D(),
5546 MemOperand(x17, 32, PostIndex));
5547 END();
5548
5549 RUN();
5550
5551 ASSERT_EQUAL_128(0x0000000000000000, 0x0101010101010101, q0);
5552 ASSERT_EQUAL_128(0x0000000000000000, 0x0202020202020202, q1);
5553 ASSERT_EQUAL_128(0x0000000000000000, 0x0303030303030303, q2);
5554 ASSERT_EQUAL_128(0x0000000000000000, 0x0404040404040404, q3);
5555 ASSERT_EQUAL_128(0x0505050505050505, 0x0505050505050505, q4);
5556 ASSERT_EQUAL_128(0x0606060606060606, 0x0606060606060606, q5);
5557 ASSERT_EQUAL_128(0x0707070707070707, 0x0707070707070707, q6);
5558 ASSERT_EQUAL_128(0x0808080808080808, 0x0808080808080808, q7);
5559 ASSERT_EQUAL_128(0x0000000000000000, 0x0706070607060706, q8);
5560 ASSERT_EQUAL_128(0x0000000000000000, 0x0908090809080908, q9);
5561 ASSERT_EQUAL_128(0x0000000000000000, 0x0b0a0b0a0b0a0b0a, q10);
5562 ASSERT_EQUAL_128(0x0000000000000000, 0x0d0c0d0c0d0c0d0c, q11);
5563 ASSERT_EQUAL_128(0x0807080708070807, 0x0807080708070807, q12);
5564 ASSERT_EQUAL_128(0x0a090a090a090a09, 0x0a090a090a090a09, q13);
5565 ASSERT_EQUAL_128(0x0c0b0c0b0c0b0c0b, 0x0c0b0c0b0c0b0c0b, q14);
5566 ASSERT_EQUAL_128(0x0e0d0e0d0e0d0e0d, 0x0e0d0e0d0e0d0e0d, q15);
5567 ASSERT_EQUAL_128(0x0000000000000000, 0x1211100f1211100f, q16);
5568 ASSERT_EQUAL_128(0x0000000000000000, 0x1615141316151413, q17);
5569 ASSERT_EQUAL_128(0x0000000000000000, 0x1a1918171a191817, q18);
5570 ASSERT_EQUAL_128(0x0000000000000000, 0x1e1d1c1b1e1d1c1b, q19);
5571 ASSERT_EQUAL_128(0x1312111013121110, 0x1312111013121110, q20);
5572 ASSERT_EQUAL_128(0x1716151417161514, 0x1716151417161514, q21);
5573 ASSERT_EQUAL_128(0x1b1a19181b1a1918, 0x1b1a19181b1a1918, q22);
5574 ASSERT_EQUAL_128(0x1f1e1d1c1f1e1d1c, 0x1f1e1d1c1f1e1d1c, q23);
5575 ASSERT_EQUAL_128(0x2726252423222120, 0x2726252423222120, q24);
5576 ASSERT_EQUAL_128(0x2f2e2d2c2b2a2928, 0x2f2e2d2c2b2a2928, q25);
5577 ASSERT_EQUAL_128(0x3736353433323130, 0x3736353433323130, q26);
5578 ASSERT_EQUAL_128(0x3f3e3d3c3b3a3938, 0x3f3e3d3c3b3a3938, q27);
5579 ASSERT_EQUAL_64(src_base + 64, x17);
5580
5581 TEARDOWN();
5582 }
5583
5584
TEST(neon_st1_lane)5585 TEST(neon_st1_lane) {
5586 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
5587
5588 uint8_t src[64];
5589 for (unsigned i = 0; i < sizeof(src); i++) {
5590 src[i] = i;
5591 }
5592 uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
5593
5594 START();
5595 __ Mov(x17, src_base);
5596 __ Mov(x18, -16);
5597 __ Ldr(q0, MemOperand(x17));
5598
5599 for (int i = 15; i >= 0; i--) {
5600 __ St1(v0.B(), i, MemOperand(x17));
5601 __ Add(x17, x17, 1);
5602 }
5603 __ Ldr(q1, MemOperand(x17, x18));
5604
5605 for (int i = 7; i >= 0; i--) {
5606 __ St1(v0.H(), i, MemOperand(x17));
5607 __ Add(x17, x17, 2);
5608 }
5609 __ Ldr(q2, MemOperand(x17, x18));
5610
5611 for (int i = 3; i >= 0; i--) {
5612 __ St1(v0.S(), i, MemOperand(x17));
5613 __ Add(x17, x17, 4);
5614 }
5615 __ Ldr(q3, MemOperand(x17, x18));
5616
5617 for (int i = 1; i >= 0; i--) {
5618 __ St1(v0.D(), i, MemOperand(x17));
5619 __ Add(x17, x17, 8);
5620 }
5621 __ Ldr(q4, MemOperand(x17, x18));
5622
5623 END();
5624
5625 RUN();
5626
5627 ASSERT_EQUAL_128(0x0001020304050607, 0x08090a0b0c0d0e0f, q1);
5628 ASSERT_EQUAL_128(0x0100030205040706, 0x09080b0a0d0c0f0e, q2);
5629 ASSERT_EQUAL_128(0x0302010007060504, 0x0b0a09080f0e0d0c, q3);
5630 ASSERT_EQUAL_128(0x0706050403020100, 0x0f0e0d0c0b0a0908, q4);
5631
5632 TEARDOWN();
5633 }
5634
5635
TEST(neon_st2_lane)5636 TEST(neon_st2_lane) {
5637 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
5638
5639 // Struct size * addressing modes * element sizes * vector size.
5640 uint8_t dst[2 * 2 * 4 * 16];
5641 memset(dst, 0, sizeof(dst));
5642 uintptr_t dst_base = reinterpret_cast<uintptr_t>(dst);
5643
5644 START();
5645 __ Mov(x17, dst_base);
5646 __ Mov(x18, dst_base);
5647 __ Movi(v0.V2D(), 0x0001020304050607, 0x08090a0b0c0d0e0f);
5648 __ Movi(v1.V2D(), 0x1011121314151617, 0x18191a1b1c1d1e1f);
5649
5650 // Test B stores with and without post index.
5651 for (int i = 15; i >= 0; i--) {
5652 __ St2(v0.B(), v1.B(), i, MemOperand(x18));
5653 __ Add(x18, x18, 2);
5654 }
5655 for (int i = 15; i >= 0; i--) {
5656 __ St2(v0.B(), v1.B(), i, MemOperand(x18, 2, PostIndex));
5657 }
5658 __ Ldr(q2, MemOperand(x17, 0 * 16));
5659 __ Ldr(q3, MemOperand(x17, 1 * 16));
5660 __ Ldr(q4, MemOperand(x17, 2 * 16));
5661 __ Ldr(q5, MemOperand(x17, 3 * 16));
5662
5663 // Test H stores with and without post index.
5664 __ Mov(x0, 4);
5665 for (int i = 7; i >= 0; i--) {
5666 __ St2(v0.H(), v1.H(), i, MemOperand(x18));
5667 __ Add(x18, x18, 4);
5668 }
5669 for (int i = 7; i >= 0; i--) {
5670 __ St2(v0.H(), v1.H(), i, MemOperand(x18, x0, PostIndex));
5671 }
5672 __ Ldr(q6, MemOperand(x17, 4 * 16));
5673 __ Ldr(q7, MemOperand(x17, 5 * 16));
5674 __ Ldr(q16, MemOperand(x17, 6 * 16));
5675 __ Ldr(q17, MemOperand(x17, 7 * 16));
5676
5677 // Test S stores with and without post index.
5678 for (int i = 3; i >= 0; i--) {
5679 __ St2(v0.S(), v1.S(), i, MemOperand(x18));
5680 __ Add(x18, x18, 8);
5681 }
5682 for (int i = 3; i >= 0; i--) {
5683 __ St2(v0.S(), v1.S(), i, MemOperand(x18, 8, PostIndex));
5684 }
5685 __ Ldr(q18, MemOperand(x17, 8 * 16));
5686 __ Ldr(q19, MemOperand(x17, 9 * 16));
5687 __ Ldr(q20, MemOperand(x17, 10 * 16));
5688 __ Ldr(q21, MemOperand(x17, 11 * 16));
5689
5690 // Test D stores with and without post index.
5691 __ Mov(x0, 16);
5692 __ St2(v0.D(), v1.D(), 1, MemOperand(x18));
5693 __ Add(x18, x18, 16);
5694 __ St2(v0.D(), v1.D(), 0, MemOperand(x18, 16, PostIndex));
5695 __ St2(v0.D(), v1.D(), 1, MemOperand(x18, x0, PostIndex));
5696 __ St2(v0.D(), v1.D(), 0, MemOperand(x18, x0, PostIndex));
5697 __ Ldr(q22, MemOperand(x17, 12 * 16));
5698 __ Ldr(q23, MemOperand(x17, 13 * 16));
5699 __ Ldr(q24, MemOperand(x17, 14 * 16));
5700 __ Ldr(q25, MemOperand(x17, 15 * 16));
5701 END();
5702
5703 RUN();
5704
5705 ASSERT_EQUAL_128(0x1707160615051404, 0x1303120211011000, q2);
5706 ASSERT_EQUAL_128(0x1f0f1e0e1d0d1c0c, 0x1b0b1a0a19091808, q3);
5707 ASSERT_EQUAL_128(0x1707160615051404, 0x1303120211011000, q4);
5708 ASSERT_EQUAL_128(0x1f0f1e0e1d0d1c0c, 0x1b0b1a0a19091808, q5);
5709
5710 ASSERT_EQUAL_128(0x1617060714150405, 0x1213020310110001, q6);
5711 ASSERT_EQUAL_128(0x1e1f0e0f1c1d0c0d, 0x1a1b0a0b18190809, q7);
5712 ASSERT_EQUAL_128(0x1617060714150405, 0x1213020310110001, q16);
5713 ASSERT_EQUAL_128(0x1e1f0e0f1c1d0c0d, 0x1a1b0a0b18190809, q17);
5714
5715 ASSERT_EQUAL_128(0x1415161704050607, 0x1011121300010203, q18);
5716 ASSERT_EQUAL_128(0x1c1d1e1f0c0d0e0f, 0x18191a1b08090a0b, q19);
5717 ASSERT_EQUAL_128(0x1415161704050607, 0x1011121300010203, q20);
5718 ASSERT_EQUAL_128(0x1c1d1e1f0c0d0e0f, 0x18191a1b08090a0b, q21);
5719
5720 ASSERT_EQUAL_128(0x1011121314151617, 0x0001020304050607, q22);
5721 ASSERT_EQUAL_128(0x18191a1b1c1d1e1f, 0x08090a0b0c0d0e0f, q23);
5722 ASSERT_EQUAL_128(0x1011121314151617, 0x0001020304050607, q22);
5723 ASSERT_EQUAL_128(0x18191a1b1c1d1e1f, 0x08090a0b0c0d0e0f, q23);
5724
5725 TEARDOWN();
5726 }
5727
5728
TEST(neon_st3_lane)5729 TEST(neon_st3_lane) {
5730 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
5731
5732 // Struct size * addressing modes * element sizes * vector size.
5733 uint8_t dst[3 * 2 * 4 * 16];
5734 memset(dst, 0, sizeof(dst));
5735 uintptr_t dst_base = reinterpret_cast<uintptr_t>(dst);
5736
5737 START();
5738 __ Mov(x17, dst_base);
5739 __ Mov(x18, dst_base);
5740 __ Movi(v0.V2D(), 0x0001020304050607, 0x08090a0b0c0d0e0f);
5741 __ Movi(v1.V2D(), 0x1011121314151617, 0x18191a1b1c1d1e1f);
5742 __ Movi(v2.V2D(), 0x2021222324252627, 0x28292a2b2c2d2e2f);
5743
5744 // Test B stores with and without post index.
5745 for (int i = 15; i >= 0; i--) {
5746 __ St3(v0.B(), v1.B(), v2.B(), i, MemOperand(x18));
5747 __ Add(x18, x18, 3);
5748 }
5749 for (int i = 15; i >= 0; i--) {
5750 __ St3(v0.B(), v1.B(), v2.B(), i, MemOperand(x18, 3, PostIndex));
5751 }
5752 __ Ldr(q3, MemOperand(x17, 0 * 16));
5753 __ Ldr(q4, MemOperand(x17, 1 * 16));
5754 __ Ldr(q5, MemOperand(x17, 2 * 16));
5755 __ Ldr(q6, MemOperand(x17, 3 * 16));
5756 __ Ldr(q7, MemOperand(x17, 4 * 16));
5757 __ Ldr(q16, MemOperand(x17, 5 * 16));
5758
5759 // Test H stores with and without post index.
5760 __ Mov(x0, 6);
5761 for (int i = 7; i >= 0; i--) {
5762 __ St3(v0.H(), v1.H(), v2.H(), i, MemOperand(x18));
5763 __ Add(x18, x18, 6);
5764 }
5765 for (int i = 7; i >= 0; i--) {
5766 __ St3(v0.H(), v1.H(), v2.H(), i, MemOperand(x18, x0, PostIndex));
5767 }
5768 __ Ldr(q17, MemOperand(x17, 6 * 16));
5769 __ Ldr(q18, MemOperand(x17, 7 * 16));
5770 __ Ldr(q19, MemOperand(x17, 8 * 16));
5771 __ Ldr(q20, MemOperand(x17, 9 * 16));
5772 __ Ldr(q21, MemOperand(x17, 10 * 16));
5773 __ Ldr(q22, MemOperand(x17, 11 * 16));
5774
5775 // Test S stores with and without post index.
5776 for (int i = 3; i >= 0; i--) {
5777 __ St3(v0.S(), v1.S(), v2.S(), i, MemOperand(x18));
5778 __ Add(x18, x18, 12);
5779 }
5780 for (int i = 3; i >= 0; i--) {
5781 __ St3(v0.S(), v1.S(), v2.S(), i, MemOperand(x18, 12, PostIndex));
5782 }
5783 __ Ldr(q23, MemOperand(x17, 12 * 16));
5784 __ Ldr(q24, MemOperand(x17, 13 * 16));
5785 __ Ldr(q25, MemOperand(x17, 14 * 16));
5786 __ Ldr(q26, MemOperand(x17, 15 * 16));
5787 __ Ldr(q27, MemOperand(x17, 16 * 16));
5788 __ Ldr(q28, MemOperand(x17, 17 * 16));
5789
5790 // Test D stores with and without post index.
5791 __ Mov(x0, 24);
5792 __ St3(v0.D(), v1.D(), v2.D(), 1, MemOperand(x18));
5793 __ Add(x18, x18, 24);
5794 __ St3(v0.D(), v1.D(), v2.D(), 0, MemOperand(x18, 24, PostIndex));
5795 __ St3(v0.D(), v1.D(), v2.D(), 1, MemOperand(x18, x0, PostIndex));
5796 __ Ldr(q29, MemOperand(x17, 18 * 16));
5797 __ Ldr(q30, MemOperand(x17, 19 * 16));
5798 __ Ldr(q31, MemOperand(x17, 20 * 16));
5799 END();
5800
5801 RUN();
5802
5803 ASSERT_EQUAL_128(0x0524140423130322, 0x1202211101201000, q3);
5804 ASSERT_EQUAL_128(0x1a0a291909281808, 0x2717072616062515, q4);
5805 ASSERT_EQUAL_128(0x2f1f0f2e1e0e2d1d, 0x0d2c1c0c2b1b0b2a, q5);
5806 ASSERT_EQUAL_128(0x0524140423130322, 0x1202211101201000, q6);
5807 ASSERT_EQUAL_128(0x1a0a291909281808, 0x2717072616062515, q7);
5808 ASSERT_EQUAL_128(0x2f1f0f2e1e0e2d1d, 0x0d2c1c0c2b1b0b2a, q16);
5809
5810 ASSERT_EQUAL_128(0x1415040522231213, 0x0203202110110001, q17);
5811 ASSERT_EQUAL_128(0x0a0b282918190809, 0x2627161706072425, q18);
5812 ASSERT_EQUAL_128(0x2e2f1e1f0e0f2c2d, 0x1c1d0c0d2a2b1a1b, q19);
5813 ASSERT_EQUAL_128(0x1415040522231213, 0x0203202110110001, q20);
5814 ASSERT_EQUAL_128(0x0a0b282918190809, 0x2627161706072425, q21);
5815 ASSERT_EQUAL_128(0x2e2f1e1f0e0f2c2d, 0x1c1d0c0d2a2b1a1b, q22);
5816
5817 ASSERT_EQUAL_128(0x0405060720212223, 0x1011121300010203, q23);
5818 ASSERT_EQUAL_128(0x18191a1b08090a0b, 0x2425262714151617, q24);
5819 ASSERT_EQUAL_128(0x2c2d2e2f1c1d1e1f, 0x0c0d0e0f28292a2b, q25);
5820 ASSERT_EQUAL_128(0x0405060720212223, 0x1011121300010203, q26);
5821 ASSERT_EQUAL_128(0x18191a1b08090a0b, 0x2425262714151617, q27);
5822 ASSERT_EQUAL_128(0x2c2d2e2f1c1d1e1f, 0x0c0d0e0f28292a2b, q28);
5823
5824 TEARDOWN();
5825 }
5826
5827
TEST(neon_st4_lane)5828 TEST(neon_st4_lane) {
5829 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
5830
5831 // Struct size * element sizes * vector size.
5832 uint8_t dst[4 * 4 * 16];
5833 memset(dst, 0, sizeof(dst));
5834 uintptr_t dst_base = reinterpret_cast<uintptr_t>(dst);
5835
5836 START();
5837 __ Mov(x17, dst_base);
5838 __ Mov(x18, dst_base);
5839 __ Movi(v0.V2D(), 0x0001020304050607, 0x08090a0b0c0d0e0f);
5840 __ Movi(v1.V2D(), 0x1011121314151617, 0x18191a1b1c1d1e1f);
5841 __ Movi(v2.V2D(), 0x2021222324252627, 0x28292a2b2c2d2e2f);
5842 __ Movi(v3.V2D(), 0x2021222324252627, 0x28292a2b2c2d2e2f);
5843
5844 // Test B stores without post index.
5845 for (int i = 15; i >= 0; i--) {
5846 __ St4(v0.B(), v1.B(), v2.B(), v3.B(), i, MemOperand(x18));
5847 __ Add(x18, x18, 4);
5848 }
5849 __ Ldr(q4, MemOperand(x17, 0 * 16));
5850 __ Ldr(q5, MemOperand(x17, 1 * 16));
5851 __ Ldr(q6, MemOperand(x17, 2 * 16));
5852 __ Ldr(q7, MemOperand(x17, 3 * 16));
5853
5854 // Test H stores with post index.
5855 __ Mov(x0, 8);
5856 for (int i = 7; i >= 0; i--) {
5857 __ St4(v0.H(), v1.H(), v2.H(), v3.H(), i, MemOperand(x18, x0, PostIndex));
5858 }
5859 __ Ldr(q16, MemOperand(x17, 4 * 16));
5860 __ Ldr(q17, MemOperand(x17, 5 * 16));
5861 __ Ldr(q18, MemOperand(x17, 6 * 16));
5862 __ Ldr(q19, MemOperand(x17, 7 * 16));
5863
5864 // Test S stores without post index.
5865 for (int i = 3; i >= 0; i--) {
5866 __ St4(v0.S(), v1.S(), v2.S(), v3.S(), i, MemOperand(x18));
5867 __ Add(x18, x18, 16);
5868 }
5869 __ Ldr(q20, MemOperand(x17, 8 * 16));
5870 __ Ldr(q21, MemOperand(x17, 9 * 16));
5871 __ Ldr(q22, MemOperand(x17, 10 * 16));
5872 __ Ldr(q23, MemOperand(x17, 11 * 16));
5873
5874 // Test D stores with post index.
5875 __ Mov(x0, 32);
5876 __ St4(v0.D(), v1.D(), v2.D(), v3.D(), 0, MemOperand(x18, 32, PostIndex));
5877 __ St4(v0.D(), v1.D(), v2.D(), v3.D(), 1, MemOperand(x18, x0, PostIndex));
5878
5879 __ Ldr(q24, MemOperand(x17, 12 * 16));
5880 __ Ldr(q25, MemOperand(x17, 13 * 16));
5881 __ Ldr(q26, MemOperand(x17, 14 * 16));
5882 __ Ldr(q27, MemOperand(x17, 15 * 16));
5883 END();
5884
5885 RUN();
5886
5887 ASSERT_EQUAL_128(0x2323130322221202, 0x2121110120201000, q4);
5888 ASSERT_EQUAL_128(0x2727170726261606, 0x2525150524241404, q5);
5889 ASSERT_EQUAL_128(0x2b2b1b0b2a2a1a0a, 0x2929190928281808, q6);
5890 ASSERT_EQUAL_128(0x2f2f1f0f2e2e1e0e, 0x2d2d1d0d2c2c1c0c, q7);
5891
5892 ASSERT_EQUAL_128(0x2223222312130203, 0x2021202110110001, q16);
5893 ASSERT_EQUAL_128(0x2627262716170607, 0x2425242514150405, q17);
5894 ASSERT_EQUAL_128(0x2a2b2a2b1a1b0a0b, 0x2829282918190809, q18);
5895 ASSERT_EQUAL_128(0x2e2f2e2f1e1f0e0f, 0x2c2d2c2d1c1d0c0d, q19);
5896
5897 ASSERT_EQUAL_128(0x2021222320212223, 0x1011121300010203, q20);
5898 ASSERT_EQUAL_128(0x2425262724252627, 0x1415161704050607, q21);
5899 ASSERT_EQUAL_128(0x28292a2b28292a2b, 0x18191a1b08090a0b, q22);
5900 ASSERT_EQUAL_128(0x2c2d2e2f2c2d2e2f, 0x1c1d1e1f0c0d0e0f, q23);
5901
5902 ASSERT_EQUAL_128(0x18191a1b1c1d1e1f, 0x08090a0b0c0d0e0f, q24);
5903 ASSERT_EQUAL_128(0x28292a2b2c2d2e2f, 0x28292a2b2c2d2e2f, q25);
5904 ASSERT_EQUAL_128(0x1011121314151617, 0x0001020304050607, q26);
5905 ASSERT_EQUAL_128(0x2021222324252627, 0x2021222324252627, q27);
5906
5907 TEARDOWN();
5908 }
5909
5910
TEST(neon_ld1_lane_postindex)5911 TEST(neon_ld1_lane_postindex) {
5912 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
5913
5914 uint8_t src[64];
5915 for (unsigned i = 0; i < sizeof(src); i++) {
5916 src[i] = i;
5917 }
5918 uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
5919
5920 START();
5921 __ Mov(x17, src_base);
5922 __ Mov(x18, src_base);
5923 __ Mov(x19, src_base);
5924 __ Mov(x20, src_base);
5925 __ Mov(x21, src_base);
5926 __ Mov(x22, src_base);
5927 __ Mov(x23, src_base);
5928 __ Mov(x24, src_base);
5929
5930 // Test loading whole register by element.
5931 for (int i = 15; i >= 0; i--) {
5932 __ Ld1(v0.B(), i, MemOperand(x17, 1, PostIndex));
5933 }
5934
5935 for (int i = 7; i >= 0; i--) {
5936 __ Ld1(v1.H(), i, MemOperand(x18, 2, PostIndex));
5937 }
5938
5939 for (int i = 3; i >= 0; i--) {
5940 __ Ld1(v2.S(), i, MemOperand(x19, 4, PostIndex));
5941 }
5942
5943 for (int i = 1; i >= 0; i--) {
5944 __ Ld1(v3.D(), i, MemOperand(x20, 8, PostIndex));
5945 }
5946
5947 // Test loading a single element into an initialised register.
5948 __ Mov(x25, 1);
5949 __ Ldr(q4, MemOperand(x21));
5950 __ Ld1(v4.B(), 4, MemOperand(x21, x25, PostIndex));
5951 __ Add(x25, x25, 1);
5952
5953 __ Ldr(q5, MemOperand(x22));
5954 __ Ld1(v5.H(), 3, MemOperand(x22, x25, PostIndex));
5955 __ Add(x25, x25, 1);
5956
5957 __ Ldr(q6, MemOperand(x23));
5958 __ Ld1(v6.S(), 2, MemOperand(x23, x25, PostIndex));
5959 __ Add(x25, x25, 1);
5960
5961 __ Ldr(q7, MemOperand(x24));
5962 __ Ld1(v7.D(), 1, MemOperand(x24, x25, PostIndex));
5963
5964 END();
5965
5966 RUN();
5967
5968 ASSERT_EQUAL_128(0x0001020304050607, 0x08090a0b0c0d0e0f, q0);
5969 ASSERT_EQUAL_128(0x0100030205040706, 0x09080b0a0d0c0f0e, q1);
5970 ASSERT_EQUAL_128(0x0302010007060504, 0x0b0a09080f0e0d0c, q2);
5971 ASSERT_EQUAL_128(0x0706050403020100, 0x0f0e0d0c0b0a0908, q3);
5972 ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0706050003020100, q4);
5973 ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0100050403020100, q5);
5974 ASSERT_EQUAL_128(0x0f0e0d0c03020100, 0x0706050403020100, q6);
5975 ASSERT_EQUAL_128(0x0706050403020100, 0x0706050403020100, q7);
5976 ASSERT_EQUAL_64(src_base + 16, x17);
5977 ASSERT_EQUAL_64(src_base + 16, x18);
5978 ASSERT_EQUAL_64(src_base + 16, x19);
5979 ASSERT_EQUAL_64(src_base + 16, x20);
5980 ASSERT_EQUAL_64(src_base + 1, x21);
5981 ASSERT_EQUAL_64(src_base + 2, x22);
5982 ASSERT_EQUAL_64(src_base + 3, x23);
5983 ASSERT_EQUAL_64(src_base + 4, x24);
5984
5985 TEARDOWN();
5986 }
5987
5988
TEST(neon_st1_lane_postindex)5989 TEST(neon_st1_lane_postindex) {
5990 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
5991
5992 uint8_t src[64];
5993 for (unsigned i = 0; i < sizeof(src); i++) {
5994 src[i] = i;
5995 }
5996 uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
5997
5998 START();
5999 __ Mov(x17, src_base);
6000 __ Mov(x18, -16);
6001 __ Ldr(q0, MemOperand(x17));
6002
6003 for (int i = 15; i >= 0; i--) {
6004 __ St1(v0.B(), i, MemOperand(x17, 1, PostIndex));
6005 }
6006 __ Ldr(q1, MemOperand(x17, x18));
6007
6008 for (int i = 7; i >= 0; i--) {
6009 __ St1(v0.H(), i, MemOperand(x17, 2, PostIndex));
6010 }
6011 __ Ldr(q2, MemOperand(x17, x18));
6012
6013 for (int i = 3; i >= 0; i--) {
6014 __ St1(v0.S(), i, MemOperand(x17, 4, PostIndex));
6015 }
6016 __ Ldr(q3, MemOperand(x17, x18));
6017
6018 for (int i = 1; i >= 0; i--) {
6019 __ St1(v0.D(), i, MemOperand(x17, 8, PostIndex));
6020 }
6021 __ Ldr(q4, MemOperand(x17, x18));
6022
6023 END();
6024
6025 RUN();
6026
6027 ASSERT_EQUAL_128(0x0001020304050607, 0x08090a0b0c0d0e0f, q1);
6028 ASSERT_EQUAL_128(0x0100030205040706, 0x09080b0a0d0c0f0e, q2);
6029 ASSERT_EQUAL_128(0x0302010007060504, 0x0b0a09080f0e0d0c, q3);
6030 ASSERT_EQUAL_128(0x0706050403020100, 0x0f0e0d0c0b0a0908, q4);
6031
6032 TEARDOWN();
6033 }
6034
6035
TEST(neon_ld1_alllanes)6036 TEST(neon_ld1_alllanes) {
6037 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
6038
6039 uint8_t src[64];
6040 for (unsigned i = 0; i < sizeof(src); i++) {
6041 src[i] = i;
6042 }
6043 uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
6044
6045 START();
6046 __ Mov(x17, src_base + 1);
6047 __ Ld1r(v0.V8B(), MemOperand(x17));
6048 __ Add(x17, x17, 1);
6049 __ Ld1r(v1.V16B(), MemOperand(x17));
6050 __ Add(x17, x17, 1);
6051 __ Ld1r(v2.V4H(), MemOperand(x17));
6052 __ Add(x17, x17, 1);
6053 __ Ld1r(v3.V8H(), MemOperand(x17));
6054 __ Add(x17, x17, 1);
6055 __ Ld1r(v4.V2S(), MemOperand(x17));
6056 __ Add(x17, x17, 1);
6057 __ Ld1r(v5.V4S(), MemOperand(x17));
6058 __ Add(x17, x17, 1);
6059 __ Ld1r(v6.V1D(), MemOperand(x17));
6060 __ Add(x17, x17, 1);
6061 __ Ld1r(v7.V2D(), MemOperand(x17));
6062 END();
6063
6064 RUN();
6065
6066 ASSERT_EQUAL_128(0, 0x0101010101010101, q0);
6067 ASSERT_EQUAL_128(0x0202020202020202, 0x0202020202020202, q1);
6068 ASSERT_EQUAL_128(0, 0x0403040304030403, q2);
6069 ASSERT_EQUAL_128(0x0504050405040504, 0x0504050405040504, q3);
6070 ASSERT_EQUAL_128(0, 0x0807060508070605, q4);
6071 ASSERT_EQUAL_128(0x0908070609080706, 0x0908070609080706, q5);
6072 ASSERT_EQUAL_128(0, 0x0e0d0c0b0a090807, q6);
6073 ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0f0e0d0c0b0a0908, q7);
6074
6075 TEARDOWN();
6076 }
6077
6078
TEST(neon_ld1_alllanes_postindex)6079 TEST(neon_ld1_alllanes_postindex) {
6080 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
6081
6082 uint8_t src[64];
6083 for (unsigned i = 0; i < sizeof(src); i++) {
6084 src[i] = i;
6085 }
6086 uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
6087
6088 START();
6089 __ Mov(x17, src_base + 1);
6090 __ Mov(x18, 1);
6091 __ Ld1r(v0.V8B(), MemOperand(x17, 1, PostIndex));
6092 __ Ld1r(v1.V16B(), MemOperand(x17, x18, PostIndex));
6093 __ Ld1r(v2.V4H(), MemOperand(x17, x18, PostIndex));
6094 __ Ld1r(v3.V8H(), MemOperand(x17, 2, PostIndex));
6095 __ Ld1r(v4.V2S(), MemOperand(x17, x18, PostIndex));
6096 __ Ld1r(v5.V4S(), MemOperand(x17, 4, PostIndex));
6097 __ Ld1r(v6.V2D(), MemOperand(x17, 8, PostIndex));
6098 END();
6099
6100 RUN();
6101
6102 ASSERT_EQUAL_128(0, 0x0101010101010101, q0);
6103 ASSERT_EQUAL_128(0x0202020202020202, 0x0202020202020202, q1);
6104 ASSERT_EQUAL_128(0, 0x0403040304030403, q2);
6105 ASSERT_EQUAL_128(0x0504050405040504, 0x0504050405040504, q3);
6106 ASSERT_EQUAL_128(0, 0x0908070609080706, q4);
6107 ASSERT_EQUAL_128(0x0a0908070a090807, 0x0a0908070a090807, q5);
6108 ASSERT_EQUAL_128(0x1211100f0e0d0c0b, 0x1211100f0e0d0c0b, q6);
6109 ASSERT_EQUAL_64(src_base + 19, x17);
6110
6111 TEARDOWN();
6112 }
6113
6114
TEST(neon_st1_d)6115 TEST(neon_st1_d) {
6116 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
6117
6118 uint8_t src[14 * kDRegSizeInBytes];
6119 for (unsigned i = 0; i < sizeof(src); i++) {
6120 src[i] = i;
6121 }
6122 uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
6123
6124 START();
6125 __ Mov(x17, src_base);
6126 __ Ldr(q0, MemOperand(x17, 16, PostIndex));
6127 __ Ldr(q1, MemOperand(x17, 16, PostIndex));
6128 __ Ldr(q2, MemOperand(x17, 16, PostIndex));
6129 __ Ldr(q3, MemOperand(x17, 16, PostIndex));
6130 __ Mov(x17, src_base);
6131
6132 __ St1(v0.V8B(), MemOperand(x17));
6133 __ Ldr(d16, MemOperand(x17, 8, PostIndex));
6134
6135 __ St1(v0.V8B(), v1.V8B(), MemOperand(x17));
6136 __ Ldr(q17, MemOperand(x17, 16, PostIndex));
6137
6138 __ St1(v0.V4H(), v1.V4H(), v2.V4H(), MemOperand(x17));
6139 __ Ldr(d18, MemOperand(x17, 8, PostIndex));
6140 __ Ldr(d19, MemOperand(x17, 8, PostIndex));
6141 __ Ldr(d20, MemOperand(x17, 8, PostIndex));
6142
6143 __ St1(v0.V2S(), v1.V2S(), v2.V2S(), v3.V2S(), MemOperand(x17));
6144 __ Ldr(q21, MemOperand(x17, 16, PostIndex));
6145 __ Ldr(q22, MemOperand(x17, 16, PostIndex));
6146
6147 __ St1(v0.V1D(), v1.V1D(), v2.V1D(), v3.V1D(), MemOperand(x17));
6148 __ Ldr(q23, MemOperand(x17, 16, PostIndex));
6149 __ Ldr(q24, MemOperand(x17));
6150 END();
6151
6152 RUN();
6153
6154 ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0706050403020100, q0);
6155 ASSERT_EQUAL_128(0x1f1e1d1c1b1a1918, 0x1716151413121110, q1);
6156 ASSERT_EQUAL_128(0x2f2e2d2c2b2a2928, 0x2726252423222120, q2);
6157 ASSERT_EQUAL_128(0x3f3e3d3c3b3a3938, 0x3736353433323130, q3);
6158 ASSERT_EQUAL_128(0, 0x0706050403020100, q16);
6159 ASSERT_EQUAL_128(0x1716151413121110, 0x0706050403020100, q17);
6160 ASSERT_EQUAL_128(0, 0x0706050403020100, q18);
6161 ASSERT_EQUAL_128(0, 0x1716151413121110, q19);
6162 ASSERT_EQUAL_128(0, 0x2726252423222120, q20);
6163 ASSERT_EQUAL_128(0x1716151413121110, 0x0706050403020100, q21);
6164 ASSERT_EQUAL_128(0x3736353433323130, 0x2726252423222120, q22);
6165 ASSERT_EQUAL_128(0x1716151413121110, 0x0706050403020100, q23);
6166 ASSERT_EQUAL_128(0x3736353433323130, 0x2726252423222120, q24);
6167
6168 TEARDOWN();
6169 }
6170
6171
TEST(neon_st1_d_postindex)6172 TEST(neon_st1_d_postindex) {
6173 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
6174
6175 uint8_t src[64 + 14 * kDRegSizeInBytes];
6176 for (unsigned i = 0; i < sizeof(src); i++) {
6177 src[i] = i;
6178 }
6179 uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
6180
6181 START();
6182 __ Mov(x17, src_base);
6183 __ Mov(x18, -8);
6184 __ Mov(x19, -16);
6185 __ Mov(x20, -24);
6186 __ Mov(x21, -32);
6187 __ Ldr(q0, MemOperand(x17, 16, PostIndex));
6188 __ Ldr(q1, MemOperand(x17, 16, PostIndex));
6189 __ Ldr(q2, MemOperand(x17, 16, PostIndex));
6190 __ Ldr(q3, MemOperand(x17, 16, PostIndex));
6191 __ Mov(x17, src_base);
6192
6193 __ St1(v0.V8B(), MemOperand(x17, 8, PostIndex));
6194 __ Ldr(d16, MemOperand(x17, x18));
6195
6196 __ St1(v0.V8B(), v1.V8B(), MemOperand(x17, 16, PostIndex));
6197 __ Ldr(q17, MemOperand(x17, x19));
6198
6199 __ St1(v0.V4H(), v1.V4H(), v2.V4H(), MemOperand(x17, 24, PostIndex));
6200 __ Ldr(d18, MemOperand(x17, x20));
6201 __ Ldr(d19, MemOperand(x17, x19));
6202 __ Ldr(d20, MemOperand(x17, x18));
6203
6204 __ St1(v0.V2S(),
6205 v1.V2S(),
6206 v2.V2S(),
6207 v3.V2S(),
6208 MemOperand(x17, 32, PostIndex));
6209 __ Ldr(q21, MemOperand(x17, x21));
6210 __ Ldr(q22, MemOperand(x17, x19));
6211
6212 __ St1(v0.V1D(),
6213 v1.V1D(),
6214 v2.V1D(),
6215 v3.V1D(),
6216 MemOperand(x17, 32, PostIndex));
6217 __ Ldr(q23, MemOperand(x17, x21));
6218 __ Ldr(q24, MemOperand(x17, x19));
6219 END();
6220
6221 RUN();
6222
6223 ASSERT_EQUAL_128(0, 0x0706050403020100, q16);
6224 ASSERT_EQUAL_128(0x1716151413121110, 0x0706050403020100, q17);
6225 ASSERT_EQUAL_128(0, 0x0706050403020100, q18);
6226 ASSERT_EQUAL_128(0, 0x1716151413121110, q19);
6227 ASSERT_EQUAL_128(0, 0x2726252423222120, q20);
6228 ASSERT_EQUAL_128(0x1716151413121110, 0x0706050403020100, q21);
6229 ASSERT_EQUAL_128(0x3736353433323130, 0x2726252423222120, q22);
6230 ASSERT_EQUAL_128(0x1716151413121110, 0x0706050403020100, q23);
6231 ASSERT_EQUAL_128(0x3736353433323130, 0x2726252423222120, q24);
6232
6233 TEARDOWN();
6234 }
6235
6236
TEST(neon_st1_q)6237 TEST(neon_st1_q) {
6238 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
6239
6240 uint8_t src[64 + 160];
6241 for (unsigned i = 0; i < sizeof(src); i++) {
6242 src[i] = i;
6243 }
6244 uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
6245
6246 START();
6247 __ Mov(x17, src_base);
6248 __ Ldr(q0, MemOperand(x17, 16, PostIndex));
6249 __ Ldr(q1, MemOperand(x17, 16, PostIndex));
6250 __ Ldr(q2, MemOperand(x17, 16, PostIndex));
6251 __ Ldr(q3, MemOperand(x17, 16, PostIndex));
6252
6253 __ St1(v0.V16B(), MemOperand(x17));
6254 __ Ldr(q16, MemOperand(x17, 16, PostIndex));
6255
6256 __ St1(v0.V8H(), v1.V8H(), MemOperand(x17));
6257 __ Ldr(q17, MemOperand(x17, 16, PostIndex));
6258 __ Ldr(q18, MemOperand(x17, 16, PostIndex));
6259
6260 __ St1(v0.V4S(), v1.V4S(), v2.V4S(), MemOperand(x17));
6261 __ Ldr(q19, MemOperand(x17, 16, PostIndex));
6262 __ Ldr(q20, MemOperand(x17, 16, PostIndex));
6263 __ Ldr(q21, MemOperand(x17, 16, PostIndex));
6264
6265 __ St1(v0.V2D(), v1.V2D(), v2.V2D(), v3.V2D(), MemOperand(x17));
6266 __ Ldr(q22, MemOperand(x17, 16, PostIndex));
6267 __ Ldr(q23, MemOperand(x17, 16, PostIndex));
6268 __ Ldr(q24, MemOperand(x17, 16, PostIndex));
6269 __ Ldr(q25, MemOperand(x17));
6270 END();
6271
6272 RUN();
6273
6274 ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0706050403020100, q16);
6275 ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0706050403020100, q17);
6276 ASSERT_EQUAL_128(0x1f1e1d1c1b1a1918, 0x1716151413121110, q18);
6277 ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0706050403020100, q19);
6278 ASSERT_EQUAL_128(0x1f1e1d1c1b1a1918, 0x1716151413121110, q20);
6279 ASSERT_EQUAL_128(0x2f2e2d2c2b2a2928, 0x2726252423222120, q21);
6280 ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0706050403020100, q22);
6281 ASSERT_EQUAL_128(0x1f1e1d1c1b1a1918, 0x1716151413121110, q23);
6282 ASSERT_EQUAL_128(0x2f2e2d2c2b2a2928, 0x2726252423222120, q24);
6283 ASSERT_EQUAL_128(0x3f3e3d3c3b3a3938, 0x3736353433323130, q25);
6284
6285 TEARDOWN();
6286 }
6287
6288
TEST(neon_st1_q_postindex)6289 TEST(neon_st1_q_postindex) {
6290 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
6291
6292 uint8_t src[64 + 160];
6293 for (unsigned i = 0; i < sizeof(src); i++) {
6294 src[i] = i;
6295 }
6296 uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
6297
6298 START();
6299 __ Mov(x17, src_base);
6300 __ Mov(x18, -16);
6301 __ Mov(x19, -32);
6302 __ Mov(x20, -48);
6303 __ Mov(x21, -64);
6304 __ Ldr(q0, MemOperand(x17, 16, PostIndex));
6305 __ Ldr(q1, MemOperand(x17, 16, PostIndex));
6306 __ Ldr(q2, MemOperand(x17, 16, PostIndex));
6307 __ Ldr(q3, MemOperand(x17, 16, PostIndex));
6308
6309 __ St1(v0.V16B(), MemOperand(x17, 16, PostIndex));
6310 __ Ldr(q16, MemOperand(x17, x18));
6311
6312 __ St1(v0.V8H(), v1.V8H(), MemOperand(x17, 32, PostIndex));
6313 __ Ldr(q17, MemOperand(x17, x19));
6314 __ Ldr(q18, MemOperand(x17, x18));
6315
6316 __ St1(v0.V4S(), v1.V4S(), v2.V4S(), MemOperand(x17, 48, PostIndex));
6317 __ Ldr(q19, MemOperand(x17, x20));
6318 __ Ldr(q20, MemOperand(x17, x19));
6319 __ Ldr(q21, MemOperand(x17, x18));
6320
6321 __ St1(v0.V2D(),
6322 v1.V2D(),
6323 v2.V2D(),
6324 v3.V2D(),
6325 MemOperand(x17, 64, PostIndex));
6326 __ Ldr(q22, MemOperand(x17, x21));
6327 __ Ldr(q23, MemOperand(x17, x20));
6328 __ Ldr(q24, MemOperand(x17, x19));
6329 __ Ldr(q25, MemOperand(x17, x18));
6330
6331 END();
6332
6333 RUN();
6334
6335 ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0706050403020100, q16);
6336 ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0706050403020100, q17);
6337 ASSERT_EQUAL_128(0x1f1e1d1c1b1a1918, 0x1716151413121110, q18);
6338 ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0706050403020100, q19);
6339 ASSERT_EQUAL_128(0x1f1e1d1c1b1a1918, 0x1716151413121110, q20);
6340 ASSERT_EQUAL_128(0x2f2e2d2c2b2a2928, 0x2726252423222120, q21);
6341 ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0706050403020100, q22);
6342 ASSERT_EQUAL_128(0x1f1e1d1c1b1a1918, 0x1716151413121110, q23);
6343 ASSERT_EQUAL_128(0x2f2e2d2c2b2a2928, 0x2726252423222120, q24);
6344 ASSERT_EQUAL_128(0x3f3e3d3c3b3a3938, 0x3736353433323130, q25);
6345
6346 TEARDOWN();
6347 }
6348
6349
TEST(neon_st2_d)6350 TEST(neon_st2_d) {
6351 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
6352
6353 uint8_t src[4 * 16];
6354 for (unsigned i = 0; i < sizeof(src); i++) {
6355 src[i] = i;
6356 }
6357 uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
6358
6359 START();
6360 __ Mov(x17, src_base);
6361 __ Mov(x18, src_base);
6362 __ Ldr(q0, MemOperand(x17, 16, PostIndex));
6363 __ Ldr(q1, MemOperand(x17, 16, PostIndex));
6364
6365 __ St2(v0.V8B(), v1.V8B(), MemOperand(x18));
6366 __ Add(x18, x18, 22);
6367 __ St2(v0.V4H(), v1.V4H(), MemOperand(x18));
6368 __ Add(x18, x18, 11);
6369 __ St2(v0.V2S(), v1.V2S(), MemOperand(x18));
6370
6371 __ Mov(x19, src_base);
6372 __ Ldr(q0, MemOperand(x19, 16, PostIndex));
6373 __ Ldr(q1, MemOperand(x19, 16, PostIndex));
6374 __ Ldr(q2, MemOperand(x19, 16, PostIndex));
6375 __ Ldr(q3, MemOperand(x19, 16, PostIndex));
6376
6377 END();
6378
6379 RUN();
6380
6381 ASSERT_EQUAL_128(0x1707160615051404, 0x1303120211011000, q0);
6382 ASSERT_EQUAL_128(0x0504131203021110, 0x0100151413121110, q1);
6383 ASSERT_EQUAL_128(0x1615140706050413, 0x1211100302010014, q2);
6384 ASSERT_EQUAL_128(0x3f3e3d3c3b3a3938, 0x3736353433323117, q3);
6385
6386 TEARDOWN();
6387 }
6388
6389
TEST(neon_st2_d_postindex)6390 TEST(neon_st2_d_postindex) {
6391 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
6392
6393 uint8_t src[4 * 16];
6394 for (unsigned i = 0; i < sizeof(src); i++) {
6395 src[i] = i;
6396 }
6397 uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
6398
6399 START();
6400 __ Mov(x22, 5);
6401 __ Mov(x17, src_base);
6402 __ Mov(x18, src_base);
6403 __ Ldr(q0, MemOperand(x17, 16, PostIndex));
6404 __ Ldr(q1, MemOperand(x17, 16, PostIndex));
6405
6406 __ St2(v0.V8B(), v1.V8B(), MemOperand(x18, x22, PostIndex));
6407 __ St2(v0.V4H(), v1.V4H(), MemOperand(x18, 16, PostIndex));
6408 __ St2(v0.V2S(), v1.V2S(), MemOperand(x18));
6409
6410
6411 __ Mov(x19, src_base);
6412 __ Ldr(q0, MemOperand(x19, 16, PostIndex));
6413 __ Ldr(q1, MemOperand(x19, 16, PostIndex));
6414 __ Ldr(q2, MemOperand(x19, 16, PostIndex));
6415
6416 END();
6417
6418 RUN();
6419
6420 ASSERT_EQUAL_128(0x1405041312030211, 0x1001000211011000, q0);
6421 ASSERT_EQUAL_128(0x0605041312111003, 0x0201001716070615, q1);
6422 ASSERT_EQUAL_128(0x2f2e2d2c2b2a2928, 0x2726251716151407, q2);
6423
6424 TEARDOWN();
6425 }
6426
6427
TEST(neon_st2_q)6428 TEST(neon_st2_q) {
6429 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
6430
6431 uint8_t src[5 * 16];
6432 for (unsigned i = 0; i < sizeof(src); i++) {
6433 src[i] = i;
6434 }
6435 uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
6436
6437 START();
6438 __ Mov(x17, src_base);
6439 __ Mov(x18, src_base);
6440 __ Ldr(q0, MemOperand(x17, 16, PostIndex));
6441 __ Ldr(q1, MemOperand(x17, 16, PostIndex));
6442
6443 __ St2(v0.V16B(), v1.V16B(), MemOperand(x18));
6444 __ Add(x18, x18, 8);
6445 __ St2(v0.V8H(), v1.V8H(), MemOperand(x18));
6446 __ Add(x18, x18, 22);
6447 __ St2(v0.V4S(), v1.V4S(), MemOperand(x18));
6448 __ Add(x18, x18, 2);
6449 __ St2(v0.V2D(), v1.V2D(), MemOperand(x18));
6450
6451 __ Mov(x19, src_base);
6452 __ Ldr(q0, MemOperand(x19, 16, PostIndex));
6453 __ Ldr(q1, MemOperand(x19, 16, PostIndex));
6454 __ Ldr(q2, MemOperand(x19, 16, PostIndex));
6455 __ Ldr(q3, MemOperand(x19, 16, PostIndex));
6456
6457 END();
6458
6459 RUN();
6460
6461 ASSERT_EQUAL_128(0x1312030211100100, 0x1303120211011000, q0);
6462 ASSERT_EQUAL_128(0x01000b0a19180908, 0x1716070615140504, q1);
6463 ASSERT_EQUAL_128(0x1716151413121110, 0x0706050403020100, q2);
6464 ASSERT_EQUAL_128(0x1f1e1d1c1b1a1918, 0x0f0e0d0c0b0a0908, q3);
6465 TEARDOWN();
6466 }
6467
6468
TEST(neon_st2_q_postindex)6469 TEST(neon_st2_q_postindex) {
6470 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
6471
6472 uint8_t src[5 * 16];
6473 for (unsigned i = 0; i < sizeof(src); i++) {
6474 src[i] = i;
6475 }
6476 uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
6477
6478 START();
6479 __ Mov(x22, 5);
6480 __ Mov(x17, src_base);
6481 __ Mov(x18, src_base);
6482 __ Ldr(q0, MemOperand(x17, 16, PostIndex));
6483 __ Ldr(q1, MemOperand(x17, 16, PostIndex));
6484
6485 __ St2(v0.V16B(), v1.V16B(), MemOperand(x18, x22, PostIndex));
6486 __ St2(v0.V8H(), v1.V8H(), MemOperand(x18, 32, PostIndex));
6487 __ St2(v0.V4S(), v1.V4S(), MemOperand(x18, x22, PostIndex));
6488 __ St2(v0.V2D(), v1.V2D(), MemOperand(x18));
6489
6490 __ Mov(x19, src_base);
6491 __ Ldr(q0, MemOperand(x19, 16, PostIndex));
6492 __ Ldr(q1, MemOperand(x19, 16, PostIndex));
6493 __ Ldr(q2, MemOperand(x19, 16, PostIndex));
6494 __ Ldr(q3, MemOperand(x19, 16, PostIndex));
6495 __ Ldr(q4, MemOperand(x19, 16, PostIndex));
6496
6497 END();
6498
6499 RUN();
6500
6501 ASSERT_EQUAL_128(0x1405041312030211, 0x1001000211011000, q0);
6502 ASSERT_EQUAL_128(0x1c0d0c1b1a0b0a19, 0x1809081716070615, q1);
6503 ASSERT_EQUAL_128(0x0504030201001003, 0x0201001f1e0f0e1d, q2);
6504 ASSERT_EQUAL_128(0x0d0c0b0a09081716, 0x1514131211100706, q3);
6505 ASSERT_EQUAL_128(0x4f4e4d4c4b4a1f1e, 0x1d1c1b1a19180f0e, q4);
6506
6507 TEARDOWN();
6508 }
6509
6510
TEST(neon_st3_d)6511 TEST(neon_st3_d) {
6512 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
6513
6514 uint8_t src[3 * 16];
6515 for (unsigned i = 0; i < sizeof(src); i++) {
6516 src[i] = i;
6517 }
6518 uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
6519
6520 START();
6521 __ Mov(x17, src_base);
6522 __ Mov(x18, src_base);
6523 __ Ldr(q0, MemOperand(x17, 16, PostIndex));
6524 __ Ldr(q1, MemOperand(x17, 16, PostIndex));
6525 __ Ldr(q2, MemOperand(x17, 16, PostIndex));
6526
6527 __ St3(v0.V8B(), v1.V8B(), v2.V8B(), MemOperand(x18));
6528 __ Add(x18, x18, 3);
6529 __ St3(v0.V4H(), v1.V4H(), v2.V4H(), MemOperand(x18));
6530 __ Add(x18, x18, 2);
6531 __ St3(v0.V2S(), v1.V2S(), v2.V2S(), MemOperand(x18));
6532
6533
6534 __ Mov(x19, src_base);
6535 __ Ldr(q0, MemOperand(x19, 16, PostIndex));
6536 __ Ldr(q1, MemOperand(x19, 16, PostIndex));
6537
6538 END();
6539
6540 RUN();
6541
6542 ASSERT_EQUAL_128(0x2221201312111003, 0x0201000100201000, q0);
6543 ASSERT_EQUAL_128(0x1f1e1d2726252417, 0x1615140706050423, q1);
6544
6545 TEARDOWN();
6546 }
6547
6548
TEST(neon_st3_d_postindex)6549 TEST(neon_st3_d_postindex) {
6550 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
6551
6552 uint8_t src[4 * 16];
6553 for (unsigned i = 0; i < sizeof(src); i++) {
6554 src[i] = i;
6555 }
6556 uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
6557
6558 START();
6559 __ Mov(x22, 5);
6560 __ Mov(x17, src_base);
6561 __ Mov(x18, src_base);
6562 __ Ldr(q0, MemOperand(x17, 16, PostIndex));
6563 __ Ldr(q1, MemOperand(x17, 16, PostIndex));
6564 __ Ldr(q2, MemOperand(x17, 16, PostIndex));
6565
6566 __ St3(v0.V8B(), v1.V8B(), v2.V8B(), MemOperand(x18, x22, PostIndex));
6567 __ St3(v0.V4H(), v1.V4H(), v2.V4H(), MemOperand(x18, 24, PostIndex));
6568 __ St3(v0.V2S(), v1.V2S(), v2.V2S(), MemOperand(x18));
6569
6570
6571 __ Mov(x19, src_base);
6572 __ Ldr(q0, MemOperand(x19, 16, PostIndex));
6573 __ Ldr(q1, MemOperand(x19, 16, PostIndex));
6574 __ Ldr(q2, MemOperand(x19, 16, PostIndex));
6575 __ Ldr(q3, MemOperand(x19, 16, PostIndex));
6576
6577 END();
6578
6579 RUN();
6580
6581 ASSERT_EQUAL_128(0x2213120302212011, 0x1001001101201000, q0);
6582 ASSERT_EQUAL_128(0x0201002726171607, 0x0625241514050423, q1);
6583 ASSERT_EQUAL_128(0x1615140706050423, 0x2221201312111003, q2);
6584 ASSERT_EQUAL_128(0x3f3e3d3c3b3a3938, 0x3736352726252417, q3);
6585
6586 TEARDOWN();
6587 }
6588
6589
TEST(neon_st3_q)6590 TEST(neon_st3_q) {
6591 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
6592
6593 uint8_t src[6 * 16];
6594 for (unsigned i = 0; i < sizeof(src); i++) {
6595 src[i] = i;
6596 }
6597 uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
6598
6599 START();
6600 __ Mov(x17, src_base);
6601 __ Mov(x18, src_base);
6602 __ Ldr(q0, MemOperand(x17, 16, PostIndex));
6603 __ Ldr(q1, MemOperand(x17, 16, PostIndex));
6604 __ Ldr(q2, MemOperand(x17, 16, PostIndex));
6605
6606 __ St3(v0.V16B(), v1.V16B(), v2.V16B(), MemOperand(x18));
6607 __ Add(x18, x18, 5);
6608 __ St3(v0.V8H(), v1.V8H(), v2.V8H(), MemOperand(x18));
6609 __ Add(x18, x18, 12);
6610 __ St3(v0.V4S(), v1.V4S(), v2.V4S(), MemOperand(x18));
6611 __ Add(x18, x18, 22);
6612 __ St3(v0.V2D(), v1.V2D(), v2.V2D(), MemOperand(x18));
6613
6614 __ Mov(x19, src_base);
6615 __ Ldr(q0, MemOperand(x19, 16, PostIndex));
6616 __ Ldr(q1, MemOperand(x19, 16, PostIndex));
6617 __ Ldr(q2, MemOperand(x19, 16, PostIndex));
6618 __ Ldr(q3, MemOperand(x19, 16, PostIndex));
6619 __ Ldr(q4, MemOperand(x19, 16, PostIndex));
6620 __ Ldr(q5, MemOperand(x19, 16, PostIndex));
6621
6622 END();
6623
6624 RUN();
6625
6626 ASSERT_EQUAL_128(0x2213120302212011, 0x1001001101201000, q0);
6627 ASSERT_EQUAL_128(0x0605042322212013, 0x1211100302010023, q1);
6628 ASSERT_EQUAL_128(0x1007060504030201, 0x0025241716151407, q2);
6629 ASSERT_EQUAL_128(0x0827262524232221, 0x2017161514131211, q3);
6630 ASSERT_EQUAL_128(0x281f1e1d1c1b1a19, 0x180f0e0d0c0b0a09, q4);
6631 ASSERT_EQUAL_128(0x5f5e5d5c5b5a5958, 0x572f2e2d2c2b2a29, q5);
6632
6633 TEARDOWN();
6634 }
6635
6636
TEST(neon_st3_q_postindex)6637 TEST(neon_st3_q_postindex) {
6638 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
6639
6640 uint8_t src[7 * 16];
6641 for (unsigned i = 0; i < sizeof(src); i++) {
6642 src[i] = i;
6643 }
6644 uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
6645
6646 START();
6647 __ Mov(x22, 5);
6648 __ Mov(x17, src_base);
6649 __ Mov(x18, src_base);
6650 __ Ldr(q0, MemOperand(x17, 16, PostIndex));
6651 __ Ldr(q1, MemOperand(x17, 16, PostIndex));
6652 __ Ldr(q2, MemOperand(x17, 16, PostIndex));
6653
6654 __ St3(v0.V16B(), v1.V16B(), v2.V16B(), MemOperand(x18, x22, PostIndex));
6655 __ St3(v0.V8H(), v1.V8H(), v2.V8H(), MemOperand(x18, 48, PostIndex));
6656 __ St3(v0.V4S(), v1.V4S(), v2.V4S(), MemOperand(x18, x22, PostIndex));
6657 __ St3(v0.V2D(), v1.V2D(), v2.V2D(), MemOperand(x18));
6658
6659 __ Mov(x19, src_base);
6660 __ Ldr(q0, MemOperand(x19, 16, PostIndex));
6661 __ Ldr(q1, MemOperand(x19, 16, PostIndex));
6662 __ Ldr(q2, MemOperand(x19, 16, PostIndex));
6663 __ Ldr(q3, MemOperand(x19, 16, PostIndex));
6664 __ Ldr(q4, MemOperand(x19, 16, PostIndex));
6665 __ Ldr(q5, MemOperand(x19, 16, PostIndex));
6666 __ Ldr(q6, MemOperand(x19, 16, PostIndex));
6667
6668 END();
6669
6670 RUN();
6671
6672 ASSERT_EQUAL_128(0x2213120302212011, 0x1001001101201000, q0);
6673 ASSERT_EQUAL_128(0x1809082726171607, 0x0625241514050423, q1);
6674 ASSERT_EQUAL_128(0x0e2d2c1d1c0d0c2b, 0x2a1b1a0b0a292819, q2);
6675 ASSERT_EQUAL_128(0x0504030201001003, 0x0201002f2e1f1e0f, q3);
6676 ASSERT_EQUAL_128(0x2524232221201716, 0x1514131211100706, q4);
6677 ASSERT_EQUAL_128(0x1d1c1b1a19180f0e, 0x0d0c0b0a09082726, q5);
6678 ASSERT_EQUAL_128(0x6f6e6d6c6b6a2f2e, 0x2d2c2b2a29281f1e, q6);
6679
6680 TEARDOWN();
6681 }
6682
6683
TEST(neon_st4_d)6684 TEST(neon_st4_d) {
6685 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
6686
6687 uint8_t src[4 * 16];
6688 for (unsigned i = 0; i < sizeof(src); i++) {
6689 src[i] = i;
6690 }
6691 uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
6692
6693 START();
6694 __ Mov(x17, src_base);
6695 __ Mov(x18, src_base);
6696 __ Ldr(q0, MemOperand(x17, 16, PostIndex));
6697 __ Ldr(q1, MemOperand(x17, 16, PostIndex));
6698 __ Ldr(q2, MemOperand(x17, 16, PostIndex));
6699 __ Ldr(q3, MemOperand(x17, 16, PostIndex));
6700
6701 __ St4(v0.V8B(), v1.V8B(), v2.V8B(), v3.V8B(), MemOperand(x18));
6702 __ Add(x18, x18, 12);
6703 __ St4(v0.V4H(), v1.V4H(), v2.V4H(), v3.V4H(), MemOperand(x18));
6704 __ Add(x18, x18, 15);
6705 __ St4(v0.V2S(), v1.V2S(), v2.V2S(), v3.V2S(), MemOperand(x18));
6706
6707
6708 __ Mov(x19, src_base);
6709 __ Ldr(q0, MemOperand(x19, 16, PostIndex));
6710 __ Ldr(q1, MemOperand(x19, 16, PostIndex));
6711 __ Ldr(q2, MemOperand(x19, 16, PostIndex));
6712 __ Ldr(q3, MemOperand(x19, 16, PostIndex));
6713
6714 END();
6715
6716 RUN();
6717
6718 ASSERT_EQUAL_128(0x1110010032221202, 0X3121110130201000, q0);
6719 ASSERT_EQUAL_128(0x1003020100322322, 0X1312030231302120, q1);
6720 ASSERT_EQUAL_128(0x1407060504333231, 0X3023222120131211, q2);
6721 ASSERT_EQUAL_128(0x3f3e3d3c3b373635, 0x3427262524171615, q3);
6722
6723 TEARDOWN();
6724 }
6725
6726
TEST(neon_st4_d_postindex)6727 TEST(neon_st4_d_postindex) {
6728 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
6729
6730 uint8_t src[5 * 16];
6731 for (unsigned i = 0; i < sizeof(src); i++) {
6732 src[i] = i;
6733 }
6734 uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
6735
6736 START();
6737 __ Mov(x22, 5);
6738 __ Mov(x17, src_base);
6739 __ Mov(x18, src_base);
6740 __ Ldr(q0, MemOperand(x17, 16, PostIndex));
6741 __ Ldr(q1, MemOperand(x17, 16, PostIndex));
6742 __ Ldr(q2, MemOperand(x17, 16, PostIndex));
6743 __ Ldr(q3, MemOperand(x17, 16, PostIndex));
6744
6745 __ St4(v0.V8B(),
6746 v1.V8B(),
6747 v2.V8B(),
6748 v3.V8B(),
6749 MemOperand(x18, x22, PostIndex));
6750 __ St4(v0.V4H(),
6751 v1.V4H(),
6752 v2.V4H(),
6753 v3.V4H(),
6754 MemOperand(x18, 32, PostIndex));
6755 __ St4(v0.V2S(), v1.V2S(), v2.V2S(), v3.V2S(), MemOperand(x18));
6756
6757
6758 __ Mov(x19, src_base);
6759 __ Ldr(q0, MemOperand(x19, 16, PostIndex));
6760 __ Ldr(q1, MemOperand(x19, 16, PostIndex));
6761 __ Ldr(q2, MemOperand(x19, 16, PostIndex));
6762 __ Ldr(q3, MemOperand(x19, 16, PostIndex));
6763 __ Ldr(q4, MemOperand(x19, 16, PostIndex));
6764
6765 END();
6766
6767 RUN();
6768
6769 ASSERT_EQUAL_128(0x1203023130212011, 0x1001000130201000, q0);
6770 ASSERT_EQUAL_128(0x1607063534252415, 0x1405043332232213, q1);
6771 ASSERT_EQUAL_128(0x2221201312111003, 0x0201003736272617, q2);
6772 ASSERT_EQUAL_128(0x2625241716151407, 0x0605043332313023, q3);
6773 ASSERT_EQUAL_128(0x4f4e4d4c4b4a4948, 0x4746453736353427, q4);
6774
6775 TEARDOWN();
6776 }
6777
6778
TEST(neon_st4_q)6779 TEST(neon_st4_q) {
6780 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
6781
6782 uint8_t src[7 * 16];
6783 for (unsigned i = 0; i < sizeof(src); i++) {
6784 src[i] = i;
6785 }
6786 uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
6787
6788 START();
6789 __ Mov(x17, src_base);
6790 __ Mov(x18, src_base);
6791 __ Ldr(q0, MemOperand(x17, 16, PostIndex));
6792 __ Ldr(q1, MemOperand(x17, 16, PostIndex));
6793 __ Ldr(q2, MemOperand(x17, 16, PostIndex));
6794 __ Ldr(q3, MemOperand(x17, 16, PostIndex));
6795
6796 __ St4(v0.V16B(), v1.V16B(), v2.V16B(), v3.V16B(), MemOperand(x18));
6797 __ Add(x18, x18, 5);
6798 __ St4(v0.V8H(), v1.V8H(), v2.V8H(), v3.V8H(), MemOperand(x18));
6799 __ Add(x18, x18, 12);
6800 __ St4(v0.V4S(), v1.V4S(), v2.V4S(), v3.V4S(), MemOperand(x18));
6801 __ Add(x18, x18, 22);
6802 __ St4(v0.V2D(), v1.V2D(), v2.V2D(), v3.V2D(), MemOperand(x18));
6803 __ Add(x18, x18, 10);
6804
6805 __ Mov(x19, src_base);
6806 __ Ldr(q0, MemOperand(x19, 16, PostIndex));
6807 __ Ldr(q1, MemOperand(x19, 16, PostIndex));
6808 __ Ldr(q2, MemOperand(x19, 16, PostIndex));
6809 __ Ldr(q3, MemOperand(x19, 16, PostIndex));
6810 __ Ldr(q4, MemOperand(x19, 16, PostIndex));
6811 __ Ldr(q5, MemOperand(x19, 16, PostIndex));
6812 __ Ldr(q6, MemOperand(x19, 16, PostIndex));
6813
6814 END();
6815
6816 RUN();
6817
6818 ASSERT_EQUAL_128(0x1203023130212011, 0x1001000130201000, q0);
6819 ASSERT_EQUAL_128(0x3231302322212013, 0x1211100302010013, q1);
6820 ASSERT_EQUAL_128(0x1007060504030201, 0x0015140706050433, q2);
6821 ASSERT_EQUAL_128(0x3027262524232221, 0x2017161514131211, q3);
6822 ASSERT_EQUAL_128(0x180f0e0d0c0b0a09, 0x0837363534333231, q4);
6823 ASSERT_EQUAL_128(0x382f2e2d2c2b2a29, 0x281f1e1d1c1b1a19, q5);
6824 ASSERT_EQUAL_128(0x6f6e6d6c6b6a6968, 0x673f3e3d3c3b3a39, q6);
6825
6826 TEARDOWN();
6827 }
6828
6829
TEST(neon_st4_q_postindex)6830 TEST(neon_st4_q_postindex) {
6831 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
6832
6833 uint8_t src[9 * 16];
6834 for (unsigned i = 0; i < sizeof(src); i++) {
6835 src[i] = i;
6836 }
6837 uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
6838
6839 START();
6840 __ Mov(x22, 5);
6841 __ Mov(x17, src_base);
6842 __ Mov(x18, src_base);
6843 __ Ldr(q0, MemOperand(x17, 16, PostIndex));
6844 __ Ldr(q1, MemOperand(x17, 16, PostIndex));
6845 __ Ldr(q2, MemOperand(x17, 16, PostIndex));
6846 __ Ldr(q3, MemOperand(x17, 16, PostIndex));
6847
6848 __ St4(v0.V16B(),
6849 v1.V16B(),
6850 v2.V16B(),
6851 v3.V16B(),
6852 MemOperand(x18, x22, PostIndex));
6853 __ St4(v0.V8H(),
6854 v1.V8H(),
6855 v2.V8H(),
6856 v3.V8H(),
6857 MemOperand(x18, 64, PostIndex));
6858 __ St4(v0.V4S(),
6859 v1.V4S(),
6860 v2.V4S(),
6861 v3.V4S(),
6862 MemOperand(x18, x22, PostIndex));
6863 __ St4(v0.V2D(), v1.V2D(), v2.V2D(), v3.V2D(), MemOperand(x18));
6864
6865 __ Mov(x19, src_base);
6866 __ Ldr(q0, MemOperand(x19, 16, PostIndex));
6867 __ Ldr(q1, MemOperand(x19, 16, PostIndex));
6868 __ Ldr(q2, MemOperand(x19, 16, PostIndex));
6869 __ Ldr(q3, MemOperand(x19, 16, PostIndex));
6870 __ Ldr(q4, MemOperand(x19, 16, PostIndex));
6871 __ Ldr(q5, MemOperand(x19, 16, PostIndex));
6872 __ Ldr(q6, MemOperand(x19, 16, PostIndex));
6873 __ Ldr(q7, MemOperand(x19, 16, PostIndex));
6874 __ Ldr(q8, MemOperand(x19, 16, PostIndex));
6875
6876 END();
6877
6878 RUN();
6879
6880 ASSERT_EQUAL_128(0x1203023130212011, 0x1001000130201000, q0);
6881 ASSERT_EQUAL_128(0x1607063534252415, 0x1405043332232213, q1);
6882 ASSERT_EQUAL_128(0x1a0b0a3938292819, 0x1809083736272617, q2);
6883 ASSERT_EQUAL_128(0x1e0f0e3d3c2d2c1d, 0x1c0d0c3b3a2b2a1b, q3);
6884 ASSERT_EQUAL_128(0x0504030201001003, 0x0201003f3e2f2e1f, q4);
6885 ASSERT_EQUAL_128(0x2524232221201716, 0x1514131211100706, q5);
6886 ASSERT_EQUAL_128(0x0d0c0b0a09083736, 0x3534333231302726, q6);
6887 ASSERT_EQUAL_128(0x2d2c2b2a29281f1e, 0x1d1c1b1a19180f0e, q7);
6888 ASSERT_EQUAL_128(0x8f8e8d8c8b8a3f3e, 0x3d3c3b3a39382f2e, q8);
6889
6890 TEARDOWN();
6891 }
6892
6893
TEST(neon_destructive_minmaxp)6894 TEST(neon_destructive_minmaxp) {
6895 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
6896
6897 START();
6898 __ Movi(v0.V2D(), 0, 0x2222222233333333);
6899 __ Movi(v1.V2D(), 0, 0x0000000011111111);
6900
6901 __ Sminp(v16.V2S(), v0.V2S(), v1.V2S());
6902 __ Mov(v17, v0);
6903 __ Sminp(v17.V2S(), v17.V2S(), v1.V2S());
6904 __ Mov(v18, v1);
6905 __ Sminp(v18.V2S(), v0.V2S(), v18.V2S());
6906 __ Mov(v19, v0);
6907 __ Sminp(v19.V2S(), v19.V2S(), v19.V2S());
6908
6909 __ Smaxp(v20.V2S(), v0.V2S(), v1.V2S());
6910 __ Mov(v21, v0);
6911 __ Smaxp(v21.V2S(), v21.V2S(), v1.V2S());
6912 __ Mov(v22, v1);
6913 __ Smaxp(v22.V2S(), v0.V2S(), v22.V2S());
6914 __ Mov(v23, v0);
6915 __ Smaxp(v23.V2S(), v23.V2S(), v23.V2S());
6916
6917 __ Uminp(v24.V2S(), v0.V2S(), v1.V2S());
6918 __ Mov(v25, v0);
6919 __ Uminp(v25.V2S(), v25.V2S(), v1.V2S());
6920 __ Mov(v26, v1);
6921 __ Uminp(v26.V2S(), v0.V2S(), v26.V2S());
6922 __ Mov(v27, v0);
6923 __ Uminp(v27.V2S(), v27.V2S(), v27.V2S());
6924
6925 __ Umaxp(v28.V2S(), v0.V2S(), v1.V2S());
6926 __ Mov(v29, v0);
6927 __ Umaxp(v29.V2S(), v29.V2S(), v1.V2S());
6928 __ Mov(v30, v1);
6929 __ Umaxp(v30.V2S(), v0.V2S(), v30.V2S());
6930 __ Mov(v31, v0);
6931 __ Umaxp(v31.V2S(), v31.V2S(), v31.V2S());
6932 END();
6933
6934 RUN();
6935
6936 ASSERT_EQUAL_128(0, 0x0000000022222222, q16);
6937 ASSERT_EQUAL_128(0, 0x0000000022222222, q17);
6938 ASSERT_EQUAL_128(0, 0x0000000022222222, q18);
6939 ASSERT_EQUAL_128(0, 0x2222222222222222, q19);
6940
6941 ASSERT_EQUAL_128(0, 0x1111111133333333, q20);
6942 ASSERT_EQUAL_128(0, 0x1111111133333333, q21);
6943 ASSERT_EQUAL_128(0, 0x1111111133333333, q22);
6944 ASSERT_EQUAL_128(0, 0x3333333333333333, q23);
6945
6946 ASSERT_EQUAL_128(0, 0x0000000022222222, q24);
6947 ASSERT_EQUAL_128(0, 0x0000000022222222, q25);
6948 ASSERT_EQUAL_128(0, 0x0000000022222222, q26);
6949 ASSERT_EQUAL_128(0, 0x2222222222222222, q27);
6950
6951 ASSERT_EQUAL_128(0, 0x1111111133333333, q28);
6952 ASSERT_EQUAL_128(0, 0x1111111133333333, q29);
6953 ASSERT_EQUAL_128(0, 0x1111111133333333, q30);
6954 ASSERT_EQUAL_128(0, 0x3333333333333333, q31);
6955
6956 TEARDOWN();
6957 }
6958
6959
TEST(neon_destructive_tbl)6960 TEST(neon_destructive_tbl) {
6961 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
6962
6963 START();
6964 __ Movi(v0.V2D(), 0x0041424334353627, 0x28291a1b1c0d0e0f);
6965 __ Movi(v1.V2D(), 0xafaeadacabaaa9a8, 0xa7a6a5a4a3a2a1a0);
6966 __ Movi(v2.V2D(), 0xbfbebdbcbbbab9b8, 0xb7b6b5b4b3b2b1b0);
6967 __ Movi(v3.V2D(), 0xcfcecdcccbcac9c8, 0xc7c6c5c4c3c2c1c0);
6968 __ Movi(v4.V2D(), 0xdfdedddcdbdad9d8, 0xd7d6d5d4d3d2d1d0);
6969
6970 __ Movi(v16.V2D(), 0x5555555555555555, 0x5555555555555555);
6971 __ Tbl(v16.V16B(), v1.V16B(), v0.V16B());
6972 __ Mov(v17, v0);
6973 __ Tbl(v17.V16B(), v1.V16B(), v17.V16B());
6974 __ Mov(v18, v1);
6975 __ Tbl(v18.V16B(), v18.V16B(), v0.V16B());
6976 __ Mov(v19, v0);
6977 __ Tbl(v19.V16B(), v19.V16B(), v19.V16B());
6978
6979 __ Movi(v20.V2D(), 0x5555555555555555, 0x5555555555555555);
6980 __ Tbl(v20.V16B(), v1.V16B(), v2.V16B(), v3.V16B(), v4.V16B(), v0.V16B());
6981 __ Mov(v21, v0);
6982 __ Tbl(v21.V16B(), v1.V16B(), v2.V16B(), v3.V16B(), v4.V16B(), v21.V16B());
6983 __ Mov(v22, v1);
6984 __ Mov(v23, v2);
6985 __ Mov(v24, v3);
6986 __ Mov(v25, v4);
6987 __ Tbl(v22.V16B(), v22.V16B(), v23.V16B(), v24.V16B(), v25.V16B(), v0.V16B());
6988 __ Mov(v26, v0);
6989 __ Mov(v27, v1);
6990 __ Mov(v28, v2);
6991 __ Mov(v29, v3);
6992 __ Tbl(v26.V16B(),
6993 v26.V16B(),
6994 v27.V16B(),
6995 v28.V16B(),
6996 v29.V16B(),
6997 v26.V16B());
6998 END();
6999
7000 RUN();
7001
7002 ASSERT_EQUAL_128(0xa000000000000000, 0x0000000000adaeaf, q16);
7003 ASSERT_EQUAL_128(0xa000000000000000, 0x0000000000adaeaf, q17);
7004 ASSERT_EQUAL_128(0xa000000000000000, 0x0000000000adaeaf, q18);
7005 ASSERT_EQUAL_128(0x0f00000000000000, 0x0000000000424100, q19);
7006
7007 ASSERT_EQUAL_128(0xa0000000d4d5d6c7, 0xc8c9babbbcadaeaf, q20);
7008 ASSERT_EQUAL_128(0xa0000000d4d5d6c7, 0xc8c9babbbcadaeaf, q21);
7009 ASSERT_EQUAL_128(0xa0000000d4d5d6c7, 0xc8c9babbbcadaeaf, q22);
7010 ASSERT_EQUAL_128(0x0f000000c4c5c6b7, 0xb8b9aaabac424100, q26);
7011
7012 TEARDOWN();
7013 }
7014
7015
TEST(neon_destructive_tbx)7016 TEST(neon_destructive_tbx) {
7017 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
7018
7019 START();
7020 __ Movi(v0.V2D(), 0x0041424334353627, 0x28291a1b1c0d0e0f);
7021 __ Movi(v1.V2D(), 0xafaeadacabaaa9a8, 0xa7a6a5a4a3a2a1a0);
7022 __ Movi(v2.V2D(), 0xbfbebdbcbbbab9b8, 0xb7b6b5b4b3b2b1b0);
7023 __ Movi(v3.V2D(), 0xcfcecdcccbcac9c8, 0xc7c6c5c4c3c2c1c0);
7024 __ Movi(v4.V2D(), 0xdfdedddcdbdad9d8, 0xd7d6d5d4d3d2d1d0);
7025
7026 __ Movi(v16.V2D(), 0x5555555555555555, 0x5555555555555555);
7027 __ Tbx(v16.V16B(), v1.V16B(), v0.V16B());
7028 __ Mov(v17, v0);
7029 __ Tbx(v17.V16B(), v1.V16B(), v17.V16B());
7030 __ Mov(v18, v1);
7031 __ Tbx(v18.V16B(), v18.V16B(), v0.V16B());
7032 __ Mov(v19, v0);
7033 __ Tbx(v19.V16B(), v19.V16B(), v19.V16B());
7034
7035 __ Movi(v20.V2D(), 0x5555555555555555, 0x5555555555555555);
7036 __ Tbx(v20.V16B(), v1.V16B(), v2.V16B(), v3.V16B(), v4.V16B(), v0.V16B());
7037 __ Mov(v21, v0);
7038 __ Tbx(v21.V16B(), v1.V16B(), v2.V16B(), v3.V16B(), v4.V16B(), v21.V16B());
7039 __ Mov(v22, v1);
7040 __ Mov(v23, v2);
7041 __ Mov(v24, v3);
7042 __ Mov(v25, v4);
7043 __ Tbx(v22.V16B(), v22.V16B(), v23.V16B(), v24.V16B(), v25.V16B(), v0.V16B());
7044 __ Mov(v26, v0);
7045 __ Mov(v27, v1);
7046 __ Mov(v28, v2);
7047 __ Mov(v29, v3);
7048 __ Tbx(v26.V16B(),
7049 v26.V16B(),
7050 v27.V16B(),
7051 v28.V16B(),
7052 v29.V16B(),
7053 v26.V16B());
7054 END();
7055
7056 RUN();
7057
7058 ASSERT_EQUAL_128(0xa055555555555555, 0x5555555555adaeaf, q16);
7059 ASSERT_EQUAL_128(0xa041424334353627, 0x28291a1b1cadaeaf, q17);
7060 ASSERT_EQUAL_128(0xa0aeadacabaaa9a8, 0xa7a6a5a4a3adaeaf, q18);
7061 ASSERT_EQUAL_128(0x0f41424334353627, 0x28291a1b1c424100, q19);
7062
7063 ASSERT_EQUAL_128(0xa0555555d4d5d6c7, 0xc8c9babbbcadaeaf, q20);
7064 ASSERT_EQUAL_128(0xa0414243d4d5d6c7, 0xc8c9babbbcadaeaf, q21);
7065 ASSERT_EQUAL_128(0xa0aeadacd4d5d6c7, 0xc8c9babbbcadaeaf, q22);
7066 ASSERT_EQUAL_128(0x0f414243c4c5c6b7, 0xb8b9aaabac424100, q26);
7067
7068 TEARDOWN();
7069 }
7070
7071
TEST(neon_destructive_fcvtl)7072 TEST(neon_destructive_fcvtl) {
7073 SETUP_WITH_FEATURES(CPUFeatures::kNEON, CPUFeatures::kFP);
7074
7075 START();
7076 __ Movi(v0.V2D(), 0x400000003f800000, 0xbf800000c0000000);
7077 __ Fcvtl(v16.V2D(), v0.V2S());
7078 __ Fcvtl2(v17.V2D(), v0.V4S());
7079 __ Mov(v18, v0);
7080 __ Mov(v19, v0);
7081 __ Fcvtl(v18.V2D(), v18.V2S());
7082 __ Fcvtl2(v19.V2D(), v19.V4S());
7083
7084 __ Movi(v1.V2D(), 0x40003c003c004000, 0xc000bc00bc00c000);
7085 __ Fcvtl(v20.V4S(), v1.V4H());
7086 __ Fcvtl2(v21.V4S(), v1.V8H());
7087 __ Mov(v22, v1);
7088 __ Mov(v23, v1);
7089 __ Fcvtl(v22.V4S(), v22.V4H());
7090 __ Fcvtl2(v23.V4S(), v23.V8H());
7091
7092 END();
7093
7094 RUN();
7095
7096 ASSERT_EQUAL_128(0xbff0000000000000, 0xc000000000000000, q16);
7097 ASSERT_EQUAL_128(0x4000000000000000, 0x3ff0000000000000, q17);
7098 ASSERT_EQUAL_128(0xbff0000000000000, 0xc000000000000000, q18);
7099 ASSERT_EQUAL_128(0x4000000000000000, 0x3ff0000000000000, q19);
7100
7101 ASSERT_EQUAL_128(0xc0000000bf800000, 0xbf800000c0000000, q20);
7102 ASSERT_EQUAL_128(0x400000003f800000, 0x3f80000040000000, q21);
7103 ASSERT_EQUAL_128(0xc0000000bf800000, 0xbf800000c0000000, q22);
7104 ASSERT_EQUAL_128(0x400000003f800000, 0x3f80000040000000, q23);
7105
7106 TEARDOWN();
7107 }
7108
7109
TEST(ldp_stp_float)7110 TEST(ldp_stp_float) {
7111 SETUP_WITH_FEATURES(CPUFeatures::kFP);
7112
7113 float src[2] = {1.0, 2.0};
7114 float dst[3] = {0.0, 0.0, 0.0};
7115 uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
7116 uintptr_t dst_base = reinterpret_cast<uintptr_t>(dst);
7117
7118 START();
7119 __ Mov(x16, src_base);
7120 __ Mov(x17, dst_base);
7121 __ Ldp(s31, s0, MemOperand(x16, 2 * sizeof(src[0]), PostIndex));
7122 __ Stp(s0, s31, MemOperand(x17, sizeof(dst[1]), PreIndex));
7123 END();
7124
7125 RUN();
7126
7127 ASSERT_EQUAL_FP32(1.0, s31);
7128 ASSERT_EQUAL_FP32(2.0, s0);
7129 ASSERT_EQUAL_FP32(0.0, dst[0]);
7130 ASSERT_EQUAL_FP32(2.0, dst[1]);
7131 ASSERT_EQUAL_FP32(1.0, dst[2]);
7132 ASSERT_EQUAL_64(src_base + 2 * sizeof(src[0]), x16);
7133 ASSERT_EQUAL_64(dst_base + sizeof(dst[1]), x17);
7134
7135 TEARDOWN();
7136 }
7137
7138
TEST(ldp_stp_double)7139 TEST(ldp_stp_double) {
7140 SETUP_WITH_FEATURES(CPUFeatures::kFP);
7141
7142 double src[2] = {1.0, 2.0};
7143 double dst[3] = {0.0, 0.0, 0.0};
7144 uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
7145 uintptr_t dst_base = reinterpret_cast<uintptr_t>(dst);
7146
7147 START();
7148 __ Mov(x16, src_base);
7149 __ Mov(x17, dst_base);
7150 __ Ldp(d31, d0, MemOperand(x16, 2 * sizeof(src[0]), PostIndex));
7151 __ Stp(d0, d31, MemOperand(x17, sizeof(dst[1]), PreIndex));
7152 END();
7153
7154 RUN();
7155
7156 ASSERT_EQUAL_FP64(1.0, d31);
7157 ASSERT_EQUAL_FP64(2.0, d0);
7158 ASSERT_EQUAL_FP64(0.0, dst[0]);
7159 ASSERT_EQUAL_FP64(2.0, dst[1]);
7160 ASSERT_EQUAL_FP64(1.0, dst[2]);
7161 ASSERT_EQUAL_64(src_base + 2 * sizeof(src[0]), x16);
7162 ASSERT_EQUAL_64(dst_base + sizeof(dst[1]), x17);
7163
7164 TEARDOWN();
7165 }
7166
7167
TEST(ldp_stp_quad)7168 TEST(ldp_stp_quad) {
7169 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
7170
7171 uint64_t src[4] = {0x0123456789abcdef,
7172 0xaaaaaaaa55555555,
7173 0xfedcba9876543210,
7174 0x55555555aaaaaaaa};
7175 uint64_t dst[6] = {0, 0, 0, 0, 0, 0};
7176 uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
7177 uintptr_t dst_base = reinterpret_cast<uintptr_t>(dst);
7178
7179 START();
7180 __ Mov(x16, src_base);
7181 __ Mov(x17, dst_base);
7182 __ Ldp(q31, q0, MemOperand(x16, 4 * sizeof(src[0]), PostIndex));
7183 __ Stp(q0, q31, MemOperand(x17, 2 * sizeof(dst[1]), PreIndex));
7184 END();
7185
7186 RUN();
7187
7188 ASSERT_EQUAL_128(0xaaaaaaaa55555555, 0x0123456789abcdef, q31);
7189 ASSERT_EQUAL_128(0x55555555aaaaaaaa, 0xfedcba9876543210, q0);
7190 ASSERT_EQUAL_64(0, dst[0]);
7191 ASSERT_EQUAL_64(0, dst[1]);
7192 ASSERT_EQUAL_64(0xfedcba9876543210, dst[2]);
7193 ASSERT_EQUAL_64(0x55555555aaaaaaaa, dst[3]);
7194 ASSERT_EQUAL_64(0x0123456789abcdef, dst[4]);
7195 ASSERT_EQUAL_64(0xaaaaaaaa55555555, dst[5]);
7196 ASSERT_EQUAL_64(src_base + 4 * sizeof(src[0]), x16);
7197 ASSERT_EQUAL_64(dst_base + 2 * sizeof(dst[1]), x17);
7198
7199 TEARDOWN();
7200 }
7201
7202
TEST(ldp_stp_offset)7203 TEST(ldp_stp_offset) {
7204 SETUP();
7205
7206 uint64_t src[3] = {0x0011223344556677,
7207 0x8899aabbccddeeff,
7208 0xffeeddccbbaa9988};
7209 uint64_t dst[7] = {0, 0, 0, 0, 0, 0, 0};
7210 uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
7211 uintptr_t dst_base = reinterpret_cast<uintptr_t>(dst);
7212
7213 START();
7214 __ Mov(x16, src_base);
7215 __ Mov(x17, dst_base);
7216 __ Mov(x18, src_base + 24);
7217 __ Mov(x19, dst_base + 56);
7218 __ Ldp(w0, w1, MemOperand(x16));
7219 __ Ldp(w2, w3, MemOperand(x16, 4));
7220 __ Ldp(x4, x5, MemOperand(x16, 8));
7221 __ Ldp(w6, w7, MemOperand(x18, -12));
7222 __ Ldp(x8, x9, MemOperand(x18, -16));
7223 __ Stp(w0, w1, MemOperand(x17));
7224 __ Stp(w2, w3, MemOperand(x17, 8));
7225 __ Stp(x4, x5, MemOperand(x17, 16));
7226 __ Stp(w6, w7, MemOperand(x19, -24));
7227 __ Stp(x8, x9, MemOperand(x19, -16));
7228 END();
7229
7230 RUN();
7231
7232 ASSERT_EQUAL_64(0x44556677, x0);
7233 ASSERT_EQUAL_64(0x00112233, x1);
7234 ASSERT_EQUAL_64(0x0011223344556677, dst[0]);
7235 ASSERT_EQUAL_64(0x00112233, x2);
7236 ASSERT_EQUAL_64(0xccddeeff, x3);
7237 ASSERT_EQUAL_64(0xccddeeff00112233, dst[1]);
7238 ASSERT_EQUAL_64(0x8899aabbccddeeff, x4);
7239 ASSERT_EQUAL_64(0x8899aabbccddeeff, dst[2]);
7240 ASSERT_EQUAL_64(0xffeeddccbbaa9988, x5);
7241 ASSERT_EQUAL_64(0xffeeddccbbaa9988, dst[3]);
7242 ASSERT_EQUAL_64(0x8899aabb, x6);
7243 ASSERT_EQUAL_64(0xbbaa9988, x7);
7244 ASSERT_EQUAL_64(0xbbaa99888899aabb, dst[4]);
7245 ASSERT_EQUAL_64(0x8899aabbccddeeff, x8);
7246 ASSERT_EQUAL_64(0x8899aabbccddeeff, dst[5]);
7247 ASSERT_EQUAL_64(0xffeeddccbbaa9988, x9);
7248 ASSERT_EQUAL_64(0xffeeddccbbaa9988, dst[6]);
7249 ASSERT_EQUAL_64(src_base, x16);
7250 ASSERT_EQUAL_64(dst_base, x17);
7251 ASSERT_EQUAL_64(src_base + 24, x18);
7252 ASSERT_EQUAL_64(dst_base + 56, x19);
7253
7254 TEARDOWN();
7255 }
7256
7257
TEST(ldp_stp_offset_wide)7258 TEST(ldp_stp_offset_wide) {
7259 SETUP();
7260
7261 uint64_t src[3] = {0x0011223344556677,
7262 0x8899aabbccddeeff,
7263 0xffeeddccbbaa9988};
7264 uint64_t dst[7] = {0, 0, 0, 0, 0, 0, 0};
7265 uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
7266 uintptr_t dst_base = reinterpret_cast<uintptr_t>(dst);
7267 // Move base too far from the array to force multiple instructions
7268 // to be emitted.
7269 const int64_t base_offset = 1024;
7270
7271 START();
7272 __ Mov(x20, src_base - base_offset);
7273 __ Mov(x21, dst_base - base_offset);
7274 __ Mov(x18, src_base + base_offset + 24);
7275 __ Mov(x19, dst_base + base_offset + 56);
7276 __ Ldp(w0, w1, MemOperand(x20, base_offset));
7277 __ Ldp(w2, w3, MemOperand(x20, base_offset + 4));
7278 __ Ldp(x4, x5, MemOperand(x20, base_offset + 8));
7279 __ Ldp(w6, w7, MemOperand(x18, -12 - base_offset));
7280 __ Ldp(x8, x9, MemOperand(x18, -16 - base_offset));
7281 __ Stp(w0, w1, MemOperand(x21, base_offset));
7282 __ Stp(w2, w3, MemOperand(x21, base_offset + 8));
7283 __ Stp(x4, x5, MemOperand(x21, base_offset + 16));
7284 __ Stp(w6, w7, MemOperand(x19, -24 - base_offset));
7285 __ Stp(x8, x9, MemOperand(x19, -16 - base_offset));
7286 END();
7287
7288 RUN();
7289
7290 ASSERT_EQUAL_64(0x44556677, x0);
7291 ASSERT_EQUAL_64(0x00112233, x1);
7292 ASSERT_EQUAL_64(0x0011223344556677, dst[0]);
7293 ASSERT_EQUAL_64(0x00112233, x2);
7294 ASSERT_EQUAL_64(0xccddeeff, x3);
7295 ASSERT_EQUAL_64(0xccddeeff00112233, dst[1]);
7296 ASSERT_EQUAL_64(0x8899aabbccddeeff, x4);
7297 ASSERT_EQUAL_64(0x8899aabbccddeeff, dst[2]);
7298 ASSERT_EQUAL_64(0xffeeddccbbaa9988, x5);
7299 ASSERT_EQUAL_64(0xffeeddccbbaa9988, dst[3]);
7300 ASSERT_EQUAL_64(0x8899aabb, x6);
7301 ASSERT_EQUAL_64(0xbbaa9988, x7);
7302 ASSERT_EQUAL_64(0xbbaa99888899aabb, dst[4]);
7303 ASSERT_EQUAL_64(0x8899aabbccddeeff, x8);
7304 ASSERT_EQUAL_64(0x8899aabbccddeeff, dst[5]);
7305 ASSERT_EQUAL_64(0xffeeddccbbaa9988, x9);
7306 ASSERT_EQUAL_64(0xffeeddccbbaa9988, dst[6]);
7307 ASSERT_EQUAL_64(src_base - base_offset, x20);
7308 ASSERT_EQUAL_64(dst_base - base_offset, x21);
7309 ASSERT_EQUAL_64(src_base + base_offset + 24, x18);
7310 ASSERT_EQUAL_64(dst_base + base_offset + 56, x19);
7311
7312 TEARDOWN();
7313 }
7314
7315
TEST(ldnp_stnp_offset)7316 TEST(ldnp_stnp_offset) {
7317 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
7318
7319 uint64_t src[4] = {0x0011223344556677,
7320 0x8899aabbccddeeff,
7321 0xffeeddccbbaa9988,
7322 0x7766554433221100};
7323 uint64_t dst[16] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
7324 uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
7325 uintptr_t dst_base = reinterpret_cast<uintptr_t>(dst);
7326
7327 START();
7328 __ Mov(x16, src_base);
7329 __ Mov(x17, dst_base);
7330 __ Mov(x18, src_base + 24);
7331 __ Mov(x19, dst_base + 64);
7332 __ Mov(x20, src_base + 32);
7333
7334 // Ensure address set up has happened before executing non-temporal ops.
7335 __ Dmb(InnerShareable, BarrierAll);
7336
7337 __ Ldnp(w0, w1, MemOperand(x16));
7338 __ Ldnp(w2, w3, MemOperand(x16, 4));
7339 __ Ldnp(x4, x5, MemOperand(x16, 8));
7340 __ Ldnp(w6, w7, MemOperand(x18, -12));
7341 __ Ldnp(x8, x9, MemOperand(x18, -16));
7342 __ Ldnp(q16, q17, MemOperand(x16));
7343 __ Ldnp(q19, q18, MemOperand(x20, -32));
7344 __ Stnp(w0, w1, MemOperand(x17));
7345 __ Stnp(w2, w3, MemOperand(x17, 8));
7346 __ Stnp(x4, x5, MemOperand(x17, 16));
7347 __ Stnp(w6, w7, MemOperand(x19, -32));
7348 __ Stnp(x8, x9, MemOperand(x19, -24));
7349 __ Stnp(q17, q16, MemOperand(x19));
7350 __ Stnp(q18, q19, MemOperand(x19, 32));
7351 END();
7352
7353 RUN();
7354
7355 ASSERT_EQUAL_64(0x44556677, x0);
7356 ASSERT_EQUAL_64(0x00112233, x1);
7357 ASSERT_EQUAL_64(0x0011223344556677, dst[0]);
7358 ASSERT_EQUAL_64(0x00112233, x2);
7359 ASSERT_EQUAL_64(0xccddeeff, x3);
7360 ASSERT_EQUAL_64(0xccddeeff00112233, dst[1]);
7361 ASSERT_EQUAL_64(0x8899aabbccddeeff, x4);
7362 ASSERT_EQUAL_64(0x8899aabbccddeeff, dst[2]);
7363 ASSERT_EQUAL_64(0xffeeddccbbaa9988, x5);
7364 ASSERT_EQUAL_64(0xffeeddccbbaa9988, dst[3]);
7365 ASSERT_EQUAL_64(0x8899aabb, x6);
7366 ASSERT_EQUAL_64(0xbbaa9988, x7);
7367 ASSERT_EQUAL_64(0xbbaa99888899aabb, dst[4]);
7368 ASSERT_EQUAL_64(0x8899aabbccddeeff, x8);
7369 ASSERT_EQUAL_64(0x8899aabbccddeeff, dst[5]);
7370 ASSERT_EQUAL_64(0xffeeddccbbaa9988, x9);
7371 ASSERT_EQUAL_64(0xffeeddccbbaa9988, dst[6]);
7372 ASSERT_EQUAL_128(0x8899aabbccddeeff, 0x0011223344556677, q16);
7373 ASSERT_EQUAL_128(0x7766554433221100, 0xffeeddccbbaa9988, q17);
7374 ASSERT_EQUAL_128(0x7766554433221100, 0xffeeddccbbaa9988, q18);
7375 ASSERT_EQUAL_128(0x8899aabbccddeeff, 0x0011223344556677, q19);
7376 ASSERT_EQUAL_64(0xffeeddccbbaa9988, dst[8]);
7377 ASSERT_EQUAL_64(0x7766554433221100, dst[9]);
7378 ASSERT_EQUAL_64(0x0011223344556677, dst[10]);
7379 ASSERT_EQUAL_64(0x8899aabbccddeeff, dst[11]);
7380 ASSERT_EQUAL_64(0xffeeddccbbaa9988, dst[12]);
7381 ASSERT_EQUAL_64(0x7766554433221100, dst[13]);
7382 ASSERT_EQUAL_64(0x0011223344556677, dst[14]);
7383 ASSERT_EQUAL_64(0x8899aabbccddeeff, dst[15]);
7384 ASSERT_EQUAL_64(src_base, x16);
7385 ASSERT_EQUAL_64(dst_base, x17);
7386 ASSERT_EQUAL_64(src_base + 24, x18);
7387 ASSERT_EQUAL_64(dst_base + 64, x19);
7388 ASSERT_EQUAL_64(src_base + 32, x20);
7389
7390 TEARDOWN();
7391 }
7392
7393
TEST(ldnp_stnp_offset_float)7394 TEST(ldnp_stnp_offset_float) {
7395 SETUP_WITH_FEATURES(CPUFeatures::kFP);
7396
7397 float src[3] = {1.2, 2.3, 3.4};
7398 float dst[6] = {0.0, 0.0, 0.0, 0.0, 0.0, 0.0};
7399 uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
7400 uintptr_t dst_base = reinterpret_cast<uintptr_t>(dst);
7401
7402 START();
7403 __ Mov(x16, src_base);
7404 __ Mov(x17, dst_base);
7405 __ Mov(x18, src_base + 12);
7406 __ Mov(x19, dst_base + 24);
7407
7408 // Ensure address set up has happened before executing non-temporal ops.
7409 __ Dmb(InnerShareable, BarrierAll);
7410
7411 __ Ldnp(s0, s1, MemOperand(x16));
7412 __ Ldnp(s2, s3, MemOperand(x16, 4));
7413 __ Ldnp(s5, s4, MemOperand(x18, -8));
7414 __ Stnp(s1, s0, MemOperand(x17));
7415 __ Stnp(s3, s2, MemOperand(x17, 8));
7416 __ Stnp(s4, s5, MemOperand(x19, -8));
7417 END();
7418
7419 RUN();
7420
7421 ASSERT_EQUAL_FP32(1.2, s0);
7422 ASSERT_EQUAL_FP32(2.3, s1);
7423 ASSERT_EQUAL_FP32(2.3, dst[0]);
7424 ASSERT_EQUAL_FP32(1.2, dst[1]);
7425 ASSERT_EQUAL_FP32(2.3, s2);
7426 ASSERT_EQUAL_FP32(3.4, s3);
7427 ASSERT_EQUAL_FP32(3.4, dst[2]);
7428 ASSERT_EQUAL_FP32(2.3, dst[3]);
7429 ASSERT_EQUAL_FP32(3.4, s4);
7430 ASSERT_EQUAL_FP32(2.3, s5);
7431 ASSERT_EQUAL_FP32(3.4, dst[4]);
7432 ASSERT_EQUAL_FP32(2.3, dst[5]);
7433 ASSERT_EQUAL_64(src_base, x16);
7434 ASSERT_EQUAL_64(dst_base, x17);
7435 ASSERT_EQUAL_64(src_base + 12, x18);
7436 ASSERT_EQUAL_64(dst_base + 24, x19);
7437
7438 TEARDOWN();
7439 }
7440
7441
TEST(ldnp_stnp_offset_double)7442 TEST(ldnp_stnp_offset_double) {
7443 SETUP_WITH_FEATURES(CPUFeatures::kFP);
7444
7445 double src[3] = {1.2, 2.3, 3.4};
7446 double dst[6] = {0.0, 0.0, 0.0, 0.0, 0.0, 0.0};
7447 uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
7448 uintptr_t dst_base = reinterpret_cast<uintptr_t>(dst);
7449
7450 START();
7451 __ Mov(x16, src_base);
7452 __ Mov(x17, dst_base);
7453 __ Mov(x18, src_base + 24);
7454 __ Mov(x19, dst_base + 48);
7455
7456 // Ensure address set up has happened before executing non-temporal ops.
7457 __ Dmb(InnerShareable, BarrierAll);
7458
7459 __ Ldnp(d0, d1, MemOperand(x16));
7460 __ Ldnp(d2, d3, MemOperand(x16, 8));
7461 __ Ldnp(d5, d4, MemOperand(x18, -16));
7462 __ Stnp(d1, d0, MemOperand(x17));
7463 __ Stnp(d3, d2, MemOperand(x17, 16));
7464 __ Stnp(d4, d5, MemOperand(x19, -16));
7465 END();
7466
7467 RUN();
7468
7469 ASSERT_EQUAL_FP64(1.2, d0);
7470 ASSERT_EQUAL_FP64(2.3, d1);
7471 ASSERT_EQUAL_FP64(2.3, dst[0]);
7472 ASSERT_EQUAL_FP64(1.2, dst[1]);
7473 ASSERT_EQUAL_FP64(2.3, d2);
7474 ASSERT_EQUAL_FP64(3.4, d3);
7475 ASSERT_EQUAL_FP64(3.4, dst[2]);
7476 ASSERT_EQUAL_FP64(2.3, dst[3]);
7477 ASSERT_EQUAL_FP64(3.4, d4);
7478 ASSERT_EQUAL_FP64(2.3, d5);
7479 ASSERT_EQUAL_FP64(3.4, dst[4]);
7480 ASSERT_EQUAL_FP64(2.3, dst[5]);
7481 ASSERT_EQUAL_64(src_base, x16);
7482 ASSERT_EQUAL_64(dst_base, x17);
7483 ASSERT_EQUAL_64(src_base + 24, x18);
7484 ASSERT_EQUAL_64(dst_base + 48, x19);
7485
7486 TEARDOWN();
7487 }
7488
7489
TEST(ldp_stp_preindex)7490 TEST(ldp_stp_preindex) {
7491 SETUP();
7492
7493 uint64_t src[3] = {0x0011223344556677,
7494 0x8899aabbccddeeff,
7495 0xffeeddccbbaa9988};
7496 uint64_t dst[5] = {0, 0, 0, 0, 0};
7497 uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
7498 uintptr_t dst_base = reinterpret_cast<uintptr_t>(dst);
7499
7500 START();
7501 __ Mov(x16, src_base);
7502 __ Mov(x17, dst_base);
7503 __ Mov(x18, dst_base + 16);
7504 __ Ldp(w0, w1, MemOperand(x16, 4, PreIndex));
7505 __ Mov(x19, x16);
7506 __ Ldp(w2, w3, MemOperand(x16, -4, PreIndex));
7507 __ Stp(w2, w3, MemOperand(x17, 4, PreIndex));
7508 __ Mov(x20, x17);
7509 __ Stp(w0, w1, MemOperand(x17, -4, PreIndex));
7510 __ Ldp(x4, x5, MemOperand(x16, 8, PreIndex));
7511 __ Mov(x21, x16);
7512 __ Ldp(x6, x7, MemOperand(x16, -8, PreIndex));
7513 __ Stp(x7, x6, MemOperand(x18, 8, PreIndex));
7514 __ Mov(x22, x18);
7515 __ Stp(x5, x4, MemOperand(x18, -8, PreIndex));
7516 END();
7517
7518 RUN();
7519
7520 ASSERT_EQUAL_64(0x00112233, x0);
7521 ASSERT_EQUAL_64(0xccddeeff, x1);
7522 ASSERT_EQUAL_64(0x44556677, x2);
7523 ASSERT_EQUAL_64(0x00112233, x3);
7524 ASSERT_EQUAL_64(0xccddeeff00112233, dst[0]);
7525 ASSERT_EQUAL_64(0x0000000000112233, dst[1]);
7526 ASSERT_EQUAL_64(0x8899aabbccddeeff, x4);
7527 ASSERT_EQUAL_64(0xffeeddccbbaa9988, x5);
7528 ASSERT_EQUAL_64(0x0011223344556677, x6);
7529 ASSERT_EQUAL_64(0x8899aabbccddeeff, x7);
7530 ASSERT_EQUAL_64(0xffeeddccbbaa9988, dst[2]);
7531 ASSERT_EQUAL_64(0x8899aabbccddeeff, dst[3]);
7532 ASSERT_EQUAL_64(0x0011223344556677, dst[4]);
7533 ASSERT_EQUAL_64(src_base, x16);
7534 ASSERT_EQUAL_64(dst_base, x17);
7535 ASSERT_EQUAL_64(dst_base + 16, x18);
7536 ASSERT_EQUAL_64(src_base + 4, x19);
7537 ASSERT_EQUAL_64(dst_base + 4, x20);
7538 ASSERT_EQUAL_64(src_base + 8, x21);
7539 ASSERT_EQUAL_64(dst_base + 24, x22);
7540
7541 TEARDOWN();
7542 }
7543
7544
TEST(ldp_stp_preindex_wide)7545 TEST(ldp_stp_preindex_wide) {
7546 SETUP();
7547
7548 uint64_t src[3] = {0x0011223344556677,
7549 0x8899aabbccddeeff,
7550 0xffeeddccbbaa9988};
7551 uint64_t dst[5] = {0, 0, 0, 0, 0};
7552 uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
7553 uintptr_t dst_base = reinterpret_cast<uintptr_t>(dst);
7554 // Move base too far from the array to force multiple instructions
7555 // to be emitted.
7556 const int64_t base_offset = 1024;
7557
7558 START();
7559 __ Mov(x24, src_base - base_offset);
7560 __ Mov(x25, dst_base + base_offset);
7561 __ Mov(x18, dst_base + base_offset + 16);
7562 __ Ldp(w0, w1, MemOperand(x24, base_offset + 4, PreIndex));
7563 __ Mov(x19, x24);
7564 __ Mov(x24, src_base - base_offset + 4);
7565 __ Ldp(w2, w3, MemOperand(x24, base_offset - 4, PreIndex));
7566 __ Stp(w2, w3, MemOperand(x25, 4 - base_offset, PreIndex));
7567 __ Mov(x20, x25);
7568 __ Mov(x25, dst_base + base_offset + 4);
7569 __ Mov(x24, src_base - base_offset);
7570 __ Stp(w0, w1, MemOperand(x25, -4 - base_offset, PreIndex));
7571 __ Ldp(x4, x5, MemOperand(x24, base_offset + 8, PreIndex));
7572 __ Mov(x21, x24);
7573 __ Mov(x24, src_base - base_offset + 8);
7574 __ Ldp(x6, x7, MemOperand(x24, base_offset - 8, PreIndex));
7575 __ Stp(x7, x6, MemOperand(x18, 8 - base_offset, PreIndex));
7576 __ Mov(x22, x18);
7577 __ Mov(x18, dst_base + base_offset + 16 + 8);
7578 __ Stp(x5, x4, MemOperand(x18, -8 - base_offset, PreIndex));
7579 END();
7580
7581 RUN();
7582
7583 ASSERT_EQUAL_64(0x00112233, x0);
7584 ASSERT_EQUAL_64(0xccddeeff, x1);
7585 ASSERT_EQUAL_64(0x44556677, x2);
7586 ASSERT_EQUAL_64(0x00112233, x3);
7587 ASSERT_EQUAL_64(0xccddeeff00112233, dst[0]);
7588 ASSERT_EQUAL_64(0x0000000000112233, dst[1]);
7589 ASSERT_EQUAL_64(0x8899aabbccddeeff, x4);
7590 ASSERT_EQUAL_64(0xffeeddccbbaa9988, x5);
7591 ASSERT_EQUAL_64(0x0011223344556677, x6);
7592 ASSERT_EQUAL_64(0x8899aabbccddeeff, x7);
7593 ASSERT_EQUAL_64(0xffeeddccbbaa9988, dst[2]);
7594 ASSERT_EQUAL_64(0x8899aabbccddeeff, dst[3]);
7595 ASSERT_EQUAL_64(0x0011223344556677, dst[4]);
7596 ASSERT_EQUAL_64(src_base, x24);
7597 ASSERT_EQUAL_64(dst_base, x25);
7598 ASSERT_EQUAL_64(dst_base + 16, x18);
7599 ASSERT_EQUAL_64(src_base + 4, x19);
7600 ASSERT_EQUAL_64(dst_base + 4, x20);
7601 ASSERT_EQUAL_64(src_base + 8, x21);
7602 ASSERT_EQUAL_64(dst_base + 24, x22);
7603
7604 TEARDOWN();
7605 }
7606
7607
TEST(ldp_stp_postindex)7608 TEST(ldp_stp_postindex) {
7609 SETUP();
7610
7611 uint64_t src[4] = {0x0011223344556677,
7612 0x8899aabbccddeeff,
7613 0xffeeddccbbaa9988,
7614 0x7766554433221100};
7615 uint64_t dst[5] = {0, 0, 0, 0, 0};
7616 uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
7617 uintptr_t dst_base = reinterpret_cast<uintptr_t>(dst);
7618
7619 START();
7620 __ Mov(x16, src_base);
7621 __ Mov(x17, dst_base);
7622 __ Mov(x18, dst_base + 16);
7623 __ Ldp(w0, w1, MemOperand(x16, 4, PostIndex));
7624 __ Mov(x19, x16);
7625 __ Ldp(w2, w3, MemOperand(x16, -4, PostIndex));
7626 __ Stp(w2, w3, MemOperand(x17, 4, PostIndex));
7627 __ Mov(x20, x17);
7628 __ Stp(w0, w1, MemOperand(x17, -4, PostIndex));
7629 __ Ldp(x4, x5, MemOperand(x16, 8, PostIndex));
7630 __ Mov(x21, x16);
7631 __ Ldp(x6, x7, MemOperand(x16, -8, PostIndex));
7632 __ Stp(x7, x6, MemOperand(x18, 8, PostIndex));
7633 __ Mov(x22, x18);
7634 __ Stp(x5, x4, MemOperand(x18, -8, PostIndex));
7635 END();
7636
7637 RUN();
7638
7639 ASSERT_EQUAL_64(0x44556677, x0);
7640 ASSERT_EQUAL_64(0x00112233, x1);
7641 ASSERT_EQUAL_64(0x00112233, x2);
7642 ASSERT_EQUAL_64(0xccddeeff, x3);
7643 ASSERT_EQUAL_64(0x4455667700112233, dst[0]);
7644 ASSERT_EQUAL_64(0x0000000000112233, dst[1]);
7645 ASSERT_EQUAL_64(0x0011223344556677, x4);
7646 ASSERT_EQUAL_64(0x8899aabbccddeeff, x5);
7647 ASSERT_EQUAL_64(0x8899aabbccddeeff, x6);
7648 ASSERT_EQUAL_64(0xffeeddccbbaa9988, x7);
7649 ASSERT_EQUAL_64(0xffeeddccbbaa9988, dst[2]);
7650 ASSERT_EQUAL_64(0x8899aabbccddeeff, dst[3]);
7651 ASSERT_EQUAL_64(0x0011223344556677, dst[4]);
7652 ASSERT_EQUAL_64(src_base, x16);
7653 ASSERT_EQUAL_64(dst_base, x17);
7654 ASSERT_EQUAL_64(dst_base + 16, x18);
7655 ASSERT_EQUAL_64(src_base + 4, x19);
7656 ASSERT_EQUAL_64(dst_base + 4, x20);
7657 ASSERT_EQUAL_64(src_base + 8, x21);
7658 ASSERT_EQUAL_64(dst_base + 24, x22);
7659
7660 TEARDOWN();
7661 }
7662
7663
TEST(ldp_stp_postindex_wide)7664 TEST(ldp_stp_postindex_wide) {
7665 SETUP();
7666
7667 uint64_t src[4] = {0x0011223344556677,
7668 0x8899aabbccddeeff,
7669 0xffeeddccbbaa9988,
7670 0x7766554433221100};
7671 uint64_t dst[5] = {0, 0, 0, 0, 0};
7672 uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
7673 uintptr_t dst_base = reinterpret_cast<uintptr_t>(dst);
7674 // Move base too far from the array to force multiple instructions
7675 // to be emitted.
7676 const int64_t base_offset = 1024;
7677
7678 START();
7679 __ Mov(x24, src_base);
7680 __ Mov(x25, dst_base);
7681 __ Mov(x18, dst_base + 16);
7682 __ Ldp(w0, w1, MemOperand(x24, base_offset + 4, PostIndex));
7683 __ Mov(x19, x24);
7684 __ Sub(x24, x24, base_offset);
7685 __ Ldp(w2, w3, MemOperand(x24, base_offset - 4, PostIndex));
7686 __ Stp(w2, w3, MemOperand(x25, 4 - base_offset, PostIndex));
7687 __ Mov(x20, x25);
7688 __ Sub(x24, x24, base_offset);
7689 __ Add(x25, x25, base_offset);
7690 __ Stp(w0, w1, MemOperand(x25, -4 - base_offset, PostIndex));
7691 __ Ldp(x4, x5, MemOperand(x24, base_offset + 8, PostIndex));
7692 __ Mov(x21, x24);
7693 __ Sub(x24, x24, base_offset);
7694 __ Ldp(x6, x7, MemOperand(x24, base_offset - 8, PostIndex));
7695 __ Stp(x7, x6, MemOperand(x18, 8 - base_offset, PostIndex));
7696 __ Mov(x22, x18);
7697 __ Add(x18, x18, base_offset);
7698 __ Stp(x5, x4, MemOperand(x18, -8 - base_offset, PostIndex));
7699 END();
7700
7701 RUN();
7702
7703 ASSERT_EQUAL_64(0x44556677, x0);
7704 ASSERT_EQUAL_64(0x00112233, x1);
7705 ASSERT_EQUAL_64(0x00112233, x2);
7706 ASSERT_EQUAL_64(0xccddeeff, x3);
7707 ASSERT_EQUAL_64(0x4455667700112233, dst[0]);
7708 ASSERT_EQUAL_64(0x0000000000112233, dst[1]);
7709 ASSERT_EQUAL_64(0x0011223344556677, x4);
7710 ASSERT_EQUAL_64(0x8899aabbccddeeff, x5);
7711 ASSERT_EQUAL_64(0x8899aabbccddeeff, x6);
7712 ASSERT_EQUAL_64(0xffeeddccbbaa9988, x7);
7713 ASSERT_EQUAL_64(0xffeeddccbbaa9988, dst[2]);
7714 ASSERT_EQUAL_64(0x8899aabbccddeeff, dst[3]);
7715 ASSERT_EQUAL_64(0x0011223344556677, dst[4]);
7716 ASSERT_EQUAL_64(src_base + base_offset, x24);
7717 ASSERT_EQUAL_64(dst_base - base_offset, x25);
7718 ASSERT_EQUAL_64(dst_base - base_offset + 16, x18);
7719 ASSERT_EQUAL_64(src_base + base_offset + 4, x19);
7720 ASSERT_EQUAL_64(dst_base - base_offset + 4, x20);
7721 ASSERT_EQUAL_64(src_base + base_offset + 8, x21);
7722 ASSERT_EQUAL_64(dst_base - base_offset + 24, x22);
7723
7724 TEARDOWN();
7725 }
7726
7727
TEST(ldp_sign_extend)7728 TEST(ldp_sign_extend) {
7729 SETUP();
7730
7731 uint32_t src[2] = {0x80000000, 0x7fffffff};
7732 uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
7733
7734 START();
7735 __ Mov(x24, src_base);
7736 __ Ldpsw(x0, x1, MemOperand(x24));
7737 END();
7738
7739 RUN();
7740
7741 ASSERT_EQUAL_64(0xffffffff80000000, x0);
7742 ASSERT_EQUAL_64(0x000000007fffffff, x1);
7743
7744 TEARDOWN();
7745 }
7746
7747
TEST(ldur_stur)7748 TEST(ldur_stur) {
7749 SETUP();
7750
7751 int64_t src[2] = {0x0123456789abcdef, 0x0123456789abcdef};
7752 int64_t dst[5] = {0, 0, 0, 0, 0};
7753 uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
7754 uintptr_t dst_base = reinterpret_cast<uintptr_t>(dst);
7755
7756 START();
7757 __ Mov(x17, src_base);
7758 __ Mov(x18, dst_base);
7759 __ Mov(x19, src_base + 16);
7760 __ Mov(x20, dst_base + 32);
7761 __ Mov(x21, dst_base + 40);
7762 __ Ldr(w0, MemOperand(x17, 1));
7763 __ Str(w0, MemOperand(x18, 2));
7764 __ Ldr(x1, MemOperand(x17, 3));
7765 __ Str(x1, MemOperand(x18, 9));
7766 __ Ldr(w2, MemOperand(x19, -9));
7767 __ Str(w2, MemOperand(x20, -5));
7768 __ Ldrb(w3, MemOperand(x19, -1));
7769 __ Strb(w3, MemOperand(x21, -1));
7770 END();
7771
7772 RUN();
7773
7774 ASSERT_EQUAL_64(0x6789abcd, x0);
7775 ASSERT_EQUAL_64(0x00006789abcd0000, dst[0]);
7776 ASSERT_EQUAL_64(0xabcdef0123456789, x1);
7777 ASSERT_EQUAL_64(0xcdef012345678900, dst[1]);
7778 ASSERT_EQUAL_64(0x000000ab, dst[2]);
7779 ASSERT_EQUAL_64(0xabcdef01, x2);
7780 ASSERT_EQUAL_64(0x00abcdef01000000, dst[3]);
7781 ASSERT_EQUAL_64(0x00000001, x3);
7782 ASSERT_EQUAL_64(0x0100000000000000, dst[4]);
7783 ASSERT_EQUAL_64(src_base, x17);
7784 ASSERT_EQUAL_64(dst_base, x18);
7785 ASSERT_EQUAL_64(src_base + 16, x19);
7786 ASSERT_EQUAL_64(dst_base + 32, x20);
7787
7788 TEARDOWN();
7789 }
7790
7791
TEST(ldur_stur_neon)7792 TEST(ldur_stur_neon) {
7793 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
7794
7795 int64_t src[3] = {0x0123456789abcdef, 0x0123456789abcdef, 0x0123456789abcdef};
7796 int64_t dst[5] = {0, 0, 0, 0, 0};
7797 uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
7798 uintptr_t dst_base = reinterpret_cast<uintptr_t>(dst);
7799
7800 START();
7801 __ Mov(x17, src_base);
7802 __ Mov(x18, dst_base);
7803 __ Ldr(b0, MemOperand(x17));
7804 __ Str(b0, MemOperand(x18));
7805 __ Ldr(h1, MemOperand(x17, 1));
7806 __ Str(h1, MemOperand(x18, 1));
7807 __ Ldr(s2, MemOperand(x17, 2));
7808 __ Str(s2, MemOperand(x18, 3));
7809 __ Ldr(d3, MemOperand(x17, 3));
7810 __ Str(d3, MemOperand(x18, 7));
7811 __ Ldr(q4, MemOperand(x17, 4));
7812 __ Str(q4, MemOperand(x18, 15));
7813 END();
7814
7815 RUN();
7816
7817 ASSERT_EQUAL_128(0, 0xef, q0);
7818 ASSERT_EQUAL_128(0, 0xabcd, q1);
7819 ASSERT_EQUAL_128(0, 0x456789ab, q2);
7820 ASSERT_EQUAL_128(0, 0xabcdef0123456789, q3);
7821 ASSERT_EQUAL_128(0x89abcdef01234567, 0x89abcdef01234567, q4);
7822 ASSERT_EQUAL_64(0x89456789ababcdef, dst[0]);
7823 ASSERT_EQUAL_64(0x67abcdef01234567, dst[1]);
7824 ASSERT_EQUAL_64(0x6789abcdef012345, dst[2]);
7825 ASSERT_EQUAL_64(0x0089abcdef012345, dst[3]);
7826
7827 TEARDOWN();
7828 }
7829
7830
TEST(ldr_literal)7831 TEST(ldr_literal) {
7832 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
7833
7834 START();
7835 __ Ldr(x2, 0x1234567890abcdef);
7836 __ Ldr(w3, 0xfedcba09);
7837 __ Ldrsw(x4, 0x7fffffff);
7838 __ Ldrsw(x5, 0x80000000);
7839 __ Ldr(q11, 0x1234000056780000, 0xabcd0000ef000000);
7840 __ Ldr(d13, 1.234);
7841 __ Ldr(s25, 2.5);
7842 END();
7843
7844 RUN();
7845
7846 ASSERT_EQUAL_64(0x1234567890abcdef, x2);
7847 ASSERT_EQUAL_64(0xfedcba09, x3);
7848 ASSERT_EQUAL_64(0x7fffffff, x4);
7849 ASSERT_EQUAL_64(0xffffffff80000000, x5);
7850 ASSERT_EQUAL_128(0x1234000056780000, 0xabcd0000ef000000, q11);
7851 ASSERT_EQUAL_FP64(1.234, d13);
7852 ASSERT_EQUAL_FP32(2.5, s25);
7853
7854 TEARDOWN();
7855 }
7856
7857
TEST(ldr_literal_range)7858 TEST(ldr_literal_range) {
7859 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
7860
7861 START();
7862 // Make sure the pool is empty;
7863 masm.EmitLiteralPool(LiteralPool::kBranchRequired);
7864 ASSERT_LITERAL_POOL_SIZE(0);
7865
7866 // Create some literal pool entries.
7867 __ Ldr(x0, 0x1234567890abcdef);
7868 __ Ldr(w1, 0xfedcba09);
7869 __ Ldrsw(x2, 0x7fffffff);
7870 __ Ldrsw(x3, 0x80000000);
7871 __ Ldr(q2, 0x1234000056780000, 0xabcd0000ef000000);
7872 __ Ldr(d0, 1.234);
7873 __ Ldr(s1, 2.5);
7874 ASSERT_LITERAL_POOL_SIZE(48);
7875
7876 // Emit more code than the maximum literal load range to ensure the pool
7877 // should be emitted.
7878 const ptrdiff_t end = masm.GetCursorOffset() + 2 * kMaxLoadLiteralRange;
7879 while (masm.GetCursorOffset() < end) {
7880 __ Nop();
7881 }
7882
7883 // The pool should have been emitted.
7884 ASSERT_LITERAL_POOL_SIZE(0);
7885
7886 // These loads should be after the pool (and will require a new one).
7887 __ Ldr(x4, 0x34567890abcdef12);
7888 __ Ldr(w5, 0xdcba09fe);
7889 __ Ldrsw(x6, 0x7fffffff);
7890 __ Ldrsw(x7, 0x80000000);
7891 __ Ldr(q6, 0x1234000056780000, 0xabcd0000ef000000);
7892 __ Ldr(d4, 123.4);
7893 __ Ldr(s5, 250.0);
7894 ASSERT_LITERAL_POOL_SIZE(48);
7895 END();
7896
7897 RUN();
7898
7899 // Check that the literals loaded correctly.
7900 ASSERT_EQUAL_64(0x1234567890abcdef, x0);
7901 ASSERT_EQUAL_64(0xfedcba09, x1);
7902 ASSERT_EQUAL_64(0x7fffffff, x2);
7903 ASSERT_EQUAL_64(0xffffffff80000000, x3);
7904 ASSERT_EQUAL_128(0x1234000056780000, 0xabcd0000ef000000, q2);
7905 ASSERT_EQUAL_FP64(1.234, d0);
7906 ASSERT_EQUAL_FP32(2.5, s1);
7907 ASSERT_EQUAL_64(0x34567890abcdef12, x4);
7908 ASSERT_EQUAL_64(0xdcba09fe, x5);
7909 ASSERT_EQUAL_64(0x7fffffff, x6);
7910 ASSERT_EQUAL_64(0xffffffff80000000, x7);
7911 ASSERT_EQUAL_128(0x1234000056780000, 0xabcd0000ef000000, q6);
7912 ASSERT_EQUAL_FP64(123.4, d4);
7913 ASSERT_EQUAL_FP32(250.0, s5);
7914
7915 TEARDOWN();
7916 }
7917
7918
TEST(ldr_literal_values_q)7919 TEST(ldr_literal_values_q) {
7920 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
7921
7922 static const uint64_t kHalfValues[] = {0x8000000000000000,
7923 0x7fffffffffffffff,
7924 0x0000000000000000,
7925 0xffffffffffffffff,
7926 0x00ff00ff00ff00ff,
7927 0x1234567890abcdef};
7928 const int card = sizeof(kHalfValues) / sizeof(kHalfValues[0]);
7929 const Register& ref_low64 = x1;
7930 const Register& ref_high64 = x2;
7931 const Register& loaded_low64 = x3;
7932 const Register& loaded_high64 = x4;
7933 const VRegister& tgt = q0;
7934
7935 START();
7936 __ Mov(x0, 0);
7937
7938 for (int i = 0; i < card; i++) {
7939 __ Mov(ref_low64, kHalfValues[i]);
7940 for (int j = 0; j < card; j++) {
7941 __ Mov(ref_high64, kHalfValues[j]);
7942 __ Ldr(tgt, kHalfValues[j], kHalfValues[i]);
7943 __ Mov(loaded_low64, tgt.V2D(), 0);
7944 __ Mov(loaded_high64, tgt.V2D(), 1);
7945 __ Cmp(loaded_low64, ref_low64);
7946 __ Ccmp(loaded_high64, ref_high64, NoFlag, eq);
7947 __ Cset(x0, ne);
7948 }
7949 }
7950 END();
7951
7952 RUN();
7953
7954 // If one of the values differs, the trace can be used to identify which one.
7955 ASSERT_EQUAL_64(0, x0);
7956
7957 TEARDOWN();
7958 }
7959
7960
7961 template <typename T>
LoadIntValueHelper(T values[],int card)7962 void LoadIntValueHelper(T values[], int card) {
7963 SETUP();
7964
7965 const bool is_32bit = (sizeof(T) == 4);
7966 Register tgt1 = is_32bit ? Register(w1) : Register(x1);
7967 Register tgt2 = is_32bit ? Register(w2) : Register(x2);
7968
7969 START();
7970 __ Mov(x0, 0);
7971
7972 // If one of the values differ then x0 will be one.
7973 for (int i = 0; i < card; ++i) {
7974 __ Mov(tgt1, values[i]);
7975 __ Ldr(tgt2, values[i]);
7976 __ Cmp(tgt1, tgt2);
7977 __ Cset(x0, ne);
7978 }
7979 END();
7980
7981 RUN();
7982
7983 // If one of the values differs, the trace can be used to identify which one.
7984 ASSERT_EQUAL_64(0, x0);
7985
7986 TEARDOWN();
7987 }
7988
7989
TEST(ldr_literal_values_x)7990 TEST(ldr_literal_values_x) {
7991 static const uint64_t kValues[] = {0x8000000000000000,
7992 0x7fffffffffffffff,
7993 0x0000000000000000,
7994 0xffffffffffffffff,
7995 0x00ff00ff00ff00ff,
7996 0x1234567890abcdef};
7997
7998 LoadIntValueHelper(kValues, sizeof(kValues) / sizeof(kValues[0]));
7999 }
8000
8001
TEST(ldr_literal_values_w)8002 TEST(ldr_literal_values_w) {
8003 static const uint32_t kValues[] = {0x80000000,
8004 0x7fffffff,
8005 0x00000000,
8006 0xffffffff,
8007 0x00ff00ff,
8008 0x12345678,
8009 0x90abcdef};
8010
8011 LoadIntValueHelper(kValues, sizeof(kValues) / sizeof(kValues[0]));
8012 }
8013
8014
8015 template <typename T>
LoadFPValueHelper(T values[],int card)8016 void LoadFPValueHelper(T values[], int card) {
8017 SETUP_WITH_FEATURES(CPUFeatures::kFP);
8018
8019 const bool is_32bits = (sizeof(T) == 4);
8020 const FPRegister& fp_tgt = is_32bits ? s2 : d2;
8021 const Register& tgt1 = is_32bits ? Register(w1) : Register(x1);
8022 const Register& tgt2 = is_32bits ? Register(w2) : Register(x2);
8023
8024 START();
8025 __ Mov(x0, 0);
8026
8027 // If one of the values differ then x0 will be one.
8028 for (int i = 0; i < card; ++i) {
8029 __ Mov(tgt1,
8030 is_32bits ? FloatToRawbits(values[i]) : DoubleToRawbits(values[i]));
8031 __ Ldr(fp_tgt, values[i]);
8032 __ Fmov(tgt2, fp_tgt);
8033 __ Cmp(tgt1, tgt2);
8034 __ Cset(x0, ne);
8035 }
8036 END();
8037
8038 RUN();
8039
8040 // If one of the values differs, the trace can be used to identify which one.
8041 ASSERT_EQUAL_64(0, x0);
8042
8043 TEARDOWN();
8044 }
8045
TEST(ldr_literal_values_d)8046 TEST(ldr_literal_values_d) {
8047 static const double kValues[] = {-0.0, 0.0, -1.0, 1.0, -1e10, 1e10};
8048
8049 LoadFPValueHelper(kValues, sizeof(kValues) / sizeof(kValues[0]));
8050 }
8051
8052
TEST(ldr_literal_values_s)8053 TEST(ldr_literal_values_s) {
8054 static const float kValues[] = {-0.0, 0.0, -1.0, 1.0, -1e10, 1e10};
8055
8056 LoadFPValueHelper(kValues, sizeof(kValues) / sizeof(kValues[0]));
8057 }
8058
8059
TEST(ldr_literal_custom)8060 TEST(ldr_literal_custom) {
8061 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
8062
8063 Label end_of_pool_before;
8064 Label end_of_pool_after;
8065
8066 const size_t kSizeOfPoolInBytes = 44;
8067
8068 Literal<uint64_t> before_x(0x1234567890abcdef);
8069 Literal<uint32_t> before_w(0xfedcba09);
8070 Literal<uint32_t> before_sx(0x80000000);
8071 Literal<uint64_t> before_q(0x1234000056780000, 0xabcd0000ef000000);
8072 Literal<double> before_d(1.234);
8073 Literal<float> before_s(2.5);
8074
8075 Literal<uint64_t> after_x(0x1234567890abcdef);
8076 Literal<uint32_t> after_w(0xfedcba09);
8077 Literal<uint32_t> after_sx(0x80000000);
8078 Literal<uint64_t> after_q(0x1234000056780000, 0xabcd0000ef000000);
8079 Literal<double> after_d(1.234);
8080 Literal<float> after_s(2.5);
8081
8082 START();
8083
8084 // Manually generate a pool.
8085 __ B(&end_of_pool_before);
8086 {
8087 ExactAssemblyScope scope(&masm, kSizeOfPoolInBytes);
8088 __ place(&before_x);
8089 __ place(&before_w);
8090 __ place(&before_sx);
8091 __ place(&before_q);
8092 __ place(&before_d);
8093 __ place(&before_s);
8094 }
8095 __ Bind(&end_of_pool_before);
8096
8097 {
8098 ExactAssemblyScope scope(&masm, 12 * kInstructionSize);
8099 __ ldr(x2, &before_x);
8100 __ ldr(w3, &before_w);
8101 __ ldrsw(x5, &before_sx);
8102 __ ldr(q11, &before_q);
8103 __ ldr(d13, &before_d);
8104 __ ldr(s25, &before_s);
8105
8106 __ ldr(x6, &after_x);
8107 __ ldr(w7, &after_w);
8108 __ ldrsw(x8, &after_sx);
8109 __ ldr(q18, &after_q);
8110 __ ldr(d14, &after_d);
8111 __ ldr(s26, &after_s);
8112 }
8113
8114 // Manually generate a pool.
8115 __ B(&end_of_pool_after);
8116 {
8117 ExactAssemblyScope scope(&masm, kSizeOfPoolInBytes);
8118 __ place(&after_x);
8119 __ place(&after_w);
8120 __ place(&after_sx);
8121 __ place(&after_q);
8122 __ place(&after_d);
8123 __ place(&after_s);
8124 }
8125 __ Bind(&end_of_pool_after);
8126
8127 END();
8128
8129 RUN();
8130
8131 ASSERT_EQUAL_64(0x1234567890abcdef, x2);
8132 ASSERT_EQUAL_64(0xfedcba09, x3);
8133 ASSERT_EQUAL_64(0xffffffff80000000, x5);
8134 ASSERT_EQUAL_128(0x1234000056780000, 0xabcd0000ef000000, q11);
8135 ASSERT_EQUAL_FP64(1.234, d13);
8136 ASSERT_EQUAL_FP32(2.5, s25);
8137
8138 ASSERT_EQUAL_64(0x1234567890abcdef, x6);
8139 ASSERT_EQUAL_64(0xfedcba09, x7);
8140 ASSERT_EQUAL_64(0xffffffff80000000, x8);
8141 ASSERT_EQUAL_128(0x1234000056780000, 0xabcd0000ef000000, q18);
8142 ASSERT_EQUAL_FP64(1.234, d14);
8143 ASSERT_EQUAL_FP32(2.5, s26);
8144
8145 TEARDOWN();
8146 }
8147
8148
TEST(ldr_literal_custom_shared)8149 TEST(ldr_literal_custom_shared) {
8150 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
8151
8152 Label end_of_pool_before;
8153 Label end_of_pool_after;
8154
8155 const size_t kSizeOfPoolInBytes = 40;
8156
8157 Literal<uint64_t> before_x(0x1234567890abcdef);
8158 Literal<uint32_t> before_w(0xfedcba09);
8159 Literal<uint64_t> before_q(0x1234000056780000, 0xabcd0000ef000000);
8160 Literal<double> before_d(1.234);
8161 Literal<float> before_s(2.5);
8162
8163 Literal<uint64_t> after_x(0x1234567890abcdef);
8164 Literal<uint32_t> after_w(0xfedcba09);
8165 Literal<uint64_t> after_q(0x1234000056780000, 0xabcd0000ef000000);
8166 Literal<double> after_d(1.234);
8167 Literal<float> after_s(2.5);
8168
8169 START();
8170
8171 // Manually generate a pool.
8172 __ B(&end_of_pool_before);
8173 {
8174 ExactAssemblyScope scope(&masm, kSizeOfPoolInBytes);
8175 __ place(&before_x);
8176 __ place(&before_w);
8177 __ place(&before_q);
8178 __ place(&before_d);
8179 __ place(&before_s);
8180 }
8181 __ Bind(&end_of_pool_before);
8182
8183 // Load the entries several times to test that literals can be shared.
8184 for (int i = 0; i < 50; i++) {
8185 ExactAssemblyScope scope(&masm, 12 * kInstructionSize);
8186 __ ldr(x2, &before_x);
8187 __ ldr(w3, &before_w);
8188 __ ldrsw(x5, &before_w); // Re-use before_w.
8189 __ ldr(q11, &before_q);
8190 __ ldr(d13, &before_d);
8191 __ ldr(s25, &before_s);
8192
8193 __ ldr(x6, &after_x);
8194 __ ldr(w7, &after_w);
8195 __ ldrsw(x8, &after_w); // Re-use after_w.
8196 __ ldr(q18, &after_q);
8197 __ ldr(d14, &after_d);
8198 __ ldr(s26, &after_s);
8199 }
8200
8201 // Manually generate a pool.
8202 __ B(&end_of_pool_after);
8203 {
8204 ExactAssemblyScope scope(&masm, kSizeOfPoolInBytes);
8205 __ place(&after_x);
8206 __ place(&after_w);
8207 __ place(&after_q);
8208 __ place(&after_d);
8209 __ place(&after_s);
8210 }
8211 __ Bind(&end_of_pool_after);
8212
8213 END();
8214
8215 RUN();
8216
8217 ASSERT_EQUAL_64(0x1234567890abcdef, x2);
8218 ASSERT_EQUAL_64(0xfedcba09, x3);
8219 ASSERT_EQUAL_64(0xfffffffffedcba09, x5);
8220 ASSERT_EQUAL_128(0x1234000056780000, 0xabcd0000ef000000, q11);
8221 ASSERT_EQUAL_FP64(1.234, d13);
8222 ASSERT_EQUAL_FP32(2.5, s25);
8223
8224 ASSERT_EQUAL_64(0x1234567890abcdef, x6);
8225 ASSERT_EQUAL_64(0xfedcba09, x7);
8226 ASSERT_EQUAL_64(0xfffffffffedcba09, x8);
8227 ASSERT_EQUAL_128(0x1234000056780000, 0xabcd0000ef000000, q18);
8228 ASSERT_EQUAL_FP64(1.234, d14);
8229 ASSERT_EQUAL_FP32(2.5, s26);
8230
8231 TEARDOWN();
8232 }
8233
8234
TEST(prfm_offset)8235 TEST(prfm_offset) {
8236 SETUP();
8237
8238 START();
8239 // The address used in prfm doesn't have to be valid.
8240 __ Mov(x0, 0x0123456789abcdef);
8241
8242 for (int i = 0; i < (1 << ImmPrefetchOperation_width); i++) {
8243 // Unallocated prefetch operations are ignored, so test all of them.
8244 PrefetchOperation op = static_cast<PrefetchOperation>(i);
8245
8246 __ Prfm(op, MemOperand(x0));
8247 __ Prfm(op, MemOperand(x0, 8));
8248 __ Prfm(op, MemOperand(x0, 32760));
8249 __ Prfm(op, MemOperand(x0, 32768));
8250
8251 __ Prfm(op, MemOperand(x0, 1));
8252 __ Prfm(op, MemOperand(x0, 9));
8253 __ Prfm(op, MemOperand(x0, 255));
8254 __ Prfm(op, MemOperand(x0, 257));
8255 __ Prfm(op, MemOperand(x0, -1));
8256 __ Prfm(op, MemOperand(x0, -9));
8257 __ Prfm(op, MemOperand(x0, -255));
8258 __ Prfm(op, MemOperand(x0, -257));
8259
8260 __ Prfm(op, MemOperand(x0, 0xfedcba9876543210));
8261 }
8262
8263 END();
8264 RUN();
8265 TEARDOWN();
8266 }
8267
8268
TEST(prfm_regoffset)8269 TEST(prfm_regoffset) {
8270 SETUP();
8271
8272 START();
8273 // The address used in prfm doesn't have to be valid.
8274 __ Mov(x0, 0x0123456789abcdef);
8275
8276 CPURegList inputs(CPURegister::kRegister, kXRegSize, 10, 18);
8277 __ Mov(x10, 0);
8278 __ Mov(x11, 1);
8279 __ Mov(x12, 8);
8280 __ Mov(x13, 255);
8281 __ Mov(x14, -0);
8282 __ Mov(x15, -1);
8283 __ Mov(x16, -8);
8284 __ Mov(x17, -255);
8285 __ Mov(x18, 0xfedcba9876543210);
8286
8287 for (int i = 0; i < (1 << ImmPrefetchOperation_width); i++) {
8288 // Unallocated prefetch operations are ignored, so test all of them.
8289 PrefetchOperation op = static_cast<PrefetchOperation>(i);
8290
8291 CPURegList loop = inputs;
8292 while (!loop.IsEmpty()) {
8293 Register input(loop.PopLowestIndex());
8294 __ Prfm(op, MemOperand(x0, input));
8295 __ Prfm(op, MemOperand(x0, input, UXTW));
8296 __ Prfm(op, MemOperand(x0, input, UXTW, 3));
8297 __ Prfm(op, MemOperand(x0, input, LSL));
8298 __ Prfm(op, MemOperand(x0, input, LSL, 3));
8299 __ Prfm(op, MemOperand(x0, input, SXTW));
8300 __ Prfm(op, MemOperand(x0, input, SXTW, 3));
8301 __ Prfm(op, MemOperand(x0, input, SXTX));
8302 __ Prfm(op, MemOperand(x0, input, SXTX, 3));
8303 }
8304 }
8305
8306 END();
8307 RUN();
8308 TEARDOWN();
8309 }
8310
8311
TEST(prfm_literal_imm19)8312 TEST(prfm_literal_imm19) {
8313 SETUP();
8314 START();
8315
8316 for (int i = 0; i < (1 << ImmPrefetchOperation_width); i++) {
8317 // Unallocated prefetch operations are ignored, so test all of them.
8318 PrefetchOperation op = static_cast<PrefetchOperation>(i);
8319
8320 ExactAssemblyScope scope(&masm, 7 * kInstructionSize);
8321 // The address used in prfm doesn't have to be valid.
8322 __ prfm(op, INT64_C(0));
8323 __ prfm(op, 1);
8324 __ prfm(op, -1);
8325 __ prfm(op, 1000);
8326 __ prfm(op, -1000);
8327 __ prfm(op, 0x3ffff);
8328 __ prfm(op, -0x40000);
8329 }
8330
8331 END();
8332 RUN();
8333 TEARDOWN();
8334 }
8335
8336
TEST(prfm_literal)8337 TEST(prfm_literal) {
8338 SETUP();
8339
8340 Label end_of_pool_before;
8341 Label end_of_pool_after;
8342 Literal<uint64_t> before(0);
8343 Literal<uint64_t> after(0);
8344
8345 START();
8346
8347 // Manually generate a pool.
8348 __ B(&end_of_pool_before);
8349 {
8350 ExactAssemblyScope scope(&masm, before.GetSize());
8351 __ place(&before);
8352 }
8353 __ Bind(&end_of_pool_before);
8354
8355 for (int i = 0; i < (1 << ImmPrefetchOperation_width); i++) {
8356 // Unallocated prefetch operations are ignored, so test all of them.
8357 PrefetchOperation op = static_cast<PrefetchOperation>(i);
8358
8359 ExactAssemblyScope guard(&masm, 2 * kInstructionSize);
8360 __ prfm(op, &before);
8361 __ prfm(op, &after);
8362 }
8363
8364 // Manually generate a pool.
8365 __ B(&end_of_pool_after);
8366 {
8367 ExactAssemblyScope scope(&masm, after.GetSize());
8368 __ place(&after);
8369 }
8370 __ Bind(&end_of_pool_after);
8371
8372 END();
8373 RUN();
8374 TEARDOWN();
8375 }
8376
8377
TEST(prfm_wide)8378 TEST(prfm_wide) {
8379 SETUP();
8380
8381 START();
8382 // The address used in prfm doesn't have to be valid.
8383 __ Mov(x0, 0x0123456789abcdef);
8384
8385 for (int i = 0; i < (1 << ImmPrefetchOperation_width); i++) {
8386 // Unallocated prefetch operations are ignored, so test all of them.
8387 PrefetchOperation op = static_cast<PrefetchOperation>(i);
8388
8389 __ Prfm(op, MemOperand(x0, 0x40000));
8390 __ Prfm(op, MemOperand(x0, -0x40001));
8391 __ Prfm(op, MemOperand(x0, UINT64_C(0x5555555555555555)));
8392 __ Prfm(op, MemOperand(x0, UINT64_C(0xfedcba9876543210)));
8393 }
8394
8395 END();
8396 RUN();
8397 TEARDOWN();
8398 }
8399
8400
TEST(load_prfm_literal)8401 TEST(load_prfm_literal) {
8402 // Test literals shared between both prfm and ldr.
8403 SETUP_WITH_FEATURES(CPUFeatures::kFP);
8404
8405 Label end_of_pool_before;
8406 Label end_of_pool_after;
8407
8408 const size_t kSizeOfPoolInBytes = 28;
8409
8410 Literal<uint64_t> before_x(0x1234567890abcdef);
8411 Literal<uint32_t> before_w(0xfedcba09);
8412 Literal<uint32_t> before_sx(0x80000000);
8413 Literal<double> before_d(1.234);
8414 Literal<float> before_s(2.5);
8415 Literal<uint64_t> after_x(0x1234567890abcdef);
8416 Literal<uint32_t> after_w(0xfedcba09);
8417 Literal<uint32_t> after_sx(0x80000000);
8418 Literal<double> after_d(1.234);
8419 Literal<float> after_s(2.5);
8420
8421 START();
8422
8423 // Manually generate a pool.
8424 __ B(&end_of_pool_before);
8425 {
8426 ExactAssemblyScope scope(&masm, kSizeOfPoolInBytes);
8427 __ place(&before_x);
8428 __ place(&before_w);
8429 __ place(&before_sx);
8430 __ place(&before_d);
8431 __ place(&before_s);
8432 }
8433 __ Bind(&end_of_pool_before);
8434
8435 for (int i = 0; i < (1 << ImmPrefetchOperation_width); i++) {
8436 // Unallocated prefetch operations are ignored, so test all of them.
8437 PrefetchOperation op = static_cast<PrefetchOperation>(i);
8438 ExactAssemblyScope scope(&masm, 10 * kInstructionSize);
8439
8440 __ prfm(op, &before_x);
8441 __ prfm(op, &before_w);
8442 __ prfm(op, &before_sx);
8443 __ prfm(op, &before_d);
8444 __ prfm(op, &before_s);
8445
8446 __ prfm(op, &after_x);
8447 __ prfm(op, &after_w);
8448 __ prfm(op, &after_sx);
8449 __ prfm(op, &after_d);
8450 __ prfm(op, &after_s);
8451 }
8452
8453 {
8454 ExactAssemblyScope scope(&masm, 10 * kInstructionSize);
8455 __ ldr(x2, &before_x);
8456 __ ldr(w3, &before_w);
8457 __ ldrsw(x5, &before_sx);
8458 __ ldr(d13, &before_d);
8459 __ ldr(s25, &before_s);
8460
8461 __ ldr(x6, &after_x);
8462 __ ldr(w7, &after_w);
8463 __ ldrsw(x8, &after_sx);
8464 __ ldr(d14, &after_d);
8465 __ ldr(s26, &after_s);
8466 }
8467
8468 // Manually generate a pool.
8469 __ B(&end_of_pool_after);
8470 {
8471 ExactAssemblyScope scope(&masm, kSizeOfPoolInBytes);
8472 __ place(&after_x);
8473 __ place(&after_w);
8474 __ place(&after_sx);
8475 __ place(&after_d);
8476 __ place(&after_s);
8477 }
8478 __ Bind(&end_of_pool_after);
8479
8480 END();
8481
8482 RUN();
8483
8484 ASSERT_EQUAL_64(0x1234567890abcdef, x2);
8485 ASSERT_EQUAL_64(0xfedcba09, x3);
8486 ASSERT_EQUAL_64(0xffffffff80000000, x5);
8487 ASSERT_EQUAL_FP64(1.234, d13);
8488 ASSERT_EQUAL_FP32(2.5, s25);
8489
8490 ASSERT_EQUAL_64(0x1234567890abcdef, x6);
8491 ASSERT_EQUAL_64(0xfedcba09, x7);
8492 ASSERT_EQUAL_64(0xffffffff80000000, x8);
8493 ASSERT_EQUAL_FP64(1.234, d14);
8494 ASSERT_EQUAL_FP32(2.5, s26);
8495
8496 TEARDOWN();
8497 }
8498
8499
TEST(add_sub_imm)8500 TEST(add_sub_imm) {
8501 SETUP();
8502
8503 START();
8504 __ Mov(x0, 0x0);
8505 __ Mov(x1, 0x1111);
8506 __ Mov(x2, 0xffffffffffffffff);
8507 __ Mov(x3, 0x8000000000000000);
8508
8509 __ Add(x10, x0, Operand(0x123));
8510 __ Add(x11, x1, Operand(0x122000));
8511 __ Add(x12, x0, Operand(0xabc << 12));
8512 __ Add(x13, x2, Operand(1));
8513
8514 __ Add(w14, w0, Operand(0x123));
8515 __ Add(w15, w1, Operand(0x122000));
8516 __ Add(w16, w0, Operand(0xabc << 12));
8517 __ Add(w17, w2, Operand(1));
8518
8519 __ Sub(x20, x0, Operand(0x1));
8520 __ Sub(x21, x1, Operand(0x111));
8521 __ Sub(x22, x1, Operand(0x1 << 12));
8522 __ Sub(x23, x3, Operand(1));
8523
8524 __ Sub(w24, w0, Operand(0x1));
8525 __ Sub(w25, w1, Operand(0x111));
8526 __ Sub(w26, w1, Operand(0x1 << 12));
8527 __ Sub(w27, w3, Operand(1));
8528 END();
8529
8530 RUN();
8531
8532 ASSERT_EQUAL_64(0x123, x10);
8533 ASSERT_EQUAL_64(0x123111, x11);
8534 ASSERT_EQUAL_64(0xabc000, x12);
8535 ASSERT_EQUAL_64(0x0, x13);
8536
8537 ASSERT_EQUAL_32(0x123, w14);
8538 ASSERT_EQUAL_32(0x123111, w15);
8539 ASSERT_EQUAL_32(0xabc000, w16);
8540 ASSERT_EQUAL_32(0x0, w17);
8541
8542 ASSERT_EQUAL_64(0xffffffffffffffff, x20);
8543 ASSERT_EQUAL_64(0x1000, x21);
8544 ASSERT_EQUAL_64(0x111, x22);
8545 ASSERT_EQUAL_64(0x7fffffffffffffff, x23);
8546
8547 ASSERT_EQUAL_32(0xffffffff, w24);
8548 ASSERT_EQUAL_32(0x1000, w25);
8549 ASSERT_EQUAL_32(0x111, w26);
8550 ASSERT_EQUAL_32(0xffffffff, w27);
8551
8552 TEARDOWN();
8553 }
8554
8555
TEST(add_sub_wide_imm)8556 TEST(add_sub_wide_imm) {
8557 SETUP();
8558
8559 START();
8560 __ Mov(x0, 0x0);
8561 __ Mov(x1, 0x1);
8562
8563 __ Add(x10, x0, Operand(0x1234567890abcdef));
8564 __ Add(x11, x1, Operand(0xffffffff));
8565
8566 __ Add(w12, w0, Operand(0x12345678));
8567 __ Add(w13, w1, Operand(0xffffffff));
8568
8569 __ Add(w18, w0, Operand(kWMinInt));
8570 __ Sub(w19, w0, Operand(kWMinInt));
8571
8572 __ Sub(x20, x0, Operand(0x1234567890abcdef));
8573 __ Sub(w21, w0, Operand(0x12345678));
8574
8575 END();
8576
8577 RUN();
8578
8579 ASSERT_EQUAL_64(0x1234567890abcdef, x10);
8580 ASSERT_EQUAL_64(0x100000000, x11);
8581
8582 ASSERT_EQUAL_32(0x12345678, w12);
8583 ASSERT_EQUAL_64(0x0, x13);
8584
8585 ASSERT_EQUAL_32(kWMinInt, w18);
8586 ASSERT_EQUAL_32(kWMinInt, w19);
8587
8588 ASSERT_EQUAL_64(-0x1234567890abcdef, x20);
8589 ASSERT_EQUAL_32(-0x12345678, w21);
8590
8591 TEARDOWN();
8592 }
8593
8594
TEST(add_sub_shifted)8595 TEST(add_sub_shifted) {
8596 SETUP();
8597
8598 START();
8599 __ Mov(x0, 0);
8600 __ Mov(x1, 0x0123456789abcdef);
8601 __ Mov(x2, 0xfedcba9876543210);
8602 __ Mov(x3, 0xffffffffffffffff);
8603
8604 __ Add(x10, x1, Operand(x2));
8605 __ Add(x11, x0, Operand(x1, LSL, 8));
8606 __ Add(x12, x0, Operand(x1, LSR, 8));
8607 __ Add(x13, x0, Operand(x1, ASR, 8));
8608 __ Add(x14, x0, Operand(x2, ASR, 8));
8609 __ Add(w15, w0, Operand(w1, ASR, 8));
8610 __ Add(w18, w3, Operand(w1, ROR, 8));
8611 __ Add(x19, x3, Operand(x1, ROR, 8));
8612
8613 __ Sub(x20, x3, Operand(x2));
8614 __ Sub(x21, x3, Operand(x1, LSL, 8));
8615 __ Sub(x22, x3, Operand(x1, LSR, 8));
8616 __ Sub(x23, x3, Operand(x1, ASR, 8));
8617 __ Sub(x24, x3, Operand(x2, ASR, 8));
8618 __ Sub(w25, w3, Operand(w1, ASR, 8));
8619 __ Sub(w26, w3, Operand(w1, ROR, 8));
8620 __ Sub(x27, x3, Operand(x1, ROR, 8));
8621 END();
8622
8623 RUN();
8624
8625 ASSERT_EQUAL_64(0xffffffffffffffff, x10);
8626 ASSERT_EQUAL_64(0x23456789abcdef00, x11);
8627 ASSERT_EQUAL_64(0x000123456789abcd, x12);
8628 ASSERT_EQUAL_64(0x000123456789abcd, x13);
8629 ASSERT_EQUAL_64(0xfffedcba98765432, x14);
8630 ASSERT_EQUAL_64(0xff89abcd, x15);
8631 ASSERT_EQUAL_64(0xef89abcc, x18);
8632 ASSERT_EQUAL_64(0xef0123456789abcc, x19);
8633
8634 ASSERT_EQUAL_64(0x0123456789abcdef, x20);
8635 ASSERT_EQUAL_64(0xdcba9876543210ff, x21);
8636 ASSERT_EQUAL_64(0xfffedcba98765432, x22);
8637 ASSERT_EQUAL_64(0xfffedcba98765432, x23);
8638 ASSERT_EQUAL_64(0x000123456789abcd, x24);
8639 ASSERT_EQUAL_64(0x00765432, x25);
8640 ASSERT_EQUAL_64(0x10765432, x26);
8641 ASSERT_EQUAL_64(0x10fedcba98765432, x27);
8642
8643 TEARDOWN();
8644 }
8645
8646
TEST(add_sub_extended)8647 TEST(add_sub_extended) {
8648 SETUP();
8649
8650 START();
8651 __ Mov(x0, 0);
8652 __ Mov(x1, 0x0123456789abcdef);
8653 __ Mov(x2, 0xfedcba9876543210);
8654 __ Mov(w3, 0x80);
8655
8656 __ Add(x10, x0, Operand(x1, UXTB, 0));
8657 __ Add(x11, x0, Operand(x1, UXTB, 1));
8658 __ Add(x12, x0, Operand(x1, UXTH, 2));
8659 __ Add(x13, x0, Operand(x1, UXTW, 4));
8660
8661 __ Add(x14, x0, Operand(x1, SXTB, 0));
8662 __ Add(x15, x0, Operand(x1, SXTB, 1));
8663 __ Add(x16, x0, Operand(x1, SXTH, 2));
8664 __ Add(x17, x0, Operand(x1, SXTW, 3));
8665 __ Add(x18, x0, Operand(x2, SXTB, 0));
8666 __ Add(x19, x0, Operand(x2, SXTB, 1));
8667 __ Add(x20, x0, Operand(x2, SXTH, 2));
8668 __ Add(x21, x0, Operand(x2, SXTW, 3));
8669
8670 __ Add(x22, x1, Operand(x2, SXTB, 1));
8671 __ Sub(x23, x1, Operand(x2, SXTB, 1));
8672
8673 __ Add(w24, w1, Operand(w2, UXTB, 2));
8674 __ Add(w25, w0, Operand(w1, SXTB, 0));
8675 __ Add(w26, w0, Operand(w1, SXTB, 1));
8676 __ Add(w27, w2, Operand(w1, SXTW, 3));
8677
8678 __ Add(w28, w0, Operand(w1, SXTW, 3));
8679 __ Add(x29, x0, Operand(w1, SXTW, 3));
8680
8681 __ Sub(x30, x0, Operand(w3, SXTB, 1));
8682 END();
8683
8684 RUN();
8685
8686 ASSERT_EQUAL_64(0xef, x10);
8687 ASSERT_EQUAL_64(0x1de, x11);
8688 ASSERT_EQUAL_64(0x337bc, x12);
8689 ASSERT_EQUAL_64(0x89abcdef0, x13);
8690
8691 ASSERT_EQUAL_64(0xffffffffffffffef, x14);
8692 ASSERT_EQUAL_64(0xffffffffffffffde, x15);
8693 ASSERT_EQUAL_64(0xffffffffffff37bc, x16);
8694 ASSERT_EQUAL_64(0xfffffffc4d5e6f78, x17);
8695 ASSERT_EQUAL_64(0x10, x18);
8696 ASSERT_EQUAL_64(0x20, x19);
8697 ASSERT_EQUAL_64(0xc840, x20);
8698 ASSERT_EQUAL_64(0x3b2a19080, x21);
8699
8700 ASSERT_EQUAL_64(0x0123456789abce0f, x22);
8701 ASSERT_EQUAL_64(0x0123456789abcdcf, x23);
8702
8703 ASSERT_EQUAL_32(0x89abce2f, w24);
8704 ASSERT_EQUAL_32(0xffffffef, w25);
8705 ASSERT_EQUAL_32(0xffffffde, w26);
8706 ASSERT_EQUAL_32(0xc3b2a188, w27);
8707
8708 ASSERT_EQUAL_32(0x4d5e6f78, w28);
8709 ASSERT_EQUAL_64(0xfffffffc4d5e6f78, x29);
8710
8711 ASSERT_EQUAL_64(256, x30);
8712
8713 TEARDOWN();
8714 }
8715
8716
TEST(add_sub_negative)8717 TEST(add_sub_negative) {
8718 SETUP();
8719
8720 START();
8721 __ Mov(x0, 0);
8722 __ Mov(x1, 4687);
8723 __ Mov(x2, 0x1122334455667788);
8724 __ Mov(w3, 0x11223344);
8725 __ Mov(w4, 400000);
8726
8727 __ Add(x10, x0, -42);
8728 __ Add(x11, x1, -687);
8729 __ Add(x12, x2, -0x88);
8730
8731 __ Sub(x13, x0, -600);
8732 __ Sub(x14, x1, -313);
8733 __ Sub(x15, x2, -0x555);
8734
8735 __ Add(w19, w3, -0x344);
8736 __ Add(w20, w4, -2000);
8737
8738 __ Sub(w21, w3, -0xbc);
8739 __ Sub(w22, w4, -2000);
8740 END();
8741
8742 RUN();
8743
8744 ASSERT_EQUAL_64(-42, x10);
8745 ASSERT_EQUAL_64(4000, x11);
8746 ASSERT_EQUAL_64(0x1122334455667700, x12);
8747
8748 ASSERT_EQUAL_64(600, x13);
8749 ASSERT_EQUAL_64(5000, x14);
8750 ASSERT_EQUAL_64(0x1122334455667cdd, x15);
8751
8752 ASSERT_EQUAL_32(0x11223000, w19);
8753 ASSERT_EQUAL_32(398000, w20);
8754
8755 ASSERT_EQUAL_32(0x11223400, w21);
8756 ASSERT_EQUAL_32(402000, w22);
8757
8758 TEARDOWN();
8759 }
8760
8761
TEST(add_sub_zero)8762 TEST(add_sub_zero) {
8763 SETUP();
8764
8765 START();
8766 __ Mov(x0, 0);
8767 __ Mov(x1, 0);
8768 __ Mov(x2, 0);
8769
8770 Label blob1;
8771 __ Bind(&blob1);
8772 __ Add(x0, x0, 0);
8773 __ Sub(x1, x1, 0);
8774 __ Sub(x2, x2, xzr);
8775 VIXL_CHECK(__ GetSizeOfCodeGeneratedSince(&blob1) == 0);
8776
8777 Label blob2;
8778 __ Bind(&blob2);
8779 __ Add(w3, w3, 0);
8780 VIXL_CHECK(__ GetSizeOfCodeGeneratedSince(&blob2) != 0);
8781
8782 Label blob3;
8783 __ Bind(&blob3);
8784 __ Sub(w3, w3, wzr);
8785 VIXL_CHECK(__ GetSizeOfCodeGeneratedSince(&blob3) != 0);
8786
8787 END();
8788
8789 RUN();
8790
8791 ASSERT_EQUAL_64(0, x0);
8792 ASSERT_EQUAL_64(0, x1);
8793 ASSERT_EQUAL_64(0, x2);
8794
8795 TEARDOWN();
8796 }
8797
8798
TEST(claim_drop_zero)8799 TEST(claim_drop_zero) {
8800 SETUP();
8801
8802 START();
8803
8804 Label start;
8805 __ Bind(&start);
8806 __ Claim(Operand(0));
8807 __ Drop(Operand(0));
8808 __ Claim(Operand(xzr));
8809 __ Drop(Operand(xzr));
8810 VIXL_CHECK(__ GetSizeOfCodeGeneratedSince(&start) == 0);
8811
8812 END();
8813
8814 RUN();
8815
8816 TEARDOWN();
8817 }
8818
8819
TEST(neg)8820 TEST(neg) {
8821 SETUP();
8822
8823 START();
8824 __ Mov(x0, 0xf123456789abcdef);
8825
8826 // Immediate.
8827 __ Neg(x1, 0x123);
8828 __ Neg(w2, 0x123);
8829
8830 // Shifted.
8831 __ Neg(x3, Operand(x0, LSL, 1));
8832 __ Neg(w4, Operand(w0, LSL, 2));
8833 __ Neg(x5, Operand(x0, LSR, 3));
8834 __ Neg(w6, Operand(w0, LSR, 4));
8835 __ Neg(x7, Operand(x0, ASR, 5));
8836 __ Neg(w8, Operand(w0, ASR, 6));
8837
8838 // Extended.
8839 __ Neg(w9, Operand(w0, UXTB));
8840 __ Neg(x10, Operand(x0, SXTB, 1));
8841 __ Neg(w11, Operand(w0, UXTH, 2));
8842 __ Neg(x12, Operand(x0, SXTH, 3));
8843 __ Neg(w13, Operand(w0, UXTW, 4));
8844 __ Neg(x14, Operand(x0, SXTW, 4));
8845 END();
8846
8847 RUN();
8848
8849 ASSERT_EQUAL_64(0xfffffffffffffedd, x1);
8850 ASSERT_EQUAL_64(0xfffffedd, x2);
8851 ASSERT_EQUAL_64(0x1db97530eca86422, x3);
8852 ASSERT_EQUAL_64(0xd950c844, x4);
8853 ASSERT_EQUAL_64(0xe1db97530eca8643, x5);
8854 ASSERT_EQUAL_64(0xf7654322, x6);
8855 ASSERT_EQUAL_64(0x0076e5d4c3b2a191, x7);
8856 ASSERT_EQUAL_64(0x01d950c9, x8);
8857 ASSERT_EQUAL_64(0xffffff11, x9);
8858 ASSERT_EQUAL_64(0x0000000000000022, x10);
8859 ASSERT_EQUAL_64(0xfffcc844, x11);
8860 ASSERT_EQUAL_64(0x0000000000019088, x12);
8861 ASSERT_EQUAL_64(0x65432110, x13);
8862 ASSERT_EQUAL_64(0x0000000765432110, x14);
8863
8864 TEARDOWN();
8865 }
8866
8867
8868 template <typename T, typename Op>
AdcsSbcsHelper(Op op,T left,T right,int carry,T expected,StatusFlags expected_flags)8869 static void AdcsSbcsHelper(
8870 Op op, T left, T right, int carry, T expected, StatusFlags expected_flags) {
8871 int reg_size = sizeof(T) * 8;
8872 Register left_reg(0, reg_size);
8873 Register right_reg(1, reg_size);
8874 Register result_reg(2, reg_size);
8875
8876 SETUP();
8877 START();
8878
8879 __ Mov(left_reg, left);
8880 __ Mov(right_reg, right);
8881 __ Mov(x10, (carry ? CFlag : NoFlag));
8882
8883 __ Msr(NZCV, x10);
8884 (masm.*op)(result_reg, left_reg, right_reg);
8885
8886 END();
8887 RUN();
8888
8889 ASSERT_EQUAL_64(left, left_reg.X());
8890 ASSERT_EQUAL_64(right, right_reg.X());
8891 ASSERT_EQUAL_64(expected, result_reg.X());
8892 ASSERT_EQUAL_NZCV(expected_flags);
8893
8894 TEARDOWN();
8895 }
8896
8897
TEST(adcs_sbcs_x)8898 TEST(adcs_sbcs_x) {
8899 uint64_t inputs[] = {
8900 0x0000000000000000,
8901 0x0000000000000001,
8902 0x7ffffffffffffffe,
8903 0x7fffffffffffffff,
8904 0x8000000000000000,
8905 0x8000000000000001,
8906 0xfffffffffffffffe,
8907 0xffffffffffffffff,
8908 };
8909 static const size_t input_count = sizeof(inputs) / sizeof(inputs[0]);
8910
8911 struct Expected {
8912 uint64_t carry0_result;
8913 StatusFlags carry0_flags;
8914 uint64_t carry1_result;
8915 StatusFlags carry1_flags;
8916 };
8917
8918 static const Expected expected_adcs_x[input_count][input_count] =
8919 {{{0x0000000000000000, ZFlag, 0x0000000000000001, NoFlag},
8920 {0x0000000000000001, NoFlag, 0x0000000000000002, NoFlag},
8921 {0x7ffffffffffffffe, NoFlag, 0x7fffffffffffffff, NoFlag},
8922 {0x7fffffffffffffff, NoFlag, 0x8000000000000000, NVFlag},
8923 {0x8000000000000000, NFlag, 0x8000000000000001, NFlag},
8924 {0x8000000000000001, NFlag, 0x8000000000000002, NFlag},
8925 {0xfffffffffffffffe, NFlag, 0xffffffffffffffff, NFlag},
8926 {0xffffffffffffffff, NFlag, 0x0000000000000000, ZCFlag}},
8927 {{0x0000000000000001, NoFlag, 0x0000000000000002, NoFlag},
8928 {0x0000000000000002, NoFlag, 0x0000000000000003, NoFlag},
8929 {0x7fffffffffffffff, NoFlag, 0x8000000000000000, NVFlag},
8930 {0x8000000000000000, NVFlag, 0x8000000000000001, NVFlag},
8931 {0x8000000000000001, NFlag, 0x8000000000000002, NFlag},
8932 {0x8000000000000002, NFlag, 0x8000000000000003, NFlag},
8933 {0xffffffffffffffff, NFlag, 0x0000000000000000, ZCFlag},
8934 {0x0000000000000000, ZCFlag, 0x0000000000000001, CFlag}},
8935 {{0x7ffffffffffffffe, NoFlag, 0x7fffffffffffffff, NoFlag},
8936 {0x7fffffffffffffff, NoFlag, 0x8000000000000000, NVFlag},
8937 {0xfffffffffffffffc, NVFlag, 0xfffffffffffffffd, NVFlag},
8938 {0xfffffffffffffffd, NVFlag, 0xfffffffffffffffe, NVFlag},
8939 {0xfffffffffffffffe, NFlag, 0xffffffffffffffff, NFlag},
8940 {0xffffffffffffffff, NFlag, 0x0000000000000000, ZCFlag},
8941 {0x7ffffffffffffffc, CFlag, 0x7ffffffffffffffd, CFlag},
8942 {0x7ffffffffffffffd, CFlag, 0x7ffffffffffffffe, CFlag}},
8943 {{0x7fffffffffffffff, NoFlag, 0x8000000000000000, NVFlag},
8944 {0x8000000000000000, NVFlag, 0x8000000000000001, NVFlag},
8945 {0xfffffffffffffffd, NVFlag, 0xfffffffffffffffe, NVFlag},
8946 {0xfffffffffffffffe, NVFlag, 0xffffffffffffffff, NVFlag},
8947 {0xffffffffffffffff, NFlag, 0x0000000000000000, ZCFlag},
8948 {0x0000000000000000, ZCFlag, 0x0000000000000001, CFlag},
8949 {0x7ffffffffffffffd, CFlag, 0x7ffffffffffffffe, CFlag},
8950 {0x7ffffffffffffffe, CFlag, 0x7fffffffffffffff, CFlag}},
8951 {{0x8000000000000000, NFlag, 0x8000000000000001, NFlag},
8952 {0x8000000000000001, NFlag, 0x8000000000000002, NFlag},
8953 {0xfffffffffffffffe, NFlag, 0xffffffffffffffff, NFlag},
8954 {0xffffffffffffffff, NFlag, 0x0000000000000000, ZCFlag},
8955 {0x0000000000000000, ZCVFlag, 0x0000000000000001, CVFlag},
8956 {0x0000000000000001, CVFlag, 0x0000000000000002, CVFlag},
8957 {0x7ffffffffffffffe, CVFlag, 0x7fffffffffffffff, CVFlag},
8958 {0x7fffffffffffffff, CVFlag, 0x8000000000000000, NCFlag}},
8959 {{0x8000000000000001, NFlag, 0x8000000000000002, NFlag},
8960 {0x8000000000000002, NFlag, 0x8000000000000003, NFlag},
8961 {0xffffffffffffffff, NFlag, 0x0000000000000000, ZCFlag},
8962 {0x0000000000000000, ZCFlag, 0x0000000000000001, CFlag},
8963 {0x0000000000000001, CVFlag, 0x0000000000000002, CVFlag},
8964 {0x0000000000000002, CVFlag, 0x0000000000000003, CVFlag},
8965 {0x7fffffffffffffff, CVFlag, 0x8000000000000000, NCFlag},
8966 {0x8000000000000000, NCFlag, 0x8000000000000001, NCFlag}},
8967 {{0xfffffffffffffffe, NFlag, 0xffffffffffffffff, NFlag},
8968 {0xffffffffffffffff, NFlag, 0x0000000000000000, ZCFlag},
8969 {0x7ffffffffffffffc, CFlag, 0x7ffffffffffffffd, CFlag},
8970 {0x7ffffffffffffffd, CFlag, 0x7ffffffffffffffe, CFlag},
8971 {0x7ffffffffffffffe, CVFlag, 0x7fffffffffffffff, CVFlag},
8972 {0x7fffffffffffffff, CVFlag, 0x8000000000000000, NCFlag},
8973 {0xfffffffffffffffc, NCFlag, 0xfffffffffffffffd, NCFlag},
8974 {0xfffffffffffffffd, NCFlag, 0xfffffffffffffffe, NCFlag}},
8975 {{0xffffffffffffffff, NFlag, 0x0000000000000000, ZCFlag},
8976 {0x0000000000000000, ZCFlag, 0x0000000000000001, CFlag},
8977 {0x7ffffffffffffffd, CFlag, 0x7ffffffffffffffe, CFlag},
8978 {0x7ffffffffffffffe, CFlag, 0x7fffffffffffffff, CFlag},
8979 {0x7fffffffffffffff, CVFlag, 0x8000000000000000, NCFlag},
8980 {0x8000000000000000, NCFlag, 0x8000000000000001, NCFlag},
8981 {0xfffffffffffffffd, NCFlag, 0xfffffffffffffffe, NCFlag},
8982 {0xfffffffffffffffe, NCFlag, 0xffffffffffffffff, NCFlag}}};
8983
8984 static const Expected expected_sbcs_x[input_count][input_count] =
8985 {{{0xffffffffffffffff, NFlag, 0x0000000000000000, ZCFlag},
8986 {0xfffffffffffffffe, NFlag, 0xffffffffffffffff, NFlag},
8987 {0x8000000000000001, NFlag, 0x8000000000000002, NFlag},
8988 {0x8000000000000000, NFlag, 0x8000000000000001, NFlag},
8989 {0x7fffffffffffffff, NoFlag, 0x8000000000000000, NVFlag},
8990 {0x7ffffffffffffffe, NoFlag, 0x7fffffffffffffff, NoFlag},
8991 {0x0000000000000001, NoFlag, 0x0000000000000002, NoFlag},
8992 {0x0000000000000000, ZFlag, 0x0000000000000001, NoFlag}},
8993 {{0x0000000000000000, ZCFlag, 0x0000000000000001, CFlag},
8994 {0xffffffffffffffff, NFlag, 0x0000000000000000, ZCFlag},
8995 {0x8000000000000002, NFlag, 0x8000000000000003, NFlag},
8996 {0x8000000000000001, NFlag, 0x8000000000000002, NFlag},
8997 {0x8000000000000000, NVFlag, 0x8000000000000001, NVFlag},
8998 {0x7fffffffffffffff, NoFlag, 0x8000000000000000, NVFlag},
8999 {0x0000000000000002, NoFlag, 0x0000000000000003, NoFlag},
9000 {0x0000000000000001, NoFlag, 0x0000000000000002, NoFlag}},
9001 {{0x7ffffffffffffffd, CFlag, 0x7ffffffffffffffe, CFlag},
9002 {0x7ffffffffffffffc, CFlag, 0x7ffffffffffffffd, CFlag},
9003 {0xffffffffffffffff, NFlag, 0x0000000000000000, ZCFlag},
9004 {0xfffffffffffffffe, NFlag, 0xffffffffffffffff, NFlag},
9005 {0xfffffffffffffffd, NVFlag, 0xfffffffffffffffe, NVFlag},
9006 {0xfffffffffffffffc, NVFlag, 0xfffffffffffffffd, NVFlag},
9007 {0x7fffffffffffffff, NoFlag, 0x8000000000000000, NVFlag},
9008 {0x7ffffffffffffffe, NoFlag, 0x7fffffffffffffff, NoFlag}},
9009 {{0x7ffffffffffffffe, CFlag, 0x7fffffffffffffff, CFlag},
9010 {0x7ffffffffffffffd, CFlag, 0x7ffffffffffffffe, CFlag},
9011 {0x0000000000000000, ZCFlag, 0x0000000000000001, CFlag},
9012 {0xffffffffffffffff, NFlag, 0x0000000000000000, ZCFlag},
9013 {0xfffffffffffffffe, NVFlag, 0xffffffffffffffff, NVFlag},
9014 {0xfffffffffffffffd, NVFlag, 0xfffffffffffffffe, NVFlag},
9015 {0x8000000000000000, NVFlag, 0x8000000000000001, NVFlag},
9016 {0x7fffffffffffffff, NoFlag, 0x8000000000000000, NVFlag}},
9017 {{0x7fffffffffffffff, CVFlag, 0x8000000000000000, NCFlag},
9018 {0x7ffffffffffffffe, CVFlag, 0x7fffffffffffffff, CVFlag},
9019 {0x0000000000000001, CVFlag, 0x0000000000000002, CVFlag},
9020 {0x0000000000000000, ZCVFlag, 0x0000000000000001, CVFlag},
9021 {0xffffffffffffffff, NFlag, 0x0000000000000000, ZCFlag},
9022 {0xfffffffffffffffe, NFlag, 0xffffffffffffffff, NFlag},
9023 {0x8000000000000001, NFlag, 0x8000000000000002, NFlag},
9024 {0x8000000000000000, NFlag, 0x8000000000000001, NFlag}},
9025 {{0x8000000000000000, NCFlag, 0x8000000000000001, NCFlag},
9026 {0x7fffffffffffffff, CVFlag, 0x8000000000000000, NCFlag},
9027 {0x0000000000000002, CVFlag, 0x0000000000000003, CVFlag},
9028 {0x0000000000000001, CVFlag, 0x0000000000000002, CVFlag},
9029 {0x0000000000000000, ZCFlag, 0x0000000000000001, CFlag},
9030 {0xffffffffffffffff, NFlag, 0x0000000000000000, ZCFlag},
9031 {0x8000000000000002, NFlag, 0x8000000000000003, NFlag},
9032 {0x8000000000000001, NFlag, 0x8000000000000002, NFlag}},
9033 {{0xfffffffffffffffd, NCFlag, 0xfffffffffffffffe, NCFlag},
9034 {0xfffffffffffffffc, NCFlag, 0xfffffffffffffffd, NCFlag},
9035 {0x7fffffffffffffff, CVFlag, 0x8000000000000000, NCFlag},
9036 {0x7ffffffffffffffe, CVFlag, 0x7fffffffffffffff, CVFlag},
9037 {0x7ffffffffffffffd, CFlag, 0x7ffffffffffffffe, CFlag},
9038 {0x7ffffffffffffffc, CFlag, 0x7ffffffffffffffd, CFlag},
9039 {0xffffffffffffffff, NFlag, 0x0000000000000000, ZCFlag},
9040 {0xfffffffffffffffe, NFlag, 0xffffffffffffffff, NFlag}},
9041 {{0xfffffffffffffffe, NCFlag, 0xffffffffffffffff, NCFlag},
9042 {0xfffffffffffffffd, NCFlag, 0xfffffffffffffffe, NCFlag},
9043 {0x8000000000000000, NCFlag, 0x8000000000000001, NCFlag},
9044 {0x7fffffffffffffff, CVFlag, 0x8000000000000000, NCFlag},
9045 {0x7ffffffffffffffe, CFlag, 0x7fffffffffffffff, CFlag},
9046 {0x7ffffffffffffffd, CFlag, 0x7ffffffffffffffe, CFlag},
9047 {0x0000000000000000, ZCFlag, 0x0000000000000001, CFlag},
9048 {0xffffffffffffffff, NFlag, 0x0000000000000000, ZCFlag}}};
9049
9050 for (size_t left = 0; left < input_count; left++) {
9051 for (size_t right = 0; right < input_count; right++) {
9052 const Expected& expected = expected_adcs_x[left][right];
9053 AdcsSbcsHelper(&MacroAssembler::Adcs,
9054 inputs[left],
9055 inputs[right],
9056 0,
9057 expected.carry0_result,
9058 expected.carry0_flags);
9059 AdcsSbcsHelper(&MacroAssembler::Adcs,
9060 inputs[left],
9061 inputs[right],
9062 1,
9063 expected.carry1_result,
9064 expected.carry1_flags);
9065 }
9066 }
9067
9068 for (size_t left = 0; left < input_count; left++) {
9069 for (size_t right = 0; right < input_count; right++) {
9070 const Expected& expected = expected_sbcs_x[left][right];
9071 AdcsSbcsHelper(&MacroAssembler::Sbcs,
9072 inputs[left],
9073 inputs[right],
9074 0,
9075 expected.carry0_result,
9076 expected.carry0_flags);
9077 AdcsSbcsHelper(&MacroAssembler::Sbcs,
9078 inputs[left],
9079 inputs[right],
9080 1,
9081 expected.carry1_result,
9082 expected.carry1_flags);
9083 }
9084 }
9085 }
9086
9087
TEST(adcs_sbcs_w)9088 TEST(adcs_sbcs_w) {
9089 uint32_t inputs[] = {
9090 0x00000000,
9091 0x00000001,
9092 0x7ffffffe,
9093 0x7fffffff,
9094 0x80000000,
9095 0x80000001,
9096 0xfffffffe,
9097 0xffffffff,
9098 };
9099 static const size_t input_count = sizeof(inputs) / sizeof(inputs[0]);
9100
9101 struct Expected {
9102 uint32_t carry0_result;
9103 StatusFlags carry0_flags;
9104 uint32_t carry1_result;
9105 StatusFlags carry1_flags;
9106 };
9107
9108 static const Expected expected_adcs_w[input_count][input_count] =
9109 {{{0x00000000, ZFlag, 0x00000001, NoFlag},
9110 {0x00000001, NoFlag, 0x00000002, NoFlag},
9111 {0x7ffffffe, NoFlag, 0x7fffffff, NoFlag},
9112 {0x7fffffff, NoFlag, 0x80000000, NVFlag},
9113 {0x80000000, NFlag, 0x80000001, NFlag},
9114 {0x80000001, NFlag, 0x80000002, NFlag},
9115 {0xfffffffe, NFlag, 0xffffffff, NFlag},
9116 {0xffffffff, NFlag, 0x00000000, ZCFlag}},
9117 {{0x00000001, NoFlag, 0x00000002, NoFlag},
9118 {0x00000002, NoFlag, 0x00000003, NoFlag},
9119 {0x7fffffff, NoFlag, 0x80000000, NVFlag},
9120 {0x80000000, NVFlag, 0x80000001, NVFlag},
9121 {0x80000001, NFlag, 0x80000002, NFlag},
9122 {0x80000002, NFlag, 0x80000003, NFlag},
9123 {0xffffffff, NFlag, 0x00000000, ZCFlag},
9124 {0x00000000, ZCFlag, 0x00000001, CFlag}},
9125 {{0x7ffffffe, NoFlag, 0x7fffffff, NoFlag},
9126 {0x7fffffff, NoFlag, 0x80000000, NVFlag},
9127 {0xfffffffc, NVFlag, 0xfffffffd, NVFlag},
9128 {0xfffffffd, NVFlag, 0xfffffffe, NVFlag},
9129 {0xfffffffe, NFlag, 0xffffffff, NFlag},
9130 {0xffffffff, NFlag, 0x00000000, ZCFlag},
9131 {0x7ffffffc, CFlag, 0x7ffffffd, CFlag},
9132 {0x7ffffffd, CFlag, 0x7ffffffe, CFlag}},
9133 {{0x7fffffff, NoFlag, 0x80000000, NVFlag},
9134 {0x80000000, NVFlag, 0x80000001, NVFlag},
9135 {0xfffffffd, NVFlag, 0xfffffffe, NVFlag},
9136 {0xfffffffe, NVFlag, 0xffffffff, NVFlag},
9137 {0xffffffff, NFlag, 0x00000000, ZCFlag},
9138 {0x00000000, ZCFlag, 0x00000001, CFlag},
9139 {0x7ffffffd, CFlag, 0x7ffffffe, CFlag},
9140 {0x7ffffffe, CFlag, 0x7fffffff, CFlag}},
9141 {{0x80000000, NFlag, 0x80000001, NFlag},
9142 {0x80000001, NFlag, 0x80000002, NFlag},
9143 {0xfffffffe, NFlag, 0xffffffff, NFlag},
9144 {0xffffffff, NFlag, 0x00000000, ZCFlag},
9145 {0x00000000, ZCVFlag, 0x00000001, CVFlag},
9146 {0x00000001, CVFlag, 0x00000002, CVFlag},
9147 {0x7ffffffe, CVFlag, 0x7fffffff, CVFlag},
9148 {0x7fffffff, CVFlag, 0x80000000, NCFlag}},
9149 {{0x80000001, NFlag, 0x80000002, NFlag},
9150 {0x80000002, NFlag, 0x80000003, NFlag},
9151 {0xffffffff, NFlag, 0x00000000, ZCFlag},
9152 {0x00000000, ZCFlag, 0x00000001, CFlag},
9153 {0x00000001, CVFlag, 0x00000002, CVFlag},
9154 {0x00000002, CVFlag, 0x00000003, CVFlag},
9155 {0x7fffffff, CVFlag, 0x80000000, NCFlag},
9156 {0x80000000, NCFlag, 0x80000001, NCFlag}},
9157 {{0xfffffffe, NFlag, 0xffffffff, NFlag},
9158 {0xffffffff, NFlag, 0x00000000, ZCFlag},
9159 {0x7ffffffc, CFlag, 0x7ffffffd, CFlag},
9160 {0x7ffffffd, CFlag, 0x7ffffffe, CFlag},
9161 {0x7ffffffe, CVFlag, 0x7fffffff, CVFlag},
9162 {0x7fffffff, CVFlag, 0x80000000, NCFlag},
9163 {0xfffffffc, NCFlag, 0xfffffffd, NCFlag},
9164 {0xfffffffd, NCFlag, 0xfffffffe, NCFlag}},
9165 {{0xffffffff, NFlag, 0x00000000, ZCFlag},
9166 {0x00000000, ZCFlag, 0x00000001, CFlag},
9167 {0x7ffffffd, CFlag, 0x7ffffffe, CFlag},
9168 {0x7ffffffe, CFlag, 0x7fffffff, CFlag},
9169 {0x7fffffff, CVFlag, 0x80000000, NCFlag},
9170 {0x80000000, NCFlag, 0x80000001, NCFlag},
9171 {0xfffffffd, NCFlag, 0xfffffffe, NCFlag},
9172 {0xfffffffe, NCFlag, 0xffffffff, NCFlag}}};
9173
9174 static const Expected expected_sbcs_w[input_count][input_count] =
9175 {{{0xffffffff, NFlag, 0x00000000, ZCFlag},
9176 {0xfffffffe, NFlag, 0xffffffff, NFlag},
9177 {0x80000001, NFlag, 0x80000002, NFlag},
9178 {0x80000000, NFlag, 0x80000001, NFlag},
9179 {0x7fffffff, NoFlag, 0x80000000, NVFlag},
9180 {0x7ffffffe, NoFlag, 0x7fffffff, NoFlag},
9181 {0x00000001, NoFlag, 0x00000002, NoFlag},
9182 {0x00000000, ZFlag, 0x00000001, NoFlag}},
9183 {{0x00000000, ZCFlag, 0x00000001, CFlag},
9184 {0xffffffff, NFlag, 0x00000000, ZCFlag},
9185 {0x80000002, NFlag, 0x80000003, NFlag},
9186 {0x80000001, NFlag, 0x80000002, NFlag},
9187 {0x80000000, NVFlag, 0x80000001, NVFlag},
9188 {0x7fffffff, NoFlag, 0x80000000, NVFlag},
9189 {0x00000002, NoFlag, 0x00000003, NoFlag},
9190 {0x00000001, NoFlag, 0x00000002, NoFlag}},
9191 {{0x7ffffffd, CFlag, 0x7ffffffe, CFlag},
9192 {0x7ffffffc, CFlag, 0x7ffffffd, CFlag},
9193 {0xffffffff, NFlag, 0x00000000, ZCFlag},
9194 {0xfffffffe, NFlag, 0xffffffff, NFlag},
9195 {0xfffffffd, NVFlag, 0xfffffffe, NVFlag},
9196 {0xfffffffc, NVFlag, 0xfffffffd, NVFlag},
9197 {0x7fffffff, NoFlag, 0x80000000, NVFlag},
9198 {0x7ffffffe, NoFlag, 0x7fffffff, NoFlag}},
9199 {{0x7ffffffe, CFlag, 0x7fffffff, CFlag},
9200 {0x7ffffffd, CFlag, 0x7ffffffe, CFlag},
9201 {0x00000000, ZCFlag, 0x00000001, CFlag},
9202 {0xffffffff, NFlag, 0x00000000, ZCFlag},
9203 {0xfffffffe, NVFlag, 0xffffffff, NVFlag},
9204 {0xfffffffd, NVFlag, 0xfffffffe, NVFlag},
9205 {0x80000000, NVFlag, 0x80000001, NVFlag},
9206 {0x7fffffff, NoFlag, 0x80000000, NVFlag}},
9207 {{0x7fffffff, CVFlag, 0x80000000, NCFlag},
9208 {0x7ffffffe, CVFlag, 0x7fffffff, CVFlag},
9209 {0x00000001, CVFlag, 0x00000002, CVFlag},
9210 {0x00000000, ZCVFlag, 0x00000001, CVFlag},
9211 {0xffffffff, NFlag, 0x00000000, ZCFlag},
9212 {0xfffffffe, NFlag, 0xffffffff, NFlag},
9213 {0x80000001, NFlag, 0x80000002, NFlag},
9214 {0x80000000, NFlag, 0x80000001, NFlag}},
9215 {{0x80000000, NCFlag, 0x80000001, NCFlag},
9216 {0x7fffffff, CVFlag, 0x80000000, NCFlag},
9217 {0x00000002, CVFlag, 0x00000003, CVFlag},
9218 {0x00000001, CVFlag, 0x00000002, CVFlag},
9219 {0x00000000, ZCFlag, 0x00000001, CFlag},
9220 {0xffffffff, NFlag, 0x00000000, ZCFlag},
9221 {0x80000002, NFlag, 0x80000003, NFlag},
9222 {0x80000001, NFlag, 0x80000002, NFlag}},
9223 {{0xfffffffd, NCFlag, 0xfffffffe, NCFlag},
9224 {0xfffffffc, NCFlag, 0xfffffffd, NCFlag},
9225 {0x7fffffff, CVFlag, 0x80000000, NCFlag},
9226 {0x7ffffffe, CVFlag, 0x7fffffff, CVFlag},
9227 {0x7ffffffd, CFlag, 0x7ffffffe, CFlag},
9228 {0x7ffffffc, CFlag, 0x7ffffffd, CFlag},
9229 {0xffffffff, NFlag, 0x00000000, ZCFlag},
9230 {0xfffffffe, NFlag, 0xffffffff, NFlag}},
9231 {{0xfffffffe, NCFlag, 0xffffffff, NCFlag},
9232 {0xfffffffd, NCFlag, 0xfffffffe, NCFlag},
9233 {0x80000000, NCFlag, 0x80000001, NCFlag},
9234 {0x7fffffff, CVFlag, 0x80000000, NCFlag},
9235 {0x7ffffffe, CFlag, 0x7fffffff, CFlag},
9236 {0x7ffffffd, CFlag, 0x7ffffffe, CFlag},
9237 {0x00000000, ZCFlag, 0x00000001, CFlag},
9238 {0xffffffff, NFlag, 0x00000000, ZCFlag}}};
9239
9240 for (size_t left = 0; left < input_count; left++) {
9241 for (size_t right = 0; right < input_count; right++) {
9242 const Expected& expected = expected_adcs_w[left][right];
9243 AdcsSbcsHelper(&MacroAssembler::Adcs,
9244 inputs[left],
9245 inputs[right],
9246 0,
9247 expected.carry0_result,
9248 expected.carry0_flags);
9249 AdcsSbcsHelper(&MacroAssembler::Adcs,
9250 inputs[left],
9251 inputs[right],
9252 1,
9253 expected.carry1_result,
9254 expected.carry1_flags);
9255 }
9256 }
9257
9258 for (size_t left = 0; left < input_count; left++) {
9259 for (size_t right = 0; right < input_count; right++) {
9260 const Expected& expected = expected_sbcs_w[left][right];
9261 AdcsSbcsHelper(&MacroAssembler::Sbcs,
9262 inputs[left],
9263 inputs[right],
9264 0,
9265 expected.carry0_result,
9266 expected.carry0_flags);
9267 AdcsSbcsHelper(&MacroAssembler::Sbcs,
9268 inputs[left],
9269 inputs[right],
9270 1,
9271 expected.carry1_result,
9272 expected.carry1_flags);
9273 }
9274 }
9275 }
9276
9277
TEST(adc_sbc_shift)9278 TEST(adc_sbc_shift) {
9279 SETUP();
9280
9281 START();
9282 __ Mov(x0, 0);
9283 __ Mov(x1, 1);
9284 __ Mov(x2, 0x0123456789abcdef);
9285 __ Mov(x3, 0xfedcba9876543210);
9286 __ Mov(x4, 0xffffffffffffffff);
9287
9288 // Clear the C flag.
9289 __ Adds(x0, x0, Operand(0));
9290
9291 __ Adc(x5, x2, Operand(x3));
9292 __ Adc(x6, x0, Operand(x1, LSL, 60));
9293 __ Sbc(x7, x4, Operand(x3, LSR, 4));
9294 __ Adc(x8, x2, Operand(x3, ASR, 4));
9295 __ Adc(x9, x2, Operand(x3, ROR, 8));
9296
9297 __ Adc(w10, w2, Operand(w3));
9298 __ Adc(w11, w0, Operand(w1, LSL, 30));
9299 __ Sbc(w12, w4, Operand(w3, LSR, 4));
9300 __ Adc(w13, w2, Operand(w3, ASR, 4));
9301 __ Adc(w14, w2, Operand(w3, ROR, 8));
9302
9303 // Set the C flag.
9304 __ Cmp(w0, Operand(w0));
9305
9306 __ Adc(x18, x2, Operand(x3));
9307 __ Adc(x19, x0, Operand(x1, LSL, 60));
9308 __ Sbc(x20, x4, Operand(x3, LSR, 4));
9309 __ Adc(x21, x2, Operand(x3, ASR, 4));
9310 __ Adc(x22, x2, Operand(x3, ROR, 8));
9311
9312 __ Adc(w23, w2, Operand(w3));
9313 __ Adc(w24, w0, Operand(w1, LSL, 30));
9314 __ Sbc(w25, w4, Operand(w3, LSR, 4));
9315 __ Adc(w26, w2, Operand(w3, ASR, 4));
9316 __ Adc(w27, w2, Operand(w3, ROR, 8));
9317 END();
9318
9319 RUN();
9320
9321 ASSERT_EQUAL_64(0xffffffffffffffff, x5);
9322 ASSERT_EQUAL_64(INT64_C(1) << 60, x6);
9323 ASSERT_EQUAL_64(0xf0123456789abcdd, x7);
9324 ASSERT_EQUAL_64(0x0111111111111110, x8);
9325 ASSERT_EQUAL_64(0x1222222222222221, x9);
9326
9327 ASSERT_EQUAL_32(0xffffffff, w10);
9328 ASSERT_EQUAL_32(INT32_C(1) << 30, w11);
9329 ASSERT_EQUAL_32(0xf89abcdd, w12);
9330 ASSERT_EQUAL_32(0x91111110, w13);
9331 ASSERT_EQUAL_32(0x9a222221, w14);
9332
9333 ASSERT_EQUAL_64(0xffffffffffffffff + 1, x18);
9334 ASSERT_EQUAL_64((INT64_C(1) << 60) + 1, x19);
9335 ASSERT_EQUAL_64(0xf0123456789abcdd + 1, x20);
9336 ASSERT_EQUAL_64(0x0111111111111110 + 1, x21);
9337 ASSERT_EQUAL_64(0x1222222222222221 + 1, x22);
9338
9339 ASSERT_EQUAL_32(0xffffffff + 1, w23);
9340 ASSERT_EQUAL_32((INT32_C(1) << 30) + 1, w24);
9341 ASSERT_EQUAL_32(0xf89abcdd + 1, w25);
9342 ASSERT_EQUAL_32(0x91111110 + 1, w26);
9343 ASSERT_EQUAL_32(0x9a222221 + 1, w27);
9344
9345 TEARDOWN();
9346 }
9347
9348
TEST(adc_sbc_extend)9349 TEST(adc_sbc_extend) {
9350 SETUP();
9351
9352 START();
9353 // Clear the C flag.
9354 __ Adds(x0, x0, Operand(0));
9355
9356 __ Mov(x0, 0);
9357 __ Mov(x1, 1);
9358 __ Mov(x2, 0x0123456789abcdef);
9359
9360 __ Adc(x10, x1, Operand(w2, UXTB, 1));
9361 __ Adc(x11, x1, Operand(x2, SXTH, 2));
9362 __ Sbc(x12, x1, Operand(w2, UXTW, 4));
9363 __ Adc(x13, x1, Operand(x2, UXTX, 4));
9364
9365 __ Adc(w14, w1, Operand(w2, UXTB, 1));
9366 __ Adc(w15, w1, Operand(w2, SXTH, 2));
9367 __ Adc(w9, w1, Operand(w2, UXTW, 4));
9368
9369 // Set the C flag.
9370 __ Cmp(w0, Operand(w0));
9371
9372 __ Adc(x20, x1, Operand(w2, UXTB, 1));
9373 __ Adc(x21, x1, Operand(x2, SXTH, 2));
9374 __ Sbc(x22, x1, Operand(w2, UXTW, 4));
9375 __ Adc(x23, x1, Operand(x2, UXTX, 4));
9376
9377 __ Adc(w24, w1, Operand(w2, UXTB, 1));
9378 __ Adc(w25, w1, Operand(w2, SXTH, 2));
9379 __ Adc(w26, w1, Operand(w2, UXTW, 4));
9380 END();
9381
9382 RUN();
9383
9384 ASSERT_EQUAL_64(0x1df, x10);
9385 ASSERT_EQUAL_64(0xffffffffffff37bd, x11);
9386 ASSERT_EQUAL_64(0xfffffff765432110, x12);
9387 ASSERT_EQUAL_64(0x123456789abcdef1, x13);
9388
9389 ASSERT_EQUAL_32(0x1df, w14);
9390 ASSERT_EQUAL_32(0xffff37bd, w15);
9391 ASSERT_EQUAL_32(0x9abcdef1, w9);
9392
9393 ASSERT_EQUAL_64(0x1df + 1, x20);
9394 ASSERT_EQUAL_64(0xffffffffffff37bd + 1, x21);
9395 ASSERT_EQUAL_64(0xfffffff765432110 + 1, x22);
9396 ASSERT_EQUAL_64(0x123456789abcdef1 + 1, x23);
9397
9398 ASSERT_EQUAL_32(0x1df + 1, w24);
9399 ASSERT_EQUAL_32(0xffff37bd + 1, w25);
9400 ASSERT_EQUAL_32(0x9abcdef1 + 1, w26);
9401
9402 // Check that adc correctly sets the condition flags.
9403 START();
9404 __ Mov(x0, 0xff);
9405 __ Mov(x1, 0xffffffffffffffff);
9406 // Clear the C flag.
9407 __ Adds(x0, x0, Operand(0));
9408 __ Adcs(x10, x0, Operand(x1, SXTX, 1));
9409 END();
9410
9411 RUN();
9412
9413 ASSERT_EQUAL_NZCV(CFlag);
9414
9415 START();
9416 __ Mov(x0, 0x7fffffffffffffff);
9417 __ Mov(x1, 1);
9418 // Clear the C flag.
9419 __ Adds(x0, x0, Operand(0));
9420 __ Adcs(x10, x0, Operand(x1, UXTB, 2));
9421 END();
9422
9423 RUN();
9424
9425 ASSERT_EQUAL_NZCV(NVFlag);
9426
9427 START();
9428 __ Mov(x0, 0x7fffffffffffffff);
9429 // Clear the C flag.
9430 __ Adds(x0, x0, Operand(0));
9431 __ Adcs(x10, x0, Operand(1));
9432 END();
9433
9434 RUN();
9435
9436 ASSERT_EQUAL_NZCV(NVFlag);
9437
9438 TEARDOWN();
9439 }
9440
9441
TEST(adc_sbc_wide_imm)9442 TEST(adc_sbc_wide_imm) {
9443 SETUP();
9444
9445 START();
9446 __ Mov(x0, 0);
9447
9448 // Clear the C flag.
9449 __ Adds(x0, x0, Operand(0));
9450
9451 __ Adc(x7, x0, Operand(0x1234567890abcdef));
9452 __ Adc(w8, w0, Operand(0xffffffff));
9453 __ Sbc(x9, x0, Operand(0x1234567890abcdef));
9454 __ Sbc(w10, w0, Operand(0xffffffff));
9455 __ Ngc(x11, Operand(0xffffffff00000000));
9456 __ Ngc(w12, Operand(0xffff0000));
9457
9458 // Set the C flag.
9459 __ Cmp(w0, Operand(w0));
9460
9461 __ Adc(x18, x0, Operand(0x1234567890abcdef));
9462 __ Adc(w19, w0, Operand(0xffffffff));
9463 __ Sbc(x20, x0, Operand(0x1234567890abcdef));
9464 __ Sbc(w21, w0, Operand(0xffffffff));
9465 __ Ngc(x22, Operand(0xffffffff00000000));
9466 __ Ngc(w23, Operand(0xffff0000));
9467 END();
9468
9469 RUN();
9470
9471 ASSERT_EQUAL_64(0x1234567890abcdef, x7);
9472 ASSERT_EQUAL_64(0xffffffff, x8);
9473 ASSERT_EQUAL_64(0xedcba9876f543210, x9);
9474 ASSERT_EQUAL_64(0, x10);
9475 ASSERT_EQUAL_64(0xffffffff, x11);
9476 ASSERT_EQUAL_64(0xffff, x12);
9477
9478 ASSERT_EQUAL_64(0x1234567890abcdef + 1, x18);
9479 ASSERT_EQUAL_64(0, x19);
9480 ASSERT_EQUAL_64(0xedcba9876f543211, x20);
9481 ASSERT_EQUAL_64(1, x21);
9482 ASSERT_EQUAL_64(0x0000000100000000, x22);
9483 ASSERT_EQUAL_64(0x0000000000010000, x23);
9484
9485 TEARDOWN();
9486 }
9487
TEST(flags)9488 TEST(flags) {
9489 SETUP();
9490
9491 START();
9492 __ Mov(x0, 0);
9493 __ Mov(x1, 0x1111111111111111);
9494 __ Neg(x10, Operand(x0));
9495 __ Neg(x11, Operand(x1));
9496 __ Neg(w12, Operand(w1));
9497 // Clear the C flag.
9498 __ Adds(x0, x0, Operand(0));
9499 __ Ngc(x13, Operand(x0));
9500 // Set the C flag.
9501 __ Cmp(x0, Operand(x0));
9502 __ Ngc(w14, Operand(w0));
9503 END();
9504
9505 RUN();
9506
9507 ASSERT_EQUAL_64(0, x10);
9508 ASSERT_EQUAL_64(-0x1111111111111111, x11);
9509 ASSERT_EQUAL_32(-0x11111111, w12);
9510 ASSERT_EQUAL_64(-1, x13);
9511 ASSERT_EQUAL_32(0, w14);
9512
9513 START();
9514 __ Mov(x0, 0);
9515 __ Cmp(x0, Operand(x0));
9516 END();
9517
9518 RUN();
9519
9520 ASSERT_EQUAL_NZCV(ZCFlag);
9521
9522 START();
9523 __ Mov(w0, 0);
9524 __ Cmp(w0, Operand(w0));
9525 END();
9526
9527 RUN();
9528
9529 ASSERT_EQUAL_NZCV(ZCFlag);
9530
9531 START();
9532 __ Mov(x0, 0);
9533 __ Mov(x1, 0x1111111111111111);
9534 __ Cmp(x0, Operand(x1));
9535 END();
9536
9537 RUN();
9538
9539 ASSERT_EQUAL_NZCV(NFlag);
9540
9541 START();
9542 __ Mov(w0, 0);
9543 __ Mov(w1, 0x11111111);
9544 __ Cmp(w0, Operand(w1));
9545 END();
9546
9547 RUN();
9548
9549 ASSERT_EQUAL_NZCV(NFlag);
9550
9551 START();
9552 __ Mov(x1, 0x1111111111111111);
9553 __ Cmp(x1, Operand(0));
9554 END();
9555
9556 RUN();
9557
9558 ASSERT_EQUAL_NZCV(CFlag);
9559
9560 START();
9561 __ Mov(w1, 0x11111111);
9562 __ Cmp(w1, Operand(0));
9563 END();
9564
9565 RUN();
9566
9567 ASSERT_EQUAL_NZCV(CFlag);
9568
9569 START();
9570 __ Mov(x0, 1);
9571 __ Mov(x1, 0x7fffffffffffffff);
9572 __ Cmn(x1, Operand(x0));
9573 END();
9574
9575 RUN();
9576
9577 ASSERT_EQUAL_NZCV(NVFlag);
9578
9579 START();
9580 __ Mov(w0, 1);
9581 __ Mov(w1, 0x7fffffff);
9582 __ Cmn(w1, Operand(w0));
9583 END();
9584
9585 RUN();
9586
9587 ASSERT_EQUAL_NZCV(NVFlag);
9588
9589 START();
9590 __ Mov(x0, 1);
9591 __ Mov(x1, 0xffffffffffffffff);
9592 __ Cmn(x1, Operand(x0));
9593 END();
9594
9595 RUN();
9596
9597 ASSERT_EQUAL_NZCV(ZCFlag);
9598
9599 START();
9600 __ Mov(w0, 1);
9601 __ Mov(w1, 0xffffffff);
9602 __ Cmn(w1, Operand(w0));
9603 END();
9604
9605 RUN();
9606
9607 ASSERT_EQUAL_NZCV(ZCFlag);
9608
9609 START();
9610 __ Mov(w0, 0);
9611 __ Mov(w1, 1);
9612 // Clear the C flag.
9613 __ Adds(w0, w0, Operand(0));
9614 __ Ngcs(w0, Operand(w1));
9615 END();
9616
9617 RUN();
9618
9619 ASSERT_EQUAL_NZCV(NFlag);
9620
9621 START();
9622 __ Mov(w0, 0);
9623 __ Mov(w1, 0);
9624 // Set the C flag.
9625 __ Cmp(w0, Operand(w0));
9626 __ Ngcs(w0, Operand(w1));
9627 END();
9628
9629 RUN();
9630
9631 ASSERT_EQUAL_NZCV(ZCFlag);
9632
9633 TEARDOWN();
9634 }
9635
9636
TEST(cmp_shift)9637 TEST(cmp_shift) {
9638 SETUP();
9639
9640 START();
9641 __ Mov(x18, 0xf0000000);
9642 __ Mov(x19, 0xf000000010000000);
9643 __ Mov(x20, 0xf0000000f0000000);
9644 __ Mov(x21, 0x7800000078000000);
9645 __ Mov(x22, 0x3c0000003c000000);
9646 __ Mov(x23, 0x8000000780000000);
9647 __ Mov(x24, 0x0000000f00000000);
9648 __ Mov(x25, 0x00000003c0000000);
9649 __ Mov(x26, 0x8000000780000000);
9650 __ Mov(x27, 0xc0000003);
9651
9652 __ Cmp(w20, Operand(w21, LSL, 1));
9653 __ Mrs(x0, NZCV);
9654
9655 __ Cmp(x20, Operand(x22, LSL, 2));
9656 __ Mrs(x1, NZCV);
9657
9658 __ Cmp(w19, Operand(w23, LSR, 3));
9659 __ Mrs(x2, NZCV);
9660
9661 __ Cmp(x18, Operand(x24, LSR, 4));
9662 __ Mrs(x3, NZCV);
9663
9664 __ Cmp(w20, Operand(w25, ASR, 2));
9665 __ Mrs(x4, NZCV);
9666
9667 __ Cmp(x20, Operand(x26, ASR, 3));
9668 __ Mrs(x5, NZCV);
9669
9670 __ Cmp(w27, Operand(w22, ROR, 28));
9671 __ Mrs(x6, NZCV);
9672
9673 __ Cmp(x20, Operand(x21, ROR, 31));
9674 __ Mrs(x7, NZCV);
9675 END();
9676
9677 RUN();
9678
9679 ASSERT_EQUAL_32(ZCFlag, w0);
9680 ASSERT_EQUAL_32(ZCFlag, w1);
9681 ASSERT_EQUAL_32(ZCFlag, w2);
9682 ASSERT_EQUAL_32(ZCFlag, w3);
9683 ASSERT_EQUAL_32(ZCFlag, w4);
9684 ASSERT_EQUAL_32(ZCFlag, w5);
9685 ASSERT_EQUAL_32(ZCFlag, w6);
9686 ASSERT_EQUAL_32(ZCFlag, w7);
9687
9688 TEARDOWN();
9689 }
9690
9691
TEST(cmp_extend)9692 TEST(cmp_extend) {
9693 SETUP();
9694
9695 START();
9696 __ Mov(w20, 0x2);
9697 __ Mov(w21, 0x1);
9698 __ Mov(x22, 0xffffffffffffffff);
9699 __ Mov(x23, 0xff);
9700 __ Mov(x24, 0xfffffffffffffffe);
9701 __ Mov(x25, 0xffff);
9702 __ Mov(x26, 0xffffffff);
9703
9704 __ Cmp(w20, Operand(w21, LSL, 1));
9705 __ Mrs(x0, NZCV);
9706
9707 __ Cmp(x22, Operand(x23, SXTB, 0));
9708 __ Mrs(x1, NZCV);
9709
9710 __ Cmp(x24, Operand(x23, SXTB, 1));
9711 __ Mrs(x2, NZCV);
9712
9713 __ Cmp(x24, Operand(x23, UXTB, 1));
9714 __ Mrs(x3, NZCV);
9715
9716 __ Cmp(w22, Operand(w25, UXTH));
9717 __ Mrs(x4, NZCV);
9718
9719 __ Cmp(x22, Operand(x25, SXTH));
9720 __ Mrs(x5, NZCV);
9721
9722 __ Cmp(x22, Operand(x26, UXTW));
9723 __ Mrs(x6, NZCV);
9724
9725 __ Cmp(x24, Operand(x26, SXTW, 1));
9726 __ Mrs(x7, NZCV);
9727 END();
9728
9729 RUN();
9730
9731 ASSERT_EQUAL_32(ZCFlag, w0);
9732 ASSERT_EQUAL_32(ZCFlag, w1);
9733 ASSERT_EQUAL_32(ZCFlag, w2);
9734 ASSERT_EQUAL_32(NCFlag, w3);
9735 ASSERT_EQUAL_32(NCFlag, w4);
9736 ASSERT_EQUAL_32(ZCFlag, w5);
9737 ASSERT_EQUAL_32(NCFlag, w6);
9738 ASSERT_EQUAL_32(ZCFlag, w7);
9739
9740 TEARDOWN();
9741 }
9742
9743
TEST(ccmp)9744 TEST(ccmp) {
9745 SETUP();
9746
9747 START();
9748 __ Mov(w16, 0);
9749 __ Mov(w17, 1);
9750 __ Cmp(w16, w16);
9751 __ Ccmp(w16, w17, NCFlag, eq);
9752 __ Mrs(x0, NZCV);
9753
9754 __ Cmp(w16, w16);
9755 __ Ccmp(w16, w17, NCFlag, ne);
9756 __ Mrs(x1, NZCV);
9757
9758 __ Cmp(x16, x16);
9759 __ Ccmn(x16, 2, NZCVFlag, eq);
9760 __ Mrs(x2, NZCV);
9761
9762 __ Cmp(x16, x16);
9763 __ Ccmn(x16, 2, NZCVFlag, ne);
9764 __ Mrs(x3, NZCV);
9765
9766 // The MacroAssembler does not allow al as a condition.
9767 {
9768 ExactAssemblyScope scope(&masm, kInstructionSize);
9769 __ ccmp(x16, x16, NZCVFlag, al);
9770 }
9771 __ Mrs(x4, NZCV);
9772
9773 // The MacroAssembler does not allow nv as a condition.
9774 {
9775 ExactAssemblyScope scope(&masm, kInstructionSize);
9776 __ ccmp(x16, x16, NZCVFlag, nv);
9777 }
9778 __ Mrs(x5, NZCV);
9779
9780 END();
9781
9782 RUN();
9783
9784 ASSERT_EQUAL_32(NFlag, w0);
9785 ASSERT_EQUAL_32(NCFlag, w1);
9786 ASSERT_EQUAL_32(NoFlag, w2);
9787 ASSERT_EQUAL_32(NZCVFlag, w3);
9788 ASSERT_EQUAL_32(ZCFlag, w4);
9789 ASSERT_EQUAL_32(ZCFlag, w5);
9790
9791 TEARDOWN();
9792 }
9793
9794
TEST(ccmp_wide_imm)9795 TEST(ccmp_wide_imm) {
9796 SETUP();
9797
9798 START();
9799 __ Mov(w20, 0);
9800
9801 __ Cmp(w20, Operand(w20));
9802 __ Ccmp(w20, Operand(0x12345678), NZCVFlag, eq);
9803 __ Mrs(x0, NZCV);
9804
9805 __ Cmp(w20, Operand(w20));
9806 __ Ccmp(x20, Operand(0xffffffffffffffff), NZCVFlag, eq);
9807 __ Mrs(x1, NZCV);
9808 END();
9809
9810 RUN();
9811
9812 ASSERT_EQUAL_32(NFlag, w0);
9813 ASSERT_EQUAL_32(NoFlag, w1);
9814
9815 TEARDOWN();
9816 }
9817
9818
TEST(ccmp_shift_extend)9819 TEST(ccmp_shift_extend) {
9820 SETUP();
9821
9822 START();
9823 __ Mov(w20, 0x2);
9824 __ Mov(w21, 0x1);
9825 __ Mov(x22, 0xffffffffffffffff);
9826 __ Mov(x23, 0xff);
9827 __ Mov(x24, 0xfffffffffffffffe);
9828
9829 __ Cmp(w20, Operand(w20));
9830 __ Ccmp(w20, Operand(w21, LSL, 1), NZCVFlag, eq);
9831 __ Mrs(x0, NZCV);
9832
9833 __ Cmp(w20, Operand(w20));
9834 __ Ccmp(x22, Operand(x23, SXTB, 0), NZCVFlag, eq);
9835 __ Mrs(x1, NZCV);
9836
9837 __ Cmp(w20, Operand(w20));
9838 __ Ccmp(x24, Operand(x23, SXTB, 1), NZCVFlag, eq);
9839 __ Mrs(x2, NZCV);
9840
9841 __ Cmp(w20, Operand(w20));
9842 __ Ccmp(x24, Operand(x23, UXTB, 1), NZCVFlag, eq);
9843 __ Mrs(x3, NZCV);
9844
9845 __ Cmp(w20, Operand(w20));
9846 __ Ccmp(x24, Operand(x23, UXTB, 1), NZCVFlag, ne);
9847 __ Mrs(x4, NZCV);
9848 END();
9849
9850 RUN();
9851
9852 ASSERT_EQUAL_32(ZCFlag, w0);
9853 ASSERT_EQUAL_32(ZCFlag, w1);
9854 ASSERT_EQUAL_32(ZCFlag, w2);
9855 ASSERT_EQUAL_32(NCFlag, w3);
9856 ASSERT_EQUAL_32(NZCVFlag, w4);
9857
9858 TEARDOWN();
9859 }
9860
9861
TEST(csel_reg)9862 TEST(csel_reg) {
9863 SETUP();
9864
9865 START();
9866 __ Mov(x16, 0);
9867 __ Mov(x24, 0x0000000f0000000f);
9868 __ Mov(x25, 0x0000001f0000001f);
9869
9870 __ Cmp(w16, Operand(0));
9871 __ Csel(w0, w24, w25, eq);
9872 __ Csel(w1, w24, w25, ne);
9873 __ Csinc(w2, w24, w25, mi);
9874 __ Csinc(w3, w24, w25, pl);
9875
9876 // The MacroAssembler does not allow al or nv as a condition.
9877 {
9878 ExactAssemblyScope scope(&masm, 2 * kInstructionSize);
9879 __ csel(w13, w24, w25, al);
9880 __ csel(x14, x24, x25, nv);
9881 }
9882
9883 __ Cmp(x16, Operand(1));
9884 __ Csinv(x4, x24, x25, gt);
9885 __ Csinv(x5, x24, x25, le);
9886 __ Csneg(x6, x24, x25, hs);
9887 __ Csneg(x7, x24, x25, lo);
9888
9889 __ Cset(w8, ne);
9890 __ Csetm(w9, ne);
9891 __ Cinc(x10, x25, ne);
9892 __ Cinv(x11, x24, ne);
9893 __ Cneg(x12, x24, ne);
9894
9895 // The MacroAssembler does not allow al or nv as a condition.
9896 {
9897 ExactAssemblyScope scope(&masm, 2 * kInstructionSize);
9898 __ csel(w15, w24, w25, al);
9899 __ csel(x17, x24, x25, nv);
9900 }
9901
9902 END();
9903
9904 RUN();
9905
9906 ASSERT_EQUAL_64(0x0000000f, x0);
9907 ASSERT_EQUAL_64(0x0000001f, x1);
9908 ASSERT_EQUAL_64(0x00000020, x2);
9909 ASSERT_EQUAL_64(0x0000000f, x3);
9910 ASSERT_EQUAL_64(0xffffffe0ffffffe0, x4);
9911 ASSERT_EQUAL_64(0x0000000f0000000f, x5);
9912 ASSERT_EQUAL_64(0xffffffe0ffffffe1, x6);
9913 ASSERT_EQUAL_64(0x0000000f0000000f, x7);
9914 ASSERT_EQUAL_64(0x00000001, x8);
9915 ASSERT_EQUAL_64(0xffffffff, x9);
9916 ASSERT_EQUAL_64(0x0000001f00000020, x10);
9917 ASSERT_EQUAL_64(0xfffffff0fffffff0, x11);
9918 ASSERT_EQUAL_64(0xfffffff0fffffff1, x12);
9919 ASSERT_EQUAL_64(0x0000000f, x13);
9920 ASSERT_EQUAL_64(0x0000000f0000000f, x14);
9921 ASSERT_EQUAL_64(0x0000000f, x15);
9922 ASSERT_EQUAL_64(0x0000000f0000000f, x17);
9923
9924 TEARDOWN();
9925 }
9926
9927
TEST(csel_imm)9928 TEST(csel_imm) {
9929 SETUP();
9930
9931 int values[] = {-123, -2, -1, 0, 1, 2, 123};
9932 int n_values = sizeof(values) / sizeof(values[0]);
9933
9934 for (int i = 0; i < n_values; i++) {
9935 for (int j = 0; j < n_values; j++) {
9936 int left = values[i];
9937 int right = values[j];
9938
9939 START();
9940 __ Mov(x10, 0);
9941 __ Cmp(x10, 0);
9942 __ Csel(w0, left, right, eq);
9943 __ Csel(w1, left, right, ne);
9944 __ Csel(x2, left, right, eq);
9945 __ Csel(x3, left, right, ne);
9946
9947 END();
9948
9949 RUN();
9950
9951 ASSERT_EQUAL_32(left, w0);
9952 ASSERT_EQUAL_32(right, w1);
9953 ASSERT_EQUAL_64(left, x2);
9954 ASSERT_EQUAL_64(right, x3);
9955 }
9956 }
9957
9958 TEARDOWN();
9959 }
9960
9961
TEST(csel_mixed)9962 TEST(csel_mixed) {
9963 SETUP();
9964
9965 START();
9966 __ Mov(x18, 0);
9967 __ Mov(x19, 0x80000000);
9968 __ Mov(x20, 0x8000000000000000);
9969
9970 __ Cmp(x18, Operand(0));
9971 __ Csel(w0, w19, -2, ne);
9972 __ Csel(w1, w19, -1, ne);
9973 __ Csel(w2, w19, 0, ne);
9974 __ Csel(w3, w19, 1, ne);
9975 __ Csel(w4, w19, 2, ne);
9976 __ Csel(w5, w19, Operand(w19, ASR, 31), ne);
9977 __ Csel(w6, w19, Operand(w19, ROR, 1), ne);
9978 __ Csel(w7, w19, 3, eq);
9979
9980 __ Csel(x8, x20, -2, ne);
9981 __ Csel(x9, x20, -1, ne);
9982 __ Csel(x10, x20, 0, ne);
9983 __ Csel(x11, x20, 1, ne);
9984 __ Csel(x12, x20, 2, ne);
9985 __ Csel(x13, x20, Operand(x20, ASR, 63), ne);
9986 __ Csel(x14, x20, Operand(x20, ROR, 1), ne);
9987 __ Csel(x15, x20, 3, eq);
9988
9989 END();
9990
9991 RUN();
9992
9993 ASSERT_EQUAL_32(-2, w0);
9994 ASSERT_EQUAL_32(-1, w1);
9995 ASSERT_EQUAL_32(0, w2);
9996 ASSERT_EQUAL_32(1, w3);
9997 ASSERT_EQUAL_32(2, w4);
9998 ASSERT_EQUAL_32(-1, w5);
9999 ASSERT_EQUAL_32(0x40000000, w6);
10000 ASSERT_EQUAL_32(0x80000000, w7);
10001
10002 ASSERT_EQUAL_64(-2, x8);
10003 ASSERT_EQUAL_64(-1, x9);
10004 ASSERT_EQUAL_64(0, x10);
10005 ASSERT_EQUAL_64(1, x11);
10006 ASSERT_EQUAL_64(2, x12);
10007 ASSERT_EQUAL_64(-1, x13);
10008 ASSERT_EQUAL_64(0x4000000000000000, x14);
10009 ASSERT_EQUAL_64(0x8000000000000000, x15);
10010
10011 TEARDOWN();
10012 }
10013
10014
TEST(lslv)10015 TEST(lslv) {
10016 SETUP();
10017
10018 uint64_t value = 0x0123456789abcdef;
10019 int shift[] = {1, 3, 5, 9, 17, 33};
10020
10021 START();
10022 __ Mov(x0, value);
10023 __ Mov(w1, shift[0]);
10024 __ Mov(w2, shift[1]);
10025 __ Mov(w3, shift[2]);
10026 __ Mov(w4, shift[3]);
10027 __ Mov(w5, shift[4]);
10028 __ Mov(w6, shift[5]);
10029
10030 // The MacroAssembler does not allow zr as an argument.
10031 {
10032 ExactAssemblyScope scope(&masm, kInstructionSize);
10033 __ lslv(x0, x0, xzr);
10034 }
10035
10036 __ Lsl(x16, x0, x1);
10037 __ Lsl(x17, x0, x2);
10038 __ Lsl(x18, x0, x3);
10039 __ Lsl(x19, x0, x4);
10040 __ Lsl(x20, x0, x5);
10041 __ Lsl(x21, x0, x6);
10042
10043 __ Lsl(w22, w0, w1);
10044 __ Lsl(w23, w0, w2);
10045 __ Lsl(w24, w0, w3);
10046 __ Lsl(w25, w0, w4);
10047 __ Lsl(w26, w0, w5);
10048 __ Lsl(w27, w0, w6);
10049 END();
10050
10051 RUN();
10052
10053 ASSERT_EQUAL_64(value, x0);
10054 ASSERT_EQUAL_64(value << (shift[0] & 63), x16);
10055 ASSERT_EQUAL_64(value << (shift[1] & 63), x17);
10056 ASSERT_EQUAL_64(value << (shift[2] & 63), x18);
10057 ASSERT_EQUAL_64(value << (shift[3] & 63), x19);
10058 ASSERT_EQUAL_64(value << (shift[4] & 63), x20);
10059 ASSERT_EQUAL_64(value << (shift[5] & 63), x21);
10060 ASSERT_EQUAL_32(value << (shift[0] & 31), w22);
10061 ASSERT_EQUAL_32(value << (shift[1] & 31), w23);
10062 ASSERT_EQUAL_32(value << (shift[2] & 31), w24);
10063 ASSERT_EQUAL_32(value << (shift[3] & 31), w25);
10064 ASSERT_EQUAL_32(value << (shift[4] & 31), w26);
10065 ASSERT_EQUAL_32(value << (shift[5] & 31), w27);
10066
10067 TEARDOWN();
10068 }
10069
10070
TEST(lsrv)10071 TEST(lsrv) {
10072 SETUP();
10073
10074 uint64_t value = 0x0123456789abcdef;
10075 int shift[] = {1, 3, 5, 9, 17, 33};
10076
10077 START();
10078 __ Mov(x0, value);
10079 __ Mov(w1, shift[0]);
10080 __ Mov(w2, shift[1]);
10081 __ Mov(w3, shift[2]);
10082 __ Mov(w4, shift[3]);
10083 __ Mov(w5, shift[4]);
10084 __ Mov(w6, shift[5]);
10085
10086 // The MacroAssembler does not allow zr as an argument.
10087 {
10088 ExactAssemblyScope scope(&masm, kInstructionSize);
10089 __ lsrv(x0, x0, xzr);
10090 }
10091
10092 __ Lsr(x16, x0, x1);
10093 __ Lsr(x17, x0, x2);
10094 __ Lsr(x18, x0, x3);
10095 __ Lsr(x19, x0, x4);
10096 __ Lsr(x20, x0, x5);
10097 __ Lsr(x21, x0, x6);
10098
10099 __ Lsr(w22, w0, w1);
10100 __ Lsr(w23, w0, w2);
10101 __ Lsr(w24, w0, w3);
10102 __ Lsr(w25, w0, w4);
10103 __ Lsr(w26, w0, w5);
10104 __ Lsr(w27, w0, w6);
10105 END();
10106
10107 RUN();
10108
10109 ASSERT_EQUAL_64(value, x0);
10110 ASSERT_EQUAL_64(value >> (shift[0] & 63), x16);
10111 ASSERT_EQUAL_64(value >> (shift[1] & 63), x17);
10112 ASSERT_EQUAL_64(value >> (shift[2] & 63), x18);
10113 ASSERT_EQUAL_64(value >> (shift[3] & 63), x19);
10114 ASSERT_EQUAL_64(value >> (shift[4] & 63), x20);
10115 ASSERT_EQUAL_64(value >> (shift[5] & 63), x21);
10116
10117 value &= 0xffffffff;
10118 ASSERT_EQUAL_32(value >> (shift[0] & 31), w22);
10119 ASSERT_EQUAL_32(value >> (shift[1] & 31), w23);
10120 ASSERT_EQUAL_32(value >> (shift[2] & 31), w24);
10121 ASSERT_EQUAL_32(value >> (shift[3] & 31), w25);
10122 ASSERT_EQUAL_32(value >> (shift[4] & 31), w26);
10123 ASSERT_EQUAL_32(value >> (shift[5] & 31), w27);
10124
10125 TEARDOWN();
10126 }
10127
10128
TEST(asrv)10129 TEST(asrv) {
10130 SETUP();
10131
10132 int64_t value = 0xfedcba98fedcba98;
10133 int shift[] = {1, 3, 5, 9, 17, 33};
10134
10135 START();
10136 __ Mov(x0, value);
10137 __ Mov(w1, shift[0]);
10138 __ Mov(w2, shift[1]);
10139 __ Mov(w3, shift[2]);
10140 __ Mov(w4, shift[3]);
10141 __ Mov(w5, shift[4]);
10142 __ Mov(w6, shift[5]);
10143
10144 // The MacroAssembler does not allow zr as an argument.
10145 {
10146 ExactAssemblyScope scope(&masm, kInstructionSize);
10147 __ asrv(x0, x0, xzr);
10148 }
10149
10150 __ Asr(x16, x0, x1);
10151 __ Asr(x17, x0, x2);
10152 __ Asr(x18, x0, x3);
10153 __ Asr(x19, x0, x4);
10154 __ Asr(x20, x0, x5);
10155 __ Asr(x21, x0, x6);
10156
10157 __ Asr(w22, w0, w1);
10158 __ Asr(w23, w0, w2);
10159 __ Asr(w24, w0, w3);
10160 __ Asr(w25, w0, w4);
10161 __ Asr(w26, w0, w5);
10162 __ Asr(w27, w0, w6);
10163 END();
10164
10165 RUN();
10166
10167 ASSERT_EQUAL_64(value, x0);
10168 ASSERT_EQUAL_64(value >> (shift[0] & 63), x16);
10169 ASSERT_EQUAL_64(value >> (shift[1] & 63), x17);
10170 ASSERT_EQUAL_64(value >> (shift[2] & 63), x18);
10171 ASSERT_EQUAL_64(value >> (shift[3] & 63), x19);
10172 ASSERT_EQUAL_64(value >> (shift[4] & 63), x20);
10173 ASSERT_EQUAL_64(value >> (shift[5] & 63), x21);
10174
10175 int32_t value32 = static_cast<int32_t>(value & 0xffffffff);
10176 ASSERT_EQUAL_32(value32 >> (shift[0] & 31), w22);
10177 ASSERT_EQUAL_32(value32 >> (shift[1] & 31), w23);
10178 ASSERT_EQUAL_32(value32 >> (shift[2] & 31), w24);
10179 ASSERT_EQUAL_32(value32 >> (shift[3] & 31), w25);
10180 ASSERT_EQUAL_32(value32 >> (shift[4] & 31), w26);
10181 ASSERT_EQUAL_32(value32 >> (shift[5] & 31), w27);
10182
10183 TEARDOWN();
10184 }
10185
10186
TEST(rorv)10187 TEST(rorv) {
10188 SETUP();
10189
10190 uint64_t value = 0x0123456789abcdef;
10191 int shift[] = {4, 8, 12, 16, 24, 36};
10192
10193 START();
10194 __ Mov(x0, value);
10195 __ Mov(w1, shift[0]);
10196 __ Mov(w2, shift[1]);
10197 __ Mov(w3, shift[2]);
10198 __ Mov(w4, shift[3]);
10199 __ Mov(w5, shift[4]);
10200 __ Mov(w6, shift[5]);
10201
10202 // The MacroAssembler does not allow zr as an argument.
10203 {
10204 ExactAssemblyScope scope(&masm, kInstructionSize);
10205 __ rorv(x0, x0, xzr);
10206 }
10207
10208 __ Ror(x16, x0, x1);
10209 __ Ror(x17, x0, x2);
10210 __ Ror(x18, x0, x3);
10211 __ Ror(x19, x0, x4);
10212 __ Ror(x20, x0, x5);
10213 __ Ror(x21, x0, x6);
10214
10215 __ Ror(w22, w0, w1);
10216 __ Ror(w23, w0, w2);
10217 __ Ror(w24, w0, w3);
10218 __ Ror(w25, w0, w4);
10219 __ Ror(w26, w0, w5);
10220 __ Ror(w27, w0, w6);
10221 END();
10222
10223 RUN();
10224
10225 ASSERT_EQUAL_64(value, x0);
10226 ASSERT_EQUAL_64(0xf0123456789abcde, x16);
10227 ASSERT_EQUAL_64(0xef0123456789abcd, x17);
10228 ASSERT_EQUAL_64(0xdef0123456789abc, x18);
10229 ASSERT_EQUAL_64(0xcdef0123456789ab, x19);
10230 ASSERT_EQUAL_64(0xabcdef0123456789, x20);
10231 ASSERT_EQUAL_64(0x789abcdef0123456, x21);
10232 ASSERT_EQUAL_32(0xf89abcde, w22);
10233 ASSERT_EQUAL_32(0xef89abcd, w23);
10234 ASSERT_EQUAL_32(0xdef89abc, w24);
10235 ASSERT_EQUAL_32(0xcdef89ab, w25);
10236 ASSERT_EQUAL_32(0xabcdef89, w26);
10237 ASSERT_EQUAL_32(0xf89abcde, w27);
10238
10239 TEARDOWN();
10240 }
10241
10242
TEST(bfm)10243 TEST(bfm) {
10244 SETUP();
10245
10246 START();
10247 __ Mov(x1, 0x0123456789abcdef);
10248
10249 __ Mov(x10, 0x8888888888888888);
10250 __ Mov(x11, 0x8888888888888888);
10251 __ Mov(x12, 0x8888888888888888);
10252 __ Mov(x13, 0x8888888888888888);
10253 __ Mov(x14, 0xffffffffffffffff);
10254 __ Mov(w20, 0x88888888);
10255 __ Mov(w21, 0x88888888);
10256
10257 __ Bfm(x10, x1, 16, 31);
10258 __ Bfm(x11, x1, 32, 15);
10259
10260 __ Bfm(w20, w1, 16, 23);
10261 __ Bfm(w21, w1, 24, 15);
10262
10263 // Aliases.
10264 __ Bfi(x12, x1, 16, 8);
10265 __ Bfxil(x13, x1, 16, 8);
10266 __ Bfc(x14, 16, 8);
10267 END();
10268
10269 RUN();
10270
10271
10272 ASSERT_EQUAL_64(0x88888888888889ab, x10);
10273 ASSERT_EQUAL_64(0x8888cdef88888888, x11);
10274
10275 ASSERT_EQUAL_32(0x888888ab, w20);
10276 ASSERT_EQUAL_32(0x88cdef88, w21);
10277
10278 ASSERT_EQUAL_64(0x8888888888ef8888, x12);
10279 ASSERT_EQUAL_64(0x88888888888888ab, x13);
10280 ASSERT_EQUAL_64(0xffffffffff00ffff, x14);
10281
10282 TEARDOWN();
10283 }
10284
10285
TEST(sbfm)10286 TEST(sbfm) {
10287 SETUP();
10288
10289 START();
10290 __ Mov(x1, 0x0123456789abcdef);
10291 __ Mov(x2, 0xfedcba9876543210);
10292
10293 __ Sbfm(x10, x1, 16, 31);
10294 __ Sbfm(x11, x1, 32, 15);
10295 __ Sbfm(x12, x1, 32, 47);
10296 __ Sbfm(x13, x1, 48, 35);
10297
10298 __ Sbfm(w14, w1, 16, 23);
10299 __ Sbfm(w15, w1, 24, 15);
10300 __ Sbfm(w16, w2, 16, 23);
10301 __ Sbfm(w17, w2, 24, 15);
10302
10303 // Aliases.
10304 __ Asr(x18, x1, 32);
10305 __ Asr(x19, x2, 32);
10306 __ Sbfiz(x20, x1, 8, 16);
10307 __ Sbfiz(x21, x2, 8, 16);
10308 __ Sbfx(x22, x1, 8, 16);
10309 __ Sbfx(x23, x2, 8, 16);
10310 __ Sxtb(x24, w1);
10311 __ Sxtb(x25, x2);
10312 __ Sxth(x26, w1);
10313 __ Sxth(x27, x2);
10314 __ Sxtw(x28, w1);
10315 __ Sxtw(x29, x2);
10316 END();
10317
10318 RUN();
10319
10320
10321 ASSERT_EQUAL_64(0xffffffffffff89ab, x10);
10322 ASSERT_EQUAL_64(0xffffcdef00000000, x11);
10323 ASSERT_EQUAL_64(0x0000000000004567, x12);
10324 ASSERT_EQUAL_64(0x000789abcdef0000, x13);
10325
10326 ASSERT_EQUAL_32(0xffffffab, w14);
10327 ASSERT_EQUAL_32(0xffcdef00, w15);
10328 ASSERT_EQUAL_32(0x00000054, w16);
10329 ASSERT_EQUAL_32(0x00321000, w17);
10330
10331 ASSERT_EQUAL_64(0x0000000001234567, x18);
10332 ASSERT_EQUAL_64(0xfffffffffedcba98, x19);
10333 ASSERT_EQUAL_64(0xffffffffffcdef00, x20);
10334 ASSERT_EQUAL_64(0x0000000000321000, x21);
10335 ASSERT_EQUAL_64(0xffffffffffffabcd, x22);
10336 ASSERT_EQUAL_64(0x0000000000005432, x23);
10337 ASSERT_EQUAL_64(0xffffffffffffffef, x24);
10338 ASSERT_EQUAL_64(0x0000000000000010, x25);
10339 ASSERT_EQUAL_64(0xffffffffffffcdef, x26);
10340 ASSERT_EQUAL_64(0x0000000000003210, x27);
10341 ASSERT_EQUAL_64(0xffffffff89abcdef, x28);
10342 ASSERT_EQUAL_64(0x0000000076543210, x29);
10343
10344 TEARDOWN();
10345 }
10346
10347
TEST(ubfm)10348 TEST(ubfm) {
10349 SETUP();
10350
10351 START();
10352 __ Mov(x1, 0x0123456789abcdef);
10353 __ Mov(x2, 0xfedcba9876543210);
10354
10355 __ Mov(x10, 0x8888888888888888);
10356 __ Mov(x11, 0x8888888888888888);
10357
10358 __ Ubfm(x10, x1, 16, 31);
10359 __ Ubfm(x11, x1, 32, 15);
10360 __ Ubfm(x12, x1, 32, 47);
10361 __ Ubfm(x13, x1, 48, 35);
10362
10363 __ Ubfm(w25, w1, 16, 23);
10364 __ Ubfm(w26, w1, 24, 15);
10365 __ Ubfm(w27, w2, 16, 23);
10366 __ Ubfm(w28, w2, 24, 15);
10367
10368 // Aliases
10369 __ Lsl(x15, x1, 63);
10370 __ Lsl(x16, x1, 0);
10371 __ Lsr(x17, x1, 32);
10372 __ Ubfiz(x18, x1, 8, 16);
10373 __ Ubfx(x19, x1, 8, 16);
10374 __ Uxtb(x20, x1);
10375 __ Uxth(x21, x1);
10376 __ Uxtw(x22, x1);
10377 END();
10378
10379 RUN();
10380
10381 ASSERT_EQUAL_64(0x00000000000089ab, x10);
10382 ASSERT_EQUAL_64(0x0000cdef00000000, x11);
10383 ASSERT_EQUAL_64(0x0000000000004567, x12);
10384 ASSERT_EQUAL_64(0x000789abcdef0000, x13);
10385
10386 ASSERT_EQUAL_32(0x000000ab, w25);
10387 ASSERT_EQUAL_32(0x00cdef00, w26);
10388 ASSERT_EQUAL_32(0x00000054, w27);
10389 ASSERT_EQUAL_32(0x00321000, w28);
10390
10391 ASSERT_EQUAL_64(0x8000000000000000, x15);
10392 ASSERT_EQUAL_64(0x0123456789abcdef, x16);
10393 ASSERT_EQUAL_64(0x0000000001234567, x17);
10394 ASSERT_EQUAL_64(0x0000000000cdef00, x18);
10395 ASSERT_EQUAL_64(0x000000000000abcd, x19);
10396 ASSERT_EQUAL_64(0x00000000000000ef, x20);
10397 ASSERT_EQUAL_64(0x000000000000cdef, x21);
10398 ASSERT_EQUAL_64(0x0000000089abcdef, x22);
10399
10400 TEARDOWN();
10401 }
10402
10403
TEST(extr)10404 TEST(extr) {
10405 SETUP();
10406
10407 START();
10408 __ Mov(x1, 0x0123456789abcdef);
10409 __ Mov(x2, 0xfedcba9876543210);
10410
10411 __ Extr(w10, w1, w2, 0);
10412 __ Extr(w11, w1, w2, 1);
10413 __ Extr(x12, x2, x1, 2);
10414
10415 __ Ror(w13, w1, 0);
10416 __ Ror(w14, w2, 17);
10417 __ Ror(w15, w1, 31);
10418 __ Ror(x18, x2, 0);
10419 __ Ror(x19, x2, 1);
10420 __ Ror(x20, x1, 63);
10421 END();
10422
10423 RUN();
10424
10425 ASSERT_EQUAL_64(0x76543210, x10);
10426 ASSERT_EQUAL_64(0xbb2a1908, x11);
10427 ASSERT_EQUAL_64(0x0048d159e26af37b, x12);
10428 ASSERT_EQUAL_64(0x89abcdef, x13);
10429 ASSERT_EQUAL_64(0x19083b2a, x14);
10430 ASSERT_EQUAL_64(0x13579bdf, x15);
10431 ASSERT_EQUAL_64(0xfedcba9876543210, x18);
10432 ASSERT_EQUAL_64(0x7f6e5d4c3b2a1908, x19);
10433 ASSERT_EQUAL_64(0x02468acf13579bde, x20);
10434
10435 TEARDOWN();
10436 }
10437
10438
TEST(fmov_imm)10439 TEST(fmov_imm) {
10440 SETUP_WITH_FEATURES(CPUFeatures::kFP, CPUFeatures::kFPHalf);
10441
10442 START();
10443 __ Fmov(s1, 255.0);
10444 __ Fmov(d2, 12.34567);
10445 __ Fmov(s3, 0.0);
10446 __ Fmov(d4, 0.0);
10447 __ Fmov(s5, kFP32PositiveInfinity);
10448 __ Fmov(d6, kFP64NegativeInfinity);
10449 __ Fmov(h7, RawbitsToFloat16(0x6400U));
10450 __ Fmov(h8, kFP16PositiveInfinity);
10451 __ Fmov(s11, 1.0);
10452 __ Fmov(h12, RawbitsToFloat16(0x7BFF));
10453 __ Fmov(h13, RawbitsToFloat16(0x57F2));
10454 __ Fmov(d22, -13.0);
10455 __ Fmov(h23, RawbitsToFloat16(0xC500U));
10456 __ Fmov(h24, Float16(-5.0));
10457 __ Fmov(h25, Float16(2049.0));
10458 __ Fmov(h21, RawbitsToFloat16(0x6404U));
10459 __ Fmov(h26, RawbitsToFloat16(0x0U));
10460 __ Fmov(h27, RawbitsToFloat16(0x7e00U));
10461 END();
10462 #ifdef VIXL_INCLUDE_SIMULATOR_AARCH64
10463 RUN();
10464
10465 ASSERT_EQUAL_FP32(255.0, s1);
10466 ASSERT_EQUAL_FP64(12.34567, d2);
10467 ASSERT_EQUAL_FP32(0.0, s3);
10468 ASSERT_EQUAL_FP64(0.0, d4);
10469 ASSERT_EQUAL_FP32(kFP32PositiveInfinity, s5);
10470 ASSERT_EQUAL_FP64(kFP64NegativeInfinity, d6);
10471 ASSERT_EQUAL_FP16(RawbitsToFloat16(0x6400U), h7);
10472 ASSERT_EQUAL_FP16(kFP16PositiveInfinity, h8);
10473 ASSERT_EQUAL_FP32(1.0, s11);
10474 ASSERT_EQUAL_FP16(RawbitsToFloat16(0x7BFF), h12);
10475 ASSERT_EQUAL_FP16(RawbitsToFloat16(0x57F2U), h13);
10476 ASSERT_EQUAL_FP16(RawbitsToFloat16(0x6404), h21);
10477 ASSERT_EQUAL_FP64(-13.0, d22);
10478 ASSERT_EQUAL_FP16(Float16(-5.0), h23);
10479 ASSERT_EQUAL_FP16(RawbitsToFloat16(0xC500), h24);
10480 // 2049 is unpresentable.
10481 ASSERT_EQUAL_FP16(RawbitsToFloat16(0x6800), h25);
10482 ASSERT_EQUAL_FP16(kFP16PositiveZero, h26);
10483 // NaN check.
10484 ASSERT_EQUAL_FP16(RawbitsToFloat16(0x7e00), h27);
10485 #endif
10486
10487 TEARDOWN();
10488 }
10489
10490
TEST(fmov_vec_imm)10491 TEST(fmov_vec_imm) {
10492 SETUP_WITH_FEATURES(CPUFeatures::kNEON,
10493 CPUFeatures::kFP,
10494 CPUFeatures::kNEONHalf);
10495
10496 START();
10497
10498 __ Fmov(v0.V2S(), 20.0);
10499 __ Fmov(v1.V4S(), 1024.0);
10500
10501 __ Fmov(v2.V4H(), RawbitsToFloat16(0xC500U));
10502 __ Fmov(v3.V8H(), RawbitsToFloat16(0x4A80U));
10503
10504 END();
10505 #ifdef VIXL_INCLUDE_SIMULATOR_AARCH64
10506 RUN();
10507
10508 ASSERT_EQUAL_64(0x41A0000041A00000, d0);
10509 ASSERT_EQUAL_128(0x4480000044800000, 0x4480000044800000, q1);
10510 ASSERT_EQUAL_64(0xC500C500C500C500, d2);
10511 ASSERT_EQUAL_128(0x4A804A804A804A80, 0x4A804A804A804A80, q3);
10512 #endif
10513
10514 TEARDOWN();
10515 }
10516
10517
TEST(fmov_reg)10518 TEST(fmov_reg) {
10519 SETUP_WITH_FEATURES(CPUFeatures::kNEON,
10520 CPUFeatures::kFP,
10521 CPUFeatures::kFPHalf);
10522
10523 START();
10524
10525 __ Fmov(h3, RawbitsToFloat16(0xCA80U));
10526 __ Fmov(h7, h3);
10527 __ Fmov(h8, -5.0);
10528 __ Fmov(w3, h8);
10529 __ Fmov(h9, w3);
10530 __ Fmov(h8, Float16(1024.0));
10531 __ Fmov(x4, h8);
10532 __ Fmov(h10, x4);
10533 __ Fmov(s20, 1.0);
10534 __ Fmov(w10, s20);
10535 __ Fmov(s30, w10);
10536 __ Fmov(s5, s20);
10537 __ Fmov(d1, -13.0);
10538 __ Fmov(x1, d1);
10539 __ Fmov(d2, x1);
10540 __ Fmov(d4, d1);
10541 __ Fmov(d6, RawbitsToDouble(0x0123456789abcdef));
10542 __ Fmov(s6, s6);
10543 __ Fmov(d0, 0.0);
10544 __ Fmov(v0.D(), 1, x1);
10545 __ Fmov(x2, v0.D(), 1);
10546
10547 END();
10548 #ifdef VIXL_INCLUDE_SIMULATOR_AARCH64
10549 RUN();
10550
10551 ASSERT_EQUAL_FP16(RawbitsToFloat16(0xCA80U), h7);
10552 ASSERT_EQUAL_FP16(RawbitsToFloat16(0xC500U), h9);
10553 ASSERT_EQUAL_32(0x0000C500, w3);
10554 ASSERT_EQUAL_64(0x0000000000006400, x4);
10555 ASSERT_EQUAL_FP16(RawbitsToFloat16(0x6400), h10);
10556 ASSERT_EQUAL_32(FloatToRawbits(1.0), w10);
10557 ASSERT_EQUAL_FP32(1.0, s30);
10558 ASSERT_EQUAL_FP32(1.0, s5);
10559 ASSERT_EQUAL_64(DoubleToRawbits(-13.0), x1);
10560 ASSERT_EQUAL_FP64(-13.0, d2);
10561 ASSERT_EQUAL_FP64(-13.0, d4);
10562 ASSERT_EQUAL_FP32(RawbitsToFloat(0x89abcdef), s6);
10563 ASSERT_EQUAL_128(DoubleToRawbits(-13.0), 0x0000000000000000, q0);
10564 ASSERT_EQUAL_64(DoubleToRawbits(-13.0), x2);
10565 #endif
10566
10567 TEARDOWN();
10568 }
10569
10570
TEST(fadd)10571 TEST(fadd) {
10572 SETUP_WITH_FEATURES(CPUFeatures::kFP);
10573
10574 START();
10575 __ Fmov(s14, -0.0f);
10576 __ Fmov(s15, kFP32PositiveInfinity);
10577 __ Fmov(s16, kFP32NegativeInfinity);
10578 __ Fmov(s17, 3.25f);
10579 __ Fmov(s18, 1.0f);
10580 __ Fmov(s19, 0.0f);
10581
10582 __ Fmov(d26, -0.0);
10583 __ Fmov(d27, kFP64PositiveInfinity);
10584 __ Fmov(d28, kFP64NegativeInfinity);
10585 __ Fmov(d29, 0.0);
10586 __ Fmov(d30, -2.0);
10587 __ Fmov(d31, 2.25);
10588
10589 __ Fadd(s0, s17, s18);
10590 __ Fadd(s1, s18, s19);
10591 __ Fadd(s2, s14, s18);
10592 __ Fadd(s3, s15, s18);
10593 __ Fadd(s4, s16, s18);
10594 __ Fadd(s5, s15, s16);
10595 __ Fadd(s6, s16, s15);
10596
10597 __ Fadd(d7, d30, d31);
10598 __ Fadd(d8, d29, d31);
10599 __ Fadd(d9, d26, d31);
10600 __ Fadd(d10, d27, d31);
10601 __ Fadd(d11, d28, d31);
10602 __ Fadd(d12, d27, d28);
10603 __ Fadd(d13, d28, d27);
10604 END();
10605
10606 RUN();
10607
10608 ASSERT_EQUAL_FP32(4.25, s0);
10609 ASSERT_EQUAL_FP32(1.0, s1);
10610 ASSERT_EQUAL_FP32(1.0, s2);
10611 ASSERT_EQUAL_FP32(kFP32PositiveInfinity, s3);
10612 ASSERT_EQUAL_FP32(kFP32NegativeInfinity, s4);
10613 ASSERT_EQUAL_FP32(kFP32DefaultNaN, s5);
10614 ASSERT_EQUAL_FP32(kFP32DefaultNaN, s6);
10615 ASSERT_EQUAL_FP64(0.25, d7);
10616 ASSERT_EQUAL_FP64(2.25, d8);
10617 ASSERT_EQUAL_FP64(2.25, d9);
10618 ASSERT_EQUAL_FP64(kFP64PositiveInfinity, d10);
10619 ASSERT_EQUAL_FP64(kFP64NegativeInfinity, d11);
10620 ASSERT_EQUAL_FP64(kFP64DefaultNaN, d12);
10621 ASSERT_EQUAL_FP64(kFP64DefaultNaN, d13);
10622
10623 TEARDOWN();
10624 }
10625
10626
TEST(fadd_h)10627 TEST(fadd_h) {
10628 SETUP_WITH_FEATURES(CPUFeatures::kFP, CPUFeatures::kFPHalf);
10629
10630 START();
10631 __ Fmov(h14, -0.0f);
10632 __ Fmov(h15, kFP16PositiveInfinity);
10633 __ Fmov(h16, kFP16NegativeInfinity);
10634 __ Fmov(h17, 3.25f);
10635 __ Fmov(h18, 1.0);
10636 __ Fmov(h19, 0.0f);
10637 __ Fmov(h20, 5.0f);
10638
10639 __ Fadd(h0, h17, h18);
10640 __ Fadd(h1, h18, h19);
10641 __ Fadd(h2, h14, h18);
10642 __ Fadd(h3, h15, h18);
10643 __ Fadd(h4, h16, h18);
10644 __ Fadd(h5, h15, h16);
10645 __ Fadd(h6, h16, h15);
10646 __ Fadd(h7, h20, h20);
10647 END();
10648
10649 #ifdef VIXL_INCLUDE_SIMULATOR_AARCH64
10650 RUN();
10651
10652 ASSERT_EQUAL_FP16(Float16(4.25), h0);
10653 ASSERT_EQUAL_FP16(Float16(1.0), h1);
10654 ASSERT_EQUAL_FP16(Float16(1.0), h2);
10655 ASSERT_EQUAL_FP16(kFP16PositiveInfinity, h3);
10656 ASSERT_EQUAL_FP16(kFP16NegativeInfinity, h4);
10657 ASSERT_EQUAL_FP16(kFP16DefaultNaN, h5);
10658 ASSERT_EQUAL_FP16(kFP16DefaultNaN, h6);
10659 ASSERT_EQUAL_FP16(Float16(10.0), h7);
10660 TEARDOWN();
10661 #endif
10662 }
10663
10664
TEST(fadd_h_neon)10665 TEST(fadd_h_neon) {
10666 SETUP_WITH_FEATURES(CPUFeatures::kNEON,
10667 CPUFeatures::kFP,
10668 CPUFeatures::kNEONHalf);
10669
10670 START();
10671 __ Fmov(v0.V4H(), 24.0);
10672 __ Fmov(v1.V4H(), 1024.0);
10673 __ Fmov(v2.V8H(), 5.5);
10674 __ Fmov(v3.V8H(), 2048.0);
10675 __ Fmov(v4.V8H(), kFP16PositiveInfinity);
10676 __ Fmov(v5.V8H(), kFP16NegativeInfinity);
10677 __ Fmov(v6.V4H(), RawbitsToFloat16(0x7c2f));
10678 __ Fmov(v7.V8H(), RawbitsToFloat16(0xfe0f));
10679
10680 __ Fadd(v8.V4H(), v1.V4H(), v0.V4H());
10681 __ Fadd(v9.V8H(), v3.V8H(), v2.V8H());
10682 __ Fadd(v10.V4H(), v4.V4H(), v3.V4H());
10683
10684 __ Fadd(v11.V4H(), v6.V4H(), v1.V4H());
10685 __ Fadd(v12.V4H(), v7.V4H(), v7.V4H());
10686
10687 END();
10688
10689 #ifdef VIXL_INCLUDE_SIMULATOR_AARCH64
10690 RUN();
10691
10692 ASSERT_EQUAL_128(0x0000000000000000, 0x6418641864186418, q8);
10693 // 2053.5 is unrepresentable in FP16.
10694 ASSERT_EQUAL_128(0x6803680368036803, 0x6803680368036803, q9);
10695
10696 // Note: we test NaNs here as vectors aren't covered by process_nans_half
10697 // and we don't have traces for half-precision enabled hardware.
10698 // Default (Signalling NaN)
10699 ASSERT_EQUAL_128(0x0000000000000000, 0x7c007c007c007c00, q10);
10700 // Quiet NaN from Signalling.
10701 ASSERT_EQUAL_128(0x0000000000000000, 0x7e2f7e2f7e2f7e2f, q11);
10702 // Quiet NaN.
10703 ASSERT_EQUAL_128(0x0000000000000000, 0xfe0ffe0ffe0ffe0f, q12);
10704 TEARDOWN();
10705 #endif
10706 }
10707
10708
TEST(fsub)10709 TEST(fsub) {
10710 SETUP_WITH_FEATURES(CPUFeatures::kFP);
10711
10712 START();
10713 __ Fmov(s14, -0.0f);
10714 __ Fmov(s15, kFP32PositiveInfinity);
10715 __ Fmov(s16, kFP32NegativeInfinity);
10716 __ Fmov(s17, 3.25f);
10717 __ Fmov(s18, 1.0f);
10718 __ Fmov(s19, 0.0f);
10719
10720 __ Fmov(d26, -0.0);
10721 __ Fmov(d27, kFP64PositiveInfinity);
10722 __ Fmov(d28, kFP64NegativeInfinity);
10723 __ Fmov(d29, 0.0);
10724 __ Fmov(d30, -2.0);
10725 __ Fmov(d31, 2.25);
10726
10727 __ Fsub(s0, s17, s18);
10728 __ Fsub(s1, s18, s19);
10729 __ Fsub(s2, s14, s18);
10730 __ Fsub(s3, s18, s15);
10731 __ Fsub(s4, s18, s16);
10732 __ Fsub(s5, s15, s15);
10733 __ Fsub(s6, s16, s16);
10734
10735 __ Fsub(d7, d30, d31);
10736 __ Fsub(d8, d29, d31);
10737 __ Fsub(d9, d26, d31);
10738 __ Fsub(d10, d31, d27);
10739 __ Fsub(d11, d31, d28);
10740 __ Fsub(d12, d27, d27);
10741 __ Fsub(d13, d28, d28);
10742 END();
10743
10744 RUN();
10745
10746 ASSERT_EQUAL_FP32(2.25, s0);
10747 ASSERT_EQUAL_FP32(1.0, s1);
10748 ASSERT_EQUAL_FP32(-1.0, s2);
10749 ASSERT_EQUAL_FP32(kFP32NegativeInfinity, s3);
10750 ASSERT_EQUAL_FP32(kFP32PositiveInfinity, s4);
10751 ASSERT_EQUAL_FP32(kFP32DefaultNaN, s5);
10752 ASSERT_EQUAL_FP32(kFP32DefaultNaN, s6);
10753 ASSERT_EQUAL_FP64(-4.25, d7);
10754 ASSERT_EQUAL_FP64(-2.25, d8);
10755 ASSERT_EQUAL_FP64(-2.25, d9);
10756 ASSERT_EQUAL_FP64(kFP64NegativeInfinity, d10);
10757 ASSERT_EQUAL_FP64(kFP64PositiveInfinity, d11);
10758 ASSERT_EQUAL_FP64(kFP64DefaultNaN, d12);
10759 ASSERT_EQUAL_FP64(kFP64DefaultNaN, d13);
10760
10761 TEARDOWN();
10762 }
10763
10764
TEST(fsub_h)10765 TEST(fsub_h) {
10766 SETUP_WITH_FEATURES(CPUFeatures::kFP, CPUFeatures::kFPHalf);
10767
10768 START();
10769 __ Fmov(h14, -0.0f);
10770 __ Fmov(h15, kFP16PositiveInfinity);
10771 __ Fmov(h16, kFP16NegativeInfinity);
10772 __ Fmov(h17, 3.25f);
10773 __ Fmov(h18, 1.0f);
10774 __ Fmov(h19, 0.0f);
10775
10776 __ Fsub(h0, h17, h18);
10777 __ Fsub(h1, h18, h19);
10778 __ Fsub(h2, h14, h18);
10779 __ Fsub(h3, h18, h15);
10780 __ Fsub(h4, h18, h16);
10781 __ Fsub(h5, h15, h15);
10782 __ Fsub(h6, h16, h16);
10783 END();
10784
10785 #ifdef VIXL_INCLUDE_SIMULATOR_AARCH64
10786 RUN();
10787
10788 ASSERT_EQUAL_FP16(Float16(2.25), h0);
10789 ASSERT_EQUAL_FP16(Float16(1.0), h1);
10790 ASSERT_EQUAL_FP16(Float16(-1.0), h2);
10791 ASSERT_EQUAL_FP16(kFP16NegativeInfinity, h3);
10792 ASSERT_EQUAL_FP16(kFP16PositiveInfinity, h4);
10793 ASSERT_EQUAL_FP16(kFP16DefaultNaN, h5);
10794 ASSERT_EQUAL_FP16(kFP16DefaultNaN, h6);
10795 TEARDOWN();
10796 #endif
10797 }
10798
10799
TEST(fsub_h_neon)10800 TEST(fsub_h_neon) {
10801 SETUP_WITH_FEATURES(CPUFeatures::kNEON,
10802 CPUFeatures::kFP,
10803 CPUFeatures::kNEONHalf);
10804
10805 START();
10806 __ Fmov(v0.V4H(), 24.0);
10807 __ Fmov(v1.V4H(), 1024.0);
10808 __ Fmov(v2.V8H(), 5.5);
10809 __ Fmov(v3.V8H(), 2048.0);
10810 __ Fmov(v4.V4H(), kFP16PositiveInfinity);
10811 __ Fmov(v5.V4H(), kFP16NegativeInfinity);
10812 __ Fmov(v6.V4H(), RawbitsToFloat16(0x7c22));
10813 __ Fmov(v7.V8H(), RawbitsToFloat16(0xfe02));
10814
10815 __ Fsub(v0.V4H(), v1.V4H(), v0.V4H());
10816 __ Fsub(v8.V8H(), v3.V8H(), v2.V8H());
10817 __ Fsub(v9.V4H(), v4.V4H(), v3.V4H());
10818 __ Fsub(v10.V4H(), v0.V4H(), v1.V4H());
10819
10820 __ Fsub(v11.V4H(), v6.V4H(), v2.V4H());
10821 __ Fsub(v12.V4H(), v7.V4H(), v7.V4H());
10822 END();
10823
10824 #ifdef VIXL_INCLUDE_SIMULATOR_AARCH64
10825 RUN();
10826
10827 ASSERT_EQUAL_128(0x0000000000000000, 0x63d063d063d063d0, q0);
10828 // 2042.5 is unpresentable in FP16:
10829 ASSERT_EQUAL_128(0x67fa67fa67fa67fa, 0x67fa67fa67fa67fa, q8);
10830
10831 // Note: we test NaNs here as vectors aren't covered by process_nans_half
10832 // and we don't have traces for half-precision enabled hardware.
10833 // Signalling (Default) NaN.
10834 ASSERT_EQUAL_128(0x0000000000000000, 0x7c007c007c007c00, q9);
10835 ASSERT_EQUAL_128(0x0000000000000000, 0xce00ce00ce00ce00, q10);
10836 // Quiet NaN from Signalling.
10837 ASSERT_EQUAL_128(0x0000000000000000, 0x7e227e227e227e22, q11);
10838 // Quiet NaN.
10839 ASSERT_EQUAL_128(0x0000000000000000, 0xfe02fe02fe02fe02, q12);
10840
10841 TEARDOWN();
10842 #endif
10843 }
10844
10845
TEST(fmul)10846 TEST(fmul) {
10847 SETUP_WITH_FEATURES(CPUFeatures::kFP);
10848
10849 START();
10850 __ Fmov(s14, -0.0f);
10851 __ Fmov(s15, kFP32PositiveInfinity);
10852 __ Fmov(s16, kFP32NegativeInfinity);
10853 __ Fmov(s17, 3.25f);
10854 __ Fmov(s18, 2.0f);
10855 __ Fmov(s19, 0.0f);
10856 __ Fmov(s20, -2.0f);
10857
10858 __ Fmov(d26, -0.0);
10859 __ Fmov(d27, kFP64PositiveInfinity);
10860 __ Fmov(d28, kFP64NegativeInfinity);
10861 __ Fmov(d29, 0.0);
10862 __ Fmov(d30, -2.0);
10863 __ Fmov(d31, 2.25);
10864
10865 __ Fmul(s0, s17, s18);
10866 __ Fmul(s1, s18, s19);
10867 __ Fmul(s2, s14, s14);
10868 __ Fmul(s3, s15, s20);
10869 __ Fmul(s4, s16, s20);
10870 __ Fmul(s5, s15, s19);
10871 __ Fmul(s6, s19, s16);
10872
10873 __ Fmul(d7, d30, d31);
10874 __ Fmul(d8, d29, d31);
10875 __ Fmul(d9, d26, d26);
10876 __ Fmul(d10, d27, d30);
10877 __ Fmul(d11, d28, d30);
10878 __ Fmul(d12, d27, d29);
10879 __ Fmul(d13, d29, d28);
10880 END();
10881
10882 RUN();
10883
10884 ASSERT_EQUAL_FP32(6.5, s0);
10885 ASSERT_EQUAL_FP32(0.0, s1);
10886 ASSERT_EQUAL_FP32(0.0, s2);
10887 ASSERT_EQUAL_FP32(kFP32NegativeInfinity, s3);
10888 ASSERT_EQUAL_FP32(kFP32PositiveInfinity, s4);
10889 ASSERT_EQUAL_FP32(kFP32DefaultNaN, s5);
10890 ASSERT_EQUAL_FP32(kFP32DefaultNaN, s6);
10891 ASSERT_EQUAL_FP64(-4.5, d7);
10892 ASSERT_EQUAL_FP64(0.0, d8);
10893 ASSERT_EQUAL_FP64(0.0, d9);
10894 ASSERT_EQUAL_FP64(kFP64NegativeInfinity, d10);
10895 ASSERT_EQUAL_FP64(kFP64PositiveInfinity, d11);
10896 ASSERT_EQUAL_FP64(kFP64DefaultNaN, d12);
10897 ASSERT_EQUAL_FP64(kFP64DefaultNaN, d13);
10898
10899 TEARDOWN();
10900 }
10901
10902
TEST(fmul_h)10903 TEST(fmul_h) {
10904 SETUP_WITH_FEATURES(CPUFeatures::kFP, CPUFeatures::kFPHalf);
10905
10906 START();
10907 __ Fmov(h14, -0.0f);
10908 __ Fmov(h15, kFP16PositiveInfinity);
10909 __ Fmov(h16, kFP16NegativeInfinity);
10910 __ Fmov(h17, 3.25f);
10911 __ Fmov(h18, 2.0f);
10912 __ Fmov(h19, 0.0f);
10913 __ Fmov(h20, -2.0f);
10914
10915 __ Fmul(h0, h17, h18);
10916 __ Fmul(h1, h18, h19);
10917 __ Fmul(h2, h14, h14);
10918 __ Fmul(h3, h15, h20);
10919 __ Fmul(h4, h16, h20);
10920 __ Fmul(h5, h15, h19);
10921 __ Fmul(h6, h19, h16);
10922 END();
10923
10924 #ifdef VIXL_INCLUDE_SIMULATOR_AARCH64
10925 RUN();
10926
10927 ASSERT_EQUAL_FP16(Float16(6.5), h0);
10928 ASSERT_EQUAL_FP16(Float16(0.0), h1);
10929 ASSERT_EQUAL_FP16(Float16(0.0), h2);
10930 ASSERT_EQUAL_FP16(kFP16NegativeInfinity, h3);
10931 ASSERT_EQUAL_FP16(kFP16PositiveInfinity, h4);
10932 ASSERT_EQUAL_FP16(kFP16DefaultNaN, h5);
10933 ASSERT_EQUAL_FP16(kFP16DefaultNaN, h6);
10934 #endif // VIXL_INCLUDE_SIMULATOR_AARCH64
10935
10936 TEARDOWN();
10937 }
10938
10939
TEST(fmul_h_neon)10940 TEST(fmul_h_neon) {
10941 SETUP_WITH_FEATURES(CPUFeatures::kNEON,
10942 CPUFeatures::kFP,
10943 CPUFeatures::kNEONHalf);
10944
10945 START();
10946 __ Fmov(v0.V4H(), 24.0);
10947 __ Fmov(v1.V4H(), -2.0);
10948 __ Fmov(v2.V8H(), 5.5);
10949 __ Fmov(v3.V8H(), 0.5);
10950 __ Fmov(v4.V4H(), kFP16PositiveInfinity);
10951 __ Fmov(v5.V4H(), kFP16NegativeInfinity);
10952
10953 __ Fmul(v6.V4H(), v1.V4H(), v0.V4H());
10954 __ Fmul(v7.V8H(), v3.V8H(), v2.V8H());
10955 __ Fmul(v8.V4H(), v4.V4H(), v3.V4H());
10956 __ Fmul(v9.V4H(), v0.V4H(), v1.V4H());
10957 __ Fmul(v10.V4H(), v5.V4H(), v0.V4H());
10958 END();
10959
10960 #ifdef VIXL_INCLUDE_SIMULATOR_AARCH64
10961 RUN();
10962
10963 ASSERT_EQUAL_128(0x0000000000000000, 0xd200d200d200d200, q6);
10964 ASSERT_EQUAL_128(0x4180418041804180, 0x4180418041804180, q7);
10965 ASSERT_EQUAL_128(0x0000000000000000, 0x7c007c007c007c00, q8);
10966 ASSERT_EQUAL_128(0x0000000000000000, 0xd200d200d200d200, q9);
10967 ASSERT_EQUAL_128(0x0000000000000000, 0xfc00fc00fc00fc00, q10);
10968 #endif // VIXL_INCLUDE_SIMULATOR_AARCH64
10969
10970 TEARDOWN();
10971 }
10972
10973
TEST(fnmul_h)10974 TEST(fnmul_h) {
10975 SETUP_WITH_FEATURES(CPUFeatures::kFP, CPUFeatures::kFPHalf);
10976
10977 START();
10978 __ Fmov(h14, -0.0f);
10979 __ Fmov(h15, kFP16PositiveInfinity);
10980 __ Fmov(h16, kFP16NegativeInfinity);
10981 __ Fmov(h17, 3.25f);
10982 __ Fmov(h18, 2.0f);
10983 __ Fmov(h19, 0.0f);
10984 __ Fmov(h20, -2.0f);
10985
10986 __ Fnmul(h0, h17, h18);
10987 __ Fnmul(h1, h18, h19);
10988 __ Fnmul(h2, h14, h14);
10989 __ Fnmul(h3, h15, h20);
10990 __ Fnmul(h4, h16, h20);
10991 __ Fnmul(h5, h15, h19);
10992 __ Fnmul(h6, h19, h16);
10993 END();
10994
10995 #ifdef VIXL_INCLUDE_SIMULATOR_AARCH64
10996 RUN();
10997
10998 ASSERT_EQUAL_FP16(Float16(-6.5), h0);
10999 ASSERT_EQUAL_FP16(Float16(-0.0), h1);
11000 ASSERT_EQUAL_FP16(Float16(-0.0), h2);
11001 ASSERT_EQUAL_FP16(kFP16PositiveInfinity, h3);
11002 ASSERT_EQUAL_FP16(kFP16NegativeInfinity, h4);
11003 ASSERT_EQUAL_FP16(RawbitsToFloat16(0xfe00), h5);
11004 ASSERT_EQUAL_FP16(RawbitsToFloat16(0xfe00), h6);
11005 #endif // VIXL_INCLUDE_SIMULATOR_AARCH64
11006
11007 TEARDOWN();
11008 }
11009
11010
FmaddFmsubHelper(double n,double m,double a,double fmadd,double fmsub,double fnmadd,double fnmsub)11011 static void FmaddFmsubHelper(double n,
11012 double m,
11013 double a,
11014 double fmadd,
11015 double fmsub,
11016 double fnmadd,
11017 double fnmsub) {
11018 SETUP_WITH_FEATURES(CPUFeatures::kFP);
11019
11020 START();
11021
11022 __ Fmov(d0, n);
11023 __ Fmov(d1, m);
11024 __ Fmov(d2, a);
11025 __ Fmadd(d28, d0, d1, d2);
11026 __ Fmsub(d29, d0, d1, d2);
11027 __ Fnmadd(d30, d0, d1, d2);
11028 __ Fnmsub(d31, d0, d1, d2);
11029
11030 END();
11031 RUN();
11032
11033 ASSERT_EQUAL_FP64(fmadd, d28);
11034 ASSERT_EQUAL_FP64(fmsub, d29);
11035 ASSERT_EQUAL_FP64(fnmadd, d30);
11036 ASSERT_EQUAL_FP64(fnmsub, d31);
11037
11038 TEARDOWN();
11039 }
11040
11041
TEST(fmadd_fmsub_double)11042 TEST(fmadd_fmsub_double) {
11043 // It's hard to check the result of fused operations because the only way to
11044 // calculate the result is using fma, which is what the Simulator uses anyway.
11045
11046 // Basic operation.
11047 FmaddFmsubHelper(1.0, 2.0, 3.0, 5.0, 1.0, -5.0, -1.0);
11048 FmaddFmsubHelper(-1.0, 2.0, 3.0, 1.0, 5.0, -1.0, -5.0);
11049
11050 // Check the sign of exact zeroes.
11051 // n m a fmadd fmsub fnmadd fnmsub
11052 FmaddFmsubHelper(-0.0, +0.0, -0.0, -0.0, +0.0, +0.0, +0.0);
11053 FmaddFmsubHelper(+0.0, +0.0, -0.0, +0.0, -0.0, +0.0, +0.0);
11054 FmaddFmsubHelper(+0.0, +0.0, +0.0, +0.0, +0.0, -0.0, +0.0);
11055 FmaddFmsubHelper(-0.0, +0.0, +0.0, +0.0, +0.0, +0.0, -0.0);
11056 FmaddFmsubHelper(+0.0, -0.0, -0.0, -0.0, +0.0, +0.0, +0.0);
11057 FmaddFmsubHelper(-0.0, -0.0, -0.0, +0.0, -0.0, +0.0, +0.0);
11058 FmaddFmsubHelper(-0.0, -0.0, +0.0, +0.0, +0.0, -0.0, +0.0);
11059 FmaddFmsubHelper(+0.0, -0.0, +0.0, +0.0, +0.0, +0.0, -0.0);
11060
11061 // Check NaN generation.
11062 FmaddFmsubHelper(kFP64PositiveInfinity,
11063 0.0,
11064 42.0,
11065 kFP64DefaultNaN,
11066 kFP64DefaultNaN,
11067 kFP64DefaultNaN,
11068 kFP64DefaultNaN);
11069 FmaddFmsubHelper(0.0,
11070 kFP64PositiveInfinity,
11071 42.0,
11072 kFP64DefaultNaN,
11073 kFP64DefaultNaN,
11074 kFP64DefaultNaN,
11075 kFP64DefaultNaN);
11076 FmaddFmsubHelper(kFP64PositiveInfinity,
11077 1.0,
11078 kFP64PositiveInfinity,
11079 kFP64PositiveInfinity, // inf + ( inf * 1) = inf
11080 kFP64DefaultNaN, // inf + (-inf * 1) = NaN
11081 kFP64NegativeInfinity, // -inf + (-inf * 1) = -inf
11082 kFP64DefaultNaN); // -inf + ( inf * 1) = NaN
11083 FmaddFmsubHelper(kFP64NegativeInfinity,
11084 1.0,
11085 kFP64PositiveInfinity,
11086 kFP64DefaultNaN, // inf + (-inf * 1) = NaN
11087 kFP64PositiveInfinity, // inf + ( inf * 1) = inf
11088 kFP64DefaultNaN, // -inf + ( inf * 1) = NaN
11089 kFP64NegativeInfinity); // -inf + (-inf * 1) = -inf
11090 }
11091
11092
FmaddFmsubHelper(float n,float m,float a,float fmadd,float fmsub,float fnmadd,float fnmsub)11093 static void FmaddFmsubHelper(float n,
11094 float m,
11095 float a,
11096 float fmadd,
11097 float fmsub,
11098 float fnmadd,
11099 float fnmsub) {
11100 SETUP_WITH_FEATURES(CPUFeatures::kFP);
11101
11102 START();
11103
11104 __ Fmov(s0, n);
11105 __ Fmov(s1, m);
11106 __ Fmov(s2, a);
11107 __ Fmadd(s28, s0, s1, s2);
11108 __ Fmsub(s29, s0, s1, s2);
11109 __ Fnmadd(s30, s0, s1, s2);
11110 __ Fnmsub(s31, s0, s1, s2);
11111
11112 END();
11113 RUN();
11114
11115 ASSERT_EQUAL_FP32(fmadd, s28);
11116 ASSERT_EQUAL_FP32(fmsub, s29);
11117 ASSERT_EQUAL_FP32(fnmadd, s30);
11118 ASSERT_EQUAL_FP32(fnmsub, s31);
11119
11120 TEARDOWN();
11121 }
11122
11123
TEST(fmadd_fmsub_float)11124 TEST(fmadd_fmsub_float) {
11125 // It's hard to check the result of fused operations because the only way to
11126 // calculate the result is using fma, which is what the simulator uses anyway.
11127
11128 // Basic operation.
11129 FmaddFmsubHelper(1.0f, 2.0f, 3.0f, 5.0f, 1.0f, -5.0f, -1.0f);
11130 FmaddFmsubHelper(-1.0f, 2.0f, 3.0f, 1.0f, 5.0f, -1.0f, -5.0f);
11131
11132 // Check the sign of exact zeroes.
11133 // n m a fmadd fmsub fnmadd fnmsub
11134 FmaddFmsubHelper(-0.0f, +0.0f, -0.0f, -0.0f, +0.0f, +0.0f, +0.0f);
11135 FmaddFmsubHelper(+0.0f, +0.0f, -0.0f, +0.0f, -0.0f, +0.0f, +0.0f);
11136 FmaddFmsubHelper(+0.0f, +0.0f, +0.0f, +0.0f, +0.0f, -0.0f, +0.0f);
11137 FmaddFmsubHelper(-0.0f, +0.0f, +0.0f, +0.0f, +0.0f, +0.0f, -0.0f);
11138 FmaddFmsubHelper(+0.0f, -0.0f, -0.0f, -0.0f, +0.0f, +0.0f, +0.0f);
11139 FmaddFmsubHelper(-0.0f, -0.0f, -0.0f, +0.0f, -0.0f, +0.0f, +0.0f);
11140 FmaddFmsubHelper(-0.0f, -0.0f, +0.0f, +0.0f, +0.0f, -0.0f, +0.0f);
11141 FmaddFmsubHelper(+0.0f, -0.0f, +0.0f, +0.0f, +0.0f, +0.0f, -0.0f);
11142
11143 // Check NaN generation.
11144 FmaddFmsubHelper(kFP32PositiveInfinity,
11145 0.0f,
11146 42.0f,
11147 kFP32DefaultNaN,
11148 kFP32DefaultNaN,
11149 kFP32DefaultNaN,
11150 kFP32DefaultNaN);
11151 FmaddFmsubHelper(0.0f,
11152 kFP32PositiveInfinity,
11153 42.0f,
11154 kFP32DefaultNaN,
11155 kFP32DefaultNaN,
11156 kFP32DefaultNaN,
11157 kFP32DefaultNaN);
11158 FmaddFmsubHelper(kFP32PositiveInfinity,
11159 1.0f,
11160 kFP32PositiveInfinity,
11161 kFP32PositiveInfinity, // inf + ( inf * 1) = inf
11162 kFP32DefaultNaN, // inf + (-inf * 1) = NaN
11163 kFP32NegativeInfinity, // -inf + (-inf * 1) = -inf
11164 kFP32DefaultNaN); // -inf + ( inf * 1) = NaN
11165 FmaddFmsubHelper(kFP32NegativeInfinity,
11166 1.0f,
11167 kFP32PositiveInfinity,
11168 kFP32DefaultNaN, // inf + (-inf * 1) = NaN
11169 kFP32PositiveInfinity, // inf + ( inf * 1) = inf
11170 kFP32DefaultNaN, // -inf + ( inf * 1) = NaN
11171 kFP32NegativeInfinity); // -inf + (-inf * 1) = -inf
11172 }
11173
11174
TEST(fmadd_fmsub_double_nans)11175 TEST(fmadd_fmsub_double_nans) {
11176 // Make sure that NaN propagation works correctly.
11177 double s1 = RawbitsToDouble(0x7ff5555511111111);
11178 double s2 = RawbitsToDouble(0x7ff5555522222222);
11179 double sa = RawbitsToDouble(0x7ff55555aaaaaaaa);
11180 double q1 = RawbitsToDouble(0x7ffaaaaa11111111);
11181 double q2 = RawbitsToDouble(0x7ffaaaaa22222222);
11182 double qa = RawbitsToDouble(0x7ffaaaaaaaaaaaaa);
11183 VIXL_ASSERT(IsSignallingNaN(s1));
11184 VIXL_ASSERT(IsSignallingNaN(s2));
11185 VIXL_ASSERT(IsSignallingNaN(sa));
11186 VIXL_ASSERT(IsQuietNaN(q1));
11187 VIXL_ASSERT(IsQuietNaN(q2));
11188 VIXL_ASSERT(IsQuietNaN(qa));
11189
11190 // The input NaNs after passing through ProcessNaN.
11191 double s1_proc = RawbitsToDouble(0x7ffd555511111111);
11192 double s2_proc = RawbitsToDouble(0x7ffd555522222222);
11193 double sa_proc = RawbitsToDouble(0x7ffd5555aaaaaaaa);
11194 double q1_proc = q1;
11195 double q2_proc = q2;
11196 double qa_proc = qa;
11197 VIXL_ASSERT(IsQuietNaN(s1_proc));
11198 VIXL_ASSERT(IsQuietNaN(s2_proc));
11199 VIXL_ASSERT(IsQuietNaN(sa_proc));
11200 VIXL_ASSERT(IsQuietNaN(q1_proc));
11201 VIXL_ASSERT(IsQuietNaN(q2_proc));
11202 VIXL_ASSERT(IsQuietNaN(qa_proc));
11203
11204 // Negated NaNs as it would be done on ARMv8 hardware.
11205 double s1_proc_neg = RawbitsToDouble(0xfffd555511111111);
11206 double sa_proc_neg = RawbitsToDouble(0xfffd5555aaaaaaaa);
11207 double q1_proc_neg = RawbitsToDouble(0xfffaaaaa11111111);
11208 double qa_proc_neg = RawbitsToDouble(0xfffaaaaaaaaaaaaa);
11209 VIXL_ASSERT(IsQuietNaN(s1_proc_neg));
11210 VIXL_ASSERT(IsQuietNaN(sa_proc_neg));
11211 VIXL_ASSERT(IsQuietNaN(q1_proc_neg));
11212 VIXL_ASSERT(IsQuietNaN(qa_proc_neg));
11213
11214 // Quiet NaNs are propagated.
11215 FmaddFmsubHelper(q1, 0, 0, q1_proc, q1_proc_neg, q1_proc_neg, q1_proc);
11216 FmaddFmsubHelper(0, q2, 0, q2_proc, q2_proc, q2_proc, q2_proc);
11217 FmaddFmsubHelper(0, 0, qa, qa_proc, qa_proc, qa_proc_neg, qa_proc_neg);
11218 FmaddFmsubHelper(q1, q2, 0, q1_proc, q1_proc_neg, q1_proc_neg, q1_proc);
11219 FmaddFmsubHelper(0, q2, qa, qa_proc, qa_proc, qa_proc_neg, qa_proc_neg);
11220 FmaddFmsubHelper(q1, 0, qa, qa_proc, qa_proc, qa_proc_neg, qa_proc_neg);
11221 FmaddFmsubHelper(q1, q2, qa, qa_proc, qa_proc, qa_proc_neg, qa_proc_neg);
11222
11223 // Signalling NaNs are propagated, and made quiet.
11224 FmaddFmsubHelper(s1, 0, 0, s1_proc, s1_proc_neg, s1_proc_neg, s1_proc);
11225 FmaddFmsubHelper(0, s2, 0, s2_proc, s2_proc, s2_proc, s2_proc);
11226 FmaddFmsubHelper(0, 0, sa, sa_proc, sa_proc, sa_proc_neg, sa_proc_neg);
11227 FmaddFmsubHelper(s1, s2, 0, s1_proc, s1_proc_neg, s1_proc_neg, s1_proc);
11228 FmaddFmsubHelper(0, s2, sa, sa_proc, sa_proc, sa_proc_neg, sa_proc_neg);
11229 FmaddFmsubHelper(s1, 0, sa, sa_proc, sa_proc, sa_proc_neg, sa_proc_neg);
11230 FmaddFmsubHelper(s1, s2, sa, sa_proc, sa_proc, sa_proc_neg, sa_proc_neg);
11231
11232 // Signalling NaNs take precedence over quiet NaNs.
11233 FmaddFmsubHelper(s1, q2, qa, s1_proc, s1_proc_neg, s1_proc_neg, s1_proc);
11234 FmaddFmsubHelper(q1, s2, qa, s2_proc, s2_proc, s2_proc, s2_proc);
11235 FmaddFmsubHelper(q1, q2, sa, sa_proc, sa_proc, sa_proc_neg, sa_proc_neg);
11236 FmaddFmsubHelper(s1, s2, qa, s1_proc, s1_proc_neg, s1_proc_neg, s1_proc);
11237 FmaddFmsubHelper(q1, s2, sa, sa_proc, sa_proc, sa_proc_neg, sa_proc_neg);
11238 FmaddFmsubHelper(s1, q2, sa, sa_proc, sa_proc, sa_proc_neg, sa_proc_neg);
11239 FmaddFmsubHelper(s1, s2, sa, sa_proc, sa_proc, sa_proc_neg, sa_proc_neg);
11240
11241 // A NaN generated by the intermediate op1 * op2 overrides a quiet NaN in a.
11242 FmaddFmsubHelper(0,
11243 kFP64PositiveInfinity,
11244 qa,
11245 kFP64DefaultNaN,
11246 kFP64DefaultNaN,
11247 kFP64DefaultNaN,
11248 kFP64DefaultNaN);
11249 FmaddFmsubHelper(kFP64PositiveInfinity,
11250 0,
11251 qa,
11252 kFP64DefaultNaN,
11253 kFP64DefaultNaN,
11254 kFP64DefaultNaN,
11255 kFP64DefaultNaN);
11256 FmaddFmsubHelper(0,
11257 kFP64NegativeInfinity,
11258 qa,
11259 kFP64DefaultNaN,
11260 kFP64DefaultNaN,
11261 kFP64DefaultNaN,
11262 kFP64DefaultNaN);
11263 FmaddFmsubHelper(kFP64NegativeInfinity,
11264 0,
11265 qa,
11266 kFP64DefaultNaN,
11267 kFP64DefaultNaN,
11268 kFP64DefaultNaN,
11269 kFP64DefaultNaN);
11270 }
11271
11272
TEST(fmadd_fmsub_float_nans)11273 TEST(fmadd_fmsub_float_nans) {
11274 // Make sure that NaN propagation works correctly.
11275 float s1 = RawbitsToFloat(0x7f951111);
11276 float s2 = RawbitsToFloat(0x7f952222);
11277 float sa = RawbitsToFloat(0x7f95aaaa);
11278 float q1 = RawbitsToFloat(0x7fea1111);
11279 float q2 = RawbitsToFloat(0x7fea2222);
11280 float qa = RawbitsToFloat(0x7feaaaaa);
11281 VIXL_ASSERT(IsSignallingNaN(s1));
11282 VIXL_ASSERT(IsSignallingNaN(s2));
11283 VIXL_ASSERT(IsSignallingNaN(sa));
11284 VIXL_ASSERT(IsQuietNaN(q1));
11285 VIXL_ASSERT(IsQuietNaN(q2));
11286 VIXL_ASSERT(IsQuietNaN(qa));
11287
11288 // The input NaNs after passing through ProcessNaN.
11289 float s1_proc = RawbitsToFloat(0x7fd51111);
11290 float s2_proc = RawbitsToFloat(0x7fd52222);
11291 float sa_proc = RawbitsToFloat(0x7fd5aaaa);
11292 float q1_proc = q1;
11293 float q2_proc = q2;
11294 float qa_proc = qa;
11295 VIXL_ASSERT(IsQuietNaN(s1_proc));
11296 VIXL_ASSERT(IsQuietNaN(s2_proc));
11297 VIXL_ASSERT(IsQuietNaN(sa_proc));
11298 VIXL_ASSERT(IsQuietNaN(q1_proc));
11299 VIXL_ASSERT(IsQuietNaN(q2_proc));
11300 VIXL_ASSERT(IsQuietNaN(qa_proc));
11301
11302 // Negated NaNs as it would be done on ARMv8 hardware.
11303 float s1_proc_neg = RawbitsToFloat(0xffd51111);
11304 float sa_proc_neg = RawbitsToFloat(0xffd5aaaa);
11305 float q1_proc_neg = RawbitsToFloat(0xffea1111);
11306 float qa_proc_neg = RawbitsToFloat(0xffeaaaaa);
11307 VIXL_ASSERT(IsQuietNaN(s1_proc_neg));
11308 VIXL_ASSERT(IsQuietNaN(sa_proc_neg));
11309 VIXL_ASSERT(IsQuietNaN(q1_proc_neg));
11310 VIXL_ASSERT(IsQuietNaN(qa_proc_neg));
11311
11312 // Quiet NaNs are propagated.
11313 FmaddFmsubHelper(q1, 0, 0, q1_proc, q1_proc_neg, q1_proc_neg, q1_proc);
11314 FmaddFmsubHelper(0, q2, 0, q2_proc, q2_proc, q2_proc, q2_proc);
11315 FmaddFmsubHelper(0, 0, qa, qa_proc, qa_proc, qa_proc_neg, qa_proc_neg);
11316 FmaddFmsubHelper(q1, q2, 0, q1_proc, q1_proc_neg, q1_proc_neg, q1_proc);
11317 FmaddFmsubHelper(0, q2, qa, qa_proc, qa_proc, qa_proc_neg, qa_proc_neg);
11318 FmaddFmsubHelper(q1, 0, qa, qa_proc, qa_proc, qa_proc_neg, qa_proc_neg);
11319 FmaddFmsubHelper(q1, q2, qa, qa_proc, qa_proc, qa_proc_neg, qa_proc_neg);
11320
11321 // Signalling NaNs are propagated, and made quiet.
11322 FmaddFmsubHelper(s1, 0, 0, s1_proc, s1_proc_neg, s1_proc_neg, s1_proc);
11323 FmaddFmsubHelper(0, s2, 0, s2_proc, s2_proc, s2_proc, s2_proc);
11324 FmaddFmsubHelper(0, 0, sa, sa_proc, sa_proc, sa_proc_neg, sa_proc_neg);
11325 FmaddFmsubHelper(s1, s2, 0, s1_proc, s1_proc_neg, s1_proc_neg, s1_proc);
11326 FmaddFmsubHelper(0, s2, sa, sa_proc, sa_proc, sa_proc_neg, sa_proc_neg);
11327 FmaddFmsubHelper(s1, 0, sa, sa_proc, sa_proc, sa_proc_neg, sa_proc_neg);
11328 FmaddFmsubHelper(s1, s2, sa, sa_proc, sa_proc, sa_proc_neg, sa_proc_neg);
11329
11330 // Signalling NaNs take precedence over quiet NaNs.
11331 FmaddFmsubHelper(s1, q2, qa, s1_proc, s1_proc_neg, s1_proc_neg, s1_proc);
11332 FmaddFmsubHelper(q1, s2, qa, s2_proc, s2_proc, s2_proc, s2_proc);
11333 FmaddFmsubHelper(q1, q2, sa, sa_proc, sa_proc, sa_proc_neg, sa_proc_neg);
11334 FmaddFmsubHelper(s1, s2, qa, s1_proc, s1_proc_neg, s1_proc_neg, s1_proc);
11335 FmaddFmsubHelper(q1, s2, sa, sa_proc, sa_proc, sa_proc_neg, sa_proc_neg);
11336 FmaddFmsubHelper(s1, q2, sa, sa_proc, sa_proc, sa_proc_neg, sa_proc_neg);
11337 FmaddFmsubHelper(s1, s2, sa, sa_proc, sa_proc, sa_proc_neg, sa_proc_neg);
11338
11339 // A NaN generated by the intermediate op1 * op2 overrides a quiet NaN in a.
11340 FmaddFmsubHelper(0,
11341 kFP32PositiveInfinity,
11342 qa,
11343 kFP32DefaultNaN,
11344 kFP32DefaultNaN,
11345 kFP32DefaultNaN,
11346 kFP32DefaultNaN);
11347 FmaddFmsubHelper(kFP32PositiveInfinity,
11348 0,
11349 qa,
11350 kFP32DefaultNaN,
11351 kFP32DefaultNaN,
11352 kFP32DefaultNaN,
11353 kFP32DefaultNaN);
11354 FmaddFmsubHelper(0,
11355 kFP32NegativeInfinity,
11356 qa,
11357 kFP32DefaultNaN,
11358 kFP32DefaultNaN,
11359 kFP32DefaultNaN,
11360 kFP32DefaultNaN);
11361 FmaddFmsubHelper(kFP32NegativeInfinity,
11362 0,
11363 qa,
11364 kFP32DefaultNaN,
11365 kFP32DefaultNaN,
11366 kFP32DefaultNaN,
11367 kFP32DefaultNaN);
11368 }
11369
11370
TEST(fdiv)11371 TEST(fdiv) {
11372 SETUP_WITH_FEATURES(CPUFeatures::kFP);
11373
11374 START();
11375 __ Fmov(s14, -0.0f);
11376 __ Fmov(s15, kFP32PositiveInfinity);
11377 __ Fmov(s16, kFP32NegativeInfinity);
11378 __ Fmov(s17, 3.25f);
11379 __ Fmov(s18, 2.0f);
11380 __ Fmov(s19, 2.0f);
11381 __ Fmov(s20, -2.0f);
11382
11383 __ Fmov(d26, -0.0);
11384 __ Fmov(d27, kFP64PositiveInfinity);
11385 __ Fmov(d28, kFP64NegativeInfinity);
11386 __ Fmov(d29, 0.0);
11387 __ Fmov(d30, -2.0);
11388 __ Fmov(d31, 2.25);
11389
11390 __ Fdiv(s0, s17, s18);
11391 __ Fdiv(s1, s18, s19);
11392 __ Fdiv(s2, s14, s18);
11393 __ Fdiv(s3, s18, s15);
11394 __ Fdiv(s4, s18, s16);
11395 __ Fdiv(s5, s15, s16);
11396 __ Fdiv(s6, s14, s14);
11397
11398 __ Fdiv(d7, d31, d30);
11399 __ Fdiv(d8, d29, d31);
11400 __ Fdiv(d9, d26, d31);
11401 __ Fdiv(d10, d31, d27);
11402 __ Fdiv(d11, d31, d28);
11403 __ Fdiv(d12, d28, d27);
11404 __ Fdiv(d13, d29, d29);
11405 END();
11406
11407 RUN();
11408
11409 ASSERT_EQUAL_FP32(1.625f, s0);
11410 ASSERT_EQUAL_FP32(1.0f, s1);
11411 ASSERT_EQUAL_FP32(-0.0f, s2);
11412 ASSERT_EQUAL_FP32(0.0f, s3);
11413 ASSERT_EQUAL_FP32(-0.0f, s4);
11414 ASSERT_EQUAL_FP32(kFP32DefaultNaN, s5);
11415 ASSERT_EQUAL_FP32(kFP32DefaultNaN, s6);
11416 ASSERT_EQUAL_FP64(-1.125, d7);
11417 ASSERT_EQUAL_FP64(0.0, d8);
11418 ASSERT_EQUAL_FP64(-0.0, d9);
11419 ASSERT_EQUAL_FP64(0.0, d10);
11420 ASSERT_EQUAL_FP64(-0.0, d11);
11421 ASSERT_EQUAL_FP64(kFP64DefaultNaN, d12);
11422 ASSERT_EQUAL_FP64(kFP64DefaultNaN, d13);
11423
11424 TEARDOWN();
11425 }
11426
11427
TEST(fdiv_h)11428 TEST(fdiv_h) {
11429 SETUP_WITH_FEATURES(CPUFeatures::kFP, CPUFeatures::kFPHalf);
11430
11431 START();
11432 __ Fmov(h14, -0.0f);
11433 __ Fmov(h15, kFP16PositiveInfinity);
11434 __ Fmov(h16, kFP16NegativeInfinity);
11435 __ Fmov(h17, 3.25f);
11436 __ Fmov(h18, 2.0f);
11437 __ Fmov(h19, 2.0f);
11438 __ Fmov(h20, -2.0f);
11439
11440 __ Fdiv(h0, h17, h18);
11441 __ Fdiv(h1, h18, h19);
11442 __ Fdiv(h2, h14, h18);
11443 __ Fdiv(h3, h18, h15);
11444 __ Fdiv(h4, h18, h16);
11445 __ Fdiv(h5, h15, h16);
11446 __ Fdiv(h6, h14, h14);
11447 END();
11448
11449 #ifdef VIXL_INCLUDE_SIMULATOR_AARCH64
11450 RUN();
11451
11452 ASSERT_EQUAL_FP16(Float16(1.625f), h0);
11453 ASSERT_EQUAL_FP16(Float16(1.0f), h1);
11454 ASSERT_EQUAL_FP16(Float16(-0.0f), h2);
11455 ASSERT_EQUAL_FP16(Float16(0.0f), h3);
11456 ASSERT_EQUAL_FP16(Float16(-0.0f), h4);
11457 ASSERT_EQUAL_FP16(kFP16DefaultNaN, h5);
11458 ASSERT_EQUAL_FP16(kFP16DefaultNaN, h6);
11459 #endif // VIXL_INCLUDE_SIMULATOR_AARCH64
11460
11461 TEARDOWN();
11462 }
11463
11464
TEST(fdiv_h_neon)11465 TEST(fdiv_h_neon) {
11466 SETUP_WITH_FEATURES(CPUFeatures::kNEON,
11467 CPUFeatures::kFP,
11468 CPUFeatures::kNEONHalf);
11469
11470 START();
11471 __ Fmov(v0.V4H(), 24.0);
11472 __ Fmov(v1.V4H(), -2.0);
11473 __ Fmov(v2.V8H(), 5.5);
11474 __ Fmov(v3.V8H(), 0.5);
11475 __ Fmov(v4.V4H(), kFP16PositiveInfinity);
11476 __ Fmov(v5.V4H(), kFP16NegativeInfinity);
11477
11478 __ Fdiv(v6.V4H(), v0.V4H(), v1.V4H());
11479 __ Fdiv(v7.V8H(), v2.V8H(), v3.V8H());
11480 __ Fdiv(v8.V4H(), v4.V4H(), v3.V4H());
11481 __ Fdiv(v9.V4H(), v1.V4H(), v0.V4H());
11482 __ Fdiv(v10.V4H(), v5.V4H(), v0.V4H());
11483 END();
11484
11485 #ifdef VIXL_INCLUDE_SIMULATOR_AARCH64
11486 RUN();
11487
11488 ASSERT_EQUAL_128(0x0000000000000000, 0xca00ca00ca00ca00, q6);
11489 ASSERT_EQUAL_128(0x4980498049804980, 0x4980498049804980, q7);
11490 ASSERT_EQUAL_128(0x0000000000000000, 0x7c007c007c007c00, q8);
11491 // -0.083333... is unrepresentable in FP16:
11492 ASSERT_EQUAL_128(0x0000000000000000, 0xad55ad55ad55ad55, q9);
11493 ASSERT_EQUAL_128(0x0000000000000000, 0xfc00fc00fc00fc00, q10);
11494 #endif // VIXL_INCLUDE_SIMULATOR_AARCH64
11495
11496 TEARDOWN();
11497 }
11498
11499
MinMaxHelper(Float16 n,Float16 m,bool min,Float16 quiet_nan_substitute=Float16 (0.0))11500 static Float16 MinMaxHelper(Float16 n,
11501 Float16 m,
11502 bool min,
11503 Float16 quiet_nan_substitute = Float16(0.0)) {
11504 const uint64_t kFP16QuietNaNMask = 0x0200;
11505 uint16_t raw_n = Float16ToRawbits(n);
11506 uint16_t raw_m = Float16ToRawbits(m);
11507
11508 if (IsSignallingNaN(n)) {
11509 // n is signalling NaN.
11510 return RawbitsToFloat16(raw_n | kFP16QuietNaNMask);
11511 } else if (IsSignallingNaN(m)) {
11512 // m is signalling NaN.
11513 return RawbitsToFloat16(raw_m | kFP16QuietNaNMask);
11514 } else if (IsZero(quiet_nan_substitute)) {
11515 if (IsNaN(n)) {
11516 // n is quiet NaN.
11517 return n;
11518 } else if (IsNaN(m)) {
11519 // m is quiet NaN.
11520 return m;
11521 }
11522 } else {
11523 // Substitute n or m if one is quiet, but not both.
11524 if (IsNaN(n) && !IsNaN(m)) {
11525 // n is quiet NaN: replace with substitute.
11526 n = quiet_nan_substitute;
11527 } else if (!IsNaN(n) && IsNaN(m)) {
11528 // m is quiet NaN: replace with substitute.
11529 m = quiet_nan_substitute;
11530 }
11531 }
11532
11533 uint16_t sign_mask = 0x8000;
11534 if (IsZero(n) && IsZero(m) && ((raw_n & sign_mask) != (raw_m & sign_mask))) {
11535 return min ? Float16(-0.0) : Float16(0.0);
11536 }
11537
11538 if (FPToDouble(n, kIgnoreDefaultNaN) < FPToDouble(m, kIgnoreDefaultNaN)) {
11539 return min ? n : m;
11540 }
11541 return min ? m : n;
11542 }
11543
11544
MinMaxHelper(float n,float m,bool min,float quiet_nan_substitute=0.0)11545 static float MinMaxHelper(float n,
11546 float m,
11547 bool min,
11548 float quiet_nan_substitute = 0.0) {
11549 const uint64_t kFP32QuietNaNMask = 0x00400000;
11550 uint32_t raw_n = FloatToRawbits(n);
11551 uint32_t raw_m = FloatToRawbits(m);
11552
11553 if (IsNaN(n) && ((raw_n & kFP32QuietNaNMask) == 0)) {
11554 // n is signalling NaN.
11555 return RawbitsToFloat(raw_n | kFP32QuietNaNMask);
11556 } else if (IsNaN(m) && ((raw_m & kFP32QuietNaNMask) == 0)) {
11557 // m is signalling NaN.
11558 return RawbitsToFloat(raw_m | kFP32QuietNaNMask);
11559 } else if (quiet_nan_substitute == 0.0) {
11560 if (IsNaN(n)) {
11561 // n is quiet NaN.
11562 return n;
11563 } else if (IsNaN(m)) {
11564 // m is quiet NaN.
11565 return m;
11566 }
11567 } else {
11568 // Substitute n or m if one is quiet, but not both.
11569 if (IsNaN(n) && !IsNaN(m)) {
11570 // n is quiet NaN: replace with substitute.
11571 n = quiet_nan_substitute;
11572 } else if (!IsNaN(n) && IsNaN(m)) {
11573 // m is quiet NaN: replace with substitute.
11574 m = quiet_nan_substitute;
11575 }
11576 }
11577
11578 if ((n == 0.0) && (m == 0.0) && (copysign(1.0, n) != copysign(1.0, m))) {
11579 return min ? -0.0 : 0.0;
11580 }
11581
11582 return min ? fminf(n, m) : fmaxf(n, m);
11583 }
11584
11585
MinMaxHelper(double n,double m,bool min,double quiet_nan_substitute=0.0)11586 static double MinMaxHelper(double n,
11587 double m,
11588 bool min,
11589 double quiet_nan_substitute = 0.0) {
11590 const uint64_t kFP64QuietNaNMask = 0x0008000000000000;
11591 uint64_t raw_n = DoubleToRawbits(n);
11592 uint64_t raw_m = DoubleToRawbits(m);
11593
11594 if (IsNaN(n) && ((raw_n & kFP64QuietNaNMask) == 0)) {
11595 // n is signalling NaN.
11596 return RawbitsToDouble(raw_n | kFP64QuietNaNMask);
11597 } else if (IsNaN(m) && ((raw_m & kFP64QuietNaNMask) == 0)) {
11598 // m is signalling NaN.
11599 return RawbitsToDouble(raw_m | kFP64QuietNaNMask);
11600 } else if (quiet_nan_substitute == 0.0) {
11601 if (IsNaN(n)) {
11602 // n is quiet NaN.
11603 return n;
11604 } else if (IsNaN(m)) {
11605 // m is quiet NaN.
11606 return m;
11607 }
11608 } else {
11609 // Substitute n or m if one is quiet, but not both.
11610 if (IsNaN(n) && !IsNaN(m)) {
11611 // n is quiet NaN: replace with substitute.
11612 n = quiet_nan_substitute;
11613 } else if (!IsNaN(n) && IsNaN(m)) {
11614 // m is quiet NaN: replace with substitute.
11615 m = quiet_nan_substitute;
11616 }
11617 }
11618
11619 if ((n == 0.0) && (m == 0.0) && (copysign(1.0, n) != copysign(1.0, m))) {
11620 return min ? -0.0 : 0.0;
11621 }
11622
11623 return min ? fmin(n, m) : fmax(n, m);
11624 }
11625
11626
FminFmaxDoubleHelper(double n,double m,double min,double max,double minnm,double maxnm)11627 static void FminFmaxDoubleHelper(
11628 double n, double m, double min, double max, double minnm, double maxnm) {
11629 SETUP_WITH_FEATURES(CPUFeatures::kFP);
11630
11631 START();
11632 __ Fmov(d0, n);
11633 __ Fmov(d1, m);
11634 __ Fmin(d28, d0, d1);
11635 __ Fmax(d29, d0, d1);
11636 __ Fminnm(d30, d0, d1);
11637 __ Fmaxnm(d31, d0, d1);
11638 END();
11639
11640 RUN();
11641
11642 ASSERT_EQUAL_FP64(min, d28);
11643 ASSERT_EQUAL_FP64(max, d29);
11644 ASSERT_EQUAL_FP64(minnm, d30);
11645 ASSERT_EQUAL_FP64(maxnm, d31);
11646
11647 TEARDOWN();
11648 }
11649
11650
TEST(fmax_fmin_d)11651 TEST(fmax_fmin_d) {
11652 // Use non-standard NaNs to check that the payload bits are preserved.
11653 double snan = RawbitsToDouble(0x7ff5555512345678);
11654 double qnan = RawbitsToDouble(0x7ffaaaaa87654321);
11655
11656 double snan_processed = RawbitsToDouble(0x7ffd555512345678);
11657 double qnan_processed = qnan;
11658
11659 VIXL_ASSERT(IsSignallingNaN(snan));
11660 VIXL_ASSERT(IsQuietNaN(qnan));
11661 VIXL_ASSERT(IsQuietNaN(snan_processed));
11662 VIXL_ASSERT(IsQuietNaN(qnan_processed));
11663
11664 // Bootstrap tests.
11665 FminFmaxDoubleHelper(0, 0, 0, 0, 0, 0);
11666 FminFmaxDoubleHelper(0, 1, 0, 1, 0, 1);
11667 FminFmaxDoubleHelper(kFP64PositiveInfinity,
11668 kFP64NegativeInfinity,
11669 kFP64NegativeInfinity,
11670 kFP64PositiveInfinity,
11671 kFP64NegativeInfinity,
11672 kFP64PositiveInfinity);
11673 FminFmaxDoubleHelper(snan,
11674 0,
11675 snan_processed,
11676 snan_processed,
11677 snan_processed,
11678 snan_processed);
11679 FminFmaxDoubleHelper(0,
11680 snan,
11681 snan_processed,
11682 snan_processed,
11683 snan_processed,
11684 snan_processed);
11685 FminFmaxDoubleHelper(qnan, 0, qnan_processed, qnan_processed, 0, 0);
11686 FminFmaxDoubleHelper(0, qnan, qnan_processed, qnan_processed, 0, 0);
11687 FminFmaxDoubleHelper(qnan,
11688 snan,
11689 snan_processed,
11690 snan_processed,
11691 snan_processed,
11692 snan_processed);
11693 FminFmaxDoubleHelper(snan,
11694 qnan,
11695 snan_processed,
11696 snan_processed,
11697 snan_processed,
11698 snan_processed);
11699
11700 // Iterate over all combinations of inputs.
11701 double inputs[] = {DBL_MAX,
11702 DBL_MIN,
11703 1.0,
11704 0.0,
11705 -DBL_MAX,
11706 -DBL_MIN,
11707 -1.0,
11708 -0.0,
11709 kFP64PositiveInfinity,
11710 kFP64NegativeInfinity,
11711 kFP64QuietNaN,
11712 kFP64SignallingNaN};
11713
11714 const int count = sizeof(inputs) / sizeof(inputs[0]);
11715
11716 for (int in = 0; in < count; in++) {
11717 double n = inputs[in];
11718 for (int im = 0; im < count; im++) {
11719 double m = inputs[im];
11720 FminFmaxDoubleHelper(n,
11721 m,
11722 MinMaxHelper(n, m, true),
11723 MinMaxHelper(n, m, false),
11724 MinMaxHelper(n, m, true, kFP64PositiveInfinity),
11725 MinMaxHelper(n, m, false, kFP64NegativeInfinity));
11726 }
11727 }
11728 }
11729
11730
FminFmaxFloatHelper(float n,float m,float min,float max,float minnm,float maxnm)11731 static void FminFmaxFloatHelper(
11732 float n, float m, float min, float max, float minnm, float maxnm) {
11733 SETUP_WITH_FEATURES(CPUFeatures::kFP);
11734
11735 START();
11736 __ Fmov(s0, n);
11737 __ Fmov(s1, m);
11738 __ Fmin(s28, s0, s1);
11739 __ Fmax(s29, s0, s1);
11740 __ Fminnm(s30, s0, s1);
11741 __ Fmaxnm(s31, s0, s1);
11742 END();
11743
11744 RUN();
11745
11746 ASSERT_EQUAL_FP32(min, s28);
11747 ASSERT_EQUAL_FP32(max, s29);
11748 ASSERT_EQUAL_FP32(minnm, s30);
11749 ASSERT_EQUAL_FP32(maxnm, s31);
11750
11751 TEARDOWN();
11752 }
11753
11754
TEST(fmax_fmin_s)11755 TEST(fmax_fmin_s) {
11756 // Use non-standard NaNs to check that the payload bits are preserved.
11757 float snan = RawbitsToFloat(0x7f951234);
11758 float qnan = RawbitsToFloat(0x7fea8765);
11759
11760 float snan_processed = RawbitsToFloat(0x7fd51234);
11761 float qnan_processed = qnan;
11762
11763 VIXL_ASSERT(IsSignallingNaN(snan));
11764 VIXL_ASSERT(IsQuietNaN(qnan));
11765 VIXL_ASSERT(IsQuietNaN(snan_processed));
11766 VIXL_ASSERT(IsQuietNaN(qnan_processed));
11767
11768 // Bootstrap tests.
11769 FminFmaxFloatHelper(0, 0, 0, 0, 0, 0);
11770 FminFmaxFloatHelper(0, 1, 0, 1, 0, 1);
11771 FminFmaxFloatHelper(kFP32PositiveInfinity,
11772 kFP32NegativeInfinity,
11773 kFP32NegativeInfinity,
11774 kFP32PositiveInfinity,
11775 kFP32NegativeInfinity,
11776 kFP32PositiveInfinity);
11777 FminFmaxFloatHelper(snan,
11778 0,
11779 snan_processed,
11780 snan_processed,
11781 snan_processed,
11782 snan_processed);
11783 FminFmaxFloatHelper(0,
11784 snan,
11785 snan_processed,
11786 snan_processed,
11787 snan_processed,
11788 snan_processed);
11789 FminFmaxFloatHelper(qnan, 0, qnan_processed, qnan_processed, 0, 0);
11790 FminFmaxFloatHelper(0, qnan, qnan_processed, qnan_processed, 0, 0);
11791 FminFmaxFloatHelper(qnan,
11792 snan,
11793 snan_processed,
11794 snan_processed,
11795 snan_processed,
11796 snan_processed);
11797 FminFmaxFloatHelper(snan,
11798 qnan,
11799 snan_processed,
11800 snan_processed,
11801 snan_processed,
11802 snan_processed);
11803
11804 // Iterate over all combinations of inputs.
11805 float inputs[] = {FLT_MAX,
11806 FLT_MIN,
11807 1.0,
11808 0.0,
11809 -FLT_MAX,
11810 -FLT_MIN,
11811 -1.0,
11812 -0.0,
11813 kFP32PositiveInfinity,
11814 kFP32NegativeInfinity,
11815 kFP32QuietNaN,
11816 kFP32SignallingNaN};
11817
11818 const int count = sizeof(inputs) / sizeof(inputs[0]);
11819
11820 for (int in = 0; in < count; in++) {
11821 float n = inputs[in];
11822 for (int im = 0; im < count; im++) {
11823 float m = inputs[im];
11824 FminFmaxFloatHelper(n,
11825 m,
11826 MinMaxHelper(n, m, true),
11827 MinMaxHelper(n, m, false),
11828 MinMaxHelper(n, m, true, kFP32PositiveInfinity),
11829 MinMaxHelper(n, m, false, kFP32NegativeInfinity));
11830 }
11831 }
11832 }
11833
11834
Float16ToV4H(Float16 f)11835 static uint64_t Float16ToV4H(Float16 f) {
11836 uint64_t bits = static_cast<uint64_t>(Float16ToRawbits(f));
11837 return (bits << 48) | (bits << 32) | (bits << 16) | bits;
11838 }
11839
11840
FminFmaxFloat16Helper(Float16 n,Float16 m,Float16 min,Float16 max,Float16 minnm,Float16 maxnm)11841 static void FminFmaxFloat16Helper(Float16 n,
11842 Float16 m,
11843 Float16 min,
11844 Float16 max,
11845 Float16 minnm,
11846 Float16 maxnm) {
11847 SETUP_WITH_FEATURES(CPUFeatures::kNEON,
11848 CPUFeatures::kFP,
11849 CPUFeatures::kNEONHalf,
11850 CPUFeatures::kFPHalf);
11851
11852 START();
11853 __ Fmov(h0, n);
11854 __ Fmov(h1, m);
11855 __ Fmov(v0.V8H(), n);
11856 __ Fmov(v1.V8H(), m);
11857 __ Fmin(h28, h0, h1);
11858 __ Fmin(v2.V4H(), v0.V4H(), v1.V4H());
11859 __ Fmin(v3.V8H(), v0.V8H(), v1.V8H());
11860 __ Fmax(h29, h0, h1);
11861 __ Fmax(v4.V4H(), v0.V4H(), v1.V4H());
11862 __ Fmax(v5.V8H(), v0.V8H(), v1.V8H());
11863 __ Fminnm(h30, h0, h1);
11864 __ Fminnm(v6.V4H(), v0.V4H(), v1.V4H());
11865 __ Fminnm(v7.V8H(), v0.V8H(), v1.V8H());
11866 __ Fmaxnm(h31, h0, h1);
11867 __ Fmaxnm(v8.V4H(), v0.V4H(), v1.V4H());
11868 __ Fmaxnm(v9.V8H(), v0.V8H(), v1.V8H());
11869 END();
11870
11871 uint64_t min_vec = Float16ToV4H(min);
11872 uint64_t max_vec = Float16ToV4H(max);
11873 uint64_t minnm_vec = Float16ToV4H(minnm);
11874 uint64_t maxnm_vec = Float16ToV4H(maxnm);
11875
11876 #ifdef VIXL_INCLUDE_SIMULATOR_AARCH64
11877 RUN();
11878
11879 ASSERT_EQUAL_FP16(min, h28);
11880 ASSERT_EQUAL_FP16(max, h29);
11881 ASSERT_EQUAL_FP16(minnm, h30);
11882 ASSERT_EQUAL_FP16(maxnm, h31);
11883
11884
11885 ASSERT_EQUAL_128(0, min_vec, v2);
11886 ASSERT_EQUAL_128(min_vec, min_vec, v3);
11887 ASSERT_EQUAL_128(0, max_vec, v4);
11888 ASSERT_EQUAL_128(max_vec, max_vec, v5);
11889 ASSERT_EQUAL_128(0, minnm_vec, v6);
11890 ASSERT_EQUAL_128(minnm_vec, minnm_vec, v7);
11891 ASSERT_EQUAL_128(0, maxnm_vec, v8);
11892 ASSERT_EQUAL_128(maxnm_vec, maxnm_vec, v9);
11893 #else
11894 USE(min_vec, max_vec, minnm_vec, maxnm_vec);
11895 #endif // VIXL_INCLUDE_SIMULATOR_AARCH64
11896
11897 TEARDOWN();
11898 }
11899
11900
TEST(fmax_fmin_h)11901 TEST(fmax_fmin_h) {
11902 // Use non-standard NaNs to check that the payload bits are preserved.
11903 Float16 snan = RawbitsToFloat16(0x7c12);
11904 Float16 qnan = RawbitsToFloat16(0x7e34);
11905
11906 Float16 snan_processed = RawbitsToFloat16(0x7e12);
11907 Float16 qnan_processed = qnan;
11908
11909 VIXL_ASSERT(IsSignallingNaN(snan));
11910 VIXL_ASSERT(IsQuietNaN(qnan));
11911 VIXL_ASSERT(IsQuietNaN(snan_processed));
11912 VIXL_ASSERT(IsQuietNaN(qnan_processed));
11913
11914 // Bootstrap tests.
11915 FminFmaxFloat16Helper(Float16(0),
11916 Float16(0),
11917 Float16(0),
11918 Float16(0),
11919 Float16(0),
11920 Float16(0));
11921 FminFmaxFloat16Helper(Float16(0),
11922 Float16(1),
11923 Float16(0),
11924 Float16(1),
11925 Float16(0),
11926 Float16(1));
11927 FminFmaxFloat16Helper(kFP16PositiveInfinity,
11928 kFP16NegativeInfinity,
11929 kFP16NegativeInfinity,
11930 kFP16PositiveInfinity,
11931 kFP16NegativeInfinity,
11932 kFP16PositiveInfinity);
11933 FminFmaxFloat16Helper(snan,
11934 Float16(0),
11935 snan_processed,
11936 snan_processed,
11937 snan_processed,
11938 snan_processed);
11939 FminFmaxFloat16Helper(Float16(0),
11940 snan,
11941 snan_processed,
11942 snan_processed,
11943 snan_processed,
11944 snan_processed);
11945 FminFmaxFloat16Helper(qnan,
11946 Float16(0),
11947 qnan_processed,
11948 qnan_processed,
11949 Float16(0),
11950 Float16(0));
11951 FminFmaxFloat16Helper(Float16(0),
11952 qnan,
11953 qnan_processed,
11954 qnan_processed,
11955 Float16(0),
11956 Float16(0));
11957 FminFmaxFloat16Helper(qnan,
11958 snan,
11959 snan_processed,
11960 snan_processed,
11961 snan_processed,
11962 snan_processed);
11963 FminFmaxFloat16Helper(snan,
11964 qnan,
11965 snan_processed,
11966 snan_processed,
11967 snan_processed,
11968 snan_processed);
11969
11970 // Iterate over all combinations of inputs.
11971 Float16 inputs[] = {RawbitsToFloat16(0x7bff),
11972 RawbitsToFloat16(0x0400),
11973 Float16(1.0),
11974 Float16(0.0),
11975 RawbitsToFloat16(0xfbff),
11976 RawbitsToFloat16(0x8400),
11977 Float16(-1.0),
11978 Float16(-0.0),
11979 kFP16PositiveInfinity,
11980 kFP16NegativeInfinity,
11981 kFP16QuietNaN,
11982 kFP16SignallingNaN};
11983
11984 const int count = sizeof(inputs) / sizeof(inputs[0]);
11985
11986 for (int in = 0; in < count; in++) {
11987 Float16 n = inputs[in];
11988 for (int im = 0; im < count; im++) {
11989 Float16 m = inputs[im];
11990 FminFmaxFloat16Helper(n,
11991 m,
11992 MinMaxHelper(n, m, true),
11993 MinMaxHelper(n, m, false),
11994 MinMaxHelper(n, m, true, kFP16PositiveInfinity),
11995 MinMaxHelper(n, m, false, kFP16NegativeInfinity));
11996 }
11997 }
11998 }
11999
12000
TEST(fccmp)12001 TEST(fccmp) {
12002 SETUP_WITH_FEATURES(CPUFeatures::kFP);
12003
12004 START();
12005 __ Fmov(s16, 0.0);
12006 __ Fmov(s17, 0.5);
12007 __ Fmov(d18, -0.5);
12008 __ Fmov(d19, -1.0);
12009 __ Mov(x20, 0);
12010 __ Mov(x21, 0x7ff0000000000001); // Double precision NaN.
12011 __ Fmov(d21, x21);
12012 __ Mov(w22, 0x7f800001); // Single precision NaN.
12013 __ Fmov(s22, w22);
12014
12015 __ Cmp(x20, 0);
12016 __ Fccmp(s16, s16, NoFlag, eq);
12017 __ Mrs(x0, NZCV);
12018
12019 __ Cmp(x20, 0);
12020 __ Fccmp(s16, s16, VFlag, ne);
12021 __ Mrs(x1, NZCV);
12022
12023 __ Cmp(x20, 0);
12024 __ Fccmp(s16, s17, CFlag, ge);
12025 __ Mrs(x2, NZCV);
12026
12027 __ Cmp(x20, 0);
12028 __ Fccmp(s16, s17, CVFlag, lt);
12029 __ Mrs(x3, NZCV);
12030
12031 __ Cmp(x20, 0);
12032 __ Fccmp(d18, d18, ZFlag, le);
12033 __ Mrs(x4, NZCV);
12034
12035 __ Cmp(x20, 0);
12036 __ Fccmp(d18, d18, ZVFlag, gt);
12037 __ Mrs(x5, NZCV);
12038
12039 __ Cmp(x20, 0);
12040 __ Fccmp(d18, d19, ZCVFlag, ls);
12041 __ Mrs(x6, NZCV);
12042
12043 __ Cmp(x20, 0);
12044 __ Fccmp(d18, d19, NFlag, hi);
12045 __ Mrs(x7, NZCV);
12046
12047 // The Macro Assembler does not allow al or nv as condition.
12048 {
12049 ExactAssemblyScope scope(&masm, kInstructionSize);
12050 __ fccmp(s16, s16, NFlag, al);
12051 }
12052 __ Mrs(x8, NZCV);
12053
12054 {
12055 ExactAssemblyScope scope(&masm, kInstructionSize);
12056 __ fccmp(d18, d18, NFlag, nv);
12057 }
12058 __ Mrs(x9, NZCV);
12059
12060 __ Cmp(x20, 0);
12061 __ Fccmpe(s16, s16, NoFlag, eq);
12062 __ Mrs(x10, NZCV);
12063
12064 __ Cmp(x20, 0);
12065 __ Fccmpe(d18, d19, ZCVFlag, ls);
12066 __ Mrs(x11, NZCV);
12067
12068 __ Cmp(x20, 0);
12069 __ Fccmpe(d21, d21, NoFlag, eq);
12070 __ Mrs(x12, NZCV);
12071
12072 __ Cmp(x20, 0);
12073 __ Fccmpe(s22, s22, NoFlag, eq);
12074 __ Mrs(x13, NZCV);
12075 END();
12076
12077 RUN();
12078
12079 ASSERT_EQUAL_32(ZCFlag, w0);
12080 ASSERT_EQUAL_32(VFlag, w1);
12081 ASSERT_EQUAL_32(NFlag, w2);
12082 ASSERT_EQUAL_32(CVFlag, w3);
12083 ASSERT_EQUAL_32(ZCFlag, w4);
12084 ASSERT_EQUAL_32(ZVFlag, w5);
12085 ASSERT_EQUAL_32(CFlag, w6);
12086 ASSERT_EQUAL_32(NFlag, w7);
12087 ASSERT_EQUAL_32(ZCFlag, w8);
12088 ASSERT_EQUAL_32(ZCFlag, w9);
12089 ASSERT_EQUAL_32(ZCFlag, w10);
12090 ASSERT_EQUAL_32(CFlag, w11);
12091 ASSERT_EQUAL_32(CVFlag, w12);
12092 ASSERT_EQUAL_32(CVFlag, w13);
12093
12094 TEARDOWN();
12095 }
12096
12097
TEST(fccmp_h)12098 TEST(fccmp_h) {
12099 SETUP_WITH_FEATURES(CPUFeatures::kFP, CPUFeatures::kFPHalf);
12100
12101 START();
12102 __ Fmov(h16, Float16(0.0));
12103 __ Fmov(h17, Float16(0.5));
12104 __ Mov(x20, 0);
12105 __ Fmov(h21, kFP16DefaultNaN);
12106
12107 __ Cmp(x20, 0);
12108 __ Fccmp(h16, h16, NoFlag, eq);
12109 __ Mrs(x0, NZCV);
12110
12111 __ Cmp(x20, 0);
12112 __ Fccmp(h16, h16, VFlag, ne);
12113 __ Mrs(x1, NZCV);
12114
12115 __ Cmp(x20, 0);
12116 __ Fccmp(h16, h17, CFlag, ge);
12117 __ Mrs(x2, NZCV);
12118
12119 __ Cmp(x20, 0);
12120 __ Fccmp(h16, h17, CVFlag, lt);
12121 __ Mrs(x3, NZCV);
12122
12123 // The Macro Assembler does not allow al or nv as condition.
12124 {
12125 ExactAssemblyScope scope(&masm, kInstructionSize);
12126 __ fccmp(h16, h16, NFlag, al);
12127 }
12128 __ Mrs(x4, NZCV);
12129 {
12130 ExactAssemblyScope scope(&masm, kInstructionSize);
12131 __ fccmp(h16, h16, NFlag, nv);
12132 }
12133 __ Mrs(x5, NZCV);
12134
12135 __ Cmp(x20, 0);
12136 __ Fccmpe(h16, h16, NoFlag, eq);
12137 __ Mrs(x6, NZCV);
12138
12139 __ Cmp(x20, 0);
12140 __ Fccmpe(h16, h21, NoFlag, eq);
12141 __ Mrs(x7, NZCV);
12142
12143 __ Cmp(x20, 0);
12144 __ Fccmpe(h21, h16, NoFlag, eq);
12145 __ Mrs(x8, NZCV);
12146
12147 __ Cmp(x20, 0);
12148 __ Fccmpe(h21, h21, NoFlag, eq);
12149 __ Mrs(x9, NZCV);
12150 END();
12151
12152 #ifdef VIXL_INCLUDE_SIMULATOR_AARCH64
12153 RUN();
12154 ASSERT_EQUAL_32(ZCFlag, w0);
12155 ASSERT_EQUAL_32(VFlag, w1);
12156 ASSERT_EQUAL_32(NFlag, w2);
12157 ASSERT_EQUAL_32(CVFlag, w3);
12158 ASSERT_EQUAL_32(ZCFlag, w4);
12159 ASSERT_EQUAL_32(ZCFlag, w5);
12160 ASSERT_EQUAL_32(ZCFlag, w6);
12161 ASSERT_EQUAL_32(CVFlag, w7);
12162 ASSERT_EQUAL_32(CVFlag, w8);
12163 ASSERT_EQUAL_32(CVFlag, w9);
12164 #endif
12165
12166 TEARDOWN();
12167 }
12168
12169
TEST(fcmp)12170 TEST(fcmp) {
12171 SETUP_WITH_FEATURES(CPUFeatures::kFP);
12172
12173 START();
12174
12175 // Some of these tests require a floating-point scratch register assigned to
12176 // the macro assembler, but most do not.
12177 {
12178 UseScratchRegisterScope temps(&masm);
12179 temps.ExcludeAll();
12180 temps.Include(ip0, ip1);
12181
12182 __ Fmov(s8, 0.0);
12183 __ Fmov(s9, 0.5);
12184 __ Mov(w18, 0x7f800001); // Single precision NaN.
12185 __ Fmov(s18, w18);
12186
12187 __ Fcmp(s8, s8);
12188 __ Mrs(x0, NZCV);
12189 __ Fcmp(s8, s9);
12190 __ Mrs(x1, NZCV);
12191 __ Fcmp(s9, s8);
12192 __ Mrs(x2, NZCV);
12193 __ Fcmp(s8, s18);
12194 __ Mrs(x3, NZCV);
12195 __ Fcmp(s18, s18);
12196 __ Mrs(x4, NZCV);
12197 __ Fcmp(s8, 0.0);
12198 __ Mrs(x5, NZCV);
12199 temps.Include(d0);
12200 __ Fcmp(s8, 255.0);
12201 temps.Exclude(d0);
12202 __ Mrs(x6, NZCV);
12203
12204 __ Fmov(d19, 0.0);
12205 __ Fmov(d20, 0.5);
12206 __ Mov(x21, 0x7ff0000000000001); // Double precision NaN.
12207 __ Fmov(d21, x21);
12208
12209 __ Fcmp(d19, d19);
12210 __ Mrs(x10, NZCV);
12211 __ Fcmp(d19, d20);
12212 __ Mrs(x11, NZCV);
12213 __ Fcmp(d20, d19);
12214 __ Mrs(x12, NZCV);
12215 __ Fcmp(d19, d21);
12216 __ Mrs(x13, NZCV);
12217 __ Fcmp(d21, d21);
12218 __ Mrs(x14, NZCV);
12219 __ Fcmp(d19, 0.0);
12220 __ Mrs(x15, NZCV);
12221 temps.Include(d0);
12222 __ Fcmp(d19, 12.3456);
12223 temps.Exclude(d0);
12224 __ Mrs(x16, NZCV);
12225
12226 __ Fcmpe(s8, s8);
12227 __ Mrs(x22, NZCV);
12228 __ Fcmpe(s8, 0.0);
12229 __ Mrs(x23, NZCV);
12230 __ Fcmpe(d19, d19);
12231 __ Mrs(x24, NZCV);
12232 __ Fcmpe(d19, 0.0);
12233 __ Mrs(x25, NZCV);
12234 __ Fcmpe(s18, s18);
12235 __ Mrs(x26, NZCV);
12236 __ Fcmpe(d21, d21);
12237 __ Mrs(x27, NZCV);
12238 }
12239
12240 END();
12241
12242 RUN();
12243
12244 ASSERT_EQUAL_32(ZCFlag, w0);
12245 ASSERT_EQUAL_32(NFlag, w1);
12246 ASSERT_EQUAL_32(CFlag, w2);
12247 ASSERT_EQUAL_32(CVFlag, w3);
12248 ASSERT_EQUAL_32(CVFlag, w4);
12249 ASSERT_EQUAL_32(ZCFlag, w5);
12250 ASSERT_EQUAL_32(NFlag, w6);
12251 ASSERT_EQUAL_32(ZCFlag, w10);
12252 ASSERT_EQUAL_32(NFlag, w11);
12253 ASSERT_EQUAL_32(CFlag, w12);
12254 ASSERT_EQUAL_32(CVFlag, w13);
12255 ASSERT_EQUAL_32(CVFlag, w14);
12256 ASSERT_EQUAL_32(ZCFlag, w15);
12257 ASSERT_EQUAL_32(NFlag, w16);
12258 ASSERT_EQUAL_32(ZCFlag, w22);
12259 ASSERT_EQUAL_32(ZCFlag, w23);
12260 ASSERT_EQUAL_32(ZCFlag, w24);
12261 ASSERT_EQUAL_32(ZCFlag, w25);
12262 ASSERT_EQUAL_32(CVFlag, w26);
12263 ASSERT_EQUAL_32(CVFlag, w27);
12264
12265 TEARDOWN();
12266 }
12267
12268
TEST(fcmp_h)12269 TEST(fcmp_h) {
12270 SETUP_WITH_FEATURES(CPUFeatures::kFP, CPUFeatures::kFPHalf);
12271
12272 START();
12273
12274 // Some of these tests require a floating-point scratch register assigned to
12275 // the macro assembler, but most do not.
12276 {
12277 UseScratchRegisterScope temps(&masm);
12278 temps.ExcludeAll();
12279 temps.Include(ip0, ip1);
12280
12281 __ Fmov(h8, Float16(0.0));
12282 __ Fmov(h9, Float16(0.5));
12283 __ Fmov(h18, kFP16DefaultNaN);
12284
12285 __ Fcmp(h8, h8);
12286 __ Mrs(x0, NZCV);
12287 __ Fcmp(h8, h9);
12288 __ Mrs(x1, NZCV);
12289 __ Fcmp(h9, h8);
12290 __ Mrs(x2, NZCV);
12291 __ Fcmp(h8, h18);
12292 __ Mrs(x3, NZCV);
12293 __ Fcmp(h18, h18);
12294 __ Mrs(x4, NZCV);
12295 __ Fcmp(h8, 0.0);
12296 __ Mrs(x5, NZCV);
12297 temps.Include(d0);
12298 __ Fcmp(h8, 255.0);
12299 temps.Exclude(d0);
12300 __ Mrs(x6, NZCV);
12301
12302 __ Fcmpe(h8, h8);
12303 __ Mrs(x22, NZCV);
12304 __ Fcmpe(h8, 0.0);
12305 __ Mrs(x23, NZCV);
12306 __ Fcmpe(h8, h18);
12307 __ Mrs(x24, NZCV);
12308 __ Fcmpe(h18, h8);
12309 __ Mrs(x25, NZCV);
12310 __ Fcmpe(h18, h18);
12311 __ Mrs(x26, NZCV);
12312 }
12313
12314 END();
12315
12316 #ifdef VIXL_INCLUDE_SIMULATOR_AARCH64
12317 RUN();
12318 ASSERT_EQUAL_32(ZCFlag, w0);
12319 ASSERT_EQUAL_32(NFlag, w1);
12320 ASSERT_EQUAL_32(CFlag, w2);
12321 ASSERT_EQUAL_32(CVFlag, w3);
12322 ASSERT_EQUAL_32(CVFlag, w4);
12323 ASSERT_EQUAL_32(ZCFlag, w5);
12324 ASSERT_EQUAL_32(NFlag, w6);
12325 ASSERT_EQUAL_32(ZCFlag, w22);
12326 ASSERT_EQUAL_32(ZCFlag, w23);
12327 ASSERT_EQUAL_32(CVFlag, w24);
12328 ASSERT_EQUAL_32(CVFlag, w25);
12329 ASSERT_EQUAL_32(CVFlag, w26);
12330 #endif
12331
12332 TEARDOWN();
12333 }
12334
12335
TEST(fcsel)12336 TEST(fcsel) {
12337 SETUP_WITH_FEATURES(CPUFeatures::kFP);
12338
12339 START();
12340 __ Mov(x16, 0);
12341 __ Fmov(s16, 1.0);
12342 __ Fmov(s17, 2.0);
12343 __ Fmov(d18, 3.0);
12344 __ Fmov(d19, 4.0);
12345
12346 __ Cmp(x16, 0);
12347 __ Fcsel(s0, s16, s17, eq);
12348 __ Fcsel(s1, s16, s17, ne);
12349 __ Fcsel(d2, d18, d19, eq);
12350 __ Fcsel(d3, d18, d19, ne);
12351 // The Macro Assembler does not allow al or nv as condition.
12352 {
12353 ExactAssemblyScope scope(&masm, 2 * kInstructionSize);
12354 __ fcsel(s4, s16, s17, al);
12355 __ fcsel(d5, d18, d19, nv);
12356 }
12357 END();
12358
12359 RUN();
12360
12361 ASSERT_EQUAL_FP32(1.0, s0);
12362 ASSERT_EQUAL_FP32(2.0, s1);
12363 ASSERT_EQUAL_FP64(3.0, d2);
12364 ASSERT_EQUAL_FP64(4.0, d3);
12365 ASSERT_EQUAL_FP32(1.0, s4);
12366 ASSERT_EQUAL_FP64(3.0, d5);
12367
12368 TEARDOWN();
12369 }
12370
12371
TEST(fcsel_h)12372 TEST(fcsel_h) {
12373 SETUP_WITH_FEATURES(CPUFeatures::kFP, CPUFeatures::kFPHalf);
12374
12375 START();
12376 __ Mov(x16, 0);
12377 __ Fmov(h16, Float16(1.0));
12378 __ Fmov(h17, Float16(2.0));
12379
12380 __ Cmp(x16, 0);
12381 __ Fcsel(h0, h16, h17, eq);
12382 __ Fcsel(h1, h16, h17, ne);
12383 // The Macro Assembler does not allow al or nv as condition.
12384 {
12385 ExactAssemblyScope scope(&masm, 2 * kInstructionSize);
12386 __ fcsel(h4, h16, h17, al);
12387 __ fcsel(h5, h16, h17, nv);
12388 }
12389 END();
12390
12391 #ifdef VIXL_INCLUDE_SIMULATOR_AARCH64
12392 RUN();
12393 ASSERT_EQUAL_FP16(Float16(1.0), h0);
12394 ASSERT_EQUAL_FP16(Float16(2.0), h1);
12395 ASSERT_EQUAL_FP16(Float16(1.0), h4);
12396 ASSERT_EQUAL_FP16(Float16(1.0), h5);
12397 #endif
12398
12399 TEARDOWN();
12400 }
12401
12402
TEST(fneg)12403 TEST(fneg) {
12404 SETUP_WITH_FEATURES(CPUFeatures::kFP);
12405
12406 START();
12407 __ Fmov(s16, 1.0);
12408 __ Fmov(s17, 0.0);
12409 __ Fmov(s18, kFP32PositiveInfinity);
12410 __ Fmov(d19, 1.0);
12411 __ Fmov(d20, 0.0);
12412 __ Fmov(d21, kFP64PositiveInfinity);
12413
12414 __ Fneg(s0, s16);
12415 __ Fneg(s1, s0);
12416 __ Fneg(s2, s17);
12417 __ Fneg(s3, s2);
12418 __ Fneg(s4, s18);
12419 __ Fneg(s5, s4);
12420 __ Fneg(d6, d19);
12421 __ Fneg(d7, d6);
12422 __ Fneg(d8, d20);
12423 __ Fneg(d9, d8);
12424 __ Fneg(d10, d21);
12425 __ Fneg(d11, d10);
12426 END();
12427
12428 RUN();
12429
12430 ASSERT_EQUAL_FP32(-1.0, s0);
12431 ASSERT_EQUAL_FP32(1.0, s1);
12432 ASSERT_EQUAL_FP32(-0.0, s2);
12433 ASSERT_EQUAL_FP32(0.0, s3);
12434 ASSERT_EQUAL_FP32(kFP32NegativeInfinity, s4);
12435 ASSERT_EQUAL_FP32(kFP32PositiveInfinity, s5);
12436 ASSERT_EQUAL_FP64(-1.0, d6);
12437 ASSERT_EQUAL_FP64(1.0, d7);
12438 ASSERT_EQUAL_FP64(-0.0, d8);
12439 ASSERT_EQUAL_FP64(0.0, d9);
12440 ASSERT_EQUAL_FP64(kFP64NegativeInfinity, d10);
12441 ASSERT_EQUAL_FP64(kFP64PositiveInfinity, d11);
12442
12443 TEARDOWN();
12444 }
12445
12446
TEST(fabs)12447 TEST(fabs) {
12448 SETUP_WITH_FEATURES(CPUFeatures::kFP);
12449
12450 START();
12451 __ Fmov(s16, -1.0);
12452 __ Fmov(s17, -0.0);
12453 __ Fmov(s18, kFP32NegativeInfinity);
12454 __ Fmov(d19, -1.0);
12455 __ Fmov(d20, -0.0);
12456 __ Fmov(d21, kFP64NegativeInfinity);
12457
12458 __ Fabs(s0, s16);
12459 __ Fabs(s1, s0);
12460 __ Fabs(s2, s17);
12461 __ Fabs(s3, s18);
12462 __ Fabs(d4, d19);
12463 __ Fabs(d5, d4);
12464 __ Fabs(d6, d20);
12465 __ Fabs(d7, d21);
12466 END();
12467
12468 RUN();
12469
12470 ASSERT_EQUAL_FP32(1.0, s0);
12471 ASSERT_EQUAL_FP32(1.0, s1);
12472 ASSERT_EQUAL_FP32(0.0, s2);
12473 ASSERT_EQUAL_FP32(kFP32PositiveInfinity, s3);
12474 ASSERT_EQUAL_FP64(1.0, d4);
12475 ASSERT_EQUAL_FP64(1.0, d5);
12476 ASSERT_EQUAL_FP64(0.0, d6);
12477 ASSERT_EQUAL_FP64(kFP64PositiveInfinity, d7);
12478
12479 TEARDOWN();
12480 }
12481
12482
TEST(fsqrt)12483 TEST(fsqrt) {
12484 SETUP_WITH_FEATURES(CPUFeatures::kFP);
12485
12486 START();
12487 __ Fmov(s16, 0.0);
12488 __ Fmov(s17, 1.0);
12489 __ Fmov(s18, 0.25);
12490 __ Fmov(s19, 65536.0);
12491 __ Fmov(s20, -0.0);
12492 __ Fmov(s21, kFP32PositiveInfinity);
12493 __ Fmov(s22, -1.0);
12494 __ Fmov(d23, 0.0);
12495 __ Fmov(d24, 1.0);
12496 __ Fmov(d25, 0.25);
12497 __ Fmov(d26, 4294967296.0);
12498 __ Fmov(d27, -0.0);
12499 __ Fmov(d28, kFP64PositiveInfinity);
12500 __ Fmov(d29, -1.0);
12501
12502 __ Fsqrt(s0, s16);
12503 __ Fsqrt(s1, s17);
12504 __ Fsqrt(s2, s18);
12505 __ Fsqrt(s3, s19);
12506 __ Fsqrt(s4, s20);
12507 __ Fsqrt(s5, s21);
12508 __ Fsqrt(s6, s22);
12509 __ Fsqrt(d7, d23);
12510 __ Fsqrt(d8, d24);
12511 __ Fsqrt(d9, d25);
12512 __ Fsqrt(d10, d26);
12513 __ Fsqrt(d11, d27);
12514 __ Fsqrt(d12, d28);
12515 __ Fsqrt(d13, d29);
12516 END();
12517
12518 RUN();
12519
12520 ASSERT_EQUAL_FP32(0.0, s0);
12521 ASSERT_EQUAL_FP32(1.0, s1);
12522 ASSERT_EQUAL_FP32(0.5, s2);
12523 ASSERT_EQUAL_FP32(256.0, s3);
12524 ASSERT_EQUAL_FP32(-0.0, s4);
12525 ASSERT_EQUAL_FP32(kFP32PositiveInfinity, s5);
12526 ASSERT_EQUAL_FP32(kFP32DefaultNaN, s6);
12527 ASSERT_EQUAL_FP64(0.0, d7);
12528 ASSERT_EQUAL_FP64(1.0, d8);
12529 ASSERT_EQUAL_FP64(0.5, d9);
12530 ASSERT_EQUAL_FP64(65536.0, d10);
12531 ASSERT_EQUAL_FP64(-0.0, d11);
12532 ASSERT_EQUAL_FP64(kFP32PositiveInfinity, d12);
12533 ASSERT_EQUAL_FP64(kFP64DefaultNaN, d13);
12534
12535 TEARDOWN();
12536 }
12537
12538
TEST(frinta)12539 TEST(frinta) {
12540 SETUP_WITH_FEATURES(CPUFeatures::kFP);
12541
12542 START();
12543 __ Fmov(s16, 1.0);
12544 __ Fmov(s17, 1.1);
12545 __ Fmov(s18, 1.5);
12546 __ Fmov(s19, 1.9);
12547 __ Fmov(s20, 2.5);
12548 __ Fmov(s21, -1.5);
12549 __ Fmov(s22, -2.5);
12550 __ Fmov(s23, kFP32PositiveInfinity);
12551 __ Fmov(s24, kFP32NegativeInfinity);
12552 __ Fmov(s25, 0.0);
12553 __ Fmov(s26, -0.0);
12554 __ Fmov(s27, -0.2);
12555
12556 __ Frinta(s0, s16);
12557 __ Frinta(s1, s17);
12558 __ Frinta(s2, s18);
12559 __ Frinta(s3, s19);
12560 __ Frinta(s4, s20);
12561 __ Frinta(s5, s21);
12562 __ Frinta(s6, s22);
12563 __ Frinta(s7, s23);
12564 __ Frinta(s8, s24);
12565 __ Frinta(s9, s25);
12566 __ Frinta(s10, s26);
12567 __ Frinta(s11, s27);
12568
12569 __ Fmov(d16, 1.0);
12570 __ Fmov(d17, 1.1);
12571 __ Fmov(d18, 1.5);
12572 __ Fmov(d19, 1.9);
12573 __ Fmov(d20, 2.5);
12574 __ Fmov(d21, -1.5);
12575 __ Fmov(d22, -2.5);
12576 __ Fmov(d23, kFP32PositiveInfinity);
12577 __ Fmov(d24, kFP32NegativeInfinity);
12578 __ Fmov(d25, 0.0);
12579 __ Fmov(d26, -0.0);
12580 __ Fmov(d27, -0.2);
12581
12582 __ Frinta(d12, d16);
12583 __ Frinta(d13, d17);
12584 __ Frinta(d14, d18);
12585 __ Frinta(d15, d19);
12586 __ Frinta(d16, d20);
12587 __ Frinta(d17, d21);
12588 __ Frinta(d18, d22);
12589 __ Frinta(d19, d23);
12590 __ Frinta(d20, d24);
12591 __ Frinta(d21, d25);
12592 __ Frinta(d22, d26);
12593 __ Frinta(d23, d27);
12594 END();
12595
12596 RUN();
12597
12598 ASSERT_EQUAL_FP32(1.0, s0);
12599 ASSERT_EQUAL_FP32(1.0, s1);
12600 ASSERT_EQUAL_FP32(2.0, s2);
12601 ASSERT_EQUAL_FP32(2.0, s3);
12602 ASSERT_EQUAL_FP32(3.0, s4);
12603 ASSERT_EQUAL_FP32(-2.0, s5);
12604 ASSERT_EQUAL_FP32(-3.0, s6);
12605 ASSERT_EQUAL_FP32(kFP32PositiveInfinity, s7);
12606 ASSERT_EQUAL_FP32(kFP32NegativeInfinity, s8);
12607 ASSERT_EQUAL_FP32(0.0, s9);
12608 ASSERT_EQUAL_FP32(-0.0, s10);
12609 ASSERT_EQUAL_FP32(-0.0, s11);
12610 ASSERT_EQUAL_FP64(1.0, d12);
12611 ASSERT_EQUAL_FP64(1.0, d13);
12612 ASSERT_EQUAL_FP64(2.0, d14);
12613 ASSERT_EQUAL_FP64(2.0, d15);
12614 ASSERT_EQUAL_FP64(3.0, d16);
12615 ASSERT_EQUAL_FP64(-2.0, d17);
12616 ASSERT_EQUAL_FP64(-3.0, d18);
12617 ASSERT_EQUAL_FP64(kFP64PositiveInfinity, d19);
12618 ASSERT_EQUAL_FP64(kFP64NegativeInfinity, d20);
12619 ASSERT_EQUAL_FP64(0.0, d21);
12620 ASSERT_EQUAL_FP64(-0.0, d22);
12621 ASSERT_EQUAL_FP64(-0.0, d23);
12622
12623 TEARDOWN();
12624 }
12625
12626
TEST(frinti)12627 TEST(frinti) {
12628 // VIXL only supports the round-to-nearest FPCR mode, so this test has the
12629 // same results as frintn.
12630 SETUP_WITH_FEATURES(CPUFeatures::kFP);
12631
12632 START();
12633 __ Fmov(s16, 1.0);
12634 __ Fmov(s17, 1.1);
12635 __ Fmov(s18, 1.5);
12636 __ Fmov(s19, 1.9);
12637 __ Fmov(s20, 2.5);
12638 __ Fmov(s21, -1.5);
12639 __ Fmov(s22, -2.5);
12640 __ Fmov(s23, kFP32PositiveInfinity);
12641 __ Fmov(s24, kFP32NegativeInfinity);
12642 __ Fmov(s25, 0.0);
12643 __ Fmov(s26, -0.0);
12644 __ Fmov(s27, -0.2);
12645
12646 __ Frinti(s0, s16);
12647 __ Frinti(s1, s17);
12648 __ Frinti(s2, s18);
12649 __ Frinti(s3, s19);
12650 __ Frinti(s4, s20);
12651 __ Frinti(s5, s21);
12652 __ Frinti(s6, s22);
12653 __ Frinti(s7, s23);
12654 __ Frinti(s8, s24);
12655 __ Frinti(s9, s25);
12656 __ Frinti(s10, s26);
12657 __ Frinti(s11, s27);
12658
12659 __ Fmov(d16, 1.0);
12660 __ Fmov(d17, 1.1);
12661 __ Fmov(d18, 1.5);
12662 __ Fmov(d19, 1.9);
12663 __ Fmov(d20, 2.5);
12664 __ Fmov(d21, -1.5);
12665 __ Fmov(d22, -2.5);
12666 __ Fmov(d23, kFP32PositiveInfinity);
12667 __ Fmov(d24, kFP32NegativeInfinity);
12668 __ Fmov(d25, 0.0);
12669 __ Fmov(d26, -0.0);
12670 __ Fmov(d27, -0.2);
12671
12672 __ Frinti(d12, d16);
12673 __ Frinti(d13, d17);
12674 __ Frinti(d14, d18);
12675 __ Frinti(d15, d19);
12676 __ Frinti(d16, d20);
12677 __ Frinti(d17, d21);
12678 __ Frinti(d18, d22);
12679 __ Frinti(d19, d23);
12680 __ Frinti(d20, d24);
12681 __ Frinti(d21, d25);
12682 __ Frinti(d22, d26);
12683 __ Frinti(d23, d27);
12684 END();
12685
12686 RUN();
12687
12688 ASSERT_EQUAL_FP32(1.0, s0);
12689 ASSERT_EQUAL_FP32(1.0, s1);
12690 ASSERT_EQUAL_FP32(2.0, s2);
12691 ASSERT_EQUAL_FP32(2.0, s3);
12692 ASSERT_EQUAL_FP32(2.0, s4);
12693 ASSERT_EQUAL_FP32(-2.0, s5);
12694 ASSERT_EQUAL_FP32(-2.0, s6);
12695 ASSERT_EQUAL_FP32(kFP32PositiveInfinity, s7);
12696 ASSERT_EQUAL_FP32(kFP32NegativeInfinity, s8);
12697 ASSERT_EQUAL_FP32(0.0, s9);
12698 ASSERT_EQUAL_FP32(-0.0, s10);
12699 ASSERT_EQUAL_FP32(-0.0, s11);
12700 ASSERT_EQUAL_FP64(1.0, d12);
12701 ASSERT_EQUAL_FP64(1.0, d13);
12702 ASSERT_EQUAL_FP64(2.0, d14);
12703 ASSERT_EQUAL_FP64(2.0, d15);
12704 ASSERT_EQUAL_FP64(2.0, d16);
12705 ASSERT_EQUAL_FP64(-2.0, d17);
12706 ASSERT_EQUAL_FP64(-2.0, d18);
12707 ASSERT_EQUAL_FP64(kFP64PositiveInfinity, d19);
12708 ASSERT_EQUAL_FP64(kFP64NegativeInfinity, d20);
12709 ASSERT_EQUAL_FP64(0.0, d21);
12710 ASSERT_EQUAL_FP64(-0.0, d22);
12711 ASSERT_EQUAL_FP64(-0.0, d23);
12712
12713 TEARDOWN();
12714 }
12715
12716
TEST(frintm)12717 TEST(frintm) {
12718 SETUP_WITH_FEATURES(CPUFeatures::kFP);
12719
12720 START();
12721 __ Fmov(s16, 1.0);
12722 __ Fmov(s17, 1.1);
12723 __ Fmov(s18, 1.5);
12724 __ Fmov(s19, 1.9);
12725 __ Fmov(s20, 2.5);
12726 __ Fmov(s21, -1.5);
12727 __ Fmov(s22, -2.5);
12728 __ Fmov(s23, kFP32PositiveInfinity);
12729 __ Fmov(s24, kFP32NegativeInfinity);
12730 __ Fmov(s25, 0.0);
12731 __ Fmov(s26, -0.0);
12732 __ Fmov(s27, -0.2);
12733
12734 __ Frintm(s0, s16);
12735 __ Frintm(s1, s17);
12736 __ Frintm(s2, s18);
12737 __ Frintm(s3, s19);
12738 __ Frintm(s4, s20);
12739 __ Frintm(s5, s21);
12740 __ Frintm(s6, s22);
12741 __ Frintm(s7, s23);
12742 __ Frintm(s8, s24);
12743 __ Frintm(s9, s25);
12744 __ Frintm(s10, s26);
12745 __ Frintm(s11, s27);
12746
12747 __ Fmov(d16, 1.0);
12748 __ Fmov(d17, 1.1);
12749 __ Fmov(d18, 1.5);
12750 __ Fmov(d19, 1.9);
12751 __ Fmov(d20, 2.5);
12752 __ Fmov(d21, -1.5);
12753 __ Fmov(d22, -2.5);
12754 __ Fmov(d23, kFP32PositiveInfinity);
12755 __ Fmov(d24, kFP32NegativeInfinity);
12756 __ Fmov(d25, 0.0);
12757 __ Fmov(d26, -0.0);
12758 __ Fmov(d27, -0.2);
12759
12760 __ Frintm(d12, d16);
12761 __ Frintm(d13, d17);
12762 __ Frintm(d14, d18);
12763 __ Frintm(d15, d19);
12764 __ Frintm(d16, d20);
12765 __ Frintm(d17, d21);
12766 __ Frintm(d18, d22);
12767 __ Frintm(d19, d23);
12768 __ Frintm(d20, d24);
12769 __ Frintm(d21, d25);
12770 __ Frintm(d22, d26);
12771 __ Frintm(d23, d27);
12772 END();
12773
12774 RUN();
12775
12776 ASSERT_EQUAL_FP32(1.0, s0);
12777 ASSERT_EQUAL_FP32(1.0, s1);
12778 ASSERT_EQUAL_FP32(1.0, s2);
12779 ASSERT_EQUAL_FP32(1.0, s3);
12780 ASSERT_EQUAL_FP32(2.0, s4);
12781 ASSERT_EQUAL_FP32(-2.0, s5);
12782 ASSERT_EQUAL_FP32(-3.0, s6);
12783 ASSERT_EQUAL_FP32(kFP32PositiveInfinity, s7);
12784 ASSERT_EQUAL_FP32(kFP32NegativeInfinity, s8);
12785 ASSERT_EQUAL_FP32(0.0, s9);
12786 ASSERT_EQUAL_FP32(-0.0, s10);
12787 ASSERT_EQUAL_FP32(-1.0, s11);
12788 ASSERT_EQUAL_FP64(1.0, d12);
12789 ASSERT_EQUAL_FP64(1.0, d13);
12790 ASSERT_EQUAL_FP64(1.0, d14);
12791 ASSERT_EQUAL_FP64(1.0, d15);
12792 ASSERT_EQUAL_FP64(2.0, d16);
12793 ASSERT_EQUAL_FP64(-2.0, d17);
12794 ASSERT_EQUAL_FP64(-3.0, d18);
12795 ASSERT_EQUAL_FP64(kFP64PositiveInfinity, d19);
12796 ASSERT_EQUAL_FP64(kFP64NegativeInfinity, d20);
12797 ASSERT_EQUAL_FP64(0.0, d21);
12798 ASSERT_EQUAL_FP64(-0.0, d22);
12799 ASSERT_EQUAL_FP64(-1.0, d23);
12800
12801 TEARDOWN();
12802 }
12803
12804
TEST(frintn)12805 TEST(frintn) {
12806 SETUP_WITH_FEATURES(CPUFeatures::kFP);
12807
12808 START();
12809 __ Fmov(s16, 1.0);
12810 __ Fmov(s17, 1.1);
12811 __ Fmov(s18, 1.5);
12812 __ Fmov(s19, 1.9);
12813 __ Fmov(s20, 2.5);
12814 __ Fmov(s21, -1.5);
12815 __ Fmov(s22, -2.5);
12816 __ Fmov(s23, kFP32PositiveInfinity);
12817 __ Fmov(s24, kFP32NegativeInfinity);
12818 __ Fmov(s25, 0.0);
12819 __ Fmov(s26, -0.0);
12820 __ Fmov(s27, -0.2);
12821
12822 __ Frintn(s0, s16);
12823 __ Frintn(s1, s17);
12824 __ Frintn(s2, s18);
12825 __ Frintn(s3, s19);
12826 __ Frintn(s4, s20);
12827 __ Frintn(s5, s21);
12828 __ Frintn(s6, s22);
12829 __ Frintn(s7, s23);
12830 __ Frintn(s8, s24);
12831 __ Frintn(s9, s25);
12832 __ Frintn(s10, s26);
12833 __ Frintn(s11, s27);
12834
12835 __ Fmov(d16, 1.0);
12836 __ Fmov(d17, 1.1);
12837 __ Fmov(d18, 1.5);
12838 __ Fmov(d19, 1.9);
12839 __ Fmov(d20, 2.5);
12840 __ Fmov(d21, -1.5);
12841 __ Fmov(d22, -2.5);
12842 __ Fmov(d23, kFP32PositiveInfinity);
12843 __ Fmov(d24, kFP32NegativeInfinity);
12844 __ Fmov(d25, 0.0);
12845 __ Fmov(d26, -0.0);
12846 __ Fmov(d27, -0.2);
12847
12848 __ Frintn(d12, d16);
12849 __ Frintn(d13, d17);
12850 __ Frintn(d14, d18);
12851 __ Frintn(d15, d19);
12852 __ Frintn(d16, d20);
12853 __ Frintn(d17, d21);
12854 __ Frintn(d18, d22);
12855 __ Frintn(d19, d23);
12856 __ Frintn(d20, d24);
12857 __ Frintn(d21, d25);
12858 __ Frintn(d22, d26);
12859 __ Frintn(d23, d27);
12860 END();
12861
12862 RUN();
12863
12864 ASSERT_EQUAL_FP32(1.0, s0);
12865 ASSERT_EQUAL_FP32(1.0, s1);
12866 ASSERT_EQUAL_FP32(2.0, s2);
12867 ASSERT_EQUAL_FP32(2.0, s3);
12868 ASSERT_EQUAL_FP32(2.0, s4);
12869 ASSERT_EQUAL_FP32(-2.0, s5);
12870 ASSERT_EQUAL_FP32(-2.0, s6);
12871 ASSERT_EQUAL_FP32(kFP32PositiveInfinity, s7);
12872 ASSERT_EQUAL_FP32(kFP32NegativeInfinity, s8);
12873 ASSERT_EQUAL_FP32(0.0, s9);
12874 ASSERT_EQUAL_FP32(-0.0, s10);
12875 ASSERT_EQUAL_FP32(-0.0, s11);
12876 ASSERT_EQUAL_FP64(1.0, d12);
12877 ASSERT_EQUAL_FP64(1.0, d13);
12878 ASSERT_EQUAL_FP64(2.0, d14);
12879 ASSERT_EQUAL_FP64(2.0, d15);
12880 ASSERT_EQUAL_FP64(2.0, d16);
12881 ASSERT_EQUAL_FP64(-2.0, d17);
12882 ASSERT_EQUAL_FP64(-2.0, d18);
12883 ASSERT_EQUAL_FP64(kFP64PositiveInfinity, d19);
12884 ASSERT_EQUAL_FP64(kFP64NegativeInfinity, d20);
12885 ASSERT_EQUAL_FP64(0.0, d21);
12886 ASSERT_EQUAL_FP64(-0.0, d22);
12887 ASSERT_EQUAL_FP64(-0.0, d23);
12888
12889 TEARDOWN();
12890 }
12891
12892
TEST(frintp)12893 TEST(frintp) {
12894 SETUP_WITH_FEATURES(CPUFeatures::kFP);
12895
12896 START();
12897 __ Fmov(s16, 1.0);
12898 __ Fmov(s17, 1.1);
12899 __ Fmov(s18, 1.5);
12900 __ Fmov(s19, 1.9);
12901 __ Fmov(s20, 2.5);
12902 __ Fmov(s21, -1.5);
12903 __ Fmov(s22, -2.5);
12904 __ Fmov(s23, kFP32PositiveInfinity);
12905 __ Fmov(s24, kFP32NegativeInfinity);
12906 __ Fmov(s25, 0.0);
12907 __ Fmov(s26, -0.0);
12908 __ Fmov(s27, -0.2);
12909
12910 __ Frintp(s0, s16);
12911 __ Frintp(s1, s17);
12912 __ Frintp(s2, s18);
12913 __ Frintp(s3, s19);
12914 __ Frintp(s4, s20);
12915 __ Frintp(s5, s21);
12916 __ Frintp(s6, s22);
12917 __ Frintp(s7, s23);
12918 __ Frintp(s8, s24);
12919 __ Frintp(s9, s25);
12920 __ Frintp(s10, s26);
12921 __ Frintp(s11, s27);
12922
12923 __ Fmov(d16, 1.0);
12924 __ Fmov(d17, 1.1);
12925 __ Fmov(d18, 1.5);
12926 __ Fmov(d19, 1.9);
12927 __ Fmov(d20, 2.5);
12928 __ Fmov(d21, -1.5);
12929 __ Fmov(d22, -2.5);
12930 __ Fmov(d23, kFP32PositiveInfinity);
12931 __ Fmov(d24, kFP32NegativeInfinity);
12932 __ Fmov(d25, 0.0);
12933 __ Fmov(d26, -0.0);
12934 __ Fmov(d27, -0.2);
12935
12936 __ Frintp(d12, d16);
12937 __ Frintp(d13, d17);
12938 __ Frintp(d14, d18);
12939 __ Frintp(d15, d19);
12940 __ Frintp(d16, d20);
12941 __ Frintp(d17, d21);
12942 __ Frintp(d18, d22);
12943 __ Frintp(d19, d23);
12944 __ Frintp(d20, d24);
12945 __ Frintp(d21, d25);
12946 __ Frintp(d22, d26);
12947 __ Frintp(d23, d27);
12948 END();
12949
12950 RUN();
12951
12952 ASSERT_EQUAL_FP32(1.0, s0);
12953 ASSERT_EQUAL_FP32(2.0, s1);
12954 ASSERT_EQUAL_FP32(2.0, s2);
12955 ASSERT_EQUAL_FP32(2.0, s3);
12956 ASSERT_EQUAL_FP32(3.0, s4);
12957 ASSERT_EQUAL_FP32(-1.0, s5);
12958 ASSERT_EQUAL_FP32(-2.0, s6);
12959 ASSERT_EQUAL_FP32(kFP32PositiveInfinity, s7);
12960 ASSERT_EQUAL_FP32(kFP32NegativeInfinity, s8);
12961 ASSERT_EQUAL_FP32(0.0, s9);
12962 ASSERT_EQUAL_FP32(-0.0, s10);
12963 ASSERT_EQUAL_FP32(-0.0, s11);
12964 ASSERT_EQUAL_FP64(1.0, d12);
12965 ASSERT_EQUAL_FP64(2.0, d13);
12966 ASSERT_EQUAL_FP64(2.0, d14);
12967 ASSERT_EQUAL_FP64(2.0, d15);
12968 ASSERT_EQUAL_FP64(3.0, d16);
12969 ASSERT_EQUAL_FP64(-1.0, d17);
12970 ASSERT_EQUAL_FP64(-2.0, d18);
12971 ASSERT_EQUAL_FP64(kFP64PositiveInfinity, d19);
12972 ASSERT_EQUAL_FP64(kFP64NegativeInfinity, d20);
12973 ASSERT_EQUAL_FP64(0.0, d21);
12974 ASSERT_EQUAL_FP64(-0.0, d22);
12975 ASSERT_EQUAL_FP64(-0.0, d23);
12976
12977 TEARDOWN();
12978 }
12979
12980
TEST(frintx)12981 TEST(frintx) {
12982 // VIXL only supports the round-to-nearest FPCR mode, and it doesn't support
12983 // FP exceptions, so this test has the same results as frintn (and frinti).
12984 SETUP_WITH_FEATURES(CPUFeatures::kFP);
12985
12986 START();
12987 __ Fmov(s16, 1.0);
12988 __ Fmov(s17, 1.1);
12989 __ Fmov(s18, 1.5);
12990 __ Fmov(s19, 1.9);
12991 __ Fmov(s20, 2.5);
12992 __ Fmov(s21, -1.5);
12993 __ Fmov(s22, -2.5);
12994 __ Fmov(s23, kFP32PositiveInfinity);
12995 __ Fmov(s24, kFP32NegativeInfinity);
12996 __ Fmov(s25, 0.0);
12997 __ Fmov(s26, -0.0);
12998 __ Fmov(s27, -0.2);
12999
13000 __ Frintx(s0, s16);
13001 __ Frintx(s1, s17);
13002 __ Frintx(s2, s18);
13003 __ Frintx(s3, s19);
13004 __ Frintx(s4, s20);
13005 __ Frintx(s5, s21);
13006 __ Frintx(s6, s22);
13007 __ Frintx(s7, s23);
13008 __ Frintx(s8, s24);
13009 __ Frintx(s9, s25);
13010 __ Frintx(s10, s26);
13011 __ Frintx(s11, s27);
13012
13013 __ Fmov(d16, 1.0);
13014 __ Fmov(d17, 1.1);
13015 __ Fmov(d18, 1.5);
13016 __ Fmov(d19, 1.9);
13017 __ Fmov(d20, 2.5);
13018 __ Fmov(d21, -1.5);
13019 __ Fmov(d22, -2.5);
13020 __ Fmov(d23, kFP32PositiveInfinity);
13021 __ Fmov(d24, kFP32NegativeInfinity);
13022 __ Fmov(d25, 0.0);
13023 __ Fmov(d26, -0.0);
13024 __ Fmov(d27, -0.2);
13025
13026 __ Frintx(d12, d16);
13027 __ Frintx(d13, d17);
13028 __ Frintx(d14, d18);
13029 __ Frintx(d15, d19);
13030 __ Frintx(d16, d20);
13031 __ Frintx(d17, d21);
13032 __ Frintx(d18, d22);
13033 __ Frintx(d19, d23);
13034 __ Frintx(d20, d24);
13035 __ Frintx(d21, d25);
13036 __ Frintx(d22, d26);
13037 __ Frintx(d23, d27);
13038 END();
13039
13040 RUN();
13041
13042 ASSERT_EQUAL_FP32(1.0, s0);
13043 ASSERT_EQUAL_FP32(1.0, s1);
13044 ASSERT_EQUAL_FP32(2.0, s2);
13045 ASSERT_EQUAL_FP32(2.0, s3);
13046 ASSERT_EQUAL_FP32(2.0, s4);
13047 ASSERT_EQUAL_FP32(-2.0, s5);
13048 ASSERT_EQUAL_FP32(-2.0, s6);
13049 ASSERT_EQUAL_FP32(kFP32PositiveInfinity, s7);
13050 ASSERT_EQUAL_FP32(kFP32NegativeInfinity, s8);
13051 ASSERT_EQUAL_FP32(0.0, s9);
13052 ASSERT_EQUAL_FP32(-0.0, s10);
13053 ASSERT_EQUAL_FP32(-0.0, s11);
13054 ASSERT_EQUAL_FP64(1.0, d12);
13055 ASSERT_EQUAL_FP64(1.0, d13);
13056 ASSERT_EQUAL_FP64(2.0, d14);
13057 ASSERT_EQUAL_FP64(2.0, d15);
13058 ASSERT_EQUAL_FP64(2.0, d16);
13059 ASSERT_EQUAL_FP64(-2.0, d17);
13060 ASSERT_EQUAL_FP64(-2.0, d18);
13061 ASSERT_EQUAL_FP64(kFP64PositiveInfinity, d19);
13062 ASSERT_EQUAL_FP64(kFP64NegativeInfinity, d20);
13063 ASSERT_EQUAL_FP64(0.0, d21);
13064 ASSERT_EQUAL_FP64(-0.0, d22);
13065 ASSERT_EQUAL_FP64(-0.0, d23);
13066
13067 TEARDOWN();
13068 }
13069
13070
TEST(frintz)13071 TEST(frintz) {
13072 SETUP_WITH_FEATURES(CPUFeatures::kFP);
13073
13074 START();
13075 __ Fmov(s16, 1.0);
13076 __ Fmov(s17, 1.1);
13077 __ Fmov(s18, 1.5);
13078 __ Fmov(s19, 1.9);
13079 __ Fmov(s20, 2.5);
13080 __ Fmov(s21, -1.5);
13081 __ Fmov(s22, -2.5);
13082 __ Fmov(s23, kFP32PositiveInfinity);
13083 __ Fmov(s24, kFP32NegativeInfinity);
13084 __ Fmov(s25, 0.0);
13085 __ Fmov(s26, -0.0);
13086
13087 __ Frintz(s0, s16);
13088 __ Frintz(s1, s17);
13089 __ Frintz(s2, s18);
13090 __ Frintz(s3, s19);
13091 __ Frintz(s4, s20);
13092 __ Frintz(s5, s21);
13093 __ Frintz(s6, s22);
13094 __ Frintz(s7, s23);
13095 __ Frintz(s8, s24);
13096 __ Frintz(s9, s25);
13097 __ Frintz(s10, s26);
13098
13099 __ Fmov(d16, 1.0);
13100 __ Fmov(d17, 1.1);
13101 __ Fmov(d18, 1.5);
13102 __ Fmov(d19, 1.9);
13103 __ Fmov(d20, 2.5);
13104 __ Fmov(d21, -1.5);
13105 __ Fmov(d22, -2.5);
13106 __ Fmov(d23, kFP32PositiveInfinity);
13107 __ Fmov(d24, kFP32NegativeInfinity);
13108 __ Fmov(d25, 0.0);
13109 __ Fmov(d26, -0.0);
13110
13111 __ Frintz(d11, d16);
13112 __ Frintz(d12, d17);
13113 __ Frintz(d13, d18);
13114 __ Frintz(d14, d19);
13115 __ Frintz(d15, d20);
13116 __ Frintz(d16, d21);
13117 __ Frintz(d17, d22);
13118 __ Frintz(d18, d23);
13119 __ Frintz(d19, d24);
13120 __ Frintz(d20, d25);
13121 __ Frintz(d21, d26);
13122 END();
13123
13124 RUN();
13125
13126 ASSERT_EQUAL_FP32(1.0, s0);
13127 ASSERT_EQUAL_FP32(1.0, s1);
13128 ASSERT_EQUAL_FP32(1.0, s2);
13129 ASSERT_EQUAL_FP32(1.0, s3);
13130 ASSERT_EQUAL_FP32(2.0, s4);
13131 ASSERT_EQUAL_FP32(-1.0, s5);
13132 ASSERT_EQUAL_FP32(-2.0, s6);
13133 ASSERT_EQUAL_FP32(kFP32PositiveInfinity, s7);
13134 ASSERT_EQUAL_FP32(kFP32NegativeInfinity, s8);
13135 ASSERT_EQUAL_FP32(0.0, s9);
13136 ASSERT_EQUAL_FP32(-0.0, s10);
13137 ASSERT_EQUAL_FP64(1.0, d11);
13138 ASSERT_EQUAL_FP64(1.0, d12);
13139 ASSERT_EQUAL_FP64(1.0, d13);
13140 ASSERT_EQUAL_FP64(1.0, d14);
13141 ASSERT_EQUAL_FP64(2.0, d15);
13142 ASSERT_EQUAL_FP64(-1.0, d16);
13143 ASSERT_EQUAL_FP64(-2.0, d17);
13144 ASSERT_EQUAL_FP64(kFP64PositiveInfinity, d18);
13145 ASSERT_EQUAL_FP64(kFP64NegativeInfinity, d19);
13146 ASSERT_EQUAL_FP64(0.0, d20);
13147 ASSERT_EQUAL_FP64(-0.0, d21);
13148
13149 TEARDOWN();
13150 }
13151
13152
TEST(fcvt_ds)13153 TEST(fcvt_ds) {
13154 SETUP_WITH_FEATURES(CPUFeatures::kFP);
13155
13156 START();
13157 __ Fmov(s16, 1.0);
13158 __ Fmov(s17, 1.1);
13159 __ Fmov(s18, 1.5);
13160 __ Fmov(s19, 1.9);
13161 __ Fmov(s20, 2.5);
13162 __ Fmov(s21, -1.5);
13163 __ Fmov(s22, -2.5);
13164 __ Fmov(s23, kFP32PositiveInfinity);
13165 __ Fmov(s24, kFP32NegativeInfinity);
13166 __ Fmov(s25, 0.0);
13167 __ Fmov(s26, -0.0);
13168 __ Fmov(s27, FLT_MAX);
13169 __ Fmov(s28, FLT_MIN);
13170 __ Fmov(s29, RawbitsToFloat(0x7fc12345)); // Quiet NaN.
13171 __ Fmov(s30, RawbitsToFloat(0x7f812345)); // Signalling NaN.
13172
13173 __ Fcvt(d0, s16);
13174 __ Fcvt(d1, s17);
13175 __ Fcvt(d2, s18);
13176 __ Fcvt(d3, s19);
13177 __ Fcvt(d4, s20);
13178 __ Fcvt(d5, s21);
13179 __ Fcvt(d6, s22);
13180 __ Fcvt(d7, s23);
13181 __ Fcvt(d8, s24);
13182 __ Fcvt(d9, s25);
13183 __ Fcvt(d10, s26);
13184 __ Fcvt(d11, s27);
13185 __ Fcvt(d12, s28);
13186 __ Fcvt(d13, s29);
13187 __ Fcvt(d14, s30);
13188 END();
13189
13190 RUN();
13191
13192 ASSERT_EQUAL_FP64(1.0f, d0);
13193 ASSERT_EQUAL_FP64(1.1f, d1);
13194 ASSERT_EQUAL_FP64(1.5f, d2);
13195 ASSERT_EQUAL_FP64(1.9f, d3);
13196 ASSERT_EQUAL_FP64(2.5f, d4);
13197 ASSERT_EQUAL_FP64(-1.5f, d5);
13198 ASSERT_EQUAL_FP64(-2.5f, d6);
13199 ASSERT_EQUAL_FP64(kFP64PositiveInfinity, d7);
13200 ASSERT_EQUAL_FP64(kFP64NegativeInfinity, d8);
13201 ASSERT_EQUAL_FP64(0.0f, d9);
13202 ASSERT_EQUAL_FP64(-0.0f, d10);
13203 ASSERT_EQUAL_FP64(FLT_MAX, d11);
13204 ASSERT_EQUAL_FP64(FLT_MIN, d12);
13205
13206 // Check that the NaN payload is preserved according to Aarch64 conversion
13207 // rules:
13208 // - The sign bit is preserved.
13209 // - The top bit of the mantissa is forced to 1 (making it a quiet NaN).
13210 // - The remaining mantissa bits are copied until they run out.
13211 // - The low-order bits that haven't already been assigned are set to 0.
13212 ASSERT_EQUAL_FP64(RawbitsToDouble(0x7ff82468a0000000), d13);
13213 ASSERT_EQUAL_FP64(RawbitsToDouble(0x7ff82468a0000000), d14);
13214
13215 TEARDOWN();
13216 }
13217
13218
TEST(fcvt_sd)13219 TEST(fcvt_sd) {
13220 // Test simple conversions here. Complex behaviour (such as rounding
13221 // specifics) are tested in the simulator tests.
13222
13223 SETUP_WITH_FEATURES(CPUFeatures::kFP);
13224
13225 START();
13226 __ Fmov(d16, 1.0);
13227 __ Fmov(d17, 1.1);
13228 __ Fmov(d18, 1.5);
13229 __ Fmov(d19, 1.9);
13230 __ Fmov(d20, 2.5);
13231 __ Fmov(d21, -1.5);
13232 __ Fmov(d22, -2.5);
13233 __ Fmov(d23, kFP32PositiveInfinity);
13234 __ Fmov(d24, kFP32NegativeInfinity);
13235 __ Fmov(d25, 0.0);
13236 __ Fmov(d26, -0.0);
13237 __ Fmov(d27, FLT_MAX);
13238 __ Fmov(d28, FLT_MIN);
13239 __ Fmov(d29, RawbitsToDouble(0x7ff82468a0000000)); // Quiet NaN.
13240 __ Fmov(d30, RawbitsToDouble(0x7ff02468a0000000)); // Signalling NaN.
13241
13242 __ Fcvt(s0, d16);
13243 __ Fcvt(s1, d17);
13244 __ Fcvt(s2, d18);
13245 __ Fcvt(s3, d19);
13246 __ Fcvt(s4, d20);
13247 __ Fcvt(s5, d21);
13248 __ Fcvt(s6, d22);
13249 __ Fcvt(s7, d23);
13250 __ Fcvt(s8, d24);
13251 __ Fcvt(s9, d25);
13252 __ Fcvt(s10, d26);
13253 __ Fcvt(s11, d27);
13254 __ Fcvt(s12, d28);
13255 __ Fcvt(s13, d29);
13256 __ Fcvt(s14, d30);
13257 END();
13258
13259 RUN();
13260
13261 ASSERT_EQUAL_FP32(1.0f, s0);
13262 ASSERT_EQUAL_FP32(1.1f, s1);
13263 ASSERT_EQUAL_FP32(1.5f, s2);
13264 ASSERT_EQUAL_FP32(1.9f, s3);
13265 ASSERT_EQUAL_FP32(2.5f, s4);
13266 ASSERT_EQUAL_FP32(-1.5f, s5);
13267 ASSERT_EQUAL_FP32(-2.5f, s6);
13268 ASSERT_EQUAL_FP32(kFP32PositiveInfinity, s7);
13269 ASSERT_EQUAL_FP32(kFP32NegativeInfinity, s8);
13270 ASSERT_EQUAL_FP32(0.0f, s9);
13271 ASSERT_EQUAL_FP32(-0.0f, s10);
13272 ASSERT_EQUAL_FP32(FLT_MAX, s11);
13273 ASSERT_EQUAL_FP32(FLT_MIN, s12);
13274
13275 // Check that the NaN payload is preserved according to Aarch64 conversion
13276 // rules:
13277 // - The sign bit is preserved.
13278 // - The top bit of the mantissa is forced to 1 (making it a quiet NaN).
13279 // - The remaining mantissa bits are copied until they run out.
13280 // - The low-order bits that haven't already been assigned are set to 0.
13281 ASSERT_EQUAL_FP32(RawbitsToFloat(0x7fc12345), s13);
13282 ASSERT_EQUAL_FP32(RawbitsToFloat(0x7fc12345), s14);
13283
13284 TEARDOWN();
13285 }
13286
13287
TEST(fcvt_half)13288 TEST(fcvt_half) {
13289 SETUP_WITH_FEATURES(CPUFeatures::kFP);
13290
13291 START();
13292 Label done;
13293 {
13294 // Check all exact conversions from half to float and back.
13295 Label ok, fail;
13296 __ Mov(w0, 0);
13297 for (int i = 0; i < 0xffff; i += 3) {
13298 if ((i & 0x7c00) == 0x7c00) continue;
13299 __ Mov(w1, i);
13300 __ Fmov(s1, w1);
13301 __ Fcvt(s2, h1);
13302 __ Fcvt(h2, s2);
13303 __ Fmov(w2, s2);
13304 __ Cmp(w1, w2);
13305 __ B(&fail, ne);
13306 }
13307 __ B(&ok);
13308 __ Bind(&fail);
13309 __ Mov(w0, 1);
13310 __ B(&done);
13311 __ Bind(&ok);
13312 }
13313 {
13314 // Check all exact conversions from half to double and back.
13315 Label ok, fail;
13316 for (int i = 0; i < 0xffff; i += 3) {
13317 if ((i & 0x7c00) == 0x7c00) continue;
13318 __ Mov(w1, i);
13319 __ Fmov(s1, w1);
13320 __ Fcvt(d2, h1);
13321 __ Fcvt(h2, d2);
13322 __ Fmov(w2, s2);
13323 __ Cmp(w1, w2);
13324 __ B(&fail, ne);
13325 }
13326 __ B(&ok);
13327 __ Bind(&fail);
13328 __ Mov(w0, 2);
13329 __ Bind(&ok);
13330 }
13331 __ Bind(&done);
13332
13333 // Check some other interesting values.
13334 __ Fmov(s0, kFP32PositiveInfinity);
13335 __ Fmov(s1, kFP32NegativeInfinity);
13336 __ Fmov(s2, 65504); // Max half precision.
13337 __ Fmov(s3, 6.10352e-5); // Min positive normal.
13338 __ Fmov(s4, 6.09756e-5); // Max subnormal.
13339 __ Fmov(s5, 5.96046e-8); // Min positive subnormal.
13340 __ Fmov(s6, 5e-9); // Not representable -> zero.
13341 __ Fmov(s7, -0.0);
13342 __ Fcvt(h0, s0);
13343 __ Fcvt(h1, s1);
13344 __ Fcvt(h2, s2);
13345 __ Fcvt(h3, s3);
13346 __ Fcvt(h4, s4);
13347 __ Fcvt(h5, s5);
13348 __ Fcvt(h6, s6);
13349 __ Fcvt(h7, s7);
13350
13351 __ Fmov(d20, kFP64PositiveInfinity);
13352 __ Fmov(d21, kFP64NegativeInfinity);
13353 __ Fmov(d22, 65504); // Max half precision.
13354 __ Fmov(d23, 6.10352e-5); // Min positive normal.
13355 __ Fmov(d24, 6.09756e-5); // Max subnormal.
13356 __ Fmov(d25, 5.96046e-8); // Min positive subnormal.
13357 __ Fmov(d26, 5e-9); // Not representable -> zero.
13358 __ Fmov(d27, -0.0);
13359 __ Fcvt(h20, d20);
13360 __ Fcvt(h21, d21);
13361 __ Fcvt(h22, d22);
13362 __ Fcvt(h23, d23);
13363 __ Fcvt(h24, d24);
13364 __ Fcvt(h25, d25);
13365 __ Fcvt(h26, d26);
13366 __ Fcvt(h27, d27);
13367 END();
13368
13369 RUN();
13370
13371 ASSERT_EQUAL_32(0, w0); // 1 => float failed, 2 => double failed.
13372 ASSERT_EQUAL_128(0, Float16ToRawbits(kFP16PositiveInfinity), q0);
13373 ASSERT_EQUAL_128(0, Float16ToRawbits(kFP16NegativeInfinity), q1);
13374 ASSERT_EQUAL_128(0, 0x7bff, q2);
13375 ASSERT_EQUAL_128(0, 0x0400, q3);
13376 ASSERT_EQUAL_128(0, 0x03ff, q4);
13377 ASSERT_EQUAL_128(0, 0x0001, q5);
13378 ASSERT_EQUAL_128(0, 0, q6);
13379 ASSERT_EQUAL_128(0, 0x8000, q7);
13380 ASSERT_EQUAL_128(0, Float16ToRawbits(kFP16PositiveInfinity), q20);
13381 ASSERT_EQUAL_128(0, Float16ToRawbits(kFP16NegativeInfinity), q21);
13382 ASSERT_EQUAL_128(0, 0x7bff, q22);
13383 ASSERT_EQUAL_128(0, 0x0400, q23);
13384 ASSERT_EQUAL_128(0, 0x03ff, q24);
13385 ASSERT_EQUAL_128(0, 0x0001, q25);
13386 ASSERT_EQUAL_128(0, 0, q26);
13387 ASSERT_EQUAL_128(0, 0x8000, q27);
13388 TEARDOWN();
13389 }
13390
13391
TEST(fcvtas)13392 TEST(fcvtas) {
13393 SETUP_WITH_FEATURES(CPUFeatures::kFP);
13394
13395 START();
13396 __ Fmov(s0, 1.0);
13397 __ Fmov(s1, 1.1);
13398 __ Fmov(s2, 2.5);
13399 __ Fmov(s3, -2.5);
13400 __ Fmov(s4, kFP32PositiveInfinity);
13401 __ Fmov(s5, kFP32NegativeInfinity);
13402 __ Fmov(s6, 0x7fffff80); // Largest float < INT32_MAX.
13403 __ Fneg(s7, s6); // Smallest float > INT32_MIN.
13404 __ Fmov(d8, 1.0);
13405 __ Fmov(d9, 1.1);
13406 __ Fmov(d10, 2.5);
13407 __ Fmov(d11, -2.5);
13408 __ Fmov(d12, kFP64PositiveInfinity);
13409 __ Fmov(d13, kFP64NegativeInfinity);
13410 __ Fmov(d14, kWMaxInt - 1);
13411 __ Fmov(d15, kWMinInt + 1);
13412 __ Fmov(s17, 1.1);
13413 __ Fmov(s18, 2.5);
13414 __ Fmov(s19, -2.5);
13415 __ Fmov(s20, kFP32PositiveInfinity);
13416 __ Fmov(s21, kFP32NegativeInfinity);
13417 __ Fmov(s22, 0x7fffff8000000000); // Largest float < INT64_MAX.
13418 __ Fneg(s23, s22); // Smallest float > INT64_MIN.
13419 __ Fmov(d24, 1.1);
13420 __ Fmov(d25, 2.5);
13421 __ Fmov(d26, -2.5);
13422 __ Fmov(d27, kFP64PositiveInfinity);
13423 __ Fmov(d28, kFP64NegativeInfinity);
13424 __ Fmov(d29, 0x7ffffffffffffc00); // Largest double < INT64_MAX.
13425 __ Fneg(d30, d29); // Smallest double > INT64_MIN.
13426
13427 __ Fcvtas(w0, s0);
13428 __ Fcvtas(w1, s1);
13429 __ Fcvtas(w2, s2);
13430 __ Fcvtas(w3, s3);
13431 __ Fcvtas(w4, s4);
13432 __ Fcvtas(w5, s5);
13433 __ Fcvtas(w6, s6);
13434 __ Fcvtas(w7, s7);
13435 __ Fcvtas(w8, d8);
13436 __ Fcvtas(w9, d9);
13437 __ Fcvtas(w10, d10);
13438 __ Fcvtas(w11, d11);
13439 __ Fcvtas(w12, d12);
13440 __ Fcvtas(w13, d13);
13441 __ Fcvtas(w14, d14);
13442 __ Fcvtas(w15, d15);
13443 __ Fcvtas(x17, s17);
13444 __ Fcvtas(x18, s18);
13445 __ Fcvtas(x19, s19);
13446 __ Fcvtas(x20, s20);
13447 __ Fcvtas(x21, s21);
13448 __ Fcvtas(x22, s22);
13449 __ Fcvtas(x23, s23);
13450 __ Fcvtas(x24, d24);
13451 __ Fcvtas(x25, d25);
13452 __ Fcvtas(x26, d26);
13453 __ Fcvtas(x27, d27);
13454 __ Fcvtas(x28, d28);
13455 __ Fcvtas(x29, d29);
13456 __ Fcvtas(x30, d30);
13457 END();
13458
13459 RUN();
13460
13461 ASSERT_EQUAL_64(1, x0);
13462 ASSERT_EQUAL_64(1, x1);
13463 ASSERT_EQUAL_64(3, x2);
13464 ASSERT_EQUAL_64(0xfffffffd, x3);
13465 ASSERT_EQUAL_64(0x7fffffff, x4);
13466 ASSERT_EQUAL_64(0x80000000, x5);
13467 ASSERT_EQUAL_64(0x7fffff80, x6);
13468 ASSERT_EQUAL_64(0x80000080, x7);
13469 ASSERT_EQUAL_64(1, x8);
13470 ASSERT_EQUAL_64(1, x9);
13471 ASSERT_EQUAL_64(3, x10);
13472 ASSERT_EQUAL_64(0xfffffffd, x11);
13473 ASSERT_EQUAL_64(0x7fffffff, x12);
13474 ASSERT_EQUAL_64(0x80000000, x13);
13475 ASSERT_EQUAL_64(0x7ffffffe, x14);
13476 ASSERT_EQUAL_64(0x80000001, x15);
13477 ASSERT_EQUAL_64(1, x17);
13478 ASSERT_EQUAL_64(3, x18);
13479 ASSERT_EQUAL_64(0xfffffffffffffffd, x19);
13480 ASSERT_EQUAL_64(0x7fffffffffffffff, x20);
13481 ASSERT_EQUAL_64(0x8000000000000000, x21);
13482 ASSERT_EQUAL_64(0x7fffff8000000000, x22);
13483 ASSERT_EQUAL_64(0x8000008000000000, x23);
13484 ASSERT_EQUAL_64(1, x24);
13485 ASSERT_EQUAL_64(3, x25);
13486 ASSERT_EQUAL_64(0xfffffffffffffffd, x26);
13487 ASSERT_EQUAL_64(0x7fffffffffffffff, x27);
13488 ASSERT_EQUAL_64(0x8000000000000000, x28);
13489 ASSERT_EQUAL_64(0x7ffffffffffffc00, x29);
13490 ASSERT_EQUAL_64(0x8000000000000400, x30);
13491
13492 TEARDOWN();
13493 }
13494
13495
TEST(fcvtau)13496 TEST(fcvtau) {
13497 SETUP_WITH_FEATURES(CPUFeatures::kFP);
13498
13499 START();
13500 __ Fmov(s0, 1.0);
13501 __ Fmov(s1, 1.1);
13502 __ Fmov(s2, 2.5);
13503 __ Fmov(s3, -2.5);
13504 __ Fmov(s4, kFP32PositiveInfinity);
13505 __ Fmov(s5, kFP32NegativeInfinity);
13506 __ Fmov(s6, 0xffffff00); // Largest float < UINT32_MAX.
13507 __ Fmov(d8, 1.0);
13508 __ Fmov(d9, 1.1);
13509 __ Fmov(d10, 2.5);
13510 __ Fmov(d11, -2.5);
13511 __ Fmov(d12, kFP64PositiveInfinity);
13512 __ Fmov(d13, kFP64NegativeInfinity);
13513 __ Fmov(d14, 0xfffffffe);
13514 __ Fmov(s16, 1.0);
13515 __ Fmov(s17, 1.1);
13516 __ Fmov(s18, 2.5);
13517 __ Fmov(s19, -2.5);
13518 __ Fmov(s20, kFP32PositiveInfinity);
13519 __ Fmov(s21, kFP32NegativeInfinity);
13520 __ Fmov(s22, 0xffffff0000000000); // Largest float < UINT64_MAX.
13521 __ Fmov(d24, 1.1);
13522 __ Fmov(d25, 2.5);
13523 __ Fmov(d26, -2.5);
13524 __ Fmov(d27, kFP64PositiveInfinity);
13525 __ Fmov(d28, kFP64NegativeInfinity);
13526 __ Fmov(d29, 0xfffffffffffff800); // Largest double < UINT64_MAX.
13527 __ Fmov(s30, 0x100000000);
13528
13529 __ Fcvtau(w0, s0);
13530 __ Fcvtau(w1, s1);
13531 __ Fcvtau(w2, s2);
13532 __ Fcvtau(w3, s3);
13533 __ Fcvtau(w4, s4);
13534 __ Fcvtau(w5, s5);
13535 __ Fcvtau(w6, s6);
13536 __ Fcvtau(w8, d8);
13537 __ Fcvtau(w9, d9);
13538 __ Fcvtau(w10, d10);
13539 __ Fcvtau(w11, d11);
13540 __ Fcvtau(w12, d12);
13541 __ Fcvtau(w13, d13);
13542 __ Fcvtau(w14, d14);
13543 __ Fcvtau(w15, d15);
13544 __ Fcvtau(x16, s16);
13545 __ Fcvtau(x17, s17);
13546 __ Fcvtau(x18, s18);
13547 __ Fcvtau(x19, s19);
13548 __ Fcvtau(x20, s20);
13549 __ Fcvtau(x21, s21);
13550 __ Fcvtau(x22, s22);
13551 __ Fcvtau(x24, d24);
13552 __ Fcvtau(x25, d25);
13553 __ Fcvtau(x26, d26);
13554 __ Fcvtau(x27, d27);
13555 __ Fcvtau(x28, d28);
13556 __ Fcvtau(x29, d29);
13557 __ Fcvtau(w30, s30);
13558 END();
13559
13560 RUN();
13561
13562 ASSERT_EQUAL_64(1, x0);
13563 ASSERT_EQUAL_64(1, x1);
13564 ASSERT_EQUAL_64(3, x2);
13565 ASSERT_EQUAL_64(0, x3);
13566 ASSERT_EQUAL_64(0xffffffff, x4);
13567 ASSERT_EQUAL_64(0, x5);
13568 ASSERT_EQUAL_64(0xffffff00, x6);
13569 ASSERT_EQUAL_64(1, x8);
13570 ASSERT_EQUAL_64(1, x9);
13571 ASSERT_EQUAL_64(3, x10);
13572 ASSERT_EQUAL_64(0, x11);
13573 ASSERT_EQUAL_64(0xffffffff, x12);
13574 ASSERT_EQUAL_64(0, x13);
13575 ASSERT_EQUAL_64(0xfffffffe, x14);
13576 ASSERT_EQUAL_64(1, x16);
13577 ASSERT_EQUAL_64(1, x17);
13578 ASSERT_EQUAL_64(3, x18);
13579 ASSERT_EQUAL_64(0, x19);
13580 ASSERT_EQUAL_64(0xffffffffffffffff, x20);
13581 ASSERT_EQUAL_64(0, x21);
13582 ASSERT_EQUAL_64(0xffffff0000000000, x22);
13583 ASSERT_EQUAL_64(1, x24);
13584 ASSERT_EQUAL_64(3, x25);
13585 ASSERT_EQUAL_64(0, x26);
13586 ASSERT_EQUAL_64(0xffffffffffffffff, x27);
13587 ASSERT_EQUAL_64(0, x28);
13588 ASSERT_EQUAL_64(0xfffffffffffff800, x29);
13589 ASSERT_EQUAL_64(0xffffffff, x30);
13590
13591 TEARDOWN();
13592 }
13593
13594
TEST(fcvtms)13595 TEST(fcvtms) {
13596 SETUP_WITH_FEATURES(CPUFeatures::kFP);
13597
13598 START();
13599 __ Fmov(s0, 1.0);
13600 __ Fmov(s1, 1.1);
13601 __ Fmov(s2, 1.5);
13602 __ Fmov(s3, -1.5);
13603 __ Fmov(s4, kFP32PositiveInfinity);
13604 __ Fmov(s5, kFP32NegativeInfinity);
13605 __ Fmov(s6, 0x7fffff80); // Largest float < INT32_MAX.
13606 __ Fneg(s7, s6); // Smallest float > INT32_MIN.
13607 __ Fmov(d8, 1.0);
13608 __ Fmov(d9, 1.1);
13609 __ Fmov(d10, 1.5);
13610 __ Fmov(d11, -1.5);
13611 __ Fmov(d12, kFP64PositiveInfinity);
13612 __ Fmov(d13, kFP64NegativeInfinity);
13613 __ Fmov(d14, kWMaxInt - 1);
13614 __ Fmov(d15, kWMinInt + 1);
13615 __ Fmov(s17, 1.1);
13616 __ Fmov(s18, 1.5);
13617 __ Fmov(s19, -1.5);
13618 __ Fmov(s20, kFP32PositiveInfinity);
13619 __ Fmov(s21, kFP32NegativeInfinity);
13620 __ Fmov(s22, 0x7fffff8000000000); // Largest float < INT64_MAX.
13621 __ Fneg(s23, s22); // Smallest float > INT64_MIN.
13622 __ Fmov(d24, 1.1);
13623 __ Fmov(d25, 1.5);
13624 __ Fmov(d26, -1.5);
13625 __ Fmov(d27, kFP64PositiveInfinity);
13626 __ Fmov(d28, kFP64NegativeInfinity);
13627 __ Fmov(d29, 0x7ffffffffffffc00); // Largest double < INT64_MAX.
13628 __ Fneg(d30, d29); // Smallest double > INT64_MIN.
13629
13630 __ Fcvtms(w0, s0);
13631 __ Fcvtms(w1, s1);
13632 __ Fcvtms(w2, s2);
13633 __ Fcvtms(w3, s3);
13634 __ Fcvtms(w4, s4);
13635 __ Fcvtms(w5, s5);
13636 __ Fcvtms(w6, s6);
13637 __ Fcvtms(w7, s7);
13638 __ Fcvtms(w8, d8);
13639 __ Fcvtms(w9, d9);
13640 __ Fcvtms(w10, d10);
13641 __ Fcvtms(w11, d11);
13642 __ Fcvtms(w12, d12);
13643 __ Fcvtms(w13, d13);
13644 __ Fcvtms(w14, d14);
13645 __ Fcvtms(w15, d15);
13646 __ Fcvtms(x17, s17);
13647 __ Fcvtms(x18, s18);
13648 __ Fcvtms(x19, s19);
13649 __ Fcvtms(x20, s20);
13650 __ Fcvtms(x21, s21);
13651 __ Fcvtms(x22, s22);
13652 __ Fcvtms(x23, s23);
13653 __ Fcvtms(x24, d24);
13654 __ Fcvtms(x25, d25);
13655 __ Fcvtms(x26, d26);
13656 __ Fcvtms(x27, d27);
13657 __ Fcvtms(x28, d28);
13658 __ Fcvtms(x29, d29);
13659 __ Fcvtms(x30, d30);
13660 END();
13661
13662 RUN();
13663
13664 ASSERT_EQUAL_64(1, x0);
13665 ASSERT_EQUAL_64(1, x1);
13666 ASSERT_EQUAL_64(1, x2);
13667 ASSERT_EQUAL_64(0xfffffffe, x3);
13668 ASSERT_EQUAL_64(0x7fffffff, x4);
13669 ASSERT_EQUAL_64(0x80000000, x5);
13670 ASSERT_EQUAL_64(0x7fffff80, x6);
13671 ASSERT_EQUAL_64(0x80000080, x7);
13672 ASSERT_EQUAL_64(1, x8);
13673 ASSERT_EQUAL_64(1, x9);
13674 ASSERT_EQUAL_64(1, x10);
13675 ASSERT_EQUAL_64(0xfffffffe, x11);
13676 ASSERT_EQUAL_64(0x7fffffff, x12);
13677 ASSERT_EQUAL_64(0x80000000, x13);
13678 ASSERT_EQUAL_64(0x7ffffffe, x14);
13679 ASSERT_EQUAL_64(0x80000001, x15);
13680 ASSERT_EQUAL_64(1, x17);
13681 ASSERT_EQUAL_64(1, x18);
13682 ASSERT_EQUAL_64(0xfffffffffffffffe, x19);
13683 ASSERT_EQUAL_64(0x7fffffffffffffff, x20);
13684 ASSERT_EQUAL_64(0x8000000000000000, x21);
13685 ASSERT_EQUAL_64(0x7fffff8000000000, x22);
13686 ASSERT_EQUAL_64(0x8000008000000000, x23);
13687 ASSERT_EQUAL_64(1, x24);
13688 ASSERT_EQUAL_64(1, x25);
13689 ASSERT_EQUAL_64(0xfffffffffffffffe, x26);
13690 ASSERT_EQUAL_64(0x7fffffffffffffff, x27);
13691 ASSERT_EQUAL_64(0x8000000000000000, x28);
13692 ASSERT_EQUAL_64(0x7ffffffffffffc00, x29);
13693 ASSERT_EQUAL_64(0x8000000000000400, x30);
13694
13695 TEARDOWN();
13696 }
13697
13698
TEST(fcvtmu)13699 TEST(fcvtmu) {
13700 SETUP_WITH_FEATURES(CPUFeatures::kFP);
13701
13702 START();
13703 __ Fmov(s0, 1.0);
13704 __ Fmov(s1, 1.1);
13705 __ Fmov(s2, 1.5);
13706 __ Fmov(s3, -1.5);
13707 __ Fmov(s4, kFP32PositiveInfinity);
13708 __ Fmov(s5, kFP32NegativeInfinity);
13709 __ Fmov(s6, 0x7fffff80); // Largest float < INT32_MAX.
13710 __ Fneg(s7, s6); // Smallest float > INT32_MIN.
13711 __ Fmov(d8, 1.0);
13712 __ Fmov(d9, 1.1);
13713 __ Fmov(d10, 1.5);
13714 __ Fmov(d11, -1.5);
13715 __ Fmov(d12, kFP64PositiveInfinity);
13716 __ Fmov(d13, kFP64NegativeInfinity);
13717 __ Fmov(d14, kWMaxInt - 1);
13718 __ Fmov(d15, kWMinInt + 1);
13719 __ Fmov(s17, 1.1);
13720 __ Fmov(s18, 1.5);
13721 __ Fmov(s19, -1.5);
13722 __ Fmov(s20, kFP32PositiveInfinity);
13723 __ Fmov(s21, kFP32NegativeInfinity);
13724 __ Fmov(s22, 0x7fffff8000000000); // Largest float < INT64_MAX.
13725 __ Fneg(s23, s22); // Smallest float > INT64_MIN.
13726 __ Fmov(d24, 1.1);
13727 __ Fmov(d25, 1.5);
13728 __ Fmov(d26, -1.5);
13729 __ Fmov(d27, kFP64PositiveInfinity);
13730 __ Fmov(d28, kFP64NegativeInfinity);
13731 __ Fmov(d29, 0x7ffffffffffffc00); // Largest double < INT64_MAX.
13732 __ Fneg(d30, d29); // Smallest double > INT64_MIN.
13733
13734 __ Fcvtmu(w0, s0);
13735 __ Fcvtmu(w1, s1);
13736 __ Fcvtmu(w2, s2);
13737 __ Fcvtmu(w3, s3);
13738 __ Fcvtmu(w4, s4);
13739 __ Fcvtmu(w5, s5);
13740 __ Fcvtmu(w6, s6);
13741 __ Fcvtmu(w7, s7);
13742 __ Fcvtmu(w8, d8);
13743 __ Fcvtmu(w9, d9);
13744 __ Fcvtmu(w10, d10);
13745 __ Fcvtmu(w11, d11);
13746 __ Fcvtmu(w12, d12);
13747 __ Fcvtmu(w13, d13);
13748 __ Fcvtmu(w14, d14);
13749 __ Fcvtmu(x17, s17);
13750 __ Fcvtmu(x18, s18);
13751 __ Fcvtmu(x19, s19);
13752 __ Fcvtmu(x20, s20);
13753 __ Fcvtmu(x21, s21);
13754 __ Fcvtmu(x22, s22);
13755 __ Fcvtmu(x23, s23);
13756 __ Fcvtmu(x24, d24);
13757 __ Fcvtmu(x25, d25);
13758 __ Fcvtmu(x26, d26);
13759 __ Fcvtmu(x27, d27);
13760 __ Fcvtmu(x28, d28);
13761 __ Fcvtmu(x29, d29);
13762 __ Fcvtmu(x30, d30);
13763 END();
13764
13765 RUN();
13766
13767 ASSERT_EQUAL_64(1, x0);
13768 ASSERT_EQUAL_64(1, x1);
13769 ASSERT_EQUAL_64(1, x2);
13770 ASSERT_EQUAL_64(0, x3);
13771 ASSERT_EQUAL_64(0xffffffff, x4);
13772 ASSERT_EQUAL_64(0, x5);
13773 ASSERT_EQUAL_64(0x7fffff80, x6);
13774 ASSERT_EQUAL_64(0, x7);
13775 ASSERT_EQUAL_64(1, x8);
13776 ASSERT_EQUAL_64(1, x9);
13777 ASSERT_EQUAL_64(1, x10);
13778 ASSERT_EQUAL_64(0, x11);
13779 ASSERT_EQUAL_64(0xffffffff, x12);
13780 ASSERT_EQUAL_64(0, x13);
13781 ASSERT_EQUAL_64(0x7ffffffe, x14);
13782 ASSERT_EQUAL_64(1, x17);
13783 ASSERT_EQUAL_64(1, x18);
13784 ASSERT_EQUAL_64(0, x19);
13785 ASSERT_EQUAL_64(0xffffffffffffffff, x20);
13786 ASSERT_EQUAL_64(0, x21);
13787 ASSERT_EQUAL_64(0x7fffff8000000000, x22);
13788 ASSERT_EQUAL_64(0, x23);
13789 ASSERT_EQUAL_64(1, x24);
13790 ASSERT_EQUAL_64(1, x25);
13791 ASSERT_EQUAL_64(0, x26);
13792 ASSERT_EQUAL_64(0xffffffffffffffff, x27);
13793 ASSERT_EQUAL_64(0, x28);
13794 ASSERT_EQUAL_64(0x7ffffffffffffc00, x29);
13795 ASSERT_EQUAL_64(0, x30);
13796
13797 TEARDOWN();
13798 }
13799
13800
TEST(fcvtns)13801 TEST(fcvtns) {
13802 SETUP_WITH_FEATURES(CPUFeatures::kFP);
13803
13804 START();
13805 __ Fmov(s0, 1.0);
13806 __ Fmov(s1, 1.1);
13807 __ Fmov(s2, 1.5);
13808 __ Fmov(s3, -1.5);
13809 __ Fmov(s4, kFP32PositiveInfinity);
13810 __ Fmov(s5, kFP32NegativeInfinity);
13811 __ Fmov(s6, 0x7fffff80); // Largest float < INT32_MAX.
13812 __ Fneg(s7, s6); // Smallest float > INT32_MIN.
13813 __ Fmov(d8, 1.0);
13814 __ Fmov(d9, 1.1);
13815 __ Fmov(d10, 1.5);
13816 __ Fmov(d11, -1.5);
13817 __ Fmov(d12, kFP64PositiveInfinity);
13818 __ Fmov(d13, kFP64NegativeInfinity);
13819 __ Fmov(d14, kWMaxInt - 1);
13820 __ Fmov(d15, kWMinInt + 1);
13821 __ Fmov(s17, 1.1);
13822 __ Fmov(s18, 1.5);
13823 __ Fmov(s19, -1.5);
13824 __ Fmov(s20, kFP32PositiveInfinity);
13825 __ Fmov(s21, kFP32NegativeInfinity);
13826 __ Fmov(s22, 0x7fffff8000000000); // Largest float < INT64_MAX.
13827 __ Fneg(s23, s22); // Smallest float > INT64_MIN.
13828 __ Fmov(d24, 1.1);
13829 __ Fmov(d25, 1.5);
13830 __ Fmov(d26, -1.5);
13831 __ Fmov(d27, kFP64PositiveInfinity);
13832 __ Fmov(d28, kFP64NegativeInfinity);
13833 __ Fmov(d29, 0x7ffffffffffffc00); // Largest double < INT64_MAX.
13834 __ Fneg(d30, d29); // Smallest double > INT64_MIN.
13835
13836 __ Fcvtns(w0, s0);
13837 __ Fcvtns(w1, s1);
13838 __ Fcvtns(w2, s2);
13839 __ Fcvtns(w3, s3);
13840 __ Fcvtns(w4, s4);
13841 __ Fcvtns(w5, s5);
13842 __ Fcvtns(w6, s6);
13843 __ Fcvtns(w7, s7);
13844 __ Fcvtns(w8, d8);
13845 __ Fcvtns(w9, d9);
13846 __ Fcvtns(w10, d10);
13847 __ Fcvtns(w11, d11);
13848 __ Fcvtns(w12, d12);
13849 __ Fcvtns(w13, d13);
13850 __ Fcvtns(w14, d14);
13851 __ Fcvtns(w15, d15);
13852 __ Fcvtns(x17, s17);
13853 __ Fcvtns(x18, s18);
13854 __ Fcvtns(x19, s19);
13855 __ Fcvtns(x20, s20);
13856 __ Fcvtns(x21, s21);
13857 __ Fcvtns(x22, s22);
13858 __ Fcvtns(x23, s23);
13859 __ Fcvtns(x24, d24);
13860 __ Fcvtns(x25, d25);
13861 __ Fcvtns(x26, d26);
13862 __ Fcvtns(x27, d27);
13863 __ Fcvtns(x28, d28);
13864 __ Fcvtns(x29, d29);
13865 __ Fcvtns(x30, d30);
13866 END();
13867
13868 RUN();
13869
13870 ASSERT_EQUAL_64(1, x0);
13871 ASSERT_EQUAL_64(1, x1);
13872 ASSERT_EQUAL_64(2, x2);
13873 ASSERT_EQUAL_64(0xfffffffe, x3);
13874 ASSERT_EQUAL_64(0x7fffffff, x4);
13875 ASSERT_EQUAL_64(0x80000000, x5);
13876 ASSERT_EQUAL_64(0x7fffff80, x6);
13877 ASSERT_EQUAL_64(0x80000080, x7);
13878 ASSERT_EQUAL_64(1, x8);
13879 ASSERT_EQUAL_64(1, x9);
13880 ASSERT_EQUAL_64(2, x10);
13881 ASSERT_EQUAL_64(0xfffffffe, x11);
13882 ASSERT_EQUAL_64(0x7fffffff, x12);
13883 ASSERT_EQUAL_64(0x80000000, x13);
13884 ASSERT_EQUAL_64(0x7ffffffe, x14);
13885 ASSERT_EQUAL_64(0x80000001, x15);
13886 ASSERT_EQUAL_64(1, x17);
13887 ASSERT_EQUAL_64(2, x18);
13888 ASSERT_EQUAL_64(0xfffffffffffffffe, x19);
13889 ASSERT_EQUAL_64(0x7fffffffffffffff, x20);
13890 ASSERT_EQUAL_64(0x8000000000000000, x21);
13891 ASSERT_EQUAL_64(0x7fffff8000000000, x22);
13892 ASSERT_EQUAL_64(0x8000008000000000, x23);
13893 ASSERT_EQUAL_64(1, x24);
13894 ASSERT_EQUAL_64(2, x25);
13895 ASSERT_EQUAL_64(0xfffffffffffffffe, x26);
13896 ASSERT_EQUAL_64(0x7fffffffffffffff, x27);
13897 ASSERT_EQUAL_64(0x8000000000000000, x28);
13898 ASSERT_EQUAL_64(0x7ffffffffffffc00, x29);
13899 ASSERT_EQUAL_64(0x8000000000000400, x30);
13900
13901 TEARDOWN();
13902 }
13903
13904
TEST(fcvtnu)13905 TEST(fcvtnu) {
13906 SETUP_WITH_FEATURES(CPUFeatures::kFP);
13907
13908 START();
13909 __ Fmov(s0, 1.0);
13910 __ Fmov(s1, 1.1);
13911 __ Fmov(s2, 1.5);
13912 __ Fmov(s3, -1.5);
13913 __ Fmov(s4, kFP32PositiveInfinity);
13914 __ Fmov(s5, kFP32NegativeInfinity);
13915 __ Fmov(s6, 0xffffff00); // Largest float < UINT32_MAX.
13916 __ Fmov(d8, 1.0);
13917 __ Fmov(d9, 1.1);
13918 __ Fmov(d10, 1.5);
13919 __ Fmov(d11, -1.5);
13920 __ Fmov(d12, kFP64PositiveInfinity);
13921 __ Fmov(d13, kFP64NegativeInfinity);
13922 __ Fmov(d14, 0xfffffffe);
13923 __ Fmov(s16, 1.0);
13924 __ Fmov(s17, 1.1);
13925 __ Fmov(s18, 1.5);
13926 __ Fmov(s19, -1.5);
13927 __ Fmov(s20, kFP32PositiveInfinity);
13928 __ Fmov(s21, kFP32NegativeInfinity);
13929 __ Fmov(s22, 0xffffff0000000000); // Largest float < UINT64_MAX.
13930 __ Fmov(d24, 1.1);
13931 __ Fmov(d25, 1.5);
13932 __ Fmov(d26, -1.5);
13933 __ Fmov(d27, kFP64PositiveInfinity);
13934 __ Fmov(d28, kFP64NegativeInfinity);
13935 __ Fmov(d29, 0xfffffffffffff800); // Largest double < UINT64_MAX.
13936 __ Fmov(s30, 0x100000000);
13937
13938 __ Fcvtnu(w0, s0);
13939 __ Fcvtnu(w1, s1);
13940 __ Fcvtnu(w2, s2);
13941 __ Fcvtnu(w3, s3);
13942 __ Fcvtnu(w4, s4);
13943 __ Fcvtnu(w5, s5);
13944 __ Fcvtnu(w6, s6);
13945 __ Fcvtnu(w8, d8);
13946 __ Fcvtnu(w9, d9);
13947 __ Fcvtnu(w10, d10);
13948 __ Fcvtnu(w11, d11);
13949 __ Fcvtnu(w12, d12);
13950 __ Fcvtnu(w13, d13);
13951 __ Fcvtnu(w14, d14);
13952 __ Fcvtnu(w15, d15);
13953 __ Fcvtnu(x16, s16);
13954 __ Fcvtnu(x17, s17);
13955 __ Fcvtnu(x18, s18);
13956 __ Fcvtnu(x19, s19);
13957 __ Fcvtnu(x20, s20);
13958 __ Fcvtnu(x21, s21);
13959 __ Fcvtnu(x22, s22);
13960 __ Fcvtnu(x24, d24);
13961 __ Fcvtnu(x25, d25);
13962 __ Fcvtnu(x26, d26);
13963 __ Fcvtnu(x27, d27);
13964 __ Fcvtnu(x28, d28);
13965 __ Fcvtnu(x29, d29);
13966 __ Fcvtnu(w30, s30);
13967 END();
13968
13969 RUN();
13970
13971 ASSERT_EQUAL_64(1, x0);
13972 ASSERT_EQUAL_64(1, x1);
13973 ASSERT_EQUAL_64(2, x2);
13974 ASSERT_EQUAL_64(0, x3);
13975 ASSERT_EQUAL_64(0xffffffff, x4);
13976 ASSERT_EQUAL_64(0, x5);
13977 ASSERT_EQUAL_64(0xffffff00, x6);
13978 ASSERT_EQUAL_64(1, x8);
13979 ASSERT_EQUAL_64(1, x9);
13980 ASSERT_EQUAL_64(2, x10);
13981 ASSERT_EQUAL_64(0, x11);
13982 ASSERT_EQUAL_64(0xffffffff, x12);
13983 ASSERT_EQUAL_64(0, x13);
13984 ASSERT_EQUAL_64(0xfffffffe, x14);
13985 ASSERT_EQUAL_64(1, x16);
13986 ASSERT_EQUAL_64(1, x17);
13987 ASSERT_EQUAL_64(2, x18);
13988 ASSERT_EQUAL_64(0, x19);
13989 ASSERT_EQUAL_64(0xffffffffffffffff, x20);
13990 ASSERT_EQUAL_64(0, x21);
13991 ASSERT_EQUAL_64(0xffffff0000000000, x22);
13992 ASSERT_EQUAL_64(1, x24);
13993 ASSERT_EQUAL_64(2, x25);
13994 ASSERT_EQUAL_64(0, x26);
13995 ASSERT_EQUAL_64(0xffffffffffffffff, x27);
13996 ASSERT_EQUAL_64(0, x28);
13997 ASSERT_EQUAL_64(0xfffffffffffff800, x29);
13998 ASSERT_EQUAL_64(0xffffffff, x30);
13999
14000 TEARDOWN();
14001 }
14002
14003
TEST(fcvtzs)14004 TEST(fcvtzs) {
14005 SETUP_WITH_FEATURES(CPUFeatures::kFP);
14006
14007 START();
14008 __ Fmov(s0, 1.0);
14009 __ Fmov(s1, 1.1);
14010 __ Fmov(s2, 1.5);
14011 __ Fmov(s3, -1.5);
14012 __ Fmov(s4, kFP32PositiveInfinity);
14013 __ Fmov(s5, kFP32NegativeInfinity);
14014 __ Fmov(s6, 0x7fffff80); // Largest float < INT32_MAX.
14015 __ Fneg(s7, s6); // Smallest float > INT32_MIN.
14016 __ Fmov(d8, 1.0);
14017 __ Fmov(d9, 1.1);
14018 __ Fmov(d10, 1.5);
14019 __ Fmov(d11, -1.5);
14020 __ Fmov(d12, kFP64PositiveInfinity);
14021 __ Fmov(d13, kFP64NegativeInfinity);
14022 __ Fmov(d14, kWMaxInt - 1);
14023 __ Fmov(d15, kWMinInt + 1);
14024 __ Fmov(s17, 1.1);
14025 __ Fmov(s18, 1.5);
14026 __ Fmov(s19, -1.5);
14027 __ Fmov(s20, kFP32PositiveInfinity);
14028 __ Fmov(s21, kFP32NegativeInfinity);
14029 __ Fmov(s22, 0x7fffff8000000000); // Largest float < INT64_MAX.
14030 __ Fneg(s23, s22); // Smallest float > INT64_MIN.
14031 __ Fmov(d24, 1.1);
14032 __ Fmov(d25, 1.5);
14033 __ Fmov(d26, -1.5);
14034 __ Fmov(d27, kFP64PositiveInfinity);
14035 __ Fmov(d28, kFP64NegativeInfinity);
14036 __ Fmov(d29, 0x7ffffffffffffc00); // Largest double < INT64_MAX.
14037 __ Fneg(d30, d29); // Smallest double > INT64_MIN.
14038
14039 __ Fcvtzs(w0, s0);
14040 __ Fcvtzs(w1, s1);
14041 __ Fcvtzs(w2, s2);
14042 __ Fcvtzs(w3, s3);
14043 __ Fcvtzs(w4, s4);
14044 __ Fcvtzs(w5, s5);
14045 __ Fcvtzs(w6, s6);
14046 __ Fcvtzs(w7, s7);
14047 __ Fcvtzs(w8, d8);
14048 __ Fcvtzs(w9, d9);
14049 __ Fcvtzs(w10, d10);
14050 __ Fcvtzs(w11, d11);
14051 __ Fcvtzs(w12, d12);
14052 __ Fcvtzs(w13, d13);
14053 __ Fcvtzs(w14, d14);
14054 __ Fcvtzs(w15, d15);
14055 __ Fcvtzs(x17, s17);
14056 __ Fcvtzs(x18, s18);
14057 __ Fcvtzs(x19, s19);
14058 __ Fcvtzs(x20, s20);
14059 __ Fcvtzs(x21, s21);
14060 __ Fcvtzs(x22, s22);
14061 __ Fcvtzs(x23, s23);
14062 __ Fcvtzs(x24, d24);
14063 __ Fcvtzs(x25, d25);
14064 __ Fcvtzs(x26, d26);
14065 __ Fcvtzs(x27, d27);
14066 __ Fcvtzs(x28, d28);
14067 __ Fcvtzs(x29, d29);
14068 __ Fcvtzs(x30, d30);
14069 END();
14070
14071 RUN();
14072
14073 ASSERT_EQUAL_64(1, x0);
14074 ASSERT_EQUAL_64(1, x1);
14075 ASSERT_EQUAL_64(1, x2);
14076 ASSERT_EQUAL_64(0xffffffff, x3);
14077 ASSERT_EQUAL_64(0x7fffffff, x4);
14078 ASSERT_EQUAL_64(0x80000000, x5);
14079 ASSERT_EQUAL_64(0x7fffff80, x6);
14080 ASSERT_EQUAL_64(0x80000080, x7);
14081 ASSERT_EQUAL_64(1, x8);
14082 ASSERT_EQUAL_64(1, x9);
14083 ASSERT_EQUAL_64(1, x10);
14084 ASSERT_EQUAL_64(0xffffffff, x11);
14085 ASSERT_EQUAL_64(0x7fffffff, x12);
14086 ASSERT_EQUAL_64(0x80000000, x13);
14087 ASSERT_EQUAL_64(0x7ffffffe, x14);
14088 ASSERT_EQUAL_64(0x80000001, x15);
14089 ASSERT_EQUAL_64(1, x17);
14090 ASSERT_EQUAL_64(1, x18);
14091 ASSERT_EQUAL_64(0xffffffffffffffff, x19);
14092 ASSERT_EQUAL_64(0x7fffffffffffffff, x20);
14093 ASSERT_EQUAL_64(0x8000000000000000, x21);
14094 ASSERT_EQUAL_64(0x7fffff8000000000, x22);
14095 ASSERT_EQUAL_64(0x8000008000000000, x23);
14096 ASSERT_EQUAL_64(1, x24);
14097 ASSERT_EQUAL_64(1, x25);
14098 ASSERT_EQUAL_64(0xffffffffffffffff, x26);
14099 ASSERT_EQUAL_64(0x7fffffffffffffff, x27);
14100 ASSERT_EQUAL_64(0x8000000000000000, x28);
14101 ASSERT_EQUAL_64(0x7ffffffffffffc00, x29);
14102 ASSERT_EQUAL_64(0x8000000000000400, x30);
14103
14104 TEARDOWN();
14105 }
14106
FjcvtzsHelper(uint64_t value,uint64_t expected,uint32_t expected_z)14107 void FjcvtzsHelper(uint64_t value, uint64_t expected, uint32_t expected_z) {
14108 SETUP_WITH_FEATURES(CPUFeatures::kFP, CPUFeatures::kJSCVT);
14109 START();
14110 __ Fmov(d0, RawbitsToDouble(value));
14111 __ Fjcvtzs(w0, d0);
14112 __ Mrs(x1, NZCV);
14113 END();
14114
14115 #ifdef VIXL_INCLUDE_SIMULATOR_AARCH64
14116 RUN();
14117 ASSERT_EQUAL_64(expected, x0);
14118 ASSERT_EQUAL_32(expected_z, w1);
14119 #else
14120 USE(expected);
14121 USE(expected_z);
14122 #endif
14123
14124 TEARDOWN();
14125 }
14126
TEST(fjcvtzs)14127 TEST(fjcvtzs) {
14128 /* Simple values. */
14129 FjcvtzsHelper(0x0000000000000000, 0, ZFlag); // 0.0
14130 FjcvtzsHelper(0x0010000000000000, 0, NoFlag); // The smallest normal value.
14131 FjcvtzsHelper(0x3fdfffffffffffff, 0, NoFlag); // The value just below 0.5.
14132 FjcvtzsHelper(0x3fe0000000000000, 0, NoFlag); // 0.5
14133 FjcvtzsHelper(0x3fe0000000000001, 0, NoFlag); // The value just above 0.5.
14134 FjcvtzsHelper(0x3fefffffffffffff, 0, NoFlag); // The value just below 1.0.
14135 FjcvtzsHelper(0x3ff0000000000000, 1, ZFlag); // 1.0
14136 FjcvtzsHelper(0x3ff0000000000001, 1, NoFlag); // The value just above 1.0.
14137 FjcvtzsHelper(0x3ff8000000000000, 1, NoFlag); // 1.5
14138 FjcvtzsHelper(0x4024000000000000, 10, ZFlag); // 10
14139 FjcvtzsHelper(0x7fefffffffffffff, 0, NoFlag); // The largest finite value.
14140
14141 /* Infinity. */
14142 FjcvtzsHelper(0x7ff0000000000000, 0, NoFlag);
14143
14144 /* NaNs. */
14145 /* - Quiet NaNs */
14146 FjcvtzsHelper(0x7ff923456789abcd, 0, NoFlag);
14147 FjcvtzsHelper(0x7ff8000000000000, 0, NoFlag);
14148 /* - Signalling NaNs */
14149 FjcvtzsHelper(0x7ff123456789abcd, 0, NoFlag);
14150 FjcvtzsHelper(0x7ff0000000000001, 0, NoFlag);
14151
14152 /* Subnormals. */
14153 /* - A recognisable bit pattern. */
14154 FjcvtzsHelper(0x000123456789abcd, 0, NoFlag);
14155 /* - The largest subnormal value. */
14156 FjcvtzsHelper(0x000fffffffffffff, 0, NoFlag);
14157 /* - The smallest subnormal value. */
14158 FjcvtzsHelper(0x0000000000000001, 0, NoFlag);
14159
14160 /* The same values again, but negated. */
14161 FjcvtzsHelper(0x8000000000000000, 0, NoFlag);
14162 FjcvtzsHelper(0x8010000000000000, 0, NoFlag);
14163 FjcvtzsHelper(0xbfdfffffffffffff, 0, NoFlag);
14164 FjcvtzsHelper(0xbfe0000000000000, 0, NoFlag);
14165 FjcvtzsHelper(0xbfe0000000000001, 0, NoFlag);
14166 FjcvtzsHelper(0xbfefffffffffffff, 0, NoFlag);
14167 FjcvtzsHelper(0xbff0000000000000, 0xffffffff, ZFlag);
14168 FjcvtzsHelper(0xbff0000000000001, 0xffffffff, NoFlag);
14169 FjcvtzsHelper(0xbff8000000000000, 0xffffffff, NoFlag);
14170 FjcvtzsHelper(0xc024000000000000, 0xfffffff6, ZFlag);
14171 FjcvtzsHelper(0xffefffffffffffff, 0, NoFlag);
14172 FjcvtzsHelper(0xfff0000000000000, 0, NoFlag);
14173 FjcvtzsHelper(0xfff923456789abcd, 0, NoFlag);
14174 FjcvtzsHelper(0xfff8000000000000, 0, NoFlag);
14175 FjcvtzsHelper(0xfff123456789abcd, 0, NoFlag);
14176 FjcvtzsHelper(0xfff0000000000001, 0, NoFlag);
14177 FjcvtzsHelper(0x800123456789abcd, 0, NoFlag);
14178 FjcvtzsHelper(0x800fffffffffffff, 0, NoFlag);
14179 FjcvtzsHelper(0x8000000000000001, 0, NoFlag);
14180
14181 // Test floating-point numbers of every possible exponent, most of the
14182 // expected values are zero but there is a range of exponents where the
14183 // results are shifted parts of this mantissa.
14184 uint64_t mantissa = 0x0001234567890abc;
14185
14186 // Between an exponent of 0 and 52, only some of the top bits of the
14187 // mantissa are above the decimal position of doubles so the mantissa is
14188 // shifted to the right down to just those top bits. Above 52, all bits
14189 // of the mantissa are shifted left above the decimal position until it
14190 // reaches 52 + 64 where all the bits are shifted out of the range of 64-bit
14191 // integers.
14192 int first_exp_boundary = 52;
14193 int second_exp_boundary = first_exp_boundary + 64;
14194 for (int exponent = 0; exponent < 2048; exponent++) {
14195 int e = exponent - 1023;
14196
14197 uint64_t expected = 0;
14198 if (e < 0) {
14199 expected = 0;
14200 } else if (e <= first_exp_boundary) {
14201 expected = (UINT64_C(1) << e) | (mantissa >> (52 - e));
14202 expected &= 0xffffffff;
14203 } else if (e < second_exp_boundary) {
14204 expected = (mantissa << (e - 52)) & 0xffffffff;
14205 } else {
14206 expected = 0;
14207 }
14208
14209 uint64_t value = (static_cast<uint64_t>(exponent) << 52) | mantissa;
14210 FjcvtzsHelper(value, expected, NoFlag);
14211 FjcvtzsHelper(value | kDSignMask, (-expected) & 0xffffffff, NoFlag);
14212 }
14213 }
14214
TEST(fcvtzu)14215 TEST(fcvtzu) {
14216 SETUP_WITH_FEATURES(CPUFeatures::kFP);
14217
14218 START();
14219 __ Fmov(s0, 1.0);
14220 __ Fmov(s1, 1.1);
14221 __ Fmov(s2, 1.5);
14222 __ Fmov(s3, -1.5);
14223 __ Fmov(s4, kFP32PositiveInfinity);
14224 __ Fmov(s5, kFP32NegativeInfinity);
14225 __ Fmov(s6, 0x7fffff80); // Largest float < INT32_MAX.
14226 __ Fneg(s7, s6); // Smallest float > INT32_MIN.
14227 __ Fmov(d8, 1.0);
14228 __ Fmov(d9, 1.1);
14229 __ Fmov(d10, 1.5);
14230 __ Fmov(d11, -1.5);
14231 __ Fmov(d12, kFP64PositiveInfinity);
14232 __ Fmov(d13, kFP64NegativeInfinity);
14233 __ Fmov(d14, kWMaxInt - 1);
14234 __ Fmov(d15, kWMinInt + 1);
14235 __ Fmov(s17, 1.1);
14236 __ Fmov(s18, 1.5);
14237 __ Fmov(s19, -1.5);
14238 __ Fmov(s20, kFP32PositiveInfinity);
14239 __ Fmov(s21, kFP32NegativeInfinity);
14240 __ Fmov(s22, 0x7fffff8000000000); // Largest float < INT64_MAX.
14241 __ Fneg(s23, s22); // Smallest float > INT64_MIN.
14242 __ Fmov(d24, 1.1);
14243 __ Fmov(d25, 1.5);
14244 __ Fmov(d26, -1.5);
14245 __ Fmov(d27, kFP64PositiveInfinity);
14246 __ Fmov(d28, kFP64NegativeInfinity);
14247 __ Fmov(d29, 0x7ffffffffffffc00); // Largest double < INT64_MAX.
14248 __ Fneg(d30, d29); // Smallest double > INT64_MIN.
14249
14250 __ Fcvtzu(w0, s0);
14251 __ Fcvtzu(w1, s1);
14252 __ Fcvtzu(w2, s2);
14253 __ Fcvtzu(w3, s3);
14254 __ Fcvtzu(w4, s4);
14255 __ Fcvtzu(w5, s5);
14256 __ Fcvtzu(w6, s6);
14257 __ Fcvtzu(w7, s7);
14258 __ Fcvtzu(w8, d8);
14259 __ Fcvtzu(w9, d9);
14260 __ Fcvtzu(w10, d10);
14261 __ Fcvtzu(w11, d11);
14262 __ Fcvtzu(w12, d12);
14263 __ Fcvtzu(w13, d13);
14264 __ Fcvtzu(w14, d14);
14265 __ Fcvtzu(x17, s17);
14266 __ Fcvtzu(x18, s18);
14267 __ Fcvtzu(x19, s19);
14268 __ Fcvtzu(x20, s20);
14269 __ Fcvtzu(x21, s21);
14270 __ Fcvtzu(x22, s22);
14271 __ Fcvtzu(x23, s23);
14272 __ Fcvtzu(x24, d24);
14273 __ Fcvtzu(x25, d25);
14274 __ Fcvtzu(x26, d26);
14275 __ Fcvtzu(x27, d27);
14276 __ Fcvtzu(x28, d28);
14277 __ Fcvtzu(x29, d29);
14278 __ Fcvtzu(x30, d30);
14279 END();
14280
14281 RUN();
14282
14283 ASSERT_EQUAL_64(1, x0);
14284 ASSERT_EQUAL_64(1, x1);
14285 ASSERT_EQUAL_64(1, x2);
14286 ASSERT_EQUAL_64(0, x3);
14287 ASSERT_EQUAL_64(0xffffffff, x4);
14288 ASSERT_EQUAL_64(0, x5);
14289 ASSERT_EQUAL_64(0x7fffff80, x6);
14290 ASSERT_EQUAL_64(0, x7);
14291 ASSERT_EQUAL_64(1, x8);
14292 ASSERT_EQUAL_64(1, x9);
14293 ASSERT_EQUAL_64(1, x10);
14294 ASSERT_EQUAL_64(0, x11);
14295 ASSERT_EQUAL_64(0xffffffff, x12);
14296 ASSERT_EQUAL_64(0, x13);
14297 ASSERT_EQUAL_64(0x7ffffffe, x14);
14298 ASSERT_EQUAL_64(1, x17);
14299 ASSERT_EQUAL_64(1, x18);
14300 ASSERT_EQUAL_64(0, x19);
14301 ASSERT_EQUAL_64(0xffffffffffffffff, x20);
14302 ASSERT_EQUAL_64(0, x21);
14303 ASSERT_EQUAL_64(0x7fffff8000000000, x22);
14304 ASSERT_EQUAL_64(0, x23);
14305 ASSERT_EQUAL_64(1, x24);
14306 ASSERT_EQUAL_64(1, x25);
14307 ASSERT_EQUAL_64(0, x26);
14308 ASSERT_EQUAL_64(0xffffffffffffffff, x27);
14309 ASSERT_EQUAL_64(0, x28);
14310 ASSERT_EQUAL_64(0x7ffffffffffffc00, x29);
14311 ASSERT_EQUAL_64(0, x30);
14312
14313 TEARDOWN();
14314 }
14315
14316
TEST(neon_fcvtl)14317 TEST(neon_fcvtl) {
14318 SETUP_WITH_FEATURES(CPUFeatures::kNEON, CPUFeatures::kFP);
14319
14320 START();
14321
14322 __ Movi(v0.V2D(), 0x000080007efffeff, 0x3100b1007c00fc00);
14323 __ Movi(v1.V2D(), 0x03ff83ff00038003, 0x000180017c01fc01);
14324 __ Movi(v2.V2D(), 0x3e200000be200000, 0x7f800000ff800000);
14325 __ Movi(v3.V2D(), 0x0000000080000000, 0x7f8fffffff8fffff);
14326 __ Movi(v4.V2D(), 0x7fcfffffffcfffff, 0x0000000180000001);
14327 __ Fcvtl(v16.V4S(), v0.V4H());
14328 __ Fcvtl2(v17.V4S(), v0.V8H());
14329 __ Fcvtl(v18.V4S(), v1.V4H());
14330 __ Fcvtl2(v19.V4S(), v1.V8H());
14331
14332 __ Fcvtl(v20.V2D(), v2.V2S());
14333 __ Fcvtl2(v21.V2D(), v2.V4S());
14334 __ Fcvtl(v22.V2D(), v3.V2S());
14335 __ Fcvtl2(v23.V2D(), v3.V4S());
14336 __ Fcvtl(v24.V2D(), v4.V2S());
14337 __ Fcvtl2(v25.V2D(), v4.V4S());
14338
14339 END();
14340
14341 RUN();
14342 ASSERT_EQUAL_128(0x3e200000be200000, 0x7f800000ff800000, q16);
14343 ASSERT_EQUAL_128(0x0000000080000000, 0x7fdfe000ffdfe000, q17);
14344 ASSERT_EQUAL_128(0x33800000b3800000, 0x7fc02000ffc02000, q18);
14345 ASSERT_EQUAL_128(0x387fc000b87fc000, 0x34400000b4400000, q19);
14346 ASSERT_EQUAL_128(0x7ff0000000000000, 0xfff0000000000000, q20);
14347 ASSERT_EQUAL_128(0x3fc4000000000000, 0xbfc4000000000000, q21);
14348 ASSERT_EQUAL_128(0x7ff9ffffe0000000, 0xfff9ffffe0000000, q22);
14349 ASSERT_EQUAL_128(0x0000000000000000, 0x8000000000000000, q23);
14350 ASSERT_EQUAL_128(0x36a0000000000000, 0xb6a0000000000000, q24);
14351 ASSERT_EQUAL_128(0x7ff9ffffe0000000, 0xfff9ffffe0000000, q25);
14352 TEARDOWN();
14353 }
14354
14355
TEST(neon_fcvtn)14356 TEST(neon_fcvtn) {
14357 SETUP_WITH_FEATURES(CPUFeatures::kNEON, CPUFeatures::kFP);
14358
14359 START();
14360
14361 __ Movi(v0.V2D(), 0x3e200000be200000, 0x7f800000ff800000);
14362 __ Movi(v1.V2D(), 0x0000000080000000, 0x7f8fffffff8fffff);
14363 __ Movi(v2.V2D(), 0x7fcfffffffcfffff, 0x0000000180000001);
14364 __ Movi(v3.V2D(), 0x3fc4000000000000, 0xbfc4000000000000);
14365 __ Movi(v4.V2D(), 0x7ff0000000000000, 0xfff0000000000000);
14366 __ Movi(v5.V2D(), 0x0000000000000000, 0x8000000000000000);
14367 __ Movi(v6.V2D(), 0x7ff0ffffffffffff, 0xfff0ffffffffffff);
14368 __ Movi(v7.V2D(), 0x7ff8ffffffffffff, 0xfff8ffffffffffff);
14369 __ Movi(v8.V2D(), 0x0000000000000001, 0x8000000000000001);
14370
14371 __ Fcvtn(v16.V4H(), v0.V4S());
14372 __ Fcvtn2(v16.V8H(), v1.V4S());
14373 __ Fcvtn(v17.V4H(), v2.V4S());
14374 __ Fcvtn(v18.V2S(), v3.V2D());
14375 __ Fcvtn2(v18.V4S(), v4.V2D());
14376 __ Fcvtn(v19.V2S(), v5.V2D());
14377 __ Fcvtn2(v19.V4S(), v6.V2D());
14378 __ Fcvtn(v20.V2S(), v7.V2D());
14379 __ Fcvtn2(v20.V4S(), v8.V2D());
14380 END();
14381
14382 RUN();
14383 ASSERT_EQUAL_128(0x000080007e7ffe7f, 0x3100b1007c00fc00, q16);
14384 ASSERT_EQUAL_64(0x7e7ffe7f00008000, d17);
14385 ASSERT_EQUAL_128(0x7f800000ff800000, 0x3e200000be200000, q18);
14386 ASSERT_EQUAL_128(0x7fc7ffffffc7ffff, 0x0000000080000000, q19);
14387 ASSERT_EQUAL_128(0x0000000080000000, 0x7fc7ffffffc7ffff, q20);
14388 TEARDOWN();
14389 }
14390
14391
TEST(neon_fcvtxn)14392 TEST(neon_fcvtxn) {
14393 SETUP_WITH_FEATURES(CPUFeatures::kNEON, CPUFeatures::kFP);
14394
14395 START();
14396 __ Movi(v0.V2D(), 0x3e200000be200000, 0x7f800000ff800000);
14397 __ Movi(v1.V2D(), 0x0000000080000000, 0x7f8fffffff8fffff);
14398 __ Movi(v2.V2D(), 0x7fcfffffffcfffff, 0x0000000180000001);
14399 __ Movi(v3.V2D(), 0x3fc4000000000000, 0xbfc4000000000000);
14400 __ Movi(v4.V2D(), 0x7ff0000000000000, 0xfff0000000000000);
14401 __ Movi(v5.V2D(), 0x0000000000000000, 0x8000000000000000);
14402 __ Movi(v6.V2D(), 0x7ff0ffffffffffff, 0xfff0ffffffffffff);
14403 __ Movi(v7.V2D(), 0x7ff8ffffffffffff, 0xfff8ffffffffffff);
14404 __ Movi(v8.V2D(), 0x0000000000000001, 0x8000000000000001);
14405 __ Movi(v9.V2D(), 0x41ed000000000000, 0x41efffffffefffff);
14406 __ Fcvtxn(v16.V2S(), v0.V2D());
14407 __ Fcvtxn2(v16.V4S(), v1.V2D());
14408 __ Fcvtxn(v17.V2S(), v2.V2D());
14409 __ Fcvtxn2(v17.V4S(), v3.V2D());
14410 __ Fcvtxn(v18.V2S(), v4.V2D());
14411 __ Fcvtxn2(v18.V4S(), v5.V2D());
14412 __ Fcvtxn(v19.V2S(), v6.V2D());
14413 __ Fcvtxn2(v19.V4S(), v7.V2D());
14414 __ Fcvtxn(v20.V2S(), v8.V2D());
14415 __ Fcvtxn2(v20.V4S(), v9.V2D());
14416 __ Fcvtxn(s21, d0);
14417 END();
14418
14419 RUN();
14420 ASSERT_EQUAL_128(0x000000017f7fffff, 0x310000057f7fffff, q16);
14421 ASSERT_EQUAL_128(0x3e200000be200000, 0x7f7fffff00000001, q17);
14422 ASSERT_EQUAL_128(0x0000000080000000, 0x7f800000ff800000, q18);
14423 ASSERT_EQUAL_128(0x7fc7ffffffc7ffff, 0x7fc7ffffffc7ffff, q19);
14424 ASSERT_EQUAL_128(0x4f6800004f7fffff, 0x0000000180000001, q20);
14425 ASSERT_EQUAL_128(0, 0x7f7fffff, q21);
14426 TEARDOWN();
14427 }
14428
14429
14430 // Test that scvtf and ucvtf can convert the 64-bit input into the expected
14431 // value. All possible values of 'fbits' are tested. The expected value is
14432 // modified accordingly in each case.
14433 //
14434 // The expected value is specified as the bit encoding of the expected double
14435 // produced by scvtf (expected_scvtf_bits) as well as ucvtf
14436 // (expected_ucvtf_bits).
14437 //
14438 // Where the input value is representable by int32_t or uint32_t, conversions
14439 // from W registers will also be tested.
TestUScvtfHelper(uint64_t in,uint64_t expected_scvtf_bits,uint64_t expected_ucvtf_bits)14440 static void TestUScvtfHelper(uint64_t in,
14441 uint64_t expected_scvtf_bits,
14442 uint64_t expected_ucvtf_bits) {
14443 uint64_t u64 = in;
14444 uint32_t u32 = u64 & 0xffffffff;
14445 int64_t s64 = static_cast<int64_t>(in);
14446 int32_t s32 = s64 & 0x7fffffff;
14447
14448 bool cvtf_s32 = (s64 == s32);
14449 bool cvtf_u32 = (u64 == u32);
14450
14451 double results_scvtf_x[65];
14452 double results_ucvtf_x[65];
14453 double results_scvtf_w[33];
14454 double results_ucvtf_w[33];
14455
14456 SETUP_WITH_FEATURES(CPUFeatures::kFP);
14457
14458 START();
14459
14460 __ Mov(x0, reinterpret_cast<uintptr_t>(results_scvtf_x));
14461 __ Mov(x1, reinterpret_cast<uintptr_t>(results_ucvtf_x));
14462 __ Mov(x2, reinterpret_cast<uintptr_t>(results_scvtf_w));
14463 __ Mov(x3, reinterpret_cast<uintptr_t>(results_ucvtf_w));
14464
14465 __ Mov(x10, s64);
14466
14467 // Corrupt the top word, in case it is accidentally used during W-register
14468 // conversions.
14469 __ Mov(x11, 0x5555555555555555);
14470 __ Bfi(x11, x10, 0, kWRegSize);
14471
14472 // Test integer conversions.
14473 __ Scvtf(d0, x10);
14474 __ Ucvtf(d1, x10);
14475 __ Scvtf(d2, w11);
14476 __ Ucvtf(d3, w11);
14477 __ Str(d0, MemOperand(x0));
14478 __ Str(d1, MemOperand(x1));
14479 __ Str(d2, MemOperand(x2));
14480 __ Str(d3, MemOperand(x3));
14481
14482 // Test all possible values of fbits.
14483 for (int fbits = 1; fbits <= 32; fbits++) {
14484 __ Scvtf(d0, x10, fbits);
14485 __ Ucvtf(d1, x10, fbits);
14486 __ Scvtf(d2, w11, fbits);
14487 __ Ucvtf(d3, w11, fbits);
14488 __ Str(d0, MemOperand(x0, fbits * kDRegSizeInBytes));
14489 __ Str(d1, MemOperand(x1, fbits * kDRegSizeInBytes));
14490 __ Str(d2, MemOperand(x2, fbits * kDRegSizeInBytes));
14491 __ Str(d3, MemOperand(x3, fbits * kDRegSizeInBytes));
14492 }
14493
14494 // Conversions from W registers can only handle fbits values <= 32, so just
14495 // test conversions from X registers for 32 < fbits <= 64.
14496 for (int fbits = 33; fbits <= 64; fbits++) {
14497 __ Scvtf(d0, x10, fbits);
14498 __ Ucvtf(d1, x10, fbits);
14499 __ Str(d0, MemOperand(x0, fbits * kDRegSizeInBytes));
14500 __ Str(d1, MemOperand(x1, fbits * kDRegSizeInBytes));
14501 }
14502
14503 END();
14504 RUN();
14505
14506 // Check the results.
14507 double expected_scvtf_base = RawbitsToDouble(expected_scvtf_bits);
14508 double expected_ucvtf_base = RawbitsToDouble(expected_ucvtf_bits);
14509
14510 for (int fbits = 0; fbits <= 32; fbits++) {
14511 double expected_scvtf = expected_scvtf_base / std::pow(2, fbits);
14512 double expected_ucvtf = expected_ucvtf_base / std::pow(2, fbits);
14513 ASSERT_EQUAL_FP64(expected_scvtf, results_scvtf_x[fbits]);
14514 ASSERT_EQUAL_FP64(expected_ucvtf, results_ucvtf_x[fbits]);
14515 if (cvtf_s32) ASSERT_EQUAL_FP64(expected_scvtf, results_scvtf_w[fbits]);
14516 if (cvtf_u32) ASSERT_EQUAL_FP64(expected_ucvtf, results_ucvtf_w[fbits]);
14517 }
14518 for (int fbits = 33; fbits <= 64; fbits++) {
14519 double expected_scvtf = expected_scvtf_base / std::pow(2, fbits);
14520 double expected_ucvtf = expected_ucvtf_base / std::pow(2, fbits);
14521 ASSERT_EQUAL_FP64(expected_scvtf, results_scvtf_x[fbits]);
14522 ASSERT_EQUAL_FP64(expected_ucvtf, results_ucvtf_x[fbits]);
14523 }
14524
14525 TEARDOWN();
14526 }
14527
14528
TEST(scvtf_ucvtf_double)14529 TEST(scvtf_ucvtf_double) {
14530 // Simple conversions of positive numbers which require no rounding; the
14531 // results should not depened on the rounding mode, and ucvtf and scvtf should
14532 // produce the same result.
14533 TestUScvtfHelper(0x0000000000000000, 0x0000000000000000, 0x0000000000000000);
14534 TestUScvtfHelper(0x0000000000000001, 0x3ff0000000000000, 0x3ff0000000000000);
14535 TestUScvtfHelper(0x0000000040000000, 0x41d0000000000000, 0x41d0000000000000);
14536 TestUScvtfHelper(0x0000000100000000, 0x41f0000000000000, 0x41f0000000000000);
14537 TestUScvtfHelper(0x4000000000000000, 0x43d0000000000000, 0x43d0000000000000);
14538 // Test mantissa extremities.
14539 TestUScvtfHelper(0x4000000000000400, 0x43d0000000000001, 0x43d0000000000001);
14540 // The largest int32_t that fits in a double.
14541 TestUScvtfHelper(0x000000007fffffff, 0x41dfffffffc00000, 0x41dfffffffc00000);
14542 // Values that would be negative if treated as an int32_t.
14543 TestUScvtfHelper(0x00000000ffffffff, 0x41efffffffe00000, 0x41efffffffe00000);
14544 TestUScvtfHelper(0x0000000080000000, 0x41e0000000000000, 0x41e0000000000000);
14545 TestUScvtfHelper(0x0000000080000001, 0x41e0000000200000, 0x41e0000000200000);
14546 // The largest int64_t that fits in a double.
14547 TestUScvtfHelper(0x7ffffffffffffc00, 0x43dfffffffffffff, 0x43dfffffffffffff);
14548 // Check for bit pattern reproduction.
14549 TestUScvtfHelper(0x0123456789abcde0, 0x43723456789abcde, 0x43723456789abcde);
14550 TestUScvtfHelper(0x0000000012345678, 0x41b2345678000000, 0x41b2345678000000);
14551
14552 // Simple conversions of negative int64_t values. These require no rounding,
14553 // and the results should not depend on the rounding mode.
14554 TestUScvtfHelper(0xffffffffc0000000, 0xc1d0000000000000, 0x43effffffff80000);
14555 TestUScvtfHelper(0xffffffff00000000, 0xc1f0000000000000, 0x43efffffffe00000);
14556 TestUScvtfHelper(0xc000000000000000, 0xc3d0000000000000, 0x43e8000000000000);
14557
14558 // Conversions which require rounding.
14559 TestUScvtfHelper(0x1000000000000000, 0x43b0000000000000, 0x43b0000000000000);
14560 TestUScvtfHelper(0x1000000000000001, 0x43b0000000000000, 0x43b0000000000000);
14561 TestUScvtfHelper(0x1000000000000080, 0x43b0000000000000, 0x43b0000000000000);
14562 TestUScvtfHelper(0x1000000000000081, 0x43b0000000000001, 0x43b0000000000001);
14563 TestUScvtfHelper(0x1000000000000100, 0x43b0000000000001, 0x43b0000000000001);
14564 TestUScvtfHelper(0x1000000000000101, 0x43b0000000000001, 0x43b0000000000001);
14565 TestUScvtfHelper(0x1000000000000180, 0x43b0000000000002, 0x43b0000000000002);
14566 TestUScvtfHelper(0x1000000000000181, 0x43b0000000000002, 0x43b0000000000002);
14567 TestUScvtfHelper(0x1000000000000200, 0x43b0000000000002, 0x43b0000000000002);
14568 TestUScvtfHelper(0x1000000000000201, 0x43b0000000000002, 0x43b0000000000002);
14569 TestUScvtfHelper(0x1000000000000280, 0x43b0000000000002, 0x43b0000000000002);
14570 TestUScvtfHelper(0x1000000000000281, 0x43b0000000000003, 0x43b0000000000003);
14571 TestUScvtfHelper(0x1000000000000300, 0x43b0000000000003, 0x43b0000000000003);
14572 // Check rounding of negative int64_t values (and large uint64_t values).
14573 TestUScvtfHelper(0x8000000000000000, 0xc3e0000000000000, 0x43e0000000000000);
14574 TestUScvtfHelper(0x8000000000000001, 0xc3e0000000000000, 0x43e0000000000000);
14575 TestUScvtfHelper(0x8000000000000200, 0xc3e0000000000000, 0x43e0000000000000);
14576 TestUScvtfHelper(0x8000000000000201, 0xc3dfffffffffffff, 0x43e0000000000000);
14577 TestUScvtfHelper(0x8000000000000400, 0xc3dfffffffffffff, 0x43e0000000000000);
14578 TestUScvtfHelper(0x8000000000000401, 0xc3dfffffffffffff, 0x43e0000000000001);
14579 TestUScvtfHelper(0x8000000000000600, 0xc3dffffffffffffe, 0x43e0000000000001);
14580 TestUScvtfHelper(0x8000000000000601, 0xc3dffffffffffffe, 0x43e0000000000001);
14581 TestUScvtfHelper(0x8000000000000800, 0xc3dffffffffffffe, 0x43e0000000000001);
14582 TestUScvtfHelper(0x8000000000000801, 0xc3dffffffffffffe, 0x43e0000000000001);
14583 TestUScvtfHelper(0x8000000000000a00, 0xc3dffffffffffffe, 0x43e0000000000001);
14584 TestUScvtfHelper(0x8000000000000a01, 0xc3dffffffffffffd, 0x43e0000000000001);
14585 TestUScvtfHelper(0x8000000000000c00, 0xc3dffffffffffffd, 0x43e0000000000002);
14586 // Round up to produce a result that's too big for the input to represent.
14587 TestUScvtfHelper(0x7ffffffffffffe00, 0x43e0000000000000, 0x43e0000000000000);
14588 TestUScvtfHelper(0x7fffffffffffffff, 0x43e0000000000000, 0x43e0000000000000);
14589 TestUScvtfHelper(0xfffffffffffffc00, 0xc090000000000000, 0x43f0000000000000);
14590 TestUScvtfHelper(0xffffffffffffffff, 0xbff0000000000000, 0x43f0000000000000);
14591 }
14592
14593
14594 // The same as TestUScvtfHelper, but convert to floats.
TestUScvtf32Helper(uint64_t in,uint32_t expected_scvtf_bits,uint32_t expected_ucvtf_bits)14595 static void TestUScvtf32Helper(uint64_t in,
14596 uint32_t expected_scvtf_bits,
14597 uint32_t expected_ucvtf_bits) {
14598 uint64_t u64 = in;
14599 uint32_t u32 = u64 & 0xffffffff;
14600 int64_t s64 = static_cast<int64_t>(in);
14601 int32_t s32 = s64 & 0x7fffffff;
14602
14603 bool cvtf_s32 = (s64 == s32);
14604 bool cvtf_u32 = (u64 == u32);
14605
14606 float results_scvtf_x[65];
14607 float results_ucvtf_x[65];
14608 float results_scvtf_w[33];
14609 float results_ucvtf_w[33];
14610
14611 SETUP_WITH_FEATURES(CPUFeatures::kFP);
14612
14613 START();
14614
14615 __ Mov(x0, reinterpret_cast<uintptr_t>(results_scvtf_x));
14616 __ Mov(x1, reinterpret_cast<uintptr_t>(results_ucvtf_x));
14617 __ Mov(x2, reinterpret_cast<uintptr_t>(results_scvtf_w));
14618 __ Mov(x3, reinterpret_cast<uintptr_t>(results_ucvtf_w));
14619
14620 __ Mov(x10, s64);
14621
14622 // Corrupt the top word, in case it is accidentally used during W-register
14623 // conversions.
14624 __ Mov(x11, 0x5555555555555555);
14625 __ Bfi(x11, x10, 0, kWRegSize);
14626
14627 // Test integer conversions.
14628 __ Scvtf(s0, x10);
14629 __ Ucvtf(s1, x10);
14630 __ Scvtf(s2, w11);
14631 __ Ucvtf(s3, w11);
14632 __ Str(s0, MemOperand(x0));
14633 __ Str(s1, MemOperand(x1));
14634 __ Str(s2, MemOperand(x2));
14635 __ Str(s3, MemOperand(x3));
14636
14637 // Test all possible values of fbits.
14638 for (int fbits = 1; fbits <= 32; fbits++) {
14639 __ Scvtf(s0, x10, fbits);
14640 __ Ucvtf(s1, x10, fbits);
14641 __ Scvtf(s2, w11, fbits);
14642 __ Ucvtf(s3, w11, fbits);
14643 __ Str(s0, MemOperand(x0, fbits * kSRegSizeInBytes));
14644 __ Str(s1, MemOperand(x1, fbits * kSRegSizeInBytes));
14645 __ Str(s2, MemOperand(x2, fbits * kSRegSizeInBytes));
14646 __ Str(s3, MemOperand(x3, fbits * kSRegSizeInBytes));
14647 }
14648
14649 // Conversions from W registers can only handle fbits values <= 32, so just
14650 // test conversions from X registers for 32 < fbits <= 64.
14651 for (int fbits = 33; fbits <= 64; fbits++) {
14652 __ Scvtf(s0, x10, fbits);
14653 __ Ucvtf(s1, x10, fbits);
14654 __ Str(s0, MemOperand(x0, fbits * kSRegSizeInBytes));
14655 __ Str(s1, MemOperand(x1, fbits * kSRegSizeInBytes));
14656 }
14657
14658 END();
14659 RUN();
14660
14661 // Check the results.
14662 float expected_scvtf_base = RawbitsToFloat(expected_scvtf_bits);
14663 float expected_ucvtf_base = RawbitsToFloat(expected_ucvtf_bits);
14664
14665 for (int fbits = 0; fbits <= 32; fbits++) {
14666 float expected_scvtf = expected_scvtf_base / std::pow(2.0f, fbits);
14667 float expected_ucvtf = expected_ucvtf_base / std::pow(2.0f, fbits);
14668 ASSERT_EQUAL_FP32(expected_scvtf, results_scvtf_x[fbits]);
14669 ASSERT_EQUAL_FP32(expected_ucvtf, results_ucvtf_x[fbits]);
14670 if (cvtf_s32) ASSERT_EQUAL_FP32(expected_scvtf, results_scvtf_w[fbits]);
14671 if (cvtf_u32) ASSERT_EQUAL_FP32(expected_ucvtf, results_ucvtf_w[fbits]);
14672 }
14673 for (int fbits = 33; fbits <= 64; fbits++) {
14674 float expected_scvtf = expected_scvtf_base / std::pow(2.0f, fbits);
14675 float expected_ucvtf = expected_ucvtf_base / std::pow(2.0f, fbits);
14676 ASSERT_EQUAL_FP32(expected_scvtf, results_scvtf_x[fbits]);
14677 ASSERT_EQUAL_FP32(expected_ucvtf, results_ucvtf_x[fbits]);
14678 }
14679
14680 TEARDOWN();
14681 }
14682
14683
TEST(scvtf_ucvtf_float)14684 TEST(scvtf_ucvtf_float) {
14685 // Simple conversions of positive numbers which require no rounding; the
14686 // results should not depened on the rounding mode, and ucvtf and scvtf should
14687 // produce the same result.
14688 TestUScvtf32Helper(0x0000000000000000, 0x00000000, 0x00000000);
14689 TestUScvtf32Helper(0x0000000000000001, 0x3f800000, 0x3f800000);
14690 TestUScvtf32Helper(0x0000000040000000, 0x4e800000, 0x4e800000);
14691 TestUScvtf32Helper(0x0000000100000000, 0x4f800000, 0x4f800000);
14692 TestUScvtf32Helper(0x4000000000000000, 0x5e800000, 0x5e800000);
14693 // Test mantissa extremities.
14694 TestUScvtf32Helper(0x0000000000800001, 0x4b000001, 0x4b000001);
14695 TestUScvtf32Helper(0x4000008000000000, 0x5e800001, 0x5e800001);
14696 // The largest int32_t that fits in a float.
14697 TestUScvtf32Helper(0x000000007fffff80, 0x4effffff, 0x4effffff);
14698 // Values that would be negative if treated as an int32_t.
14699 TestUScvtf32Helper(0x00000000ffffff00, 0x4f7fffff, 0x4f7fffff);
14700 TestUScvtf32Helper(0x0000000080000000, 0x4f000000, 0x4f000000);
14701 TestUScvtf32Helper(0x0000000080000100, 0x4f000001, 0x4f000001);
14702 // The largest int64_t that fits in a float.
14703 TestUScvtf32Helper(0x7fffff8000000000, 0x5effffff, 0x5effffff);
14704 // Check for bit pattern reproduction.
14705 TestUScvtf32Helper(0x0000000000876543, 0x4b076543, 0x4b076543);
14706
14707 // Simple conversions of negative int64_t values. These require no rounding,
14708 // and the results should not depend on the rounding mode.
14709 TestUScvtf32Helper(0xfffffc0000000000, 0xd4800000, 0x5f7ffffc);
14710 TestUScvtf32Helper(0xc000000000000000, 0xde800000, 0x5f400000);
14711
14712 // Conversions which require rounding.
14713 TestUScvtf32Helper(0x0000800000000000, 0x57000000, 0x57000000);
14714 TestUScvtf32Helper(0x0000800000000001, 0x57000000, 0x57000000);
14715 TestUScvtf32Helper(0x0000800000800000, 0x57000000, 0x57000000);
14716 TestUScvtf32Helper(0x0000800000800001, 0x57000001, 0x57000001);
14717 TestUScvtf32Helper(0x0000800001000000, 0x57000001, 0x57000001);
14718 TestUScvtf32Helper(0x0000800001000001, 0x57000001, 0x57000001);
14719 TestUScvtf32Helper(0x0000800001800000, 0x57000002, 0x57000002);
14720 TestUScvtf32Helper(0x0000800001800001, 0x57000002, 0x57000002);
14721 TestUScvtf32Helper(0x0000800002000000, 0x57000002, 0x57000002);
14722 TestUScvtf32Helper(0x0000800002000001, 0x57000002, 0x57000002);
14723 TestUScvtf32Helper(0x0000800002800000, 0x57000002, 0x57000002);
14724 TestUScvtf32Helper(0x0000800002800001, 0x57000003, 0x57000003);
14725 TestUScvtf32Helper(0x0000800003000000, 0x57000003, 0x57000003);
14726 // Check rounding of negative int64_t values (and large uint64_t values).
14727 TestUScvtf32Helper(0x8000000000000000, 0xdf000000, 0x5f000000);
14728 TestUScvtf32Helper(0x8000000000000001, 0xdf000000, 0x5f000000);
14729 TestUScvtf32Helper(0x8000004000000000, 0xdf000000, 0x5f000000);
14730 TestUScvtf32Helper(0x8000004000000001, 0xdeffffff, 0x5f000000);
14731 TestUScvtf32Helper(0x8000008000000000, 0xdeffffff, 0x5f000000);
14732 TestUScvtf32Helper(0x8000008000000001, 0xdeffffff, 0x5f000001);
14733 TestUScvtf32Helper(0x800000c000000000, 0xdefffffe, 0x5f000001);
14734 TestUScvtf32Helper(0x800000c000000001, 0xdefffffe, 0x5f000001);
14735 TestUScvtf32Helper(0x8000010000000000, 0xdefffffe, 0x5f000001);
14736 TestUScvtf32Helper(0x8000010000000001, 0xdefffffe, 0x5f000001);
14737 TestUScvtf32Helper(0x8000014000000000, 0xdefffffe, 0x5f000001);
14738 TestUScvtf32Helper(0x8000014000000001, 0xdefffffd, 0x5f000001);
14739 TestUScvtf32Helper(0x8000018000000000, 0xdefffffd, 0x5f000002);
14740 // Round up to produce a result that's too big for the input to represent.
14741 TestUScvtf32Helper(0x000000007fffffc0, 0x4f000000, 0x4f000000);
14742 TestUScvtf32Helper(0x000000007fffffff, 0x4f000000, 0x4f000000);
14743 TestUScvtf32Helper(0x00000000ffffff80, 0x4f800000, 0x4f800000);
14744 TestUScvtf32Helper(0x00000000ffffffff, 0x4f800000, 0x4f800000);
14745 TestUScvtf32Helper(0x7fffffc000000000, 0x5f000000, 0x5f000000);
14746 TestUScvtf32Helper(0x7fffffffffffffff, 0x5f000000, 0x5f000000);
14747 TestUScvtf32Helper(0xffffff8000000000, 0xd3000000, 0x5f800000);
14748 TestUScvtf32Helper(0xffffffffffffffff, 0xbf800000, 0x5f800000);
14749 }
14750
14751
TEST(system_mrs)14752 TEST(system_mrs) {
14753 SETUP();
14754
14755 START();
14756 __ Mov(w0, 0);
14757 __ Mov(w1, 1);
14758 __ Mov(w2, 0x80000000);
14759
14760 // Set the Z and C flags.
14761 __ Cmp(w0, w0);
14762 __ Mrs(x3, NZCV);
14763
14764 // Set the N flag.
14765 __ Cmp(w0, w1);
14766 __ Mrs(x4, NZCV);
14767
14768 // Set the Z, C and V flags.
14769 __ Adds(w0, w2, w2);
14770 __ Mrs(x5, NZCV);
14771
14772 // Read the default FPCR.
14773 __ Mrs(x6, FPCR);
14774 END();
14775
14776 RUN();
14777
14778 // NZCV
14779 ASSERT_EQUAL_32(ZCFlag, w3);
14780 ASSERT_EQUAL_32(NFlag, w4);
14781 ASSERT_EQUAL_32(ZCVFlag, w5);
14782
14783 // FPCR
14784 // The default FPCR on Linux-based platforms is 0.
14785 ASSERT_EQUAL_32(0, w6);
14786
14787 TEARDOWN();
14788 }
14789
14790
TEST(system_msr)14791 TEST(system_msr) {
14792 // All FPCR fields that must be implemented: AHP, DN, FZ, RMode
14793 const uint64_t fpcr_core = 0x07c00000;
14794
14795 // All FPCR fields (including fields which may be read-as-zero):
14796 // Stride, Len
14797 // IDE, IXE, UFE, OFE, DZE, IOE
14798 const uint64_t fpcr_all = fpcr_core | 0x00379f00;
14799
14800 SETUP();
14801
14802 START();
14803 __ Mov(w0, 0);
14804 __ Mov(w1, 0x7fffffff);
14805
14806 __ Mov(x7, 0);
14807
14808 __ Mov(x10, NVFlag);
14809 __ Cmp(w0, w0); // Set Z and C.
14810 __ Msr(NZCV, x10); // Set N and V.
14811 // The Msr should have overwritten every flag set by the Cmp.
14812 __ Cinc(x7, x7, mi); // N
14813 __ Cinc(x7, x7, ne); // !Z
14814 __ Cinc(x7, x7, lo); // !C
14815 __ Cinc(x7, x7, vs); // V
14816
14817 __ Mov(x10, ZCFlag);
14818 __ Cmn(w1, w1); // Set N and V.
14819 __ Msr(NZCV, x10); // Set Z and C.
14820 // The Msr should have overwritten every flag set by the Cmn.
14821 __ Cinc(x7, x7, pl); // !N
14822 __ Cinc(x7, x7, eq); // Z
14823 __ Cinc(x7, x7, hs); // C
14824 __ Cinc(x7, x7, vc); // !V
14825
14826 // All core FPCR fields must be writable.
14827 __ Mov(x8, fpcr_core);
14828 __ Msr(FPCR, x8);
14829 __ Mrs(x8, FPCR);
14830
14831 // All FPCR fields, including optional ones. This part of the test doesn't
14832 // achieve much other than ensuring that supported fields can be cleared by
14833 // the next test.
14834 __ Mov(x9, fpcr_all);
14835 __ Msr(FPCR, x9);
14836 __ Mrs(x9, FPCR);
14837 __ And(x9, x9, fpcr_core);
14838
14839 // The undefined bits must ignore writes.
14840 // It's conceivable that a future version of the architecture could use these
14841 // fields (making this test fail), but in the meantime this is a useful test
14842 // for the simulator.
14843 __ Mov(x10, ~fpcr_all);
14844 __ Msr(FPCR, x10);
14845 __ Mrs(x10, FPCR);
14846
14847 END();
14848
14849 RUN();
14850
14851 // We should have incremented x7 (from 0) exactly 8 times.
14852 ASSERT_EQUAL_64(8, x7);
14853
14854 ASSERT_EQUAL_64(fpcr_core, x8);
14855 ASSERT_EQUAL_64(fpcr_core, x9);
14856 ASSERT_EQUAL_64(0, x10);
14857
14858 TEARDOWN();
14859 }
14860
14861
TEST(system_pauth_a)14862 TEST(system_pauth_a) {
14863 SETUP_WITH_FEATURES(CPUFeatures::kPAuth);
14864 START();
14865
14866 // Exclude x16 and x17 from the scratch register list so we can use
14867 // Pac/Autia1716 safely.
14868 UseScratchRegisterScope temps(&masm);
14869 temps.Exclude(x16, x17);
14870 temps.Include(x10, x11);
14871
14872 // Backup stack pointer.
14873 __ Mov(x20, sp);
14874
14875 // Modifiers
14876 __ Mov(x16, 0x477d469dec0b8760);
14877 __ Mov(sp, 0x477d469dec0b8760);
14878
14879 // Generate PACs using the 3 system instructions.
14880 __ Mov(x17, 0x0000000012345678);
14881 __ Pacia1716();
14882 __ Mov(x0, x17);
14883
14884 __ Mov(lr, 0x0000000012345678);
14885 __ Paciaz();
14886 __ Mov(x1, lr);
14887
14888 __ Mov(lr, 0x0000000012345678);
14889 __ Paciasp();
14890 __ Mov(x2, lr);
14891
14892 // Authenticate the pointers above.
14893 __ Mov(x17, x0);
14894 __ Autia1716();
14895 __ Mov(x3, x17);
14896
14897 __ Mov(lr, x1);
14898 __ Autiaz();
14899 __ Mov(x4, lr);
14900
14901 __ Mov(lr, x2);
14902 __ Autiasp();
14903 __ Mov(x5, lr);
14904
14905 // Attempt to authenticate incorrect pointers.
14906 __ Mov(x17, x1);
14907 __ Autia1716();
14908 __ Mov(x6, x17);
14909
14910 __ Mov(lr, x0);
14911 __ Autiaz();
14912 __ Mov(x7, lr);
14913
14914 __ Mov(lr, x1);
14915 __ Autiasp();
14916 __ Mov(x8, lr);
14917
14918 // Strip the pac code from the pointer in x0.
14919 __ Mov(lr, x0);
14920 __ Xpaclri();
14921 __ Mov(x9, lr);
14922
14923 // Restore stack pointer.
14924 __ Mov(sp, x20);
14925
14926 // Mask out just the PAC code bits.
14927 // TODO: use Simulator::CalculatePACMask in a nice way.
14928 __ And(x0, x0, 0x007f000000000000);
14929 __ And(x1, x1, 0x007f000000000000);
14930 __ And(x2, x2, 0x007f000000000000);
14931
14932 END();
14933
14934 #ifdef VIXL_INCLUDE_SIMULATOR_AARCH64
14935 RUN();
14936
14937 // Check PAC codes have been generated and aren't equal.
14938 // NOTE: with a different ComputePAC implementation, there may be a collision.
14939 ASSERT_NOT_EQUAL_64(0, x0);
14940 ASSERT_NOT_EQUAL_64(0, x1);
14941 ASSERT_NOT_EQUAL_64(0, x2);
14942 ASSERT_NOT_EQUAL_64(x0, x1);
14943 ASSERT_EQUAL_64(x0, x2);
14944
14945 // Pointers correctly authenticated.
14946 ASSERT_EQUAL_64(0x0000000012345678, x3);
14947 ASSERT_EQUAL_64(0x0000000012345678, x4);
14948 ASSERT_EQUAL_64(0x0000000012345678, x5);
14949
14950 // Pointers corrupted after failing to authenticate.
14951 ASSERT_EQUAL_64(0x0020000012345678, x6);
14952 ASSERT_EQUAL_64(0x0020000012345678, x7);
14953 ASSERT_EQUAL_64(0x0020000012345678, x8);
14954
14955 // Pointer with code stripped.
14956 ASSERT_EQUAL_64(0x0000000012345678, x9);
14957 #endif // VIXL_INCLUDE_SIMULATOR_AARCH64
14958
14959 TEARDOWN();
14960 }
14961
14962
TEST(system_pauth_b)14963 TEST(system_pauth_b) {
14964 SETUP_WITH_FEATURES(CPUFeatures::kPAuth);
14965 START();
14966
14967 // Exclude x16 and x17 from the scratch register list so we can use
14968 // Pac/Autia1716 safely.
14969 UseScratchRegisterScope temps(&masm);
14970 temps.Exclude(x16, x17);
14971 temps.Include(x10, x11);
14972
14973 // Backup stack pointer.
14974 __ Mov(x20, sp);
14975
14976 // Modifiers
14977 __ Mov(x16, 0x477d469dec0b8760);
14978 __ Mov(sp, 0x477d469dec0b8760);
14979
14980 // Generate PACs using the 3 system instructions.
14981 __ Mov(x17, 0x0000000012345678);
14982 __ Pacib1716();
14983 __ Mov(x0, x17);
14984
14985 __ Mov(lr, 0x0000000012345678);
14986 __ Pacibz();
14987 __ Mov(x1, lr);
14988
14989 __ Mov(lr, 0x0000000012345678);
14990 __ Pacibsp();
14991 __ Mov(x2, lr);
14992
14993 // Authenticate the pointers above.
14994 __ Mov(x17, x0);
14995 __ Autib1716();
14996 __ Mov(x3, x17);
14997
14998 __ Mov(lr, x1);
14999 __ Autibz();
15000 __ Mov(x4, lr);
15001
15002 __ Mov(lr, x2);
15003 __ Autibsp();
15004 __ Mov(x5, lr);
15005
15006 // Attempt to authenticate incorrect pointers.
15007 __ Mov(x17, x1);
15008 __ Autib1716();
15009 __ Mov(x6, x17);
15010
15011 __ Mov(lr, x0);
15012 __ Autibz();
15013 __ Mov(x7, lr);
15014
15015 __ Mov(lr, x1);
15016 __ Autibsp();
15017 __ Mov(x8, lr);
15018
15019 // Strip the pac code from the pointer in x0.
15020 __ Mov(lr, x0);
15021 __ Xpaclri();
15022 __ Mov(x9, lr);
15023
15024 // Restore stack pointer.
15025 __ Mov(sp, x20);
15026
15027 // Mask out just the PAC code bits.
15028 // TODO: use Simulator::CalculatePACMask in a nice way.
15029 __ And(x0, x0, 0x007f000000000000);
15030 __ And(x1, x1, 0x007f000000000000);
15031 __ And(x2, x2, 0x007f000000000000);
15032
15033 END();
15034
15035 #ifdef VIXL_INCLUDE_SIMULATOR_AARCH64
15036 RUN();
15037
15038 // Check PAC codes have been generated and aren't equal.
15039 // NOTE: with a different ComputePAC implementation, there may be a collision.
15040 ASSERT_NOT_EQUAL_64(0, x0);
15041 ASSERT_NOT_EQUAL_64(0, x1);
15042 ASSERT_NOT_EQUAL_64(0, x2);
15043 ASSERT_NOT_EQUAL_64(x0, x1);
15044 ASSERT_EQUAL_64(x0, x2);
15045
15046 // Pointers correctly authenticated.
15047 ASSERT_EQUAL_64(0x0000000012345678, x3);
15048 ASSERT_EQUAL_64(0x0000000012345678, x4);
15049 ASSERT_EQUAL_64(0x0000000012345678, x5);
15050
15051 // Pointers corrupted after failing to authenticate.
15052 ASSERT_EQUAL_64(0x0040000012345678, x6);
15053 ASSERT_EQUAL_64(0x0040000012345678, x7);
15054 ASSERT_EQUAL_64(0x0040000012345678, x8);
15055
15056 // Pointer with code stripped.
15057 ASSERT_EQUAL_64(0x0000000012345678, x9);
15058 #endif // VIXL_INCLUDE_SIMULATOR_AARCH64
15059
15060 TEARDOWN();
15061 }
15062
15063 #ifdef VIXL_NEGATIVE_TESTING
TEST(system_pauth_negative_test)15064 TEST(system_pauth_negative_test) {
15065 SETUP_WITH_FEATURES(CPUFeatures::kPAuth);
15066 START();
15067
15068 // Test for an assert (independent of order).
15069 MUST_FAIL_WITH_MESSAGE(__ Pacia1716(),
15070 "Assertion failed "
15071 "(!GetScratchRegisterList()->IncludesAliasOf(");
15072
15073 // Test for x16 assert.
15074 {
15075 UseScratchRegisterScope temps(&masm);
15076 temps.Exclude(x17);
15077 temps.Include(x16);
15078 MUST_FAIL_WITH_MESSAGE(__ Pacia1716(),
15079 "Assertion failed "
15080 "(!GetScratchRegisterList()->IncludesAliasOf(x16))");
15081 }
15082
15083 // Test for x17 assert.
15084 {
15085 UseScratchRegisterScope temps(&masm);
15086 temps.Exclude(x16);
15087 temps.Include(x17);
15088 MUST_FAIL_WITH_MESSAGE(__ Pacia1716(),
15089 "Assertion failed "
15090 "(!GetScratchRegisterList()->IncludesAliasOf(x17))");
15091 }
15092
15093 // Repeat first test for other 1716 instructions.
15094 MUST_FAIL_WITH_MESSAGE(__ Pacib1716(),
15095 "Assertion failed "
15096 "(!GetScratchRegisterList()->IncludesAliasOf(");
15097 MUST_FAIL_WITH_MESSAGE(__ Autia1716(),
15098 "Assertion failed "
15099 "(!GetScratchRegisterList()->IncludesAliasOf(");
15100 MUST_FAIL_WITH_MESSAGE(__ Autib1716(),
15101 "Assertion failed "
15102 "(!GetScratchRegisterList()->IncludesAliasOf(");
15103
15104 END();
15105 TEARDOWN();
15106 }
15107 #endif // VIXL_NEGATIVE_TESTING
15108
15109
TEST(system)15110 TEST(system) {
15111 // RegisterDump::Dump uses NEON.
15112 SETUP_WITH_FEATURES(CPUFeatures::kNEON, CPUFeatures::kRAS);
15113 RegisterDump before;
15114
15115 START();
15116 before.Dump(&masm);
15117 __ Nop();
15118 __ Esb();
15119 __ Csdb();
15120 END();
15121
15122 RUN();
15123
15124 ASSERT_EQUAL_REGISTERS(before);
15125 ASSERT_EQUAL_NZCV(before.flags_nzcv());
15126
15127 TEARDOWN();
15128 }
15129
15130
TEST(zero_dest)15131 TEST(zero_dest) {
15132 // RegisterDump::Dump uses NEON.
15133 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
15134 RegisterDump before;
15135
15136 START();
15137 // Preserve the stack pointer, in case we clobber it.
15138 __ Mov(x30, sp);
15139 // Initialize the other registers used in this test.
15140 uint64_t literal_base = 0x0100001000100101;
15141 __ Mov(x0, 0);
15142 __ Mov(x1, literal_base);
15143 for (unsigned i = 2; i < x30.GetCode(); i++) {
15144 __ Add(Register::GetXRegFromCode(i), Register::GetXRegFromCode(i - 1), x1);
15145 }
15146 before.Dump(&masm);
15147
15148 // All of these instructions should be NOPs in these forms, but have
15149 // alternate forms which can write into the stack pointer.
15150 {
15151 ExactAssemblyScope scope(&masm, 3 * 7 * kInstructionSize);
15152 __ add(xzr, x0, x1);
15153 __ add(xzr, x1, xzr);
15154 __ add(xzr, xzr, x1);
15155
15156 __ and_(xzr, x0, x2);
15157 __ and_(xzr, x2, xzr);
15158 __ and_(xzr, xzr, x2);
15159
15160 __ bic(xzr, x0, x3);
15161 __ bic(xzr, x3, xzr);
15162 __ bic(xzr, xzr, x3);
15163
15164 __ eon(xzr, x0, x4);
15165 __ eon(xzr, x4, xzr);
15166 __ eon(xzr, xzr, x4);
15167
15168 __ eor(xzr, x0, x5);
15169 __ eor(xzr, x5, xzr);
15170 __ eor(xzr, xzr, x5);
15171
15172 __ orr(xzr, x0, x6);
15173 __ orr(xzr, x6, xzr);
15174 __ orr(xzr, xzr, x6);
15175
15176 __ sub(xzr, x0, x7);
15177 __ sub(xzr, x7, xzr);
15178 __ sub(xzr, xzr, x7);
15179 }
15180
15181 // Swap the saved stack pointer with the real one. If sp was written
15182 // during the test, it will show up in x30. This is done because the test
15183 // framework assumes that sp will be valid at the end of the test.
15184 __ Mov(x29, x30);
15185 __ Mov(x30, sp);
15186 __ Mov(sp, x29);
15187 // We used x29 as a scratch register, so reset it to make sure it doesn't
15188 // trigger a test failure.
15189 __ Add(x29, x28, x1);
15190 END();
15191
15192 RUN();
15193
15194 ASSERT_EQUAL_REGISTERS(before);
15195 ASSERT_EQUAL_NZCV(before.flags_nzcv());
15196
15197 TEARDOWN();
15198 }
15199
15200
TEST(zero_dest_setflags)15201 TEST(zero_dest_setflags) {
15202 // RegisterDump::Dump uses NEON.
15203 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
15204 RegisterDump before;
15205
15206 START();
15207 // Preserve the stack pointer, in case we clobber it.
15208 __ Mov(x30, sp);
15209 // Initialize the other registers used in this test.
15210 uint64_t literal_base = 0x0100001000100101;
15211 __ Mov(x0, 0);
15212 __ Mov(x1, literal_base);
15213 for (int i = 2; i < 30; i++) {
15214 __ Add(Register::GetXRegFromCode(i), Register::GetXRegFromCode(i - 1), x1);
15215 }
15216 before.Dump(&masm);
15217
15218 // All of these instructions should only write to the flags in these forms,
15219 // but have alternate forms which can write into the stack pointer.
15220 {
15221 ExactAssemblyScope scope(&masm, 6 * kInstructionSize);
15222 __ adds(xzr, x0, Operand(x1, UXTX));
15223 __ adds(xzr, x1, Operand(xzr, UXTX));
15224 __ adds(xzr, x1, 1234);
15225 __ adds(xzr, x0, x1);
15226 __ adds(xzr, x1, xzr);
15227 __ adds(xzr, xzr, x1);
15228 }
15229
15230 {
15231 ExactAssemblyScope scope(&masm, 5 * kInstructionSize);
15232 __ ands(xzr, x2, ~0xf);
15233 __ ands(xzr, xzr, ~0xf);
15234 __ ands(xzr, x0, x2);
15235 __ ands(xzr, x2, xzr);
15236 __ ands(xzr, xzr, x2);
15237 }
15238
15239 {
15240 ExactAssemblyScope scope(&masm, 5 * kInstructionSize);
15241 __ bics(xzr, x3, ~0xf);
15242 __ bics(xzr, xzr, ~0xf);
15243 __ bics(xzr, x0, x3);
15244 __ bics(xzr, x3, xzr);
15245 __ bics(xzr, xzr, x3);
15246 }
15247
15248 {
15249 ExactAssemblyScope scope(&masm, 6 * kInstructionSize);
15250 __ subs(xzr, x0, Operand(x3, UXTX));
15251 __ subs(xzr, x3, Operand(xzr, UXTX));
15252 __ subs(xzr, x3, 1234);
15253 __ subs(xzr, x0, x3);
15254 __ subs(xzr, x3, xzr);
15255 __ subs(xzr, xzr, x3);
15256 }
15257
15258 // Swap the saved stack pointer with the real one. If sp was written
15259 // during the test, it will show up in x30. This is done because the test
15260 // framework assumes that sp will be valid at the end of the test.
15261 __ Mov(x29, x30);
15262 __ Mov(x30, sp);
15263 __ Mov(sp, x29);
15264 // We used x29 as a scratch register, so reset it to make sure it doesn't
15265 // trigger a test failure.
15266 __ Add(x29, x28, x1);
15267 END();
15268
15269 RUN();
15270
15271 ASSERT_EQUAL_REGISTERS(before);
15272
15273 TEARDOWN();
15274 }
15275
15276
TEST(stack_pointer_override)15277 TEST(stack_pointer_override) {
15278 // This test generates some stack maintenance code, but the test only checks
15279 // the reported state.
15280 SETUP();
15281 START();
15282
15283 // The default stack pointer in VIXL is sp.
15284 VIXL_CHECK(sp.Is(__ StackPointer()));
15285 __ SetStackPointer(x0);
15286 VIXL_CHECK(x0.Is(__ StackPointer()));
15287 __ SetStackPointer(x28);
15288 VIXL_CHECK(x28.Is(__ StackPointer()));
15289 __ SetStackPointer(sp);
15290 VIXL_CHECK(sp.Is(__ StackPointer()));
15291
15292 END();
15293 RUN();
15294 TEARDOWN();
15295 }
15296
15297
TEST(peek_poke_simple)15298 TEST(peek_poke_simple) {
15299 SETUP();
15300 START();
15301
15302 static const RegList x0_to_x3 =
15303 x0.GetBit() | x1.GetBit() | x2.GetBit() | x3.GetBit();
15304 static const RegList x10_to_x13 =
15305 x10.GetBit() | x11.GetBit() | x12.GetBit() | x13.GetBit();
15306
15307 // The literal base is chosen to have two useful properties:
15308 // * When multiplied by small values (such as a register index), this value
15309 // is clearly readable in the result.
15310 // * The value is not formed from repeating fixed-size smaller values, so it
15311 // can be used to detect endianness-related errors.
15312 uint64_t literal_base = 0x0100001000100101;
15313
15314 // Initialize the registers.
15315 __ Mov(x0, literal_base);
15316 __ Add(x1, x0, x0);
15317 __ Add(x2, x1, x0);
15318 __ Add(x3, x2, x0);
15319
15320 __ Claim(32);
15321
15322 // Simple exchange.
15323 // After this test:
15324 // x0-x3 should be unchanged.
15325 // w10-w13 should contain the lower words of x0-x3.
15326 __ Poke(x0, 0);
15327 __ Poke(x1, 8);
15328 __ Poke(x2, 16);
15329 __ Poke(x3, 24);
15330 Clobber(&masm, x0_to_x3);
15331 __ Peek(x0, 0);
15332 __ Peek(x1, 8);
15333 __ Peek(x2, 16);
15334 __ Peek(x3, 24);
15335
15336 __ Poke(w0, 0);
15337 __ Poke(w1, 4);
15338 __ Poke(w2, 8);
15339 __ Poke(w3, 12);
15340 Clobber(&masm, x10_to_x13);
15341 __ Peek(w10, 0);
15342 __ Peek(w11, 4);
15343 __ Peek(w12, 8);
15344 __ Peek(w13, 12);
15345
15346 __ Drop(32);
15347
15348 END();
15349 RUN();
15350
15351 ASSERT_EQUAL_64(literal_base * 1, x0);
15352 ASSERT_EQUAL_64(literal_base * 2, x1);
15353 ASSERT_EQUAL_64(literal_base * 3, x2);
15354 ASSERT_EQUAL_64(literal_base * 4, x3);
15355
15356 ASSERT_EQUAL_64((literal_base * 1) & 0xffffffff, x10);
15357 ASSERT_EQUAL_64((literal_base * 2) & 0xffffffff, x11);
15358 ASSERT_EQUAL_64((literal_base * 3) & 0xffffffff, x12);
15359 ASSERT_EQUAL_64((literal_base * 4) & 0xffffffff, x13);
15360
15361 TEARDOWN();
15362 }
15363
15364
TEST(peek_poke_unaligned)15365 TEST(peek_poke_unaligned) {
15366 SETUP();
15367 START();
15368
15369 // The literal base is chosen to have two useful properties:
15370 // * When multiplied by small values (such as a register index), this value
15371 // is clearly readable in the result.
15372 // * The value is not formed from repeating fixed-size smaller values, so it
15373 // can be used to detect endianness-related errors.
15374 uint64_t literal_base = 0x0100001000100101;
15375
15376 // Initialize the registers.
15377 __ Mov(x0, literal_base);
15378 __ Add(x1, x0, x0);
15379 __ Add(x2, x1, x0);
15380 __ Add(x3, x2, x0);
15381 __ Add(x4, x3, x0);
15382 __ Add(x5, x4, x0);
15383 __ Add(x6, x5, x0);
15384
15385 __ Claim(32);
15386
15387 // Unaligned exchanges.
15388 // After this test:
15389 // x0-x6 should be unchanged.
15390 // w10-w12 should contain the lower words of x0-x2.
15391 __ Poke(x0, 1);
15392 Clobber(&masm, x0.GetBit());
15393 __ Peek(x0, 1);
15394 __ Poke(x1, 2);
15395 Clobber(&masm, x1.GetBit());
15396 __ Peek(x1, 2);
15397 __ Poke(x2, 3);
15398 Clobber(&masm, x2.GetBit());
15399 __ Peek(x2, 3);
15400 __ Poke(x3, 4);
15401 Clobber(&masm, x3.GetBit());
15402 __ Peek(x3, 4);
15403 __ Poke(x4, 5);
15404 Clobber(&masm, x4.GetBit());
15405 __ Peek(x4, 5);
15406 __ Poke(x5, 6);
15407 Clobber(&masm, x5.GetBit());
15408 __ Peek(x5, 6);
15409 __ Poke(x6, 7);
15410 Clobber(&masm, x6.GetBit());
15411 __ Peek(x6, 7);
15412
15413 __ Poke(w0, 1);
15414 Clobber(&masm, w10.GetBit());
15415 __ Peek(w10, 1);
15416 __ Poke(w1, 2);
15417 Clobber(&masm, w11.GetBit());
15418 __ Peek(w11, 2);
15419 __ Poke(w2, 3);
15420 Clobber(&masm, w12.GetBit());
15421 __ Peek(w12, 3);
15422
15423 __ Drop(32);
15424
15425 END();
15426 RUN();
15427
15428 ASSERT_EQUAL_64(literal_base * 1, x0);
15429 ASSERT_EQUAL_64(literal_base * 2, x1);
15430 ASSERT_EQUAL_64(literal_base * 3, x2);
15431 ASSERT_EQUAL_64(literal_base * 4, x3);
15432 ASSERT_EQUAL_64(literal_base * 5, x4);
15433 ASSERT_EQUAL_64(literal_base * 6, x5);
15434 ASSERT_EQUAL_64(literal_base * 7, x6);
15435
15436 ASSERT_EQUAL_64((literal_base * 1) & 0xffffffff, x10);
15437 ASSERT_EQUAL_64((literal_base * 2) & 0xffffffff, x11);
15438 ASSERT_EQUAL_64((literal_base * 3) & 0xffffffff, x12);
15439
15440 TEARDOWN();
15441 }
15442
15443
TEST(peek_poke_endianness)15444 TEST(peek_poke_endianness) {
15445 SETUP();
15446 START();
15447
15448 // The literal base is chosen to have two useful properties:
15449 // * When multiplied by small values (such as a register index), this value
15450 // is clearly readable in the result.
15451 // * The value is not formed from repeating fixed-size smaller values, so it
15452 // can be used to detect endianness-related errors.
15453 uint64_t literal_base = 0x0100001000100101;
15454
15455 // Initialize the registers.
15456 __ Mov(x0, literal_base);
15457 __ Add(x1, x0, x0);
15458
15459 __ Claim(32);
15460
15461 // Endianness tests.
15462 // After this section:
15463 // x4 should match x0[31:0]:x0[63:32]
15464 // w5 should match w1[15:0]:w1[31:16]
15465 __ Poke(x0, 0);
15466 __ Poke(x0, 8);
15467 __ Peek(x4, 4);
15468
15469 __ Poke(w1, 0);
15470 __ Poke(w1, 4);
15471 __ Peek(w5, 2);
15472
15473 __ Drop(32);
15474
15475 END();
15476 RUN();
15477
15478 uint64_t x0_expected = literal_base * 1;
15479 uint64_t x1_expected = literal_base * 2;
15480 uint64_t x4_expected = (x0_expected << 32) | (x0_expected >> 32);
15481 uint64_t x5_expected =
15482 ((x1_expected << 16) & 0xffff0000) | ((x1_expected >> 16) & 0x0000ffff);
15483
15484 ASSERT_EQUAL_64(x0_expected, x0);
15485 ASSERT_EQUAL_64(x1_expected, x1);
15486 ASSERT_EQUAL_64(x4_expected, x4);
15487 ASSERT_EQUAL_64(x5_expected, x5);
15488
15489 TEARDOWN();
15490 }
15491
15492
TEST(peek_poke_mixed)15493 TEST(peek_poke_mixed) {
15494 SETUP();
15495 START();
15496
15497 // Acquire all temps from the MacroAssembler. They are used arbitrarily below.
15498 UseScratchRegisterScope temps(&masm);
15499 temps.ExcludeAll();
15500
15501 // The literal base is chosen to have two useful properties:
15502 // * When multiplied by small values (such as a register index), this value
15503 // is clearly readable in the result.
15504 // * The value is not formed from repeating fixed-size smaller values, so it
15505 // can be used to detect endianness-related errors.
15506 uint64_t literal_base = 0x0100001000100101;
15507
15508 // Initialize the registers.
15509 __ Mov(x0, literal_base);
15510 __ Add(x1, x0, x0);
15511 __ Add(x2, x1, x0);
15512 __ Add(x3, x2, x0);
15513
15514 __ Claim(32);
15515
15516 // Mix with other stack operations.
15517 // After this section:
15518 // x0-x3 should be unchanged.
15519 // x6 should match x1[31:0]:x0[63:32]
15520 // w7 should match x1[15:0]:x0[63:48]
15521 __ Poke(x1, 8);
15522 __ Poke(x0, 0);
15523 {
15524 VIXL_ASSERT(__ StackPointer().Is(sp));
15525 __ Mov(x4, __ StackPointer());
15526 __ SetStackPointer(x4);
15527
15528 __ Poke(wzr, 0); // Clobber the space we're about to drop.
15529 __ Drop(4);
15530 __ Peek(x6, 0);
15531 __ Claim(8);
15532 __ Peek(w7, 10);
15533 __ Poke(x3, 28);
15534 __ Poke(xzr, 0); // Clobber the space we're about to drop.
15535 __ Drop(8);
15536 __ Poke(x2, 12);
15537 __ Push(w0);
15538
15539 __ Mov(sp, __ StackPointer());
15540 __ SetStackPointer(sp);
15541 }
15542
15543 __ Pop(x0, x1, x2, x3);
15544
15545 END();
15546 RUN();
15547
15548 uint64_t x0_expected = literal_base * 1;
15549 uint64_t x1_expected = literal_base * 2;
15550 uint64_t x2_expected = literal_base * 3;
15551 uint64_t x3_expected = literal_base * 4;
15552 uint64_t x6_expected = (x1_expected << 32) | (x0_expected >> 32);
15553 uint64_t x7_expected =
15554 ((x1_expected << 16) & 0xffff0000) | ((x0_expected >> 48) & 0x0000ffff);
15555
15556 ASSERT_EQUAL_64(x0_expected, x0);
15557 ASSERT_EQUAL_64(x1_expected, x1);
15558 ASSERT_EQUAL_64(x2_expected, x2);
15559 ASSERT_EQUAL_64(x3_expected, x3);
15560 ASSERT_EQUAL_64(x6_expected, x6);
15561 ASSERT_EQUAL_64(x7_expected, x7);
15562
15563 TEARDOWN();
15564 }
15565
15566
TEST(peek_poke_reglist)15567 TEST(peek_poke_reglist) {
15568 SETUP_WITH_FEATURES(CPUFeatures::kFP);
15569
15570 START();
15571
15572 // Acquire all temps from the MacroAssembler. They are used arbitrarily below.
15573 UseScratchRegisterScope temps(&masm);
15574 temps.ExcludeAll();
15575
15576 // The literal base is chosen to have two useful properties:
15577 // * When multiplied by small values (such as a register index), this value
15578 // is clearly readable in the result.
15579 // * The value is not formed from repeating fixed-size smaller values, so it
15580 // can be used to detect endianness-related errors.
15581 uint64_t base = 0x0100001000100101;
15582
15583 // Initialize the registers.
15584 __ Mov(x1, base);
15585 __ Add(x2, x1, x1);
15586 __ Add(x3, x2, x1);
15587 __ Add(x4, x3, x1);
15588
15589 CPURegList list_1(x1, x2, x3, x4);
15590 CPURegList list_2(x11, x12, x13, x14);
15591 int list_1_size = list_1.GetTotalSizeInBytes();
15592
15593 __ Claim(2 * list_1_size);
15594
15595 __ PokeCPURegList(list_1, 0);
15596 __ PokeXRegList(list_1.GetList(), list_1_size);
15597 __ PeekCPURegList(list_2, 2 * kXRegSizeInBytes);
15598 __ PeekXRegList(x15.GetBit(), kWRegSizeInBytes);
15599 __ PeekWRegList(w16.GetBit() | w17.GetBit(), 3 * kXRegSizeInBytes);
15600
15601 __ Drop(2 * list_1_size);
15602
15603
15604 uint64_t base_d = 0x1010010001000010;
15605
15606 // Initialize the registers.
15607 __ Mov(x1, base_d);
15608 __ Add(x2, x1, x1);
15609 __ Add(x3, x2, x1);
15610 __ Add(x4, x3, x1);
15611 __ Fmov(d1, x1);
15612 __ Fmov(d2, x2);
15613 __ Fmov(d3, x3);
15614 __ Fmov(d4, x4);
15615
15616 CPURegList list_d_1(d1, d2, d3, d4);
15617 CPURegList list_d_2(d11, d12, d13, d14);
15618 int list_d_1_size = list_d_1.GetTotalSizeInBytes();
15619
15620 __ Claim(2 * list_d_1_size);
15621
15622 __ PokeCPURegList(list_d_1, 0);
15623 __ PokeDRegList(list_d_1.GetList(), list_d_1_size);
15624 __ PeekCPURegList(list_d_2, 2 * kDRegSizeInBytes);
15625 __ PeekDRegList(d15.GetBit(), kSRegSizeInBytes);
15626 __ PeekSRegList(s16.GetBit() | s17.GetBit(), 3 * kDRegSizeInBytes);
15627
15628 __ Drop(2 * list_d_1_size);
15629
15630
15631 END();
15632 RUN();
15633
15634 ASSERT_EQUAL_64(3 * base, x11);
15635 ASSERT_EQUAL_64(4 * base, x12);
15636 ASSERT_EQUAL_64(1 * base, x13);
15637 ASSERT_EQUAL_64(2 * base, x14);
15638 ASSERT_EQUAL_64(((1 * base) >> kWRegSize) | ((2 * base) << kWRegSize), x15);
15639 ASSERT_EQUAL_64(2 * base, x14);
15640 ASSERT_EQUAL_32((4 * base) & kWRegMask, w16);
15641 ASSERT_EQUAL_32((4 * base) >> kWRegSize, w17);
15642
15643 ASSERT_EQUAL_FP64(RawbitsToDouble(3 * base_d), d11);
15644 ASSERT_EQUAL_FP64(RawbitsToDouble(4 * base_d), d12);
15645 ASSERT_EQUAL_FP64(RawbitsToDouble(1 * base_d), d13);
15646 ASSERT_EQUAL_FP64(RawbitsToDouble(2 * base_d), d14);
15647 ASSERT_EQUAL_FP64(RawbitsToDouble((base_d >> kSRegSize) |
15648 ((2 * base_d) << kSRegSize)),
15649 d15);
15650 ASSERT_EQUAL_FP64(RawbitsToDouble(2 * base_d), d14);
15651 ASSERT_EQUAL_FP32(RawbitsToFloat((4 * base_d) & kSRegMask), s16);
15652 ASSERT_EQUAL_FP32(RawbitsToFloat((4 * base_d) >> kSRegSize), s17);
15653
15654 TEARDOWN();
15655 }
15656
15657
TEST(load_store_reglist)15658 TEST(load_store_reglist) {
15659 SETUP_WITH_FEATURES(CPUFeatures::kFP);
15660
15661 START();
15662
15663 // The literal base is chosen to have two useful properties:
15664 // * When multiplied by small values (such as a register index), this value
15665 // is clearly readable in the result.
15666 // * The value is not formed from repeating fixed-size smaller values, so it
15667 // can be used to detect endianness-related errors.
15668 uint64_t high_base = UINT32_C(0x01000010);
15669 uint64_t low_base = UINT32_C(0x00100101);
15670 uint64_t base = (high_base << 32) | low_base;
15671 uint64_t array[21];
15672 memset(array, 0, sizeof(array));
15673
15674 // Initialize the registers.
15675 __ Mov(x1, base);
15676 __ Add(x2, x1, x1);
15677 __ Add(x3, x2, x1);
15678 __ Add(x4, x3, x1);
15679 __ Fmov(d1, x1);
15680 __ Fmov(d2, x2);
15681 __ Fmov(d3, x3);
15682 __ Fmov(d4, x4);
15683 __ Fmov(d5, x1);
15684 __ Fmov(d6, x2);
15685 __ Fmov(d7, x3);
15686 __ Fmov(d8, x4);
15687
15688 Register reg_base = x20;
15689 Register reg_index = x21;
15690 int size_stored = 0;
15691
15692 __ Mov(reg_base, reinterpret_cast<uintptr_t>(&array));
15693
15694 // Test aligned accesses.
15695 CPURegList list_src(w1, w2, w3, w4);
15696 CPURegList list_dst(w11, w12, w13, w14);
15697 CPURegList list_fp_src_1(d1, d2, d3, d4);
15698 CPURegList list_fp_dst_1(d11, d12, d13, d14);
15699
15700 __ StoreCPURegList(list_src, MemOperand(reg_base, 0 * sizeof(uint64_t)));
15701 __ LoadCPURegList(list_dst, MemOperand(reg_base, 0 * sizeof(uint64_t)));
15702 size_stored += 4 * kWRegSizeInBytes;
15703
15704 __ Mov(reg_index, size_stored);
15705 __ StoreCPURegList(list_src, MemOperand(reg_base, reg_index));
15706 __ LoadCPURegList(list_dst, MemOperand(reg_base, reg_index));
15707 size_stored += 4 * kWRegSizeInBytes;
15708
15709 __ StoreCPURegList(list_fp_src_1, MemOperand(reg_base, size_stored));
15710 __ LoadCPURegList(list_fp_dst_1, MemOperand(reg_base, size_stored));
15711 size_stored += 4 * kDRegSizeInBytes;
15712
15713 __ Mov(reg_index, size_stored);
15714 __ StoreCPURegList(list_fp_src_1, MemOperand(reg_base, reg_index));
15715 __ LoadCPURegList(list_fp_dst_1, MemOperand(reg_base, reg_index));
15716 size_stored += 4 * kDRegSizeInBytes;
15717
15718 // Test unaligned accesses.
15719 CPURegList list_fp_src_2(d5, d6, d7, d8);
15720 CPURegList list_fp_dst_2(d15, d16, d17, d18);
15721
15722 __ Str(wzr, MemOperand(reg_base, size_stored));
15723 size_stored += 1 * kWRegSizeInBytes;
15724 __ StoreCPURegList(list_fp_src_2, MemOperand(reg_base, size_stored));
15725 __ LoadCPURegList(list_fp_dst_2, MemOperand(reg_base, size_stored));
15726 size_stored += 4 * kDRegSizeInBytes;
15727
15728 __ Mov(reg_index, size_stored);
15729 __ StoreCPURegList(list_fp_src_2, MemOperand(reg_base, reg_index));
15730 __ LoadCPURegList(list_fp_dst_2, MemOperand(reg_base, reg_index));
15731
15732 END();
15733 RUN();
15734
15735 VIXL_CHECK(array[0] == (1 * low_base) + (2 * low_base << kWRegSize));
15736 VIXL_CHECK(array[1] == (3 * low_base) + (4 * low_base << kWRegSize));
15737 VIXL_CHECK(array[2] == (1 * low_base) + (2 * low_base << kWRegSize));
15738 VIXL_CHECK(array[3] == (3 * low_base) + (4 * low_base << kWRegSize));
15739 VIXL_CHECK(array[4] == 1 * base);
15740 VIXL_CHECK(array[5] == 2 * base);
15741 VIXL_CHECK(array[6] == 3 * base);
15742 VIXL_CHECK(array[7] == 4 * base);
15743 VIXL_CHECK(array[8] == 1 * base);
15744 VIXL_CHECK(array[9] == 2 * base);
15745 VIXL_CHECK(array[10] == 3 * base);
15746 VIXL_CHECK(array[11] == 4 * base);
15747 VIXL_CHECK(array[12] == ((1 * low_base) << kSRegSize));
15748 VIXL_CHECK(array[13] == (((2 * low_base) << kSRegSize) | (1 * high_base)));
15749 VIXL_CHECK(array[14] == (((3 * low_base) << kSRegSize) | (2 * high_base)));
15750 VIXL_CHECK(array[15] == (((4 * low_base) << kSRegSize) | (3 * high_base)));
15751 VIXL_CHECK(array[16] == (((1 * low_base) << kSRegSize) | (4 * high_base)));
15752 VIXL_CHECK(array[17] == (((2 * low_base) << kSRegSize) | (1 * high_base)));
15753 VIXL_CHECK(array[18] == (((3 * low_base) << kSRegSize) | (2 * high_base)));
15754 VIXL_CHECK(array[19] == (((4 * low_base) << kSRegSize) | (3 * high_base)));
15755 VIXL_CHECK(array[20] == (4 * high_base));
15756
15757 ASSERT_EQUAL_64(1 * low_base, x11);
15758 ASSERT_EQUAL_64(2 * low_base, x12);
15759 ASSERT_EQUAL_64(3 * low_base, x13);
15760 ASSERT_EQUAL_64(4 * low_base, x14);
15761 ASSERT_EQUAL_FP64(RawbitsToDouble(1 * base), d11);
15762 ASSERT_EQUAL_FP64(RawbitsToDouble(2 * base), d12);
15763 ASSERT_EQUAL_FP64(RawbitsToDouble(3 * base), d13);
15764 ASSERT_EQUAL_FP64(RawbitsToDouble(4 * base), d14);
15765 ASSERT_EQUAL_FP64(RawbitsToDouble(1 * base), d15);
15766 ASSERT_EQUAL_FP64(RawbitsToDouble(2 * base), d16);
15767 ASSERT_EQUAL_FP64(RawbitsToDouble(3 * base), d17);
15768 ASSERT_EQUAL_FP64(RawbitsToDouble(4 * base), d18);
15769
15770 TEARDOWN();
15771 }
15772
15773
15774 // This enum is used only as an argument to the push-pop test helpers.
15775 enum PushPopMethod {
15776 // Push or Pop using the Push and Pop methods, with blocks of up to four
15777 // registers. (Smaller blocks will be used if necessary.)
15778 PushPopByFour,
15779
15780 // Use Push<Size>RegList and Pop<Size>RegList to transfer the registers.
15781 PushPopRegList
15782 };
15783
15784
15785 // For the PushPop* tests, use the maximum number of registers that the test
15786 // supports (where a reg_count argument would otherwise be provided).
15787 static int const kPushPopUseMaxRegCount = -1;
15788
15789 // Test a simple push-pop pattern:
15790 // * Claim <claim> bytes to set the stack alignment.
15791 // * Push <reg_count> registers with size <reg_size>.
15792 // * Clobber the register contents.
15793 // * Pop <reg_count> registers to restore the original contents.
15794 // * Drop <claim> bytes to restore the original stack pointer.
15795 //
15796 // Different push and pop methods can be specified independently to test for
15797 // proper word-endian behaviour.
PushPopSimpleHelper(int reg_count,int claim,int reg_size,PushPopMethod push_method,PushPopMethod pop_method)15798 static void PushPopSimpleHelper(int reg_count,
15799 int claim,
15800 int reg_size,
15801 PushPopMethod push_method,
15802 PushPopMethod pop_method) {
15803 SETUP();
15804
15805 START();
15806
15807 // Arbitrarily pick a register to use as a stack pointer.
15808 const Register& stack_pointer = x20;
15809 const RegList allowed = ~stack_pointer.GetBit();
15810 if (reg_count == kPushPopUseMaxRegCount) {
15811 reg_count = CountSetBits(allowed, kNumberOfRegisters);
15812 }
15813 // Work out which registers to use, based on reg_size.
15814 Register r[kNumberOfRegisters];
15815 Register x[kNumberOfRegisters];
15816 RegList list =
15817 PopulateRegisterArray(NULL, x, r, reg_size, reg_count, allowed);
15818
15819 // Acquire all temps from the MacroAssembler. They are used arbitrarily below.
15820 UseScratchRegisterScope temps(&masm);
15821 temps.ExcludeAll();
15822
15823 // The literal base is chosen to have two useful properties:
15824 // * When multiplied by small values (such as a register index), this value
15825 // is clearly readable in the result.
15826 // * The value is not formed from repeating fixed-size smaller values, so it
15827 // can be used to detect endianness-related errors.
15828 uint64_t literal_base = 0x0100001000100101;
15829
15830 {
15831 VIXL_ASSERT(__ StackPointer().Is(sp));
15832 __ Mov(stack_pointer, __ StackPointer());
15833 __ SetStackPointer(stack_pointer);
15834
15835 int i;
15836
15837 // Initialize the registers.
15838 for (i = 0; i < reg_count; i++) {
15839 // Always write into the X register, to ensure that the upper word is
15840 // properly ignored by Push when testing W registers.
15841 __ Mov(x[i], literal_base * i);
15842 }
15843
15844 // Claim memory first, as requested.
15845 __ Claim(claim);
15846
15847 switch (push_method) {
15848 case PushPopByFour:
15849 // Push high-numbered registers first (to the highest addresses).
15850 for (i = reg_count; i >= 4; i -= 4) {
15851 __ Push(r[i - 1], r[i - 2], r[i - 3], r[i - 4]);
15852 }
15853 // Finish off the leftovers.
15854 switch (i) {
15855 case 3:
15856 __ Push(r[2], r[1], r[0]);
15857 break;
15858 case 2:
15859 __ Push(r[1], r[0]);
15860 break;
15861 case 1:
15862 __ Push(r[0]);
15863 break;
15864 default:
15865 VIXL_ASSERT(i == 0);
15866 break;
15867 }
15868 break;
15869 case PushPopRegList:
15870 __ PushSizeRegList(list, reg_size);
15871 break;
15872 }
15873
15874 // Clobber all the registers, to ensure that they get repopulated by Pop.
15875 Clobber(&masm, list);
15876
15877 switch (pop_method) {
15878 case PushPopByFour:
15879 // Pop low-numbered registers first (from the lowest addresses).
15880 for (i = 0; i <= (reg_count - 4); i += 4) {
15881 __ Pop(r[i], r[i + 1], r[i + 2], r[i + 3]);
15882 }
15883 // Finish off the leftovers.
15884 switch (reg_count - i) {
15885 case 3:
15886 __ Pop(r[i], r[i + 1], r[i + 2]);
15887 break;
15888 case 2:
15889 __ Pop(r[i], r[i + 1]);
15890 break;
15891 case 1:
15892 __ Pop(r[i]);
15893 break;
15894 default:
15895 VIXL_ASSERT(i == reg_count);
15896 break;
15897 }
15898 break;
15899 case PushPopRegList:
15900 __ PopSizeRegList(list, reg_size);
15901 break;
15902 }
15903
15904 // Drop memory to restore stack_pointer.
15905 __ Drop(claim);
15906
15907 __ Mov(sp, __ StackPointer());
15908 __ SetStackPointer(sp);
15909 }
15910
15911 END();
15912
15913 RUN();
15914
15915 // Check that the register contents were preserved.
15916 // Always use ASSERT_EQUAL_64, even when testing W registers, so we can test
15917 // that the upper word was properly cleared by Pop.
15918 literal_base &= (0xffffffffffffffff >> (64 - reg_size));
15919 for (int i = 0; i < reg_count; i++) {
15920 if (x[i].Is(xzr)) {
15921 ASSERT_EQUAL_64(0, x[i]);
15922 } else {
15923 ASSERT_EQUAL_64(literal_base * i, x[i]);
15924 }
15925 }
15926
15927 TEARDOWN();
15928 }
15929
15930
TEST(push_pop_xreg_simple_32)15931 TEST(push_pop_xreg_simple_32) {
15932 for (int claim = 0; claim <= 8; claim++) {
15933 for (int count = 0; count <= 8; count++) {
15934 PushPopSimpleHelper(count,
15935 claim,
15936 kWRegSize,
15937 PushPopByFour,
15938 PushPopByFour);
15939 PushPopSimpleHelper(count,
15940 claim,
15941 kWRegSize,
15942 PushPopByFour,
15943 PushPopRegList);
15944 PushPopSimpleHelper(count,
15945 claim,
15946 kWRegSize,
15947 PushPopRegList,
15948 PushPopByFour);
15949 PushPopSimpleHelper(count,
15950 claim,
15951 kWRegSize,
15952 PushPopRegList,
15953 PushPopRegList);
15954 }
15955 // Test with the maximum number of registers.
15956 PushPopSimpleHelper(kPushPopUseMaxRegCount,
15957 claim,
15958 kWRegSize,
15959 PushPopByFour,
15960 PushPopByFour);
15961 PushPopSimpleHelper(kPushPopUseMaxRegCount,
15962 claim,
15963 kWRegSize,
15964 PushPopByFour,
15965 PushPopRegList);
15966 PushPopSimpleHelper(kPushPopUseMaxRegCount,
15967 claim,
15968 kWRegSize,
15969 PushPopRegList,
15970 PushPopByFour);
15971 PushPopSimpleHelper(kPushPopUseMaxRegCount,
15972 claim,
15973 kWRegSize,
15974 PushPopRegList,
15975 PushPopRegList);
15976 }
15977 }
15978
15979
TEST(push_pop_xreg_simple_64)15980 TEST(push_pop_xreg_simple_64) {
15981 for (int claim = 0; claim <= 8; claim++) {
15982 for (int count = 0; count <= 8; count++) {
15983 PushPopSimpleHelper(count,
15984 claim,
15985 kXRegSize,
15986 PushPopByFour,
15987 PushPopByFour);
15988 PushPopSimpleHelper(count,
15989 claim,
15990 kXRegSize,
15991 PushPopByFour,
15992 PushPopRegList);
15993 PushPopSimpleHelper(count,
15994 claim,
15995 kXRegSize,
15996 PushPopRegList,
15997 PushPopByFour);
15998 PushPopSimpleHelper(count,
15999 claim,
16000 kXRegSize,
16001 PushPopRegList,
16002 PushPopRegList);
16003 }
16004 // Test with the maximum number of registers.
16005 PushPopSimpleHelper(kPushPopUseMaxRegCount,
16006 claim,
16007 kXRegSize,
16008 PushPopByFour,
16009 PushPopByFour);
16010 PushPopSimpleHelper(kPushPopUseMaxRegCount,
16011 claim,
16012 kXRegSize,
16013 PushPopByFour,
16014 PushPopRegList);
16015 PushPopSimpleHelper(kPushPopUseMaxRegCount,
16016 claim,
16017 kXRegSize,
16018 PushPopRegList,
16019 PushPopByFour);
16020 PushPopSimpleHelper(kPushPopUseMaxRegCount,
16021 claim,
16022 kXRegSize,
16023 PushPopRegList,
16024 PushPopRegList);
16025 }
16026 }
16027
16028 // For the PushPopFP* tests, use the maximum number of registers that the test
16029 // supports (where a reg_count argument would otherwise be provided).
16030 static int const kPushPopFPUseMaxRegCount = -1;
16031
16032 // Test a simple push-pop pattern:
16033 // * Claim <claim> bytes to set the stack alignment.
16034 // * Push <reg_count> FP registers with size <reg_size>.
16035 // * Clobber the register contents.
16036 // * Pop <reg_count> FP registers to restore the original contents.
16037 // * Drop <claim> bytes to restore the original stack pointer.
16038 //
16039 // Different push and pop methods can be specified independently to test for
16040 // proper word-endian behaviour.
PushPopFPSimpleHelper(int reg_count,int claim,int reg_size,PushPopMethod push_method,PushPopMethod pop_method)16041 static void PushPopFPSimpleHelper(int reg_count,
16042 int claim,
16043 int reg_size,
16044 PushPopMethod push_method,
16045 PushPopMethod pop_method) {
16046 SETUP_WITH_FEATURES((reg_count == 0) ? CPUFeatures::kNone : CPUFeatures::kFP);
16047
16048 START();
16049
16050 // We can use any floating-point register. None of them are reserved for
16051 // debug code, for example.
16052 static RegList const allowed = ~0;
16053 if (reg_count == kPushPopFPUseMaxRegCount) {
16054 reg_count = CountSetBits(allowed, kNumberOfFPRegisters);
16055 }
16056 // Work out which registers to use, based on reg_size.
16057 FPRegister v[kNumberOfRegisters];
16058 FPRegister d[kNumberOfRegisters];
16059 RegList list =
16060 PopulateFPRegisterArray(NULL, d, v, reg_size, reg_count, allowed);
16061
16062 // Arbitrarily pick a register to use as a stack pointer.
16063 const Register& stack_pointer = x10;
16064
16065 // Acquire all temps from the MacroAssembler. They are used arbitrarily below.
16066 UseScratchRegisterScope temps(&masm);
16067 temps.ExcludeAll();
16068
16069 // The literal base is chosen to have two useful properties:
16070 // * When multiplied (using an integer) by small values (such as a register
16071 // index), this value is clearly readable in the result.
16072 // * The value is not formed from repeating fixed-size smaller values, so it
16073 // can be used to detect endianness-related errors.
16074 // * It is never a floating-point NaN, and will therefore always compare
16075 // equal to itself.
16076 uint64_t literal_base = 0x0100001000100101;
16077
16078 {
16079 VIXL_ASSERT(__ StackPointer().Is(sp));
16080 __ Mov(stack_pointer, __ StackPointer());
16081 __ SetStackPointer(stack_pointer);
16082
16083 int i;
16084
16085 // Initialize the registers, using X registers to load the literal.
16086 __ Mov(x0, 0);
16087 __ Mov(x1, literal_base);
16088 for (i = 0; i < reg_count; i++) {
16089 // Always write into the D register, to ensure that the upper word is
16090 // properly ignored by Push when testing S registers.
16091 __ Fmov(d[i], x0);
16092 // Calculate the next literal.
16093 __ Add(x0, x0, x1);
16094 }
16095
16096 // Claim memory first, as requested.
16097 __ Claim(claim);
16098
16099 switch (push_method) {
16100 case PushPopByFour:
16101 // Push high-numbered registers first (to the highest addresses).
16102 for (i = reg_count; i >= 4; i -= 4) {
16103 __ Push(v[i - 1], v[i - 2], v[i - 3], v[i - 4]);
16104 }
16105 // Finish off the leftovers.
16106 switch (i) {
16107 case 3:
16108 __ Push(v[2], v[1], v[0]);
16109 break;
16110 case 2:
16111 __ Push(v[1], v[0]);
16112 break;
16113 case 1:
16114 __ Push(v[0]);
16115 break;
16116 default:
16117 VIXL_ASSERT(i == 0);
16118 break;
16119 }
16120 break;
16121 case PushPopRegList:
16122 __ PushSizeRegList(list, reg_size, CPURegister::kVRegister);
16123 break;
16124 }
16125
16126 // Clobber all the registers, to ensure that they get repopulated by Pop.
16127 ClobberFP(&masm, list);
16128
16129 switch (pop_method) {
16130 case PushPopByFour:
16131 // Pop low-numbered registers first (from the lowest addresses).
16132 for (i = 0; i <= (reg_count - 4); i += 4) {
16133 __ Pop(v[i], v[i + 1], v[i + 2], v[i + 3]);
16134 }
16135 // Finish off the leftovers.
16136 switch (reg_count - i) {
16137 case 3:
16138 __ Pop(v[i], v[i + 1], v[i + 2]);
16139 break;
16140 case 2:
16141 __ Pop(v[i], v[i + 1]);
16142 break;
16143 case 1:
16144 __ Pop(v[i]);
16145 break;
16146 default:
16147 VIXL_ASSERT(i == reg_count);
16148 break;
16149 }
16150 break;
16151 case PushPopRegList:
16152 __ PopSizeRegList(list, reg_size, CPURegister::kVRegister);
16153 break;
16154 }
16155
16156 // Drop memory to restore the stack pointer.
16157 __ Drop(claim);
16158
16159 __ Mov(sp, __ StackPointer());
16160 __ SetStackPointer(sp);
16161 }
16162
16163 END();
16164
16165 RUN();
16166
16167 // Check that the register contents were preserved.
16168 // Always use ASSERT_EQUAL_FP64, even when testing S registers, so we can
16169 // test that the upper word was properly cleared by Pop.
16170 literal_base &= (0xffffffffffffffff >> (64 - reg_size));
16171 for (int i = 0; i < reg_count; i++) {
16172 uint64_t literal = literal_base * i;
16173 double expected;
16174 memcpy(&expected, &literal, sizeof(expected));
16175 ASSERT_EQUAL_FP64(expected, d[i]);
16176 }
16177
16178 TEARDOWN();
16179 }
16180
16181
TEST(push_pop_fp_xreg_simple_32)16182 TEST(push_pop_fp_xreg_simple_32) {
16183 for (int claim = 0; claim <= 8; claim++) {
16184 for (int count = 0; count <= 8; count++) {
16185 PushPopFPSimpleHelper(count,
16186 claim,
16187 kSRegSize,
16188 PushPopByFour,
16189 PushPopByFour);
16190 PushPopFPSimpleHelper(count,
16191 claim,
16192 kSRegSize,
16193 PushPopByFour,
16194 PushPopRegList);
16195 PushPopFPSimpleHelper(count,
16196 claim,
16197 kSRegSize,
16198 PushPopRegList,
16199 PushPopByFour);
16200 PushPopFPSimpleHelper(count,
16201 claim,
16202 kSRegSize,
16203 PushPopRegList,
16204 PushPopRegList);
16205 }
16206 // Test with the maximum number of registers.
16207 PushPopFPSimpleHelper(kPushPopFPUseMaxRegCount,
16208 claim,
16209 kSRegSize,
16210 PushPopByFour,
16211 PushPopByFour);
16212 PushPopFPSimpleHelper(kPushPopFPUseMaxRegCount,
16213 claim,
16214 kSRegSize,
16215 PushPopByFour,
16216 PushPopRegList);
16217 PushPopFPSimpleHelper(kPushPopFPUseMaxRegCount,
16218 claim,
16219 kSRegSize,
16220 PushPopRegList,
16221 PushPopByFour);
16222 PushPopFPSimpleHelper(kPushPopFPUseMaxRegCount,
16223 claim,
16224 kSRegSize,
16225 PushPopRegList,
16226 PushPopRegList);
16227 }
16228 }
16229
16230
TEST(push_pop_fp_xreg_simple_64)16231 TEST(push_pop_fp_xreg_simple_64) {
16232 for (int claim = 0; claim <= 8; claim++) {
16233 for (int count = 0; count <= 8; count++) {
16234 PushPopFPSimpleHelper(count,
16235 claim,
16236 kDRegSize,
16237 PushPopByFour,
16238 PushPopByFour);
16239 PushPopFPSimpleHelper(count,
16240 claim,
16241 kDRegSize,
16242 PushPopByFour,
16243 PushPopRegList);
16244 PushPopFPSimpleHelper(count,
16245 claim,
16246 kDRegSize,
16247 PushPopRegList,
16248 PushPopByFour);
16249 PushPopFPSimpleHelper(count,
16250 claim,
16251 kDRegSize,
16252 PushPopRegList,
16253 PushPopRegList);
16254 }
16255 // Test with the maximum number of registers.
16256 PushPopFPSimpleHelper(kPushPopFPUseMaxRegCount,
16257 claim,
16258 kDRegSize,
16259 PushPopByFour,
16260 PushPopByFour);
16261 PushPopFPSimpleHelper(kPushPopFPUseMaxRegCount,
16262 claim,
16263 kDRegSize,
16264 PushPopByFour,
16265 PushPopRegList);
16266 PushPopFPSimpleHelper(kPushPopFPUseMaxRegCount,
16267 claim,
16268 kDRegSize,
16269 PushPopRegList,
16270 PushPopByFour);
16271 PushPopFPSimpleHelper(kPushPopFPUseMaxRegCount,
16272 claim,
16273 kDRegSize,
16274 PushPopRegList,
16275 PushPopRegList);
16276 }
16277 }
16278
16279
16280 // Push and pop data using an overlapping combination of Push/Pop and
16281 // RegList-based methods.
PushPopMixedMethodsHelper(int claim,int reg_size)16282 static void PushPopMixedMethodsHelper(int claim, int reg_size) {
16283 SETUP();
16284
16285 // Arbitrarily pick a register to use as a stack pointer.
16286 const Register& stack_pointer = x5;
16287 const RegList allowed = ~stack_pointer.GetBit();
16288 // Work out which registers to use, based on reg_size.
16289 Register r[10];
16290 Register x[10];
16291 PopulateRegisterArray(NULL, x, r, reg_size, 10, allowed);
16292
16293 // Calculate some handy register lists.
16294 RegList r0_to_r3 = 0;
16295 for (int i = 0; i <= 3; i++) {
16296 r0_to_r3 |= x[i].GetBit();
16297 }
16298 RegList r4_to_r5 = 0;
16299 for (int i = 4; i <= 5; i++) {
16300 r4_to_r5 |= x[i].GetBit();
16301 }
16302 RegList r6_to_r9 = 0;
16303 for (int i = 6; i <= 9; i++) {
16304 r6_to_r9 |= x[i].GetBit();
16305 }
16306
16307 // Acquire all temps from the MacroAssembler. They are used arbitrarily below.
16308 UseScratchRegisterScope temps(&masm);
16309 temps.ExcludeAll();
16310
16311 // The literal base is chosen to have two useful properties:
16312 // * When multiplied by small values (such as a register index), this value
16313 // is clearly readable in the result.
16314 // * The value is not formed from repeating fixed-size smaller values, so it
16315 // can be used to detect endianness-related errors.
16316 uint64_t literal_base = 0x0100001000100101;
16317
16318 START();
16319 {
16320 VIXL_ASSERT(__ StackPointer().Is(sp));
16321 __ Mov(stack_pointer, __ StackPointer());
16322 __ SetStackPointer(stack_pointer);
16323
16324 // Claim memory first, as requested.
16325 __ Claim(claim);
16326
16327 __ Mov(x[3], literal_base * 3);
16328 __ Mov(x[2], literal_base * 2);
16329 __ Mov(x[1], literal_base * 1);
16330 __ Mov(x[0], literal_base * 0);
16331
16332 __ PushSizeRegList(r0_to_r3, reg_size);
16333 __ Push(r[3], r[2]);
16334
16335 Clobber(&masm, r0_to_r3);
16336 __ PopSizeRegList(r0_to_r3, reg_size);
16337
16338 __ Push(r[2], r[1], r[3], r[0]);
16339
16340 Clobber(&masm, r4_to_r5);
16341 __ Pop(r[4], r[5]);
16342 Clobber(&masm, r6_to_r9);
16343 __ Pop(r[6], r[7], r[8], r[9]);
16344
16345 // Drop memory to restore stack_pointer.
16346 __ Drop(claim);
16347
16348 __ Mov(sp, __ StackPointer());
16349 __ SetStackPointer(sp);
16350 }
16351
16352 END();
16353
16354 RUN();
16355
16356 // Always use ASSERT_EQUAL_64, even when testing W registers, so we can test
16357 // that the upper word was properly cleared by Pop.
16358 literal_base &= (0xffffffffffffffff >> (64 - reg_size));
16359
16360 ASSERT_EQUAL_64(literal_base * 3, x[9]);
16361 ASSERT_EQUAL_64(literal_base * 2, x[8]);
16362 ASSERT_EQUAL_64(literal_base * 0, x[7]);
16363 ASSERT_EQUAL_64(literal_base * 3, x[6]);
16364 ASSERT_EQUAL_64(literal_base * 1, x[5]);
16365 ASSERT_EQUAL_64(literal_base * 2, x[4]);
16366
16367 TEARDOWN();
16368 }
16369
16370
TEST(push_pop_xreg_mixed_methods_64)16371 TEST(push_pop_xreg_mixed_methods_64) {
16372 for (int claim = 0; claim <= 8; claim++) {
16373 PushPopMixedMethodsHelper(claim, kXRegSize);
16374 }
16375 }
16376
16377
TEST(push_pop_xreg_mixed_methods_32)16378 TEST(push_pop_xreg_mixed_methods_32) {
16379 for (int claim = 0; claim <= 8; claim++) {
16380 PushPopMixedMethodsHelper(claim, kWRegSize);
16381 }
16382 }
16383
16384
16385 // Push and pop data using overlapping X- and W-sized quantities.
PushPopWXOverlapHelper(int reg_count,int claim)16386 static void PushPopWXOverlapHelper(int reg_count, int claim) {
16387 SETUP();
16388
16389 // Arbitrarily pick a register to use as a stack pointer.
16390 const Register& stack_pointer = x10;
16391 const RegList allowed = ~stack_pointer.GetBit();
16392 if (reg_count == kPushPopUseMaxRegCount) {
16393 reg_count = CountSetBits(allowed, kNumberOfRegisters);
16394 }
16395 // Work out which registers to use, based on reg_size.
16396 Register w[kNumberOfRegisters];
16397 Register x[kNumberOfRegisters];
16398 RegList list = PopulateRegisterArray(w, x, NULL, 0, reg_count, allowed);
16399
16400 // The number of W-sized slots we expect to pop. When we pop, we alternate
16401 // between W and X registers, so we need reg_count*1.5 W-sized slots.
16402 int const requested_w_slots = reg_count + reg_count / 2;
16403
16404 // Track what _should_ be on the stack, using W-sized slots.
16405 static int const kMaxWSlots = kNumberOfRegisters + kNumberOfRegisters / 2;
16406 uint32_t stack[kMaxWSlots];
16407 for (int i = 0; i < kMaxWSlots; i++) {
16408 stack[i] = 0xdeadbeef;
16409 }
16410
16411 // Acquire all temps from the MacroAssembler. They are used arbitrarily below.
16412 UseScratchRegisterScope temps(&masm);
16413 temps.ExcludeAll();
16414
16415 // The literal base is chosen to have two useful properties:
16416 // * When multiplied by small values (such as a register index), this value
16417 // is clearly readable in the result.
16418 // * The value is not formed from repeating fixed-size smaller values, so it
16419 // can be used to detect endianness-related errors.
16420 static uint64_t const literal_base = 0x0100001000100101;
16421 static uint64_t const literal_base_hi = literal_base >> 32;
16422 static uint64_t const literal_base_lo = literal_base & 0xffffffff;
16423 static uint64_t const literal_base_w = literal_base & 0xffffffff;
16424
16425 START();
16426 {
16427 VIXL_ASSERT(__ StackPointer().Is(sp));
16428 __ Mov(stack_pointer, __ StackPointer());
16429 __ SetStackPointer(stack_pointer);
16430
16431 // Initialize the registers.
16432 for (int i = 0; i < reg_count; i++) {
16433 // Always write into the X register, to ensure that the upper word is
16434 // properly ignored by Push when testing W registers.
16435 __ Mov(x[i], literal_base * i);
16436 }
16437
16438 // Claim memory first, as requested.
16439 __ Claim(claim);
16440
16441 // The push-pop pattern is as follows:
16442 // Push: Pop:
16443 // x[0](hi) -> w[0]
16444 // x[0](lo) -> x[1](hi)
16445 // w[1] -> x[1](lo)
16446 // w[1] -> w[2]
16447 // x[2](hi) -> x[2](hi)
16448 // x[2](lo) -> x[2](lo)
16449 // x[2](hi) -> w[3]
16450 // x[2](lo) -> x[4](hi)
16451 // x[2](hi) -> x[4](lo)
16452 // x[2](lo) -> w[5]
16453 // w[3] -> x[5](hi)
16454 // w[3] -> x[6](lo)
16455 // w[3] -> w[7]
16456 // w[3] -> x[8](hi)
16457 // x[4](hi) -> x[8](lo)
16458 // x[4](lo) -> w[9]
16459 // ... pattern continues ...
16460 //
16461 // That is, registers are pushed starting with the lower numbers,
16462 // alternating between x and w registers, and pushing i%4+1 copies of each,
16463 // where i is the register number.
16464 // Registers are popped starting with the higher numbers one-by-one,
16465 // alternating between x and w registers, but only popping one at a time.
16466 //
16467 // This pattern provides a wide variety of alignment effects and overlaps.
16468
16469 // ---- Push ----
16470
16471 int active_w_slots = 0;
16472 for (int i = 0; active_w_slots < requested_w_slots; i++) {
16473 VIXL_ASSERT(i < reg_count);
16474 // In order to test various arguments to PushMultipleTimes, and to try to
16475 // exercise different alignment and overlap effects, we push each
16476 // register a different number of times.
16477 int times = i % 4 + 1;
16478 if (i & 1) {
16479 // Push odd-numbered registers as W registers.
16480 __ PushMultipleTimes(times, w[i]);
16481 // Fill in the expected stack slots.
16482 for (int j = 0; j < times; j++) {
16483 if (w[i].Is(wzr)) {
16484 // The zero register always writes zeroes.
16485 stack[active_w_slots++] = 0;
16486 } else {
16487 stack[active_w_slots++] = literal_base_w * i;
16488 }
16489 }
16490 } else {
16491 // Push even-numbered registers as X registers.
16492 __ PushMultipleTimes(times, x[i]);
16493 // Fill in the expected stack slots.
16494 for (int j = 0; j < times; j++) {
16495 if (x[i].Is(xzr)) {
16496 // The zero register always writes zeroes.
16497 stack[active_w_slots++] = 0;
16498 stack[active_w_slots++] = 0;
16499 } else {
16500 stack[active_w_slots++] = literal_base_hi * i;
16501 stack[active_w_slots++] = literal_base_lo * i;
16502 }
16503 }
16504 }
16505 }
16506 // Because we were pushing several registers at a time, we probably pushed
16507 // more than we needed to.
16508 if (active_w_slots > requested_w_slots) {
16509 __ Drop((active_w_slots - requested_w_slots) * kWRegSizeInBytes);
16510 // Bump the number of active W-sized slots back to where it should be,
16511 // and fill the empty space with a dummy value.
16512 do {
16513 stack[active_w_slots--] = 0xdeadbeef;
16514 } while (active_w_slots > requested_w_slots);
16515 }
16516
16517 // ---- Pop ----
16518
16519 Clobber(&masm, list);
16520
16521 // If popping an even number of registers, the first one will be X-sized.
16522 // Otherwise, the first one will be W-sized.
16523 bool next_is_64 = !(reg_count & 1);
16524 for (int i = reg_count - 1; i >= 0; i--) {
16525 if (next_is_64) {
16526 __ Pop(x[i]);
16527 active_w_slots -= 2;
16528 } else {
16529 __ Pop(w[i]);
16530 active_w_slots -= 1;
16531 }
16532 next_is_64 = !next_is_64;
16533 }
16534 VIXL_ASSERT(active_w_slots == 0);
16535
16536 // Drop memory to restore stack_pointer.
16537 __ Drop(claim);
16538
16539 __ Mov(sp, __ StackPointer());
16540 __ SetStackPointer(sp);
16541 }
16542
16543 END();
16544
16545 RUN();
16546
16547 int slot = 0;
16548 for (int i = 0; i < reg_count; i++) {
16549 // Even-numbered registers were written as W registers.
16550 // Odd-numbered registers were written as X registers.
16551 bool expect_64 = (i & 1);
16552 uint64_t expected;
16553
16554 if (expect_64) {
16555 uint64_t hi = stack[slot++];
16556 uint64_t lo = stack[slot++];
16557 expected = (hi << 32) | lo;
16558 } else {
16559 expected = stack[slot++];
16560 }
16561
16562 // Always use ASSERT_EQUAL_64, even when testing W registers, so we can
16563 // test that the upper word was properly cleared by Pop.
16564 if (x[i].Is(xzr)) {
16565 ASSERT_EQUAL_64(0, x[i]);
16566 } else {
16567 ASSERT_EQUAL_64(expected, x[i]);
16568 }
16569 }
16570 VIXL_ASSERT(slot == requested_w_slots);
16571
16572 TEARDOWN();
16573 }
16574
16575
TEST(push_pop_xreg_wx_overlap)16576 TEST(push_pop_xreg_wx_overlap) {
16577 for (int claim = 0; claim <= 8; claim++) {
16578 for (int count = 1; count <= 8; count++) {
16579 PushPopWXOverlapHelper(count, claim);
16580 }
16581 // Test with the maximum number of registers.
16582 PushPopWXOverlapHelper(kPushPopUseMaxRegCount, claim);
16583 }
16584 }
16585
16586
TEST(push_pop_sp)16587 TEST(push_pop_sp) {
16588 SETUP();
16589
16590 START();
16591
16592 VIXL_ASSERT(sp.Is(__ StackPointer()));
16593
16594 // Acquire all temps from the MacroAssembler. They are used arbitrarily below.
16595 UseScratchRegisterScope temps(&masm);
16596 temps.ExcludeAll();
16597
16598 __ Mov(x3, 0x3333333333333333);
16599 __ Mov(x2, 0x2222222222222222);
16600 __ Mov(x1, 0x1111111111111111);
16601 __ Mov(x0, 0x0000000000000000);
16602 __ Claim(2 * kXRegSizeInBytes);
16603 __ PushXRegList(x0.GetBit() | x1.GetBit() | x2.GetBit() | x3.GetBit());
16604 __ Push(x3, x2);
16605 __ PopXRegList(x0.GetBit() | x1.GetBit() | x2.GetBit() | x3.GetBit());
16606 __ Push(x2, x1, x3, x0);
16607 __ Pop(x4, x5);
16608 __ Pop(x6, x7, x8, x9);
16609
16610 __ Claim(2 * kXRegSizeInBytes);
16611 __ PushWRegList(w0.GetBit() | w1.GetBit() | w2.GetBit() | w3.GetBit());
16612 __ Push(w3, w1, w2, w0);
16613 __ PopWRegList(w10.GetBit() | w11.GetBit() | w12.GetBit() | w13.GetBit());
16614 __ Pop(w14, w15, w16, w17);
16615
16616 __ Claim(2 * kXRegSizeInBytes);
16617 __ Push(w2, w2, w1, w1);
16618 __ Push(x3, x3);
16619 __ Pop(w18, w19, w20, w21);
16620 __ Pop(x22, x23);
16621
16622 __ Claim(2 * kXRegSizeInBytes);
16623 __ PushXRegList(x1.GetBit() | x22.GetBit());
16624 __ PopXRegList(x24.GetBit() | x26.GetBit());
16625
16626 __ Claim(2 * kXRegSizeInBytes);
16627 __ PushWRegList(w1.GetBit() | w2.GetBit() | w4.GetBit() | w22.GetBit());
16628 __ PopWRegList(w25.GetBit() | w27.GetBit() | w28.GetBit() | w29.GetBit());
16629
16630 __ Claim(2 * kXRegSizeInBytes);
16631 __ PushXRegList(0);
16632 __ PopXRegList(0);
16633 __ PushXRegList(0xffffffff);
16634 __ PopXRegList(0xffffffff);
16635 __ Drop(12 * kXRegSizeInBytes);
16636 END();
16637
16638 RUN();
16639
16640 ASSERT_EQUAL_64(0x1111111111111111, x3);
16641 ASSERT_EQUAL_64(0x0000000000000000, x2);
16642 ASSERT_EQUAL_64(0x3333333333333333, x1);
16643 ASSERT_EQUAL_64(0x2222222222222222, x0);
16644 ASSERT_EQUAL_64(0x3333333333333333, x9);
16645 ASSERT_EQUAL_64(0x2222222222222222, x8);
16646 ASSERT_EQUAL_64(0x0000000000000000, x7);
16647 ASSERT_EQUAL_64(0x3333333333333333, x6);
16648 ASSERT_EQUAL_64(0x1111111111111111, x5);
16649 ASSERT_EQUAL_64(0x2222222222222222, x4);
16650
16651 ASSERT_EQUAL_32(0x11111111U, w13);
16652 ASSERT_EQUAL_32(0x33333333U, w12);
16653 ASSERT_EQUAL_32(0x00000000U, w11);
16654 ASSERT_EQUAL_32(0x22222222U, w10);
16655 ASSERT_EQUAL_32(0x11111111U, w17);
16656 ASSERT_EQUAL_32(0x00000000U, w16);
16657 ASSERT_EQUAL_32(0x33333333U, w15);
16658 ASSERT_EQUAL_32(0x22222222U, w14);
16659
16660 ASSERT_EQUAL_32(0x11111111U, w18);
16661 ASSERT_EQUAL_32(0x11111111U, w19);
16662 ASSERT_EQUAL_32(0x11111111U, w20);
16663 ASSERT_EQUAL_32(0x11111111U, w21);
16664 ASSERT_EQUAL_64(0x3333333333333333, x22);
16665 ASSERT_EQUAL_64(0x0000000000000000, x23);
16666
16667 ASSERT_EQUAL_64(0x3333333333333333, x24);
16668 ASSERT_EQUAL_64(0x3333333333333333, x26);
16669
16670 ASSERT_EQUAL_32(0x33333333U, w25);
16671 ASSERT_EQUAL_32(0x00000000U, w27);
16672 ASSERT_EQUAL_32(0x22222222U, w28);
16673 ASSERT_EQUAL_32(0x33333333U, w29);
16674 TEARDOWN();
16675 }
16676
16677
TEST(printf)16678 TEST(printf) {
16679 // RegisterDump::Dump uses NEON.
16680 // Printf uses FP to cast FP arguments to doubles.
16681 SETUP_WITH_FEATURES(CPUFeatures::kNEON, CPUFeatures::kFP);
16682
16683 START();
16684
16685 char const* test_plain_string = "Printf with no arguments.\n";
16686 char const* test_substring = "'This is a substring.'";
16687 RegisterDump before;
16688
16689 // Initialize x29 to the value of the stack pointer. We will use x29 as a
16690 // temporary stack pointer later, and initializing it in this way allows the
16691 // RegisterDump check to pass.
16692 __ Mov(x29, __ StackPointer());
16693
16694 // Test simple integer arguments.
16695 __ Mov(x0, 1234);
16696 __ Mov(x1, 0x1234);
16697
16698 // Test simple floating-point arguments.
16699 __ Fmov(d0, 1.234);
16700
16701 // Test pointer (string) arguments.
16702 __ Mov(x2, reinterpret_cast<uintptr_t>(test_substring));
16703
16704 // Test the maximum number of arguments, and sign extension.
16705 __ Mov(w3, 0xffffffff);
16706 __ Mov(w4, 0xffffffff);
16707 __ Mov(x5, 0xffffffffffffffff);
16708 __ Mov(x6, 0xffffffffffffffff);
16709 __ Fmov(s1, 1.234);
16710 __ Fmov(s2, 2.345);
16711 __ Fmov(d3, 3.456);
16712 __ Fmov(d4, 4.567);
16713
16714 // Test printing callee-saved registers.
16715 __ Mov(x28, 0x123456789abcdef);
16716 __ Fmov(d10, 42.0);
16717
16718 // Test with three arguments.
16719 __ Mov(x10, 3);
16720 __ Mov(x11, 40);
16721 __ Mov(x12, 500);
16722
16723 // A single character.
16724 __ Mov(w13, 'x');
16725
16726 // Check that we don't clobber any registers.
16727 before.Dump(&masm);
16728
16729 __ Printf(test_plain_string); // NOLINT(runtime/printf)
16730 __ Printf("x0: %" PRId64 ", x1: 0x%08" PRIx64 "\n", x0, x1);
16731 __ Printf("w5: %" PRId32 ", x5: %" PRId64 "\n", w5, x5);
16732 __ Printf("d0: %f\n", d0);
16733 __ Printf("Test %%s: %s\n", x2);
16734 __ Printf("w3(uint32): %" PRIu32 "\nw4(int32): %" PRId32
16735 "\n"
16736 "x5(uint64): %" PRIu64 "\nx6(int64): %" PRId64 "\n",
16737 w3,
16738 w4,
16739 x5,
16740 x6);
16741 __ Printf("%%f: %f\n%%g: %g\n%%e: %e\n%%E: %E\n", s1, s2, d3, d4);
16742 __ Printf("0x%" PRIx32 ", 0x%" PRIx64 "\n", w28, x28);
16743 __ Printf("%g\n", d10);
16744 __ Printf("%%%%%s%%%c%%\n", x2, w13);
16745
16746 // Print the stack pointer (sp).
16747 __ Printf("StackPointer(sp): 0x%016" PRIx64 ", 0x%08" PRIx32 "\n",
16748 __ StackPointer(),
16749 __ StackPointer().W());
16750
16751 // Test with a different stack pointer.
16752 const Register old_stack_pointer = __ StackPointer();
16753 __ Mov(x29, old_stack_pointer);
16754 __ SetStackPointer(x29);
16755 // Print the stack pointer (not sp).
16756 __ Printf("StackPointer(not sp): 0x%016" PRIx64 ", 0x%08" PRIx32 "\n",
16757 __ StackPointer(),
16758 __ StackPointer().W());
16759 __ Mov(old_stack_pointer, __ StackPointer());
16760 __ SetStackPointer(old_stack_pointer);
16761
16762 // Test with three arguments.
16763 __ Printf("3=%u, 4=%u, 5=%u\n", x10, x11, x12);
16764
16765 // Mixed argument types.
16766 __ Printf("w3: %" PRIu32 ", s1: %f, x5: %" PRIu64 ", d3: %f\n",
16767 w3,
16768 s1,
16769 x5,
16770 d3);
16771 __ Printf("s1: %f, d3: %f, w3: %" PRId32 ", x5: %" PRId64 "\n",
16772 s1,
16773 d3,
16774 w3,
16775 x5);
16776
16777 END();
16778 RUN();
16779
16780 // We cannot easily test the output of the Printf sequences, and because
16781 // Printf preserves all registers by default, we can't look at the number of
16782 // bytes that were printed. However, the printf_no_preserve test should check
16783 // that, and here we just test that we didn't clobber any registers.
16784 ASSERT_EQUAL_REGISTERS(before);
16785
16786 TEARDOWN();
16787 }
16788
16789
TEST(printf_no_preserve)16790 TEST(printf_no_preserve) {
16791 // PrintfNoPreserve uses FP to cast FP arguments to doubles.
16792 SETUP_WITH_FEATURES(CPUFeatures::kFP);
16793
16794 START();
16795
16796 char const* test_plain_string = "Printf with no arguments.\n";
16797 char const* test_substring = "'This is a substring.'";
16798
16799 __ PrintfNoPreserve(test_plain_string);
16800 __ Mov(x19, x0);
16801
16802 // Test simple integer arguments.
16803 __ Mov(x0, 1234);
16804 __ Mov(x1, 0x1234);
16805 __ PrintfNoPreserve("x0: %" PRId64 ", x1: 0x%08" PRIx64 "\n", x0, x1);
16806 __ Mov(x20, x0);
16807
16808 // Test simple floating-point arguments.
16809 __ Fmov(d0, 1.234);
16810 __ PrintfNoPreserve("d0: %f\n", d0);
16811 __ Mov(x21, x0);
16812
16813 // Test pointer (string) arguments.
16814 __ Mov(x2, reinterpret_cast<uintptr_t>(test_substring));
16815 __ PrintfNoPreserve("Test %%s: %s\n", x2);
16816 __ Mov(x22, x0);
16817
16818 // Test the maximum number of arguments, and sign extension.
16819 __ Mov(w3, 0xffffffff);
16820 __ Mov(w4, 0xffffffff);
16821 __ Mov(x5, 0xffffffffffffffff);
16822 __ Mov(x6, 0xffffffffffffffff);
16823 __ PrintfNoPreserve("w3(uint32): %" PRIu32 "\nw4(int32): %" PRId32
16824 "\n"
16825 "x5(uint64): %" PRIu64 "\nx6(int64): %" PRId64 "\n",
16826 w3,
16827 w4,
16828 x5,
16829 x6);
16830 __ Mov(x23, x0);
16831
16832 __ Fmov(s1, 1.234);
16833 __ Fmov(s2, 2.345);
16834 __ Fmov(d3, 3.456);
16835 __ Fmov(d4, 4.567);
16836 __ PrintfNoPreserve("%%f: %f\n%%g: %g\n%%e: %e\n%%E: %E\n", s1, s2, d3, d4);
16837 __ Mov(x24, x0);
16838
16839 // Test printing callee-saved registers.
16840 __ Mov(x28, 0x123456789abcdef);
16841 __ PrintfNoPreserve("0x%" PRIx32 ", 0x%" PRIx64 "\n", w28, x28);
16842 __ Mov(x25, x0);
16843
16844 __ Fmov(d10, 42.0);
16845 __ PrintfNoPreserve("%g\n", d10);
16846 __ Mov(x26, x0);
16847
16848 // Test with a different stack pointer.
16849 const Register old_stack_pointer = __ StackPointer();
16850 __ Mov(x29, old_stack_pointer);
16851 __ SetStackPointer(x29);
16852 // Print the stack pointer (not sp).
16853 __ PrintfNoPreserve("StackPointer(not sp): 0x%016" PRIx64 ", 0x%08" PRIx32
16854 "\n",
16855 __ StackPointer(),
16856 __ StackPointer().W());
16857 __ Mov(x27, x0);
16858 __ Mov(old_stack_pointer, __ StackPointer());
16859 __ SetStackPointer(old_stack_pointer);
16860
16861 // Test with three arguments.
16862 __ Mov(x3, 3);
16863 __ Mov(x4, 40);
16864 __ Mov(x5, 500);
16865 __ PrintfNoPreserve("3=%u, 4=%u, 5=%u\n", x3, x4, x5);
16866 __ Mov(x28, x0);
16867
16868 // Mixed argument types.
16869 __ Mov(w3, 0xffffffff);
16870 __ Fmov(s1, 1.234);
16871 __ Mov(x5, 0xffffffffffffffff);
16872 __ Fmov(d3, 3.456);
16873 __ PrintfNoPreserve("w3: %" PRIu32 ", s1: %f, x5: %" PRIu64 ", d3: %f\n",
16874 w3,
16875 s1,
16876 x5,
16877 d3);
16878 __ Mov(x29, x0);
16879
16880 END();
16881 RUN();
16882
16883 // We cannot easily test the exact output of the Printf sequences, but we can
16884 // use the return code to check that the string length was correct.
16885
16886 // Printf with no arguments.
16887 ASSERT_EQUAL_64(strlen(test_plain_string), x19);
16888 // x0: 1234, x1: 0x00001234
16889 ASSERT_EQUAL_64(25, x20);
16890 // d0: 1.234000
16891 ASSERT_EQUAL_64(13, x21);
16892 // Test %s: 'This is a substring.'
16893 ASSERT_EQUAL_64(32, x22);
16894 // w3(uint32): 4294967295
16895 // w4(int32): -1
16896 // x5(uint64): 18446744073709551615
16897 // x6(int64): -1
16898 ASSERT_EQUAL_64(23 + 14 + 33 + 14, x23);
16899 // %f: 1.234000
16900 // %g: 2.345
16901 // %e: 3.456000e+00
16902 // %E: 4.567000E+00
16903 ASSERT_EQUAL_64(13 + 10 + 17 + 17, x24);
16904 // 0x89abcdef, 0x123456789abcdef
16905 ASSERT_EQUAL_64(30, x25);
16906 // 42
16907 ASSERT_EQUAL_64(3, x26);
16908 // StackPointer(not sp): 0x00007fb037ae2370, 0x37ae2370
16909 // Note: This is an example value, but the field width is fixed here so the
16910 // string length is still predictable.
16911 ASSERT_EQUAL_64(53, x27);
16912 // 3=3, 4=40, 5=500
16913 ASSERT_EQUAL_64(17, x28);
16914 // w3: 4294967295, s1: 1.234000, x5: 18446744073709551615, d3: 3.456000
16915 ASSERT_EQUAL_64(69, x29);
16916
16917 TEARDOWN();
16918 }
16919
16920
16921 #ifndef VIXL_INCLUDE_SIMULATOR_AARCH64
TEST(trace)16922 TEST(trace) {
16923 // The Trace helper should not generate any code unless the simulator is being
16924 // used.
16925 SETUP();
16926 START();
16927
16928 Label start;
16929 __ Bind(&start);
16930 __ Trace(LOG_ALL, TRACE_ENABLE);
16931 __ Trace(LOG_ALL, TRACE_DISABLE);
16932 VIXL_CHECK(__ GetSizeOfCodeGeneratedSince(&start) == 0);
16933
16934 END();
16935 TEARDOWN();
16936 }
16937 #endif
16938
16939
16940 #ifndef VIXL_INCLUDE_SIMULATOR_AARCH64
TEST(log)16941 TEST(log) {
16942 // The Log helper should not generate any code unless the simulator is being
16943 // used.
16944 SETUP();
16945 START();
16946
16947 Label start;
16948 __ Bind(&start);
16949 __ Log(LOG_ALL);
16950 VIXL_CHECK(__ GetSizeOfCodeGeneratedSince(&start) == 0);
16951
16952 END();
16953 TEARDOWN();
16954 }
16955 #endif
16956
16957
TEST(blr_lr)16958 TEST(blr_lr) {
16959 // A simple test to check that the simulator correcty handle "blr lr".
16960 SETUP();
16961
16962 START();
16963 Label target;
16964 Label end;
16965
16966 __ Mov(x0, 0x0);
16967 __ Adr(lr, &target);
16968
16969 __ Blr(lr);
16970 __ Mov(x0, 0xdeadbeef);
16971 __ B(&end);
16972
16973 __ Bind(&target);
16974 __ Mov(x0, 0xc001c0de);
16975
16976 __ Bind(&end);
16977 END();
16978
16979 RUN();
16980
16981 ASSERT_EQUAL_64(0xc001c0de, x0);
16982
16983 TEARDOWN();
16984 }
16985
16986
TEST(barriers)16987 TEST(barriers) {
16988 // Generate all supported barriers, this is just a smoke test
16989 SETUP();
16990
16991 START();
16992
16993 // DMB
16994 __ Dmb(FullSystem, BarrierAll);
16995 __ Dmb(FullSystem, BarrierReads);
16996 __ Dmb(FullSystem, BarrierWrites);
16997 __ Dmb(FullSystem, BarrierOther);
16998
16999 __ Dmb(InnerShareable, BarrierAll);
17000 __ Dmb(InnerShareable, BarrierReads);
17001 __ Dmb(InnerShareable, BarrierWrites);
17002 __ Dmb(InnerShareable, BarrierOther);
17003
17004 __ Dmb(NonShareable, BarrierAll);
17005 __ Dmb(NonShareable, BarrierReads);
17006 __ Dmb(NonShareable, BarrierWrites);
17007 __ Dmb(NonShareable, BarrierOther);
17008
17009 __ Dmb(OuterShareable, BarrierAll);
17010 __ Dmb(OuterShareable, BarrierReads);
17011 __ Dmb(OuterShareable, BarrierWrites);
17012 __ Dmb(OuterShareable, BarrierOther);
17013
17014 // DSB
17015 __ Dsb(FullSystem, BarrierAll);
17016 __ Dsb(FullSystem, BarrierReads);
17017 __ Dsb(FullSystem, BarrierWrites);
17018 __ Dsb(FullSystem, BarrierOther);
17019
17020 __ Dsb(InnerShareable, BarrierAll);
17021 __ Dsb(InnerShareable, BarrierReads);
17022 __ Dsb(InnerShareable, BarrierWrites);
17023 __ Dsb(InnerShareable, BarrierOther);
17024
17025 __ Dsb(NonShareable, BarrierAll);
17026 __ Dsb(NonShareable, BarrierReads);
17027 __ Dsb(NonShareable, BarrierWrites);
17028 __ Dsb(NonShareable, BarrierOther);
17029
17030 __ Dsb(OuterShareable, BarrierAll);
17031 __ Dsb(OuterShareable, BarrierReads);
17032 __ Dsb(OuterShareable, BarrierWrites);
17033 __ Dsb(OuterShareable, BarrierOther);
17034
17035 // ISB
17036 __ Isb();
17037
17038 END();
17039
17040 RUN();
17041
17042 TEARDOWN();
17043 }
17044
17045
TEST(process_nan_double)17046 TEST(process_nan_double) {
17047 // Make sure that NaN propagation works correctly.
17048 double sn = RawbitsToDouble(0x7ff5555511111111);
17049 double qn = RawbitsToDouble(0x7ffaaaaa11111111);
17050 VIXL_ASSERT(IsSignallingNaN(sn));
17051 VIXL_ASSERT(IsQuietNaN(qn));
17052
17053 // The input NaNs after passing through ProcessNaN.
17054 double sn_proc = RawbitsToDouble(0x7ffd555511111111);
17055 double qn_proc = qn;
17056 VIXL_ASSERT(IsQuietNaN(sn_proc));
17057 VIXL_ASSERT(IsQuietNaN(qn_proc));
17058
17059 SETUP_WITH_FEATURES(CPUFeatures::kFP);
17060
17061 START();
17062
17063 // Execute a number of instructions which all use ProcessNaN, and check that
17064 // they all handle the NaN correctly.
17065 __ Fmov(d0, sn);
17066 __ Fmov(d10, qn);
17067
17068 // Operations that always propagate NaNs unchanged, even signalling NaNs.
17069 // - Signalling NaN
17070 __ Fmov(d1, d0);
17071 __ Fabs(d2, d0);
17072 __ Fneg(d3, d0);
17073 // - Quiet NaN
17074 __ Fmov(d11, d10);
17075 __ Fabs(d12, d10);
17076 __ Fneg(d13, d10);
17077
17078 // Operations that use ProcessNaN.
17079 // - Signalling NaN
17080 __ Fsqrt(d4, d0);
17081 __ Frinta(d5, d0);
17082 __ Frintn(d6, d0);
17083 __ Frintz(d7, d0);
17084 // - Quiet NaN
17085 __ Fsqrt(d14, d10);
17086 __ Frinta(d15, d10);
17087 __ Frintn(d16, d10);
17088 __ Frintz(d17, d10);
17089
17090 // The behaviour of fcvt is checked in TEST(fcvt_sd).
17091
17092 END();
17093 RUN();
17094
17095 uint64_t qn_raw = DoubleToRawbits(qn);
17096 uint64_t sn_raw = DoubleToRawbits(sn);
17097
17098 // - Signalling NaN
17099 ASSERT_EQUAL_FP64(sn, d1);
17100 ASSERT_EQUAL_FP64(RawbitsToDouble(sn_raw & ~kDSignMask), d2);
17101 ASSERT_EQUAL_FP64(RawbitsToDouble(sn_raw ^ kDSignMask), d3);
17102 // - Quiet NaN
17103 ASSERT_EQUAL_FP64(qn, d11);
17104 ASSERT_EQUAL_FP64(RawbitsToDouble(qn_raw & ~kDSignMask), d12);
17105 ASSERT_EQUAL_FP64(RawbitsToDouble(qn_raw ^ kDSignMask), d13);
17106
17107 // - Signalling NaN
17108 ASSERT_EQUAL_FP64(sn_proc, d4);
17109 ASSERT_EQUAL_FP64(sn_proc, d5);
17110 ASSERT_EQUAL_FP64(sn_proc, d6);
17111 ASSERT_EQUAL_FP64(sn_proc, d7);
17112 // - Quiet NaN
17113 ASSERT_EQUAL_FP64(qn_proc, d14);
17114 ASSERT_EQUAL_FP64(qn_proc, d15);
17115 ASSERT_EQUAL_FP64(qn_proc, d16);
17116 ASSERT_EQUAL_FP64(qn_proc, d17);
17117
17118 TEARDOWN();
17119 }
17120
17121
TEST(process_nan_float)17122 TEST(process_nan_float) {
17123 // Make sure that NaN propagation works correctly.
17124 float sn = RawbitsToFloat(0x7f951111);
17125 float qn = RawbitsToFloat(0x7fea1111);
17126 VIXL_ASSERT(IsSignallingNaN(sn));
17127 VIXL_ASSERT(IsQuietNaN(qn));
17128
17129 // The input NaNs after passing through ProcessNaN.
17130 float sn_proc = RawbitsToFloat(0x7fd51111);
17131 float qn_proc = qn;
17132 VIXL_ASSERT(IsQuietNaN(sn_proc));
17133 VIXL_ASSERT(IsQuietNaN(qn_proc));
17134
17135 SETUP_WITH_FEATURES(CPUFeatures::kFP);
17136
17137 START();
17138
17139 // Execute a number of instructions which all use ProcessNaN, and check that
17140 // they all handle the NaN correctly.
17141 __ Fmov(s0, sn);
17142 __ Fmov(s10, qn);
17143
17144 // Operations that always propagate NaNs unchanged, even signalling NaNs.
17145 // - Signalling NaN
17146 __ Fmov(s1, s0);
17147 __ Fabs(s2, s0);
17148 __ Fneg(s3, s0);
17149 // - Quiet NaN
17150 __ Fmov(s11, s10);
17151 __ Fabs(s12, s10);
17152 __ Fneg(s13, s10);
17153
17154 // Operations that use ProcessNaN.
17155 // - Signalling NaN
17156 __ Fsqrt(s4, s0);
17157 __ Frinta(s5, s0);
17158 __ Frintn(s6, s0);
17159 __ Frintz(s7, s0);
17160 // - Quiet NaN
17161 __ Fsqrt(s14, s10);
17162 __ Frinta(s15, s10);
17163 __ Frintn(s16, s10);
17164 __ Frintz(s17, s10);
17165
17166 // The behaviour of fcvt is checked in TEST(fcvt_sd).
17167
17168 END();
17169 RUN();
17170
17171 uint32_t qn_raw = FloatToRawbits(qn);
17172 uint32_t sn_raw = FloatToRawbits(sn);
17173
17174 // - Signalling NaN
17175 ASSERT_EQUAL_FP32(sn, s1);
17176 ASSERT_EQUAL_FP32(RawbitsToFloat(sn_raw & ~kSSignMask), s2);
17177 ASSERT_EQUAL_FP32(RawbitsToFloat(sn_raw ^ kSSignMask), s3);
17178 // - Quiet NaN
17179 ASSERT_EQUAL_FP32(qn, s11);
17180 ASSERT_EQUAL_FP32(RawbitsToFloat(qn_raw & ~kSSignMask), s12);
17181 ASSERT_EQUAL_FP32(RawbitsToFloat(qn_raw ^ kSSignMask), s13);
17182
17183 // - Signalling NaN
17184 ASSERT_EQUAL_FP32(sn_proc, s4);
17185 ASSERT_EQUAL_FP32(sn_proc, s5);
17186 ASSERT_EQUAL_FP32(sn_proc, s6);
17187 ASSERT_EQUAL_FP32(sn_proc, s7);
17188 // - Quiet NaN
17189 ASSERT_EQUAL_FP32(qn_proc, s14);
17190 ASSERT_EQUAL_FP32(qn_proc, s15);
17191 ASSERT_EQUAL_FP32(qn_proc, s16);
17192 ASSERT_EQUAL_FP32(qn_proc, s17);
17193
17194 TEARDOWN();
17195 }
17196
17197 // TODO: TEST(process_nan_half) {}
17198
ProcessNaNsHelper(double n,double m,double expected)17199 static void ProcessNaNsHelper(double n, double m, double expected) {
17200 VIXL_ASSERT(IsNaN(n) || IsNaN(m));
17201 VIXL_ASSERT(IsNaN(expected));
17202
17203 SETUP_WITH_FEATURES(CPUFeatures::kFP);
17204
17205 START();
17206
17207 // Execute a number of instructions which all use ProcessNaNs, and check that
17208 // they all propagate NaNs correctly.
17209 __ Fmov(d0, n);
17210 __ Fmov(d1, m);
17211
17212 __ Fadd(d2, d0, d1);
17213 __ Fsub(d3, d0, d1);
17214 __ Fmul(d4, d0, d1);
17215 __ Fdiv(d5, d0, d1);
17216 __ Fmax(d6, d0, d1);
17217 __ Fmin(d7, d0, d1);
17218
17219 END();
17220 RUN();
17221
17222 ASSERT_EQUAL_FP64(expected, d2);
17223 ASSERT_EQUAL_FP64(expected, d3);
17224 ASSERT_EQUAL_FP64(expected, d4);
17225 ASSERT_EQUAL_FP64(expected, d5);
17226 ASSERT_EQUAL_FP64(expected, d6);
17227 ASSERT_EQUAL_FP64(expected, d7);
17228
17229 TEARDOWN();
17230 }
17231
17232
TEST(process_nans_double)17233 TEST(process_nans_double) {
17234 // Make sure that NaN propagation works correctly.
17235 double sn = RawbitsToDouble(0x7ff5555511111111);
17236 double sm = RawbitsToDouble(0x7ff5555522222222);
17237 double qn = RawbitsToDouble(0x7ffaaaaa11111111);
17238 double qm = RawbitsToDouble(0x7ffaaaaa22222222);
17239 VIXL_ASSERT(IsSignallingNaN(sn));
17240 VIXL_ASSERT(IsSignallingNaN(sm));
17241 VIXL_ASSERT(IsQuietNaN(qn));
17242 VIXL_ASSERT(IsQuietNaN(qm));
17243
17244 // The input NaNs after passing through ProcessNaN.
17245 double sn_proc = RawbitsToDouble(0x7ffd555511111111);
17246 double sm_proc = RawbitsToDouble(0x7ffd555522222222);
17247 double qn_proc = qn;
17248 double qm_proc = qm;
17249 VIXL_ASSERT(IsQuietNaN(sn_proc));
17250 VIXL_ASSERT(IsQuietNaN(sm_proc));
17251 VIXL_ASSERT(IsQuietNaN(qn_proc));
17252 VIXL_ASSERT(IsQuietNaN(qm_proc));
17253
17254 // Quiet NaNs are propagated.
17255 ProcessNaNsHelper(qn, 0, qn_proc);
17256 ProcessNaNsHelper(0, qm, qm_proc);
17257 ProcessNaNsHelper(qn, qm, qn_proc);
17258
17259 // Signalling NaNs are propagated, and made quiet.
17260 ProcessNaNsHelper(sn, 0, sn_proc);
17261 ProcessNaNsHelper(0, sm, sm_proc);
17262 ProcessNaNsHelper(sn, sm, sn_proc);
17263
17264 // Signalling NaNs take precedence over quiet NaNs.
17265 ProcessNaNsHelper(sn, qm, sn_proc);
17266 ProcessNaNsHelper(qn, sm, sm_proc);
17267 ProcessNaNsHelper(sn, sm, sn_proc);
17268 }
17269
17270
ProcessNaNsHelper(float n,float m,float expected)17271 static void ProcessNaNsHelper(float n, float m, float expected) {
17272 VIXL_ASSERT(IsNaN(n) || IsNaN(m));
17273 VIXL_ASSERT(IsNaN(expected));
17274
17275 SETUP_WITH_FEATURES(CPUFeatures::kFP);
17276
17277 START();
17278
17279 // Execute a number of instructions which all use ProcessNaNs, and check that
17280 // they all propagate NaNs correctly.
17281 __ Fmov(s0, n);
17282 __ Fmov(s1, m);
17283
17284 __ Fadd(s2, s0, s1);
17285 __ Fsub(s3, s0, s1);
17286 __ Fmul(s4, s0, s1);
17287 __ Fdiv(s5, s0, s1);
17288 __ Fmax(s6, s0, s1);
17289 __ Fmin(s7, s0, s1);
17290
17291 END();
17292 RUN();
17293
17294 ASSERT_EQUAL_FP32(expected, s2);
17295 ASSERT_EQUAL_FP32(expected, s3);
17296 ASSERT_EQUAL_FP32(expected, s4);
17297 ASSERT_EQUAL_FP32(expected, s5);
17298 ASSERT_EQUAL_FP32(expected, s6);
17299 ASSERT_EQUAL_FP32(expected, s7);
17300
17301 TEARDOWN();
17302 }
17303
17304
TEST(process_nans_float)17305 TEST(process_nans_float) {
17306 // Make sure that NaN propagation works correctly.
17307 float sn = RawbitsToFloat(0x7f951111);
17308 float sm = RawbitsToFloat(0x7f952222);
17309 float qn = RawbitsToFloat(0x7fea1111);
17310 float qm = RawbitsToFloat(0x7fea2222);
17311 VIXL_ASSERT(IsSignallingNaN(sn));
17312 VIXL_ASSERT(IsSignallingNaN(sm));
17313 VIXL_ASSERT(IsQuietNaN(qn));
17314 VIXL_ASSERT(IsQuietNaN(qm));
17315
17316 // The input NaNs after passing through ProcessNaN.
17317 float sn_proc = RawbitsToFloat(0x7fd51111);
17318 float sm_proc = RawbitsToFloat(0x7fd52222);
17319 float qn_proc = qn;
17320 float qm_proc = qm;
17321 VIXL_ASSERT(IsQuietNaN(sn_proc));
17322 VIXL_ASSERT(IsQuietNaN(sm_proc));
17323 VIXL_ASSERT(IsQuietNaN(qn_proc));
17324 VIXL_ASSERT(IsQuietNaN(qm_proc));
17325
17326 // Quiet NaNs are propagated.
17327 ProcessNaNsHelper(qn, 0, qn_proc);
17328 ProcessNaNsHelper(0, qm, qm_proc);
17329 ProcessNaNsHelper(qn, qm, qn_proc);
17330
17331 // Signalling NaNs are propagated, and made quiet.
17332 ProcessNaNsHelper(sn, 0, sn_proc);
17333 ProcessNaNsHelper(0, sm, sm_proc);
17334 ProcessNaNsHelper(sn, sm, sn_proc);
17335
17336 // Signalling NaNs take precedence over quiet NaNs.
17337 ProcessNaNsHelper(sn, qm, sn_proc);
17338 ProcessNaNsHelper(qn, sm, sm_proc);
17339 ProcessNaNsHelper(sn, sm, sn_proc);
17340 }
17341
17342
ProcessNaNsHelper(Float16 n,Float16 m,Float16 expected)17343 static void ProcessNaNsHelper(Float16 n, Float16 m, Float16 expected) {
17344 VIXL_ASSERT(IsNaN(n) || IsNaN(m));
17345 VIXL_ASSERT(IsNaN(expected));
17346
17347 SETUP_WITH_FEATURES(CPUFeatures::kFP, CPUFeatures::kFPHalf);
17348
17349 START();
17350
17351 // Execute a number of instructions which all use ProcessNaNs, and check that
17352 // they all propagate NaNs correctly.
17353 __ Fmov(h0, n);
17354 __ Fmov(h1, m);
17355
17356 __ Fadd(h2, h0, h1);
17357 __ Fsub(h3, h0, h1);
17358 __ Fmul(h4, h0, h1);
17359 __ Fdiv(h5, h0, h1);
17360 __ Fmax(h6, h0, h1);
17361 __ Fmin(h7, h0, h1);
17362
17363 END();
17364
17365 #ifdef VIXL_INCLUDE_SIMULATOR_AARCH64
17366 RUN();
17367 ASSERT_EQUAL_FP16(expected, h2);
17368 ASSERT_EQUAL_FP16(expected, h3);
17369 ASSERT_EQUAL_FP16(expected, h4);
17370 ASSERT_EQUAL_FP16(expected, h5);
17371 ASSERT_EQUAL_FP16(expected, h6);
17372 ASSERT_EQUAL_FP16(expected, h7);
17373 #else
17374 USE(expected);
17375 #endif
17376
17377 TEARDOWN();
17378 }
17379
17380
TEST(process_nans_half)17381 TEST(process_nans_half) {
17382 // Make sure that NaN propagation works correctly.
17383 Float16 sn(RawbitsToFloat16(0x7c11));
17384 Float16 sm(RawbitsToFloat16(0xfc22));
17385 Float16 qn(RawbitsToFloat16(0x7e33));
17386 Float16 qm(RawbitsToFloat16(0xfe44));
17387 VIXL_ASSERT(IsSignallingNaN(sn));
17388 VIXL_ASSERT(IsSignallingNaN(sm));
17389 VIXL_ASSERT(IsQuietNaN(qn));
17390 VIXL_ASSERT(IsQuietNaN(qm));
17391
17392 // The input NaNs after passing through ProcessNaN.
17393 Float16 sn_proc(RawbitsToFloat16(0x7e11));
17394 Float16 sm_proc(RawbitsToFloat16(0xfe22));
17395 Float16 qn_proc = qn;
17396 Float16 qm_proc = qm;
17397 VIXL_ASSERT(IsQuietNaN(sn_proc));
17398 VIXL_ASSERT(IsQuietNaN(sm_proc));
17399 VIXL_ASSERT(IsQuietNaN(qn_proc));
17400 VIXL_ASSERT(IsQuietNaN(qm_proc));
17401
17402 // Quiet NaNs are propagated.
17403 ProcessNaNsHelper(qn, Float16(), qn_proc);
17404 ProcessNaNsHelper(Float16(), qm, qm_proc);
17405 ProcessNaNsHelper(qn, qm, qn_proc);
17406
17407 // Signalling NaNs are propagated, and made quiet.
17408 ProcessNaNsHelper(sn, Float16(), sn_proc);
17409 ProcessNaNsHelper(Float16(), sm, sm_proc);
17410 ProcessNaNsHelper(sn, sm, sn_proc);
17411
17412 // Signalling NaNs take precedence over quiet NaNs.
17413 ProcessNaNsHelper(sn, qm, sn_proc);
17414 ProcessNaNsHelper(qn, sm, sm_proc);
17415 ProcessNaNsHelper(sn, sm, sn_proc);
17416 }
17417
17418
DefaultNaNHelper(float n,float m,float a)17419 static void DefaultNaNHelper(float n, float m, float a) {
17420 VIXL_ASSERT(IsNaN(n) || IsNaN(m) || IsNaN(a));
17421
17422 bool test_1op = IsNaN(n);
17423 bool test_2op = IsNaN(n) || IsNaN(m);
17424
17425 SETUP_WITH_FEATURES(CPUFeatures::kFP);
17426 START();
17427
17428 // Enable Default-NaN mode in the FPCR.
17429 __ Mrs(x0, FPCR);
17430 __ Orr(x1, x0, DN_mask);
17431 __ Msr(FPCR, x1);
17432
17433 // Execute a number of instructions which all use ProcessNaNs, and check that
17434 // they all produce the default NaN.
17435 __ Fmov(s0, n);
17436 __ Fmov(s1, m);
17437 __ Fmov(s2, a);
17438
17439 if (test_1op) {
17440 // Operations that always propagate NaNs unchanged, even signalling NaNs.
17441 __ Fmov(s10, s0);
17442 __ Fabs(s11, s0);
17443 __ Fneg(s12, s0);
17444
17445 // Operations that use ProcessNaN.
17446 __ Fsqrt(s13, s0);
17447 __ Frinta(s14, s0);
17448 __ Frintn(s15, s0);
17449 __ Frintz(s16, s0);
17450
17451 // Fcvt usually has special NaN handling, but it respects default-NaN mode.
17452 __ Fcvt(d17, s0);
17453 }
17454
17455 if (test_2op) {
17456 __ Fadd(s18, s0, s1);
17457 __ Fsub(s19, s0, s1);
17458 __ Fmul(s20, s0, s1);
17459 __ Fdiv(s21, s0, s1);
17460 __ Fmax(s22, s0, s1);
17461 __ Fmin(s23, s0, s1);
17462 }
17463
17464 __ Fmadd(s24, s0, s1, s2);
17465 __ Fmsub(s25, s0, s1, s2);
17466 __ Fnmadd(s26, s0, s1, s2);
17467 __ Fnmsub(s27, s0, s1, s2);
17468
17469 // Restore FPCR.
17470 __ Msr(FPCR, x0);
17471
17472 END();
17473 RUN();
17474
17475 if (test_1op) {
17476 uint32_t n_raw = FloatToRawbits(n);
17477 ASSERT_EQUAL_FP32(n, s10);
17478 ASSERT_EQUAL_FP32(RawbitsToFloat(n_raw & ~kSSignMask), s11);
17479 ASSERT_EQUAL_FP32(RawbitsToFloat(n_raw ^ kSSignMask), s12);
17480 ASSERT_EQUAL_FP32(kFP32DefaultNaN, s13);
17481 ASSERT_EQUAL_FP32(kFP32DefaultNaN, s14);
17482 ASSERT_EQUAL_FP32(kFP32DefaultNaN, s15);
17483 ASSERT_EQUAL_FP32(kFP32DefaultNaN, s16);
17484 ASSERT_EQUAL_FP64(kFP64DefaultNaN, d17);
17485 }
17486
17487 if (test_2op) {
17488 ASSERT_EQUAL_FP32(kFP32DefaultNaN, s18);
17489 ASSERT_EQUAL_FP32(kFP32DefaultNaN, s19);
17490 ASSERT_EQUAL_FP32(kFP32DefaultNaN, s20);
17491 ASSERT_EQUAL_FP32(kFP32DefaultNaN, s21);
17492 ASSERT_EQUAL_FP32(kFP32DefaultNaN, s22);
17493 ASSERT_EQUAL_FP32(kFP32DefaultNaN, s23);
17494 }
17495
17496 ASSERT_EQUAL_FP32(kFP32DefaultNaN, s24);
17497 ASSERT_EQUAL_FP32(kFP32DefaultNaN, s25);
17498 ASSERT_EQUAL_FP32(kFP32DefaultNaN, s26);
17499 ASSERT_EQUAL_FP32(kFP32DefaultNaN, s27);
17500
17501 TEARDOWN();
17502 }
17503
17504
TEST(default_nan_float)17505 TEST(default_nan_float) {
17506 float sn = RawbitsToFloat(0x7f951111);
17507 float sm = RawbitsToFloat(0x7f952222);
17508 float sa = RawbitsToFloat(0x7f95aaaa);
17509 float qn = RawbitsToFloat(0x7fea1111);
17510 float qm = RawbitsToFloat(0x7fea2222);
17511 float qa = RawbitsToFloat(0x7feaaaaa);
17512 VIXL_ASSERT(IsSignallingNaN(sn));
17513 VIXL_ASSERT(IsSignallingNaN(sm));
17514 VIXL_ASSERT(IsSignallingNaN(sa));
17515 VIXL_ASSERT(IsQuietNaN(qn));
17516 VIXL_ASSERT(IsQuietNaN(qm));
17517 VIXL_ASSERT(IsQuietNaN(qa));
17518
17519 // - Signalling NaNs
17520 DefaultNaNHelper(sn, 0.0f, 0.0f);
17521 DefaultNaNHelper(0.0f, sm, 0.0f);
17522 DefaultNaNHelper(0.0f, 0.0f, sa);
17523 DefaultNaNHelper(sn, sm, 0.0f);
17524 DefaultNaNHelper(0.0f, sm, sa);
17525 DefaultNaNHelper(sn, 0.0f, sa);
17526 DefaultNaNHelper(sn, sm, sa);
17527 // - Quiet NaNs
17528 DefaultNaNHelper(qn, 0.0f, 0.0f);
17529 DefaultNaNHelper(0.0f, qm, 0.0f);
17530 DefaultNaNHelper(0.0f, 0.0f, qa);
17531 DefaultNaNHelper(qn, qm, 0.0f);
17532 DefaultNaNHelper(0.0f, qm, qa);
17533 DefaultNaNHelper(qn, 0.0f, qa);
17534 DefaultNaNHelper(qn, qm, qa);
17535 // - Mixed NaNs
17536 DefaultNaNHelper(qn, sm, sa);
17537 DefaultNaNHelper(sn, qm, sa);
17538 DefaultNaNHelper(sn, sm, qa);
17539 DefaultNaNHelper(qn, qm, sa);
17540 DefaultNaNHelper(sn, qm, qa);
17541 DefaultNaNHelper(qn, sm, qa);
17542 DefaultNaNHelper(qn, qm, qa);
17543 }
17544
17545
DefaultNaNHelper(double n,double m,double a)17546 static void DefaultNaNHelper(double n, double m, double a) {
17547 VIXL_ASSERT(IsNaN(n) || IsNaN(m) || IsNaN(a));
17548
17549 bool test_1op = IsNaN(n);
17550 bool test_2op = IsNaN(n) || IsNaN(m);
17551
17552 SETUP_WITH_FEATURES(CPUFeatures::kFP);
17553
17554 START();
17555
17556 // Enable Default-NaN mode in the FPCR.
17557 __ Mrs(x0, FPCR);
17558 __ Orr(x1, x0, DN_mask);
17559 __ Msr(FPCR, x1);
17560
17561 // Execute a number of instructions which all use ProcessNaNs, and check that
17562 // they all produce the default NaN.
17563 __ Fmov(d0, n);
17564 __ Fmov(d1, m);
17565 __ Fmov(d2, a);
17566
17567 if (test_1op) {
17568 // Operations that always propagate NaNs unchanged, even signalling NaNs.
17569 __ Fmov(d10, d0);
17570 __ Fabs(d11, d0);
17571 __ Fneg(d12, d0);
17572
17573 // Operations that use ProcessNaN.
17574 __ Fsqrt(d13, d0);
17575 __ Frinta(d14, d0);
17576 __ Frintn(d15, d0);
17577 __ Frintz(d16, d0);
17578
17579 // Fcvt usually has special NaN handling, but it respects default-NaN mode.
17580 __ Fcvt(s17, d0);
17581 }
17582
17583 if (test_2op) {
17584 __ Fadd(d18, d0, d1);
17585 __ Fsub(d19, d0, d1);
17586 __ Fmul(d20, d0, d1);
17587 __ Fdiv(d21, d0, d1);
17588 __ Fmax(d22, d0, d1);
17589 __ Fmin(d23, d0, d1);
17590 }
17591
17592 __ Fmadd(d24, d0, d1, d2);
17593 __ Fmsub(d25, d0, d1, d2);
17594 __ Fnmadd(d26, d0, d1, d2);
17595 __ Fnmsub(d27, d0, d1, d2);
17596
17597 // Restore FPCR.
17598 __ Msr(FPCR, x0);
17599
17600 END();
17601 RUN();
17602
17603 if (test_1op) {
17604 uint64_t n_raw = DoubleToRawbits(n);
17605 ASSERT_EQUAL_FP64(n, d10);
17606 ASSERT_EQUAL_FP64(RawbitsToDouble(n_raw & ~kDSignMask), d11);
17607 ASSERT_EQUAL_FP64(RawbitsToDouble(n_raw ^ kDSignMask), d12);
17608 ASSERT_EQUAL_FP64(kFP64DefaultNaN, d13);
17609 ASSERT_EQUAL_FP64(kFP64DefaultNaN, d14);
17610 ASSERT_EQUAL_FP64(kFP64DefaultNaN, d15);
17611 ASSERT_EQUAL_FP64(kFP64DefaultNaN, d16);
17612 ASSERT_EQUAL_FP32(kFP32DefaultNaN, s17);
17613 }
17614
17615 if (test_2op) {
17616 ASSERT_EQUAL_FP64(kFP64DefaultNaN, d18);
17617 ASSERT_EQUAL_FP64(kFP64DefaultNaN, d19);
17618 ASSERT_EQUAL_FP64(kFP64DefaultNaN, d20);
17619 ASSERT_EQUAL_FP64(kFP64DefaultNaN, d21);
17620 ASSERT_EQUAL_FP64(kFP64DefaultNaN, d22);
17621 ASSERT_EQUAL_FP64(kFP64DefaultNaN, d23);
17622 }
17623
17624 ASSERT_EQUAL_FP64(kFP64DefaultNaN, d24);
17625 ASSERT_EQUAL_FP64(kFP64DefaultNaN, d25);
17626 ASSERT_EQUAL_FP64(kFP64DefaultNaN, d26);
17627 ASSERT_EQUAL_FP64(kFP64DefaultNaN, d27);
17628
17629 TEARDOWN();
17630 }
17631
17632
TEST(default_nan_double)17633 TEST(default_nan_double) {
17634 double sn = RawbitsToDouble(0x7ff5555511111111);
17635 double sm = RawbitsToDouble(0x7ff5555522222222);
17636 double sa = RawbitsToDouble(0x7ff55555aaaaaaaa);
17637 double qn = RawbitsToDouble(0x7ffaaaaa11111111);
17638 double qm = RawbitsToDouble(0x7ffaaaaa22222222);
17639 double qa = RawbitsToDouble(0x7ffaaaaaaaaaaaaa);
17640 VIXL_ASSERT(IsSignallingNaN(sn));
17641 VIXL_ASSERT(IsSignallingNaN(sm));
17642 VIXL_ASSERT(IsSignallingNaN(sa));
17643 VIXL_ASSERT(IsQuietNaN(qn));
17644 VIXL_ASSERT(IsQuietNaN(qm));
17645 VIXL_ASSERT(IsQuietNaN(qa));
17646
17647 // - Signalling NaNs
17648 DefaultNaNHelper(sn, 0.0, 0.0);
17649 DefaultNaNHelper(0.0, sm, 0.0);
17650 DefaultNaNHelper(0.0, 0.0, sa);
17651 DefaultNaNHelper(sn, sm, 0.0);
17652 DefaultNaNHelper(0.0, sm, sa);
17653 DefaultNaNHelper(sn, 0.0, sa);
17654 DefaultNaNHelper(sn, sm, sa);
17655 // - Quiet NaNs
17656 DefaultNaNHelper(qn, 0.0, 0.0);
17657 DefaultNaNHelper(0.0, qm, 0.0);
17658 DefaultNaNHelper(0.0, 0.0, qa);
17659 DefaultNaNHelper(qn, qm, 0.0);
17660 DefaultNaNHelper(0.0, qm, qa);
17661 DefaultNaNHelper(qn, 0.0, qa);
17662 DefaultNaNHelper(qn, qm, qa);
17663 // - Mixed NaNs
17664 DefaultNaNHelper(qn, sm, sa);
17665 DefaultNaNHelper(sn, qm, sa);
17666 DefaultNaNHelper(sn, sm, qa);
17667 DefaultNaNHelper(qn, qm, sa);
17668 DefaultNaNHelper(sn, qm, qa);
17669 DefaultNaNHelper(qn, sm, qa);
17670 DefaultNaNHelper(qn, qm, qa);
17671 }
17672
17673
TEST(ldar_stlr)17674 TEST(ldar_stlr) {
17675 // The middle value is read, modified, and written. The padding exists only to
17676 // check for over-write.
17677 uint8_t b[] = {0, 0x12, 0};
17678 uint16_t h[] = {0, 0x1234, 0};
17679 uint32_t w[] = {0, 0x12345678, 0};
17680 uint64_t x[] = {0, 0x123456789abcdef0, 0};
17681
17682 SETUP();
17683 START();
17684
17685 __ Mov(x10, reinterpret_cast<uintptr_t>(&b[1]));
17686 __ Ldarb(w0, MemOperand(x10));
17687 __ Add(w0, w0, 1);
17688 __ Stlrb(w0, MemOperand(x10));
17689
17690 __ Mov(x10, reinterpret_cast<uintptr_t>(&h[1]));
17691 __ Ldarh(w0, MemOperand(x10));
17692 __ Add(w0, w0, 1);
17693 __ Stlrh(w0, MemOperand(x10));
17694
17695 __ Mov(x10, reinterpret_cast<uintptr_t>(&w[1]));
17696 __ Ldar(w0, MemOperand(x10));
17697 __ Add(w0, w0, 1);
17698 __ Stlr(w0, MemOperand(x10));
17699
17700 __ Mov(x10, reinterpret_cast<uintptr_t>(&x[1]));
17701 __ Ldar(x0, MemOperand(x10));
17702 __ Add(x0, x0, 1);
17703 __ Stlr(x0, MemOperand(x10));
17704
17705 END();
17706 RUN();
17707
17708 ASSERT_EQUAL_32(0x13, b[1]);
17709 ASSERT_EQUAL_32(0x1235, h[1]);
17710 ASSERT_EQUAL_32(0x12345679, w[1]);
17711 ASSERT_EQUAL_64(0x123456789abcdef1, x[1]);
17712
17713 // Check for over-write.
17714 ASSERT_EQUAL_32(0, b[0]);
17715 ASSERT_EQUAL_32(0, b[2]);
17716 ASSERT_EQUAL_32(0, h[0]);
17717 ASSERT_EQUAL_32(0, h[2]);
17718 ASSERT_EQUAL_32(0, w[0]);
17719 ASSERT_EQUAL_32(0, w[2]);
17720 ASSERT_EQUAL_64(0, x[0]);
17721 ASSERT_EQUAL_64(0, x[2]);
17722
17723 TEARDOWN();
17724 }
17725
17726
TEST(ldlar_stllr)17727 TEST(ldlar_stllr) {
17728 // The middle value is read, modified, and written. The padding exists only to
17729 // check for over-write.
17730 uint8_t b[] = {0, 0x12, 0};
17731 uint16_t h[] = {0, 0x1234, 0};
17732 uint32_t w[] = {0, 0x12345678, 0};
17733 uint64_t x[] = {0, 0x123456789abcdef0, 0};
17734
17735 SETUP_WITH_FEATURES(CPUFeatures::kLORegions);
17736
17737 START();
17738
17739 __ Mov(x10, reinterpret_cast<uintptr_t>(&b[1]));
17740 __ Ldlarb(w0, MemOperand(x10));
17741 __ Add(w0, w0, 1);
17742 __ Stllrb(w0, MemOperand(x10));
17743
17744 __ Mov(x10, reinterpret_cast<uintptr_t>(&h[1]));
17745 __ Ldlarh(w0, MemOperand(x10));
17746 __ Add(w0, w0, 1);
17747 __ Stllrh(w0, MemOperand(x10));
17748
17749 __ Mov(x10, reinterpret_cast<uintptr_t>(&w[1]));
17750 __ Ldlar(w0, MemOperand(x10));
17751 __ Add(w0, w0, 1);
17752 __ Stllr(w0, MemOperand(x10));
17753
17754 __ Mov(x10, reinterpret_cast<uintptr_t>(&x[1]));
17755 __ Ldlar(x0, MemOperand(x10));
17756 __ Add(x0, x0, 1);
17757 __ Stllr(x0, MemOperand(x10));
17758
17759 END();
17760
17761 #ifdef VIXL_INCLUDE_SIMULATOR_AARCH64
17762 RUN();
17763
17764 ASSERT_EQUAL_32(0x13, b[1]);
17765 ASSERT_EQUAL_32(0x1235, h[1]);
17766 ASSERT_EQUAL_32(0x12345679, w[1]);
17767 ASSERT_EQUAL_64(0x123456789abcdef1, x[1]);
17768
17769 // Check for over-write.
17770 ASSERT_EQUAL_32(0, b[0]);
17771 ASSERT_EQUAL_32(0, b[2]);
17772 ASSERT_EQUAL_32(0, h[0]);
17773 ASSERT_EQUAL_32(0, h[2]);
17774 ASSERT_EQUAL_32(0, w[0]);
17775 ASSERT_EQUAL_32(0, w[2]);
17776 ASSERT_EQUAL_64(0, x[0]);
17777 ASSERT_EQUAL_64(0, x[2]);
17778 #endif // VIXL_INCLUDE_SIMULATOR_AARCH64
17779
17780 TEARDOWN();
17781 }
17782
17783
TEST(ldxr_stxr)17784 TEST(ldxr_stxr) {
17785 // The middle value is read, modified, and written. The padding exists only to
17786 // check for over-write.
17787 uint8_t b[] = {0, 0x12, 0};
17788 uint16_t h[] = {0, 0x1234, 0};
17789 uint32_t w[] = {0, 0x12345678, 0};
17790 uint64_t x[] = {0, 0x123456789abcdef0, 0};
17791
17792 // As above, but get suitably-aligned values for ldxp and stxp.
17793 uint32_t wp_data[] = {0, 0, 0, 0, 0};
17794 uint32_t* wp = AlignUp(wp_data + 1, kWRegSizeInBytes * 2) - 1;
17795 wp[1] = 0x12345678; // wp[1] is 64-bit-aligned.
17796 wp[2] = 0x87654321;
17797 uint64_t xp_data[] = {0, 0, 0, 0, 0};
17798 uint64_t* xp = AlignUp(xp_data + 1, kXRegSizeInBytes * 2) - 1;
17799 xp[1] = 0x123456789abcdef0; // xp[1] is 128-bit-aligned.
17800 xp[2] = 0x0fedcba987654321;
17801
17802 SETUP();
17803 START();
17804
17805 __ Mov(x10, reinterpret_cast<uintptr_t>(&b[1]));
17806 Label try_b;
17807 __ Bind(&try_b);
17808 __ Ldxrb(w0, MemOperand(x10));
17809 __ Add(w0, w0, 1);
17810 __ Stxrb(w5, w0, MemOperand(x10));
17811 __ Cbnz(w5, &try_b);
17812
17813 __ Mov(x10, reinterpret_cast<uintptr_t>(&h[1]));
17814 Label try_h;
17815 __ Bind(&try_h);
17816 __ Ldxrh(w0, MemOperand(x10));
17817 __ Add(w0, w0, 1);
17818 __ Stxrh(w5, w0, MemOperand(x10));
17819 __ Cbnz(w5, &try_h);
17820
17821 __ Mov(x10, reinterpret_cast<uintptr_t>(&w[1]));
17822 Label try_w;
17823 __ Bind(&try_w);
17824 __ Ldxr(w0, MemOperand(x10));
17825 __ Add(w0, w0, 1);
17826 __ Stxr(w5, w0, MemOperand(x10));
17827 __ Cbnz(w5, &try_w);
17828
17829 __ Mov(x10, reinterpret_cast<uintptr_t>(&x[1]));
17830 Label try_x;
17831 __ Bind(&try_x);
17832 __ Ldxr(x0, MemOperand(x10));
17833 __ Add(x0, x0, 1);
17834 __ Stxr(w5, x0, MemOperand(x10));
17835 __ Cbnz(w5, &try_x);
17836
17837 __ Mov(x10, reinterpret_cast<uintptr_t>(&wp[1]));
17838 Label try_wp;
17839 __ Bind(&try_wp);
17840 __ Ldxp(w0, w1, MemOperand(x10));
17841 __ Add(w0, w0, 1);
17842 __ Add(w1, w1, 1);
17843 __ Stxp(w5, w0, w1, MemOperand(x10));
17844 __ Cbnz(w5, &try_wp);
17845
17846 __ Mov(x10, reinterpret_cast<uintptr_t>(&xp[1]));
17847 Label try_xp;
17848 __ Bind(&try_xp);
17849 __ Ldxp(x0, x1, MemOperand(x10));
17850 __ Add(x0, x0, 1);
17851 __ Add(x1, x1, 1);
17852 __ Stxp(w5, x0, x1, MemOperand(x10));
17853 __ Cbnz(w5, &try_xp);
17854
17855 END();
17856 RUN();
17857
17858 ASSERT_EQUAL_32(0x13, b[1]);
17859 ASSERT_EQUAL_32(0x1235, h[1]);
17860 ASSERT_EQUAL_32(0x12345679, w[1]);
17861 ASSERT_EQUAL_64(0x123456789abcdef1, x[1]);
17862 ASSERT_EQUAL_32(0x12345679, wp[1]);
17863 ASSERT_EQUAL_32(0x87654322, wp[2]);
17864 ASSERT_EQUAL_64(0x123456789abcdef1, xp[1]);
17865 ASSERT_EQUAL_64(0x0fedcba987654322, xp[2]);
17866
17867 // Check for over-write.
17868 ASSERT_EQUAL_32(0, b[0]);
17869 ASSERT_EQUAL_32(0, b[2]);
17870 ASSERT_EQUAL_32(0, h[0]);
17871 ASSERT_EQUAL_32(0, h[2]);
17872 ASSERT_EQUAL_32(0, w[0]);
17873 ASSERT_EQUAL_32(0, w[2]);
17874 ASSERT_EQUAL_64(0, x[0]);
17875 ASSERT_EQUAL_64(0, x[2]);
17876 ASSERT_EQUAL_32(0, wp[0]);
17877 ASSERT_EQUAL_32(0, wp[3]);
17878 ASSERT_EQUAL_64(0, xp[0]);
17879 ASSERT_EQUAL_64(0, xp[3]);
17880
17881 TEARDOWN();
17882 }
17883
17884
TEST(ldaxr_stlxr)17885 TEST(ldaxr_stlxr) {
17886 // The middle value is read, modified, and written. The padding exists only to
17887 // check for over-write.
17888 uint8_t b[] = {0, 0x12, 0};
17889 uint16_t h[] = {0, 0x1234, 0};
17890 uint32_t w[] = {0, 0x12345678, 0};
17891 uint64_t x[] = {0, 0x123456789abcdef0, 0};
17892
17893 // As above, but get suitably-aligned values for ldxp and stxp.
17894 uint32_t wp_data[] = {0, 0, 0, 0, 0};
17895 uint32_t* wp = AlignUp(wp_data + 1, kWRegSizeInBytes * 2) - 1;
17896 wp[1] = 0x12345678; // wp[1] is 64-bit-aligned.
17897 wp[2] = 0x87654321;
17898 uint64_t xp_data[] = {0, 0, 0, 0, 0};
17899 uint64_t* xp = AlignUp(xp_data + 1, kXRegSizeInBytes * 2) - 1;
17900 xp[1] = 0x123456789abcdef0; // xp[1] is 128-bit-aligned.
17901 xp[2] = 0x0fedcba987654321;
17902
17903 SETUP();
17904 START();
17905
17906 __ Mov(x10, reinterpret_cast<uintptr_t>(&b[1]));
17907 Label try_b;
17908 __ Bind(&try_b);
17909 __ Ldaxrb(w0, MemOperand(x10));
17910 __ Add(w0, w0, 1);
17911 __ Stlxrb(w5, w0, MemOperand(x10));
17912 __ Cbnz(w5, &try_b);
17913
17914 __ Mov(x10, reinterpret_cast<uintptr_t>(&h[1]));
17915 Label try_h;
17916 __ Bind(&try_h);
17917 __ Ldaxrh(w0, MemOperand(x10));
17918 __ Add(w0, w0, 1);
17919 __ Stlxrh(w5, w0, MemOperand(x10));
17920 __ Cbnz(w5, &try_h);
17921
17922 __ Mov(x10, reinterpret_cast<uintptr_t>(&w[1]));
17923 Label try_w;
17924 __ Bind(&try_w);
17925 __ Ldaxr(w0, MemOperand(x10));
17926 __ Add(w0, w0, 1);
17927 __ Stlxr(w5, w0, MemOperand(x10));
17928 __ Cbnz(w5, &try_w);
17929
17930 __ Mov(x10, reinterpret_cast<uintptr_t>(&x[1]));
17931 Label try_x;
17932 __ Bind(&try_x);
17933 __ Ldaxr(x0, MemOperand(x10));
17934 __ Add(x0, x0, 1);
17935 __ Stlxr(w5, x0, MemOperand(x10));
17936 __ Cbnz(w5, &try_x);
17937
17938 __ Mov(x10, reinterpret_cast<uintptr_t>(&wp[1]));
17939 Label try_wp;
17940 __ Bind(&try_wp);
17941 __ Ldaxp(w0, w1, MemOperand(x10));
17942 __ Add(w0, w0, 1);
17943 __ Add(w1, w1, 1);
17944 __ Stlxp(w5, w0, w1, MemOperand(x10));
17945 __ Cbnz(w5, &try_wp);
17946
17947 __ Mov(x10, reinterpret_cast<uintptr_t>(&xp[1]));
17948 Label try_xp;
17949 __ Bind(&try_xp);
17950 __ Ldaxp(x0, x1, MemOperand(x10));
17951 __ Add(x0, x0, 1);
17952 __ Add(x1, x1, 1);
17953 __ Stlxp(w5, x0, x1, MemOperand(x10));
17954 __ Cbnz(w5, &try_xp);
17955
17956 END();
17957 RUN();
17958
17959 ASSERT_EQUAL_32(0x13, b[1]);
17960 ASSERT_EQUAL_32(0x1235, h[1]);
17961 ASSERT_EQUAL_32(0x12345679, w[1]);
17962 ASSERT_EQUAL_64(0x123456789abcdef1, x[1]);
17963 ASSERT_EQUAL_32(0x12345679, wp[1]);
17964 ASSERT_EQUAL_32(0x87654322, wp[2]);
17965 ASSERT_EQUAL_64(0x123456789abcdef1, xp[1]);
17966 ASSERT_EQUAL_64(0x0fedcba987654322, xp[2]);
17967
17968 // Check for over-write.
17969 ASSERT_EQUAL_32(0, b[0]);
17970 ASSERT_EQUAL_32(0, b[2]);
17971 ASSERT_EQUAL_32(0, h[0]);
17972 ASSERT_EQUAL_32(0, h[2]);
17973 ASSERT_EQUAL_32(0, w[0]);
17974 ASSERT_EQUAL_32(0, w[2]);
17975 ASSERT_EQUAL_64(0, x[0]);
17976 ASSERT_EQUAL_64(0, x[2]);
17977 ASSERT_EQUAL_32(0, wp[0]);
17978 ASSERT_EQUAL_32(0, wp[3]);
17979 ASSERT_EQUAL_64(0, xp[0]);
17980 ASSERT_EQUAL_64(0, xp[3]);
17981
17982 TEARDOWN();
17983 }
17984
17985
TEST(clrex)17986 TEST(clrex) {
17987 // This data should never be written.
17988 uint64_t data[] = {0, 0, 0};
17989 uint64_t* data_aligned = AlignUp(data, kXRegSizeInBytes * 2);
17990
17991 SETUP();
17992 START();
17993
17994 __ Mov(x10, reinterpret_cast<uintptr_t>(data_aligned));
17995 __ Mov(w6, 0);
17996
17997 __ Ldxrb(w0, MemOperand(x10));
17998 __ Clrex();
17999 __ Add(w0, w0, 1);
18000 __ Stxrb(w5, w0, MemOperand(x10));
18001 __ Add(w6, w6, w5);
18002
18003 __ Ldxrh(w0, MemOperand(x10));
18004 __ Clrex();
18005 __ Add(w0, w0, 1);
18006 __ Stxrh(w5, w0, MemOperand(x10));
18007 __ Add(w6, w6, w5);
18008
18009 __ Ldxr(w0, MemOperand(x10));
18010 __ Clrex();
18011 __ Add(w0, w0, 1);
18012 __ Stxr(w5, w0, MemOperand(x10));
18013 __ Add(w6, w6, w5);
18014
18015 __ Ldxr(x0, MemOperand(x10));
18016 __ Clrex();
18017 __ Add(x0, x0, 1);
18018 __ Stxr(w5, x0, MemOperand(x10));
18019 __ Add(w6, w6, w5);
18020
18021 __ Ldxp(w0, w1, MemOperand(x10));
18022 __ Clrex();
18023 __ Add(w0, w0, 1);
18024 __ Add(w1, w1, 1);
18025 __ Stxp(w5, w0, w1, MemOperand(x10));
18026 __ Add(w6, w6, w5);
18027
18028 __ Ldxp(x0, x1, MemOperand(x10));
18029 __ Clrex();
18030 __ Add(x0, x0, 1);
18031 __ Add(x1, x1, 1);
18032 __ Stxp(w5, x0, x1, MemOperand(x10));
18033 __ Add(w6, w6, w5);
18034
18035 // Acquire-release variants.
18036
18037 __ Ldaxrb(w0, MemOperand(x10));
18038 __ Clrex();
18039 __ Add(w0, w0, 1);
18040 __ Stlxrb(w5, w0, MemOperand(x10));
18041 __ Add(w6, w6, w5);
18042
18043 __ Ldaxrh(w0, MemOperand(x10));
18044 __ Clrex();
18045 __ Add(w0, w0, 1);
18046 __ Stlxrh(w5, w0, MemOperand(x10));
18047 __ Add(w6, w6, w5);
18048
18049 __ Ldaxr(w0, MemOperand(x10));
18050 __ Clrex();
18051 __ Add(w0, w0, 1);
18052 __ Stlxr(w5, w0, MemOperand(x10));
18053 __ Add(w6, w6, w5);
18054
18055 __ Ldaxr(x0, MemOperand(x10));
18056 __ Clrex();
18057 __ Add(x0, x0, 1);
18058 __ Stlxr(w5, x0, MemOperand(x10));
18059 __ Add(w6, w6, w5);
18060
18061 __ Ldaxp(w0, w1, MemOperand(x10));
18062 __ Clrex();
18063 __ Add(w0, w0, 1);
18064 __ Add(w1, w1, 1);
18065 __ Stlxp(w5, w0, w1, MemOperand(x10));
18066 __ Add(w6, w6, w5);
18067
18068 __ Ldaxp(x0, x1, MemOperand(x10));
18069 __ Clrex();
18070 __ Add(x0, x0, 1);
18071 __ Add(x1, x1, 1);
18072 __ Stlxp(w5, x0, x1, MemOperand(x10));
18073 __ Add(w6, w6, w5);
18074
18075 END();
18076 RUN();
18077
18078 // None of the 12 store-exclusives should have succeeded.
18079 ASSERT_EQUAL_32(12, w6);
18080
18081 ASSERT_EQUAL_64(0, data[0]);
18082 ASSERT_EQUAL_64(0, data[1]);
18083 ASSERT_EQUAL_64(0, data[2]);
18084
18085 TEARDOWN();
18086 }
18087
18088
18089 #ifdef VIXL_INCLUDE_SIMULATOR_AARCH64
18090 // Check that the simulator occasionally makes store-exclusive fail.
TEST(ldxr_stxr_fail)18091 TEST(ldxr_stxr_fail) {
18092 uint64_t data[] = {0, 0, 0};
18093 uint64_t* data_aligned = AlignUp(data, kXRegSizeInBytes * 2);
18094
18095 // Impose a hard limit on the number of attempts, so the test cannot hang.
18096 static const uint64_t kWatchdog = 10000;
18097 Label done;
18098
18099 SETUP();
18100 START();
18101
18102 __ Mov(x10, reinterpret_cast<uintptr_t>(data_aligned));
18103 __ Mov(x11, kWatchdog);
18104
18105 // This loop is the opposite of what we normally do with ldxr and stxr; we
18106 // keep trying until we fail (or the watchdog counter runs out).
18107 Label try_b;
18108 __ Bind(&try_b);
18109 __ Ldxrb(w0, MemOperand(x10));
18110 __ Stxrb(w5, w0, MemOperand(x10));
18111 // Check the watchdog counter.
18112 __ Sub(x11, x11, 1);
18113 __ Cbz(x11, &done);
18114 // Check the exclusive-store result.
18115 __ Cbz(w5, &try_b);
18116
18117 Label try_h;
18118 __ Bind(&try_h);
18119 __ Ldxrh(w0, MemOperand(x10));
18120 __ Stxrh(w5, w0, MemOperand(x10));
18121 __ Sub(x11, x11, 1);
18122 __ Cbz(x11, &done);
18123 __ Cbz(w5, &try_h);
18124
18125 Label try_w;
18126 __ Bind(&try_w);
18127 __ Ldxr(w0, MemOperand(x10));
18128 __ Stxr(w5, w0, MemOperand(x10));
18129 __ Sub(x11, x11, 1);
18130 __ Cbz(x11, &done);
18131 __ Cbz(w5, &try_w);
18132
18133 Label try_x;
18134 __ Bind(&try_x);
18135 __ Ldxr(x0, MemOperand(x10));
18136 __ Stxr(w5, x0, MemOperand(x10));
18137 __ Sub(x11, x11, 1);
18138 __ Cbz(x11, &done);
18139 __ Cbz(w5, &try_x);
18140
18141 Label try_wp;
18142 __ Bind(&try_wp);
18143 __ Ldxp(w0, w1, MemOperand(x10));
18144 __ Stxp(w5, w0, w1, MemOperand(x10));
18145 __ Sub(x11, x11, 1);
18146 __ Cbz(x11, &done);
18147 __ Cbz(w5, &try_wp);
18148
18149 Label try_xp;
18150 __ Bind(&try_xp);
18151 __ Ldxp(x0, x1, MemOperand(x10));
18152 __ Stxp(w5, x0, x1, MemOperand(x10));
18153 __ Sub(x11, x11, 1);
18154 __ Cbz(x11, &done);
18155 __ Cbz(w5, &try_xp);
18156
18157 __ Bind(&done);
18158 // Trigger an error if x11 (watchdog) is zero.
18159 __ Cmp(x11, 0);
18160 __ Cset(x12, eq);
18161
18162 END();
18163 RUN();
18164
18165 // Check that the watchdog counter didn't run out.
18166 ASSERT_EQUAL_64(0, x12);
18167
18168 TEARDOWN();
18169 }
18170 #endif
18171
18172
18173 #ifdef VIXL_INCLUDE_SIMULATOR_AARCH64
18174 // Check that the simulator occasionally makes store-exclusive fail.
TEST(ldaxr_stlxr_fail)18175 TEST(ldaxr_stlxr_fail) {
18176 uint64_t data[] = {0, 0, 0};
18177 uint64_t* data_aligned = AlignUp(data, kXRegSizeInBytes * 2);
18178
18179 // Impose a hard limit on the number of attempts, so the test cannot hang.
18180 static const uint64_t kWatchdog = 10000;
18181 Label done;
18182
18183 SETUP();
18184 START();
18185
18186 __ Mov(x10, reinterpret_cast<uintptr_t>(data_aligned));
18187 __ Mov(x11, kWatchdog);
18188
18189 // This loop is the opposite of what we normally do with ldxr and stxr; we
18190 // keep trying until we fail (or the watchdog counter runs out).
18191 Label try_b;
18192 __ Bind(&try_b);
18193 __ Ldxrb(w0, MemOperand(x10));
18194 __ Stxrb(w5, w0, MemOperand(x10));
18195 // Check the watchdog counter.
18196 __ Sub(x11, x11, 1);
18197 __ Cbz(x11, &done);
18198 // Check the exclusive-store result.
18199 __ Cbz(w5, &try_b);
18200
18201 Label try_h;
18202 __ Bind(&try_h);
18203 __ Ldaxrh(w0, MemOperand(x10));
18204 __ Stlxrh(w5, w0, MemOperand(x10));
18205 __ Sub(x11, x11, 1);
18206 __ Cbz(x11, &done);
18207 __ Cbz(w5, &try_h);
18208
18209 Label try_w;
18210 __ Bind(&try_w);
18211 __ Ldaxr(w0, MemOperand(x10));
18212 __ Stlxr(w5, w0, MemOperand(x10));
18213 __ Sub(x11, x11, 1);
18214 __ Cbz(x11, &done);
18215 __ Cbz(w5, &try_w);
18216
18217 Label try_x;
18218 __ Bind(&try_x);
18219 __ Ldaxr(x0, MemOperand(x10));
18220 __ Stlxr(w5, x0, MemOperand(x10));
18221 __ Sub(x11, x11, 1);
18222 __ Cbz(x11, &done);
18223 __ Cbz(w5, &try_x);
18224
18225 Label try_wp;
18226 __ Bind(&try_wp);
18227 __ Ldaxp(w0, w1, MemOperand(x10));
18228 __ Stlxp(w5, w0, w1, MemOperand(x10));
18229 __ Sub(x11, x11, 1);
18230 __ Cbz(x11, &done);
18231 __ Cbz(w5, &try_wp);
18232
18233 Label try_xp;
18234 __ Bind(&try_xp);
18235 __ Ldaxp(x0, x1, MemOperand(x10));
18236 __ Stlxp(w5, x0, x1, MemOperand(x10));
18237 __ Sub(x11, x11, 1);
18238 __ Cbz(x11, &done);
18239 __ Cbz(w5, &try_xp);
18240
18241 __ Bind(&done);
18242 // Trigger an error if x11 (watchdog) is zero.
18243 __ Cmp(x11, 0);
18244 __ Cset(x12, eq);
18245
18246 END();
18247 RUN();
18248
18249 // Check that the watchdog counter didn't run out.
18250 ASSERT_EQUAL_64(0, x12);
18251
18252 TEARDOWN();
18253 }
18254 #endif
18255
TEST(cas_casa_casl_casal_w)18256 TEST(cas_casa_casl_casal_w) {
18257 uint64_t data1[] = {0x01234567, 0};
18258 uint64_t data2[] = {0x01234567, 0};
18259 uint64_t data3[] = {0x01234567, 0};
18260 uint64_t data4[] = {0x01234567, 0};
18261 uint64_t data5[] = {0x01234567, 0};
18262 uint64_t data6[] = {0x01234567, 0};
18263 uint64_t data7[] = {0x01234567, 0};
18264 uint64_t data8[] = {0x01234567, 0};
18265
18266 uint64_t* data1_aligned = AlignUp(data1, kXRegSizeInBytes * 2);
18267 uint64_t* data2_aligned = AlignUp(data2, kXRegSizeInBytes * 2);
18268 uint64_t* data3_aligned = AlignUp(data3, kXRegSizeInBytes * 2);
18269 uint64_t* data4_aligned = AlignUp(data4, kXRegSizeInBytes * 2);
18270 uint64_t* data5_aligned = AlignUp(data5, kXRegSizeInBytes * 2);
18271 uint64_t* data6_aligned = AlignUp(data6, kXRegSizeInBytes * 2);
18272 uint64_t* data7_aligned = AlignUp(data7, kXRegSizeInBytes * 2);
18273 uint64_t* data8_aligned = AlignUp(data8, kXRegSizeInBytes * 2);
18274
18275 SETUP_WITH_FEATURES(CPUFeatures::kAtomics);
18276
18277 START();
18278
18279 __ Mov(x21, reinterpret_cast<uintptr_t>(data1_aligned));
18280 __ Mov(x22, reinterpret_cast<uintptr_t>(data2_aligned));
18281 __ Mov(x23, reinterpret_cast<uintptr_t>(data3_aligned));
18282 __ Mov(x24, reinterpret_cast<uintptr_t>(data4_aligned));
18283 __ Mov(x25, reinterpret_cast<uintptr_t>(data5_aligned));
18284 __ Mov(x26, reinterpret_cast<uintptr_t>(data6_aligned));
18285 __ Mov(x27, reinterpret_cast<uintptr_t>(data7_aligned));
18286 __ Mov(x28, reinterpret_cast<uintptr_t>(data8_aligned));
18287
18288 __ Mov(x0, 0xffffffff);
18289
18290 __ Mov(x1, 0x76543210);
18291 __ Mov(x2, 0x01234567);
18292 __ Mov(x3, 0x76543210);
18293 __ Mov(x4, 0x01234567);
18294 __ Mov(x5, 0x76543210);
18295 __ Mov(x6, 0x01234567);
18296 __ Mov(x7, 0x76543210);
18297 __ Mov(x8, 0x01234567);
18298
18299 __ Cas(w1, w0, MemOperand(x21));
18300 __ Cas(w2, w0, MemOperand(x22));
18301 __ Casa(w3, w0, MemOperand(x23));
18302 __ Casa(w4, w0, MemOperand(x24));
18303 __ Casl(w5, w0, MemOperand(x25));
18304 __ Casl(w6, w0, MemOperand(x26));
18305 __ Casal(w7, w0, MemOperand(x27));
18306 __ Casal(w8, w0, MemOperand(x28));
18307
18308 END();
18309
18310 #ifdef VIXL_INCLUDE_SIMULATOR_AARCH64
18311 RUN();
18312
18313 ASSERT_EQUAL_64(0x01234567, x1);
18314 ASSERT_EQUAL_64(0x01234567, x2);
18315 ASSERT_EQUAL_64(0x01234567, x3);
18316 ASSERT_EQUAL_64(0x01234567, x4);
18317 ASSERT_EQUAL_64(0x01234567, x5);
18318 ASSERT_EQUAL_64(0x01234567, x6);
18319 ASSERT_EQUAL_64(0x01234567, x7);
18320 ASSERT_EQUAL_64(0x01234567, x8);
18321
18322 ASSERT_EQUAL_64(0x01234567, data1[0]);
18323 ASSERT_EQUAL_64(0xffffffff, data2[0]);
18324 ASSERT_EQUAL_64(0x01234567, data3[0]);
18325 ASSERT_EQUAL_64(0xffffffff, data4[0]);
18326 ASSERT_EQUAL_64(0x01234567, data5[0]);
18327 ASSERT_EQUAL_64(0xffffffff, data6[0]);
18328 ASSERT_EQUAL_64(0x01234567, data7[0]);
18329 ASSERT_EQUAL_64(0xffffffff, data8[0]);
18330 #endif // VIXL_INCLUDE_SIMULATOR_AARCH64
18331
18332 TEARDOWN();
18333 }
18334
TEST(cas_casa_casl_casal_x)18335 TEST(cas_casa_casl_casal_x) {
18336 uint64_t data1[] = {0x0123456789abcdef, 0};
18337 uint64_t data2[] = {0x0123456789abcdef, 0};
18338 uint64_t data3[] = {0x0123456789abcdef, 0};
18339 uint64_t data4[] = {0x0123456789abcdef, 0};
18340 uint64_t data5[] = {0x0123456789abcdef, 0};
18341 uint64_t data6[] = {0x0123456789abcdef, 0};
18342 uint64_t data7[] = {0x0123456789abcdef, 0};
18343 uint64_t data8[] = {0x0123456789abcdef, 0};
18344
18345 uint64_t* data1_aligned = AlignUp(data1, kXRegSizeInBytes * 2);
18346 uint64_t* data2_aligned = AlignUp(data2, kXRegSizeInBytes * 2);
18347 uint64_t* data3_aligned = AlignUp(data3, kXRegSizeInBytes * 2);
18348 uint64_t* data4_aligned = AlignUp(data4, kXRegSizeInBytes * 2);
18349 uint64_t* data5_aligned = AlignUp(data5, kXRegSizeInBytes * 2);
18350 uint64_t* data6_aligned = AlignUp(data6, kXRegSizeInBytes * 2);
18351 uint64_t* data7_aligned = AlignUp(data7, kXRegSizeInBytes * 2);
18352 uint64_t* data8_aligned = AlignUp(data8, kXRegSizeInBytes * 2);
18353
18354 SETUP_WITH_FEATURES(CPUFeatures::kAtomics);
18355
18356 START();
18357
18358 __ Mov(x21, reinterpret_cast<uintptr_t>(data1_aligned));
18359 __ Mov(x22, reinterpret_cast<uintptr_t>(data2_aligned));
18360 __ Mov(x23, reinterpret_cast<uintptr_t>(data3_aligned));
18361 __ Mov(x24, reinterpret_cast<uintptr_t>(data4_aligned));
18362 __ Mov(x25, reinterpret_cast<uintptr_t>(data5_aligned));
18363 __ Mov(x26, reinterpret_cast<uintptr_t>(data6_aligned));
18364 __ Mov(x27, reinterpret_cast<uintptr_t>(data7_aligned));
18365 __ Mov(x28, reinterpret_cast<uintptr_t>(data8_aligned));
18366
18367 __ Mov(x0, 0xffffffffffffffff);
18368
18369 __ Mov(x1, 0xfedcba9876543210);
18370 __ Mov(x2, 0x0123456789abcdef);
18371 __ Mov(x3, 0xfedcba9876543210);
18372 __ Mov(x4, 0x0123456789abcdef);
18373 __ Mov(x5, 0xfedcba9876543210);
18374 __ Mov(x6, 0x0123456789abcdef);
18375 __ Mov(x7, 0xfedcba9876543210);
18376 __ Mov(x8, 0x0123456789abcdef);
18377
18378 __ Cas(x1, x0, MemOperand(x21));
18379 __ Cas(x2, x0, MemOperand(x22));
18380 __ Casa(x3, x0, MemOperand(x23));
18381 __ Casa(x4, x0, MemOperand(x24));
18382 __ Casl(x5, x0, MemOperand(x25));
18383 __ Casl(x6, x0, MemOperand(x26));
18384 __ Casal(x7, x0, MemOperand(x27));
18385 __ Casal(x8, x0, MemOperand(x28));
18386
18387 END();
18388
18389 #ifdef VIXL_INCLUDE_SIMULATOR_AARCH64
18390 RUN();
18391
18392 ASSERT_EQUAL_64(0x0123456789abcdef, x1);
18393 ASSERT_EQUAL_64(0x0123456789abcdef, x2);
18394 ASSERT_EQUAL_64(0x0123456789abcdef, x3);
18395 ASSERT_EQUAL_64(0x0123456789abcdef, x4);
18396 ASSERT_EQUAL_64(0x0123456789abcdef, x5);
18397 ASSERT_EQUAL_64(0x0123456789abcdef, x6);
18398 ASSERT_EQUAL_64(0x0123456789abcdef, x7);
18399 ASSERT_EQUAL_64(0x0123456789abcdef, x8);
18400
18401 ASSERT_EQUAL_64(0x0123456789abcdef, data1[0]);
18402 ASSERT_EQUAL_64(0xffffffffffffffff, data2[0]);
18403 ASSERT_EQUAL_64(0x0123456789abcdef, data3[0]);
18404 ASSERT_EQUAL_64(0xffffffffffffffff, data4[0]);
18405 ASSERT_EQUAL_64(0x0123456789abcdef, data5[0]);
18406 ASSERT_EQUAL_64(0xffffffffffffffff, data6[0]);
18407 ASSERT_EQUAL_64(0x0123456789abcdef, data7[0]);
18408 ASSERT_EQUAL_64(0xffffffffffffffff, data8[0]);
18409 #endif // VIXL_INCLUDE_SIMULATOR_AARCH64
18410
18411 TEARDOWN();
18412 }
18413
TEST(casb_casab_caslb_casalb)18414 TEST(casb_casab_caslb_casalb) {
18415 uint64_t data1[] = {0x01234567, 0};
18416 uint64_t data2[] = {0x01234567, 0};
18417 uint64_t data3[] = {0x01234567, 0};
18418 uint64_t data4[] = {0x01234567, 0};
18419 uint64_t data5[] = {0x01234567, 0};
18420 uint64_t data6[] = {0x01234567, 0};
18421 uint64_t data7[] = {0x01234567, 0};
18422 uint64_t data8[] = {0x01234567, 0};
18423
18424 uint64_t* data1_aligned = AlignUp(data1, kXRegSizeInBytes * 2);
18425 uint64_t* data2_aligned = AlignUp(data2, kXRegSizeInBytes * 2);
18426 uint64_t* data3_aligned = AlignUp(data3, kXRegSizeInBytes * 2);
18427 uint64_t* data4_aligned = AlignUp(data4, kXRegSizeInBytes * 2);
18428 uint64_t* data5_aligned = AlignUp(data5, kXRegSizeInBytes * 2);
18429 uint64_t* data6_aligned = AlignUp(data6, kXRegSizeInBytes * 2);
18430 uint64_t* data7_aligned = AlignUp(data7, kXRegSizeInBytes * 2);
18431 uint64_t* data8_aligned = AlignUp(data8, kXRegSizeInBytes * 2);
18432
18433 SETUP_WITH_FEATURES(CPUFeatures::kAtomics);
18434
18435 START();
18436
18437 __ Mov(x21, reinterpret_cast<uintptr_t>(data1_aligned));
18438 __ Mov(x22, reinterpret_cast<uintptr_t>(data2_aligned));
18439 __ Mov(x23, reinterpret_cast<uintptr_t>(data3_aligned));
18440 __ Mov(x24, reinterpret_cast<uintptr_t>(data4_aligned));
18441 __ Mov(x25, reinterpret_cast<uintptr_t>(data5_aligned));
18442 __ Mov(x26, reinterpret_cast<uintptr_t>(data6_aligned));
18443 __ Mov(x27, reinterpret_cast<uintptr_t>(data7_aligned));
18444 __ Mov(x28, reinterpret_cast<uintptr_t>(data8_aligned));
18445
18446 __ Mov(x0, 0xffffffff);
18447
18448 __ Mov(x1, 0x76543210);
18449 __ Mov(x2, 0x01234567);
18450 __ Mov(x3, 0x76543210);
18451 __ Mov(x4, 0x01234567);
18452 __ Mov(x5, 0x76543210);
18453 __ Mov(x6, 0x01234567);
18454 __ Mov(x7, 0x76543210);
18455 __ Mov(x8, 0x01234567);
18456
18457 __ Casb(w1, w0, MemOperand(x21));
18458 __ Casb(w2, w0, MemOperand(x22));
18459 __ Casab(w3, w0, MemOperand(x23));
18460 __ Casab(w4, w0, MemOperand(x24));
18461 __ Caslb(w5, w0, MemOperand(x25));
18462 __ Caslb(w6, w0, MemOperand(x26));
18463 __ Casalb(w7, w0, MemOperand(x27));
18464 __ Casalb(w8, w0, MemOperand(x28));
18465
18466 END();
18467
18468 #ifdef VIXL_INCLUDE_SIMULATOR_AARCH64
18469 RUN();
18470
18471 ASSERT_EQUAL_64(0x00000067, x1);
18472 ASSERT_EQUAL_64(0x00000067, x2);
18473 ASSERT_EQUAL_64(0x00000067, x3);
18474 ASSERT_EQUAL_64(0x00000067, x4);
18475 ASSERT_EQUAL_64(0x00000067, x5);
18476 ASSERT_EQUAL_64(0x00000067, x6);
18477 ASSERT_EQUAL_64(0x00000067, x7);
18478 ASSERT_EQUAL_64(0x00000067, x8);
18479
18480 ASSERT_EQUAL_64(0x01234567, data1[0]);
18481 ASSERT_EQUAL_64(0x012345ff, data2[0]);
18482 ASSERT_EQUAL_64(0x01234567, data3[0]);
18483 ASSERT_EQUAL_64(0x012345ff, data4[0]);
18484 ASSERT_EQUAL_64(0x01234567, data5[0]);
18485 ASSERT_EQUAL_64(0x012345ff, data6[0]);
18486 ASSERT_EQUAL_64(0x01234567, data7[0]);
18487 ASSERT_EQUAL_64(0x012345ff, data8[0]);
18488 #endif // VIXL_INCLUDE_SIMULATOR_AARCH64
18489
18490 TEARDOWN();
18491 }
18492
TEST(cash_casah_caslh_casalh)18493 TEST(cash_casah_caslh_casalh) {
18494 uint64_t data1[] = {0x01234567, 0};
18495 uint64_t data2[] = {0x01234567, 0};
18496 uint64_t data3[] = {0x01234567, 0};
18497 uint64_t data4[] = {0x01234567, 0};
18498 uint64_t data5[] = {0x01234567, 0};
18499 uint64_t data6[] = {0x01234567, 0};
18500 uint64_t data7[] = {0x01234567, 0};
18501 uint64_t data8[] = {0x01234567, 0};
18502
18503 uint64_t* data1_aligned = AlignUp(data1, kXRegSizeInBytes * 2);
18504 uint64_t* data2_aligned = AlignUp(data2, kXRegSizeInBytes * 2);
18505 uint64_t* data3_aligned = AlignUp(data3, kXRegSizeInBytes * 2);
18506 uint64_t* data4_aligned = AlignUp(data4, kXRegSizeInBytes * 2);
18507 uint64_t* data5_aligned = AlignUp(data5, kXRegSizeInBytes * 2);
18508 uint64_t* data6_aligned = AlignUp(data6, kXRegSizeInBytes * 2);
18509 uint64_t* data7_aligned = AlignUp(data7, kXRegSizeInBytes * 2);
18510 uint64_t* data8_aligned = AlignUp(data8, kXRegSizeInBytes * 2);
18511
18512 SETUP_WITH_FEATURES(CPUFeatures::kAtomics);
18513
18514 START();
18515
18516 __ Mov(x21, reinterpret_cast<uintptr_t>(data1_aligned));
18517 __ Mov(x22, reinterpret_cast<uintptr_t>(data2_aligned));
18518 __ Mov(x23, reinterpret_cast<uintptr_t>(data3_aligned));
18519 __ Mov(x24, reinterpret_cast<uintptr_t>(data4_aligned));
18520 __ Mov(x25, reinterpret_cast<uintptr_t>(data5_aligned));
18521 __ Mov(x26, reinterpret_cast<uintptr_t>(data6_aligned));
18522 __ Mov(x27, reinterpret_cast<uintptr_t>(data7_aligned));
18523 __ Mov(x28, reinterpret_cast<uintptr_t>(data8_aligned));
18524
18525 __ Mov(x0, 0xffffffff);
18526
18527 __ Mov(x1, 0x76543210);
18528 __ Mov(x2, 0x01234567);
18529 __ Mov(x3, 0x76543210);
18530 __ Mov(x4, 0x01234567);
18531 __ Mov(x5, 0x76543210);
18532 __ Mov(x6, 0x01234567);
18533 __ Mov(x7, 0x76543210);
18534 __ Mov(x8, 0x01234567);
18535
18536 __ Cash(w1, w0, MemOperand(x21));
18537 __ Cash(w2, w0, MemOperand(x22));
18538 __ Casah(w3, w0, MemOperand(x23));
18539 __ Casah(w4, w0, MemOperand(x24));
18540 __ Caslh(w5, w0, MemOperand(x25));
18541 __ Caslh(w6, w0, MemOperand(x26));
18542 __ Casalh(w7, w0, MemOperand(x27));
18543 __ Casalh(w8, w0, MemOperand(x28));
18544
18545 END();
18546
18547 #ifdef VIXL_INCLUDE_SIMULATOR_AARCH64
18548 RUN();
18549
18550 ASSERT_EQUAL_64(0x00004567, x1);
18551 ASSERT_EQUAL_64(0x00004567, x2);
18552 ASSERT_EQUAL_64(0x00004567, x3);
18553 ASSERT_EQUAL_64(0x00004567, x4);
18554 ASSERT_EQUAL_64(0x00004567, x5);
18555 ASSERT_EQUAL_64(0x00004567, x6);
18556 ASSERT_EQUAL_64(0x00004567, x7);
18557 ASSERT_EQUAL_64(0x00004567, x8);
18558
18559 ASSERT_EQUAL_64(0x01234567, data1[0]);
18560 ASSERT_EQUAL_64(0x0123ffff, data2[0]);
18561 ASSERT_EQUAL_64(0x01234567, data3[0]);
18562 ASSERT_EQUAL_64(0x0123ffff, data4[0]);
18563 ASSERT_EQUAL_64(0x01234567, data5[0]);
18564 ASSERT_EQUAL_64(0x0123ffff, data6[0]);
18565 ASSERT_EQUAL_64(0x01234567, data7[0]);
18566 ASSERT_EQUAL_64(0x0123ffff, data8[0]);
18567 #endif // VIXL_INCLUDE_SIMULATOR_AARCH64
18568
18569 TEARDOWN();
18570 }
18571
TEST(casp_caspa_caspl_caspal)18572 TEST(casp_caspa_caspl_caspal) {
18573 uint64_t data1[] = {0x89abcdef01234567, 0};
18574 uint64_t data2[] = {0x89abcdef01234567, 0};
18575 uint64_t data3[] = {0x89abcdef01234567, 0};
18576 uint64_t data4[] = {0x89abcdef01234567, 0};
18577 uint64_t data5[] = {0x89abcdef01234567, 0};
18578 uint64_t data6[] = {0x89abcdef01234567, 0};
18579 uint64_t data7[] = {0x89abcdef01234567, 0};
18580 uint64_t data8[] = {0x89abcdef01234567, 0};
18581
18582 uint64_t* data1_aligned = AlignUp(data1, kXRegSizeInBytes * 2);
18583 uint64_t* data2_aligned = AlignUp(data2, kXRegSizeInBytes * 2);
18584 uint64_t* data3_aligned = AlignUp(data3, kXRegSizeInBytes * 2);
18585 uint64_t* data4_aligned = AlignUp(data4, kXRegSizeInBytes * 2);
18586 uint64_t* data5_aligned = AlignUp(data5, kXRegSizeInBytes * 2);
18587 uint64_t* data6_aligned = AlignUp(data6, kXRegSizeInBytes * 2);
18588 uint64_t* data7_aligned = AlignUp(data7, kXRegSizeInBytes * 2);
18589 uint64_t* data8_aligned = AlignUp(data8, kXRegSizeInBytes * 2);
18590
18591 SETUP_WITH_FEATURES(CPUFeatures::kAtomics);
18592
18593 START();
18594
18595 __ Mov(x21, reinterpret_cast<uintptr_t>(data1_aligned));
18596 __ Mov(x22, reinterpret_cast<uintptr_t>(data2_aligned));
18597 __ Mov(x23, reinterpret_cast<uintptr_t>(data3_aligned));
18598 __ Mov(x24, reinterpret_cast<uintptr_t>(data4_aligned));
18599 __ Mov(x25, reinterpret_cast<uintptr_t>(data5_aligned));
18600 __ Mov(x26, reinterpret_cast<uintptr_t>(data6_aligned));
18601 __ Mov(x27, reinterpret_cast<uintptr_t>(data7_aligned));
18602 __ Mov(x28, reinterpret_cast<uintptr_t>(data8_aligned));
18603
18604 __ Mov(x0, 0xffffffff);
18605 __ Mov(x1, 0xffffffff);
18606
18607 __ Mov(x2, 0x76543210);
18608 __ Mov(x3, 0xfedcba98);
18609 __ Mov(x4, 0x89abcdef);
18610 __ Mov(x5, 0x01234567);
18611
18612 __ Mov(x6, 0x76543210);
18613 __ Mov(x7, 0xfedcba98);
18614 __ Mov(x8, 0x89abcdef);
18615 __ Mov(x9, 0x01234567);
18616
18617 __ Mov(x10, 0x76543210);
18618 __ Mov(x11, 0xfedcba98);
18619 __ Mov(x12, 0x89abcdef);
18620 __ Mov(x13, 0x01234567);
18621
18622 __ Mov(x14, 0x76543210);
18623 __ Mov(x15, 0xfedcba98);
18624 __ Mov(x16, 0x89abcdef);
18625 __ Mov(x17, 0x01234567);
18626
18627 __ Casp(w2, w3, w0, w1, MemOperand(x21));
18628 __ Casp(w4, w5, w0, w1, MemOperand(x22));
18629 __ Caspa(w6, w7, w0, w1, MemOperand(x23));
18630 __ Caspa(w8, w9, w0, w1, MemOperand(x24));
18631 __ Caspl(w10, w11, w0, w1, MemOperand(x25));
18632 __ Caspl(w12, w13, w0, w1, MemOperand(x26));
18633 __ Caspal(w14, w15, w0, w1, MemOperand(x27));
18634 __ Caspal(w16, w17, w0, w1, MemOperand(x28));
18635
18636 END();
18637
18638 #ifdef VIXL_INCLUDE_SIMULATOR_AARCH64
18639 RUN();
18640
18641 ASSERT_EQUAL_64(0x89abcdef, x2);
18642 ASSERT_EQUAL_64(0x01234567, x3);
18643 ASSERT_EQUAL_64(0x89abcdef, x4);
18644 ASSERT_EQUAL_64(0x01234567, x5);
18645 ASSERT_EQUAL_64(0x89abcdef, x6);
18646 ASSERT_EQUAL_64(0x01234567, x7);
18647 ASSERT_EQUAL_64(0x89abcdef, x8);
18648 ASSERT_EQUAL_64(0x01234567, x9);
18649 ASSERT_EQUAL_64(0x89abcdef, x10);
18650 ASSERT_EQUAL_64(0x01234567, x11);
18651 ASSERT_EQUAL_64(0x89abcdef, x12);
18652 ASSERT_EQUAL_64(0x01234567, x13);
18653 ASSERT_EQUAL_64(0x89abcdef, x14);
18654 ASSERT_EQUAL_64(0x01234567, x15);
18655 ASSERT_EQUAL_64(0x89abcdef, x16);
18656 ASSERT_EQUAL_64(0x01234567, x17);
18657
18658 ASSERT_EQUAL_64(0x89abcdef01234567, data1[0]);
18659 ASSERT_EQUAL_64(0xffffffffffffffff, data2[0]);
18660 ASSERT_EQUAL_64(0x89abcdef01234567, data3[0]);
18661 ASSERT_EQUAL_64(0xffffffffffffffff, data4[0]);
18662 ASSERT_EQUAL_64(0x89abcdef01234567, data5[0]);
18663 ASSERT_EQUAL_64(0xffffffffffffffff, data6[0]);
18664 ASSERT_EQUAL_64(0x89abcdef01234567, data7[0]);
18665 ASSERT_EQUAL_64(0xffffffffffffffff, data8[0]);
18666 #endif // VIXL_INCLUDE_SIMULATOR_AARCH64
18667
18668 TEARDOWN();
18669 }
18670
18671
18672 typedef void (MacroAssembler::*AtomicMemoryLoadSignature)(
18673 const Register& rs, const Register& rt, const MemOperand& src);
18674 typedef void (MacroAssembler::*AtomicMemoryStoreSignature)(
18675 const Register& rs, const MemOperand& src);
18676
AtomicMemoryWHelper(AtomicMemoryLoadSignature * load_funcs,AtomicMemoryStoreSignature * store_funcs,uint64_t arg1,uint64_t arg2,uint64_t expected,uint64_t result_mask)18677 void AtomicMemoryWHelper(AtomicMemoryLoadSignature* load_funcs,
18678 AtomicMemoryStoreSignature* store_funcs,
18679 uint64_t arg1,
18680 uint64_t arg2,
18681 uint64_t expected,
18682 uint64_t result_mask) {
18683 uint64_t data0[] __attribute__((aligned(kXRegSizeInBytes * 2))) = {arg2, 0};
18684 uint64_t data1[] __attribute__((aligned(kXRegSizeInBytes * 2))) = {arg2, 0};
18685 uint64_t data2[] __attribute__((aligned(kXRegSizeInBytes * 2))) = {arg2, 0};
18686 uint64_t data3[] __attribute__((aligned(kXRegSizeInBytes * 2))) = {arg2, 0};
18687 uint64_t data4[] __attribute__((aligned(kXRegSizeInBytes * 2))) = {arg2, 0};
18688 uint64_t data5[] __attribute__((aligned(kXRegSizeInBytes * 2))) = {arg2, 0};
18689
18690 SETUP_WITH_FEATURES(CPUFeatures::kAtomics);
18691 START();
18692
18693 __ Mov(x20, reinterpret_cast<uintptr_t>(data0));
18694 __ Mov(x21, reinterpret_cast<uintptr_t>(data1));
18695 __ Mov(x22, reinterpret_cast<uintptr_t>(data2));
18696 __ Mov(x23, reinterpret_cast<uintptr_t>(data3));
18697
18698 __ Mov(x0, arg1);
18699 __ Mov(x1, arg1);
18700 __ Mov(x2, arg1);
18701 __ Mov(x3, arg1);
18702
18703 (masm.*(load_funcs[0]))(w0, w10, MemOperand(x20));
18704 (masm.*(load_funcs[1]))(w1, w11, MemOperand(x21));
18705 (masm.*(load_funcs[2]))(w2, w12, MemOperand(x22));
18706 (masm.*(load_funcs[3]))(w3, w13, MemOperand(x23));
18707
18708 if (store_funcs != NULL) {
18709 __ Mov(x24, reinterpret_cast<uintptr_t>(data4));
18710 __ Mov(x25, reinterpret_cast<uintptr_t>(data5));
18711 __ Mov(x4, arg1);
18712 __ Mov(x5, arg1);
18713
18714 (masm.*(store_funcs[0]))(w4, MemOperand(x24));
18715 (masm.*(store_funcs[1]))(w5, MemOperand(x25));
18716 }
18717
18718 END();
18719
18720 #ifdef VIXL_INCLUDE_SIMULATOR_AARCH64
18721 RUN();
18722
18723 uint64_t stored_value = arg2 & result_mask;
18724 ASSERT_EQUAL_64(stored_value, x10);
18725 ASSERT_EQUAL_64(stored_value, x11);
18726 ASSERT_EQUAL_64(stored_value, x12);
18727 ASSERT_EQUAL_64(stored_value, x13);
18728
18729 // The data fields contain arg2 already then only the bits masked by
18730 // result_mask are overwritten.
18731 uint64_t final_expected = (arg2 & ~result_mask) | (expected & result_mask);
18732 ASSERT_EQUAL_64(final_expected, data0[0]);
18733 ASSERT_EQUAL_64(final_expected, data1[0]);
18734 ASSERT_EQUAL_64(final_expected, data2[0]);
18735 ASSERT_EQUAL_64(final_expected, data3[0]);
18736
18737 if (store_funcs != NULL) {
18738 ASSERT_EQUAL_64(final_expected, data4[0]);
18739 ASSERT_EQUAL_64(final_expected, data5[0]);
18740 }
18741 #else
18742 USE(expected, result_mask);
18743 #endif // VIXL_INCLUDE_SIMULATOR_AARCH64
18744
18745 TEARDOWN();
18746 }
18747
AtomicMemoryXHelper(AtomicMemoryLoadSignature * load_funcs,AtomicMemoryStoreSignature * store_funcs,uint64_t arg1,uint64_t arg2,uint64_t expected)18748 void AtomicMemoryXHelper(AtomicMemoryLoadSignature* load_funcs,
18749 AtomicMemoryStoreSignature* store_funcs,
18750 uint64_t arg1,
18751 uint64_t arg2,
18752 uint64_t expected) {
18753 uint64_t data0[] __attribute__((aligned(kXRegSizeInBytes * 2))) = {arg2, 0};
18754 uint64_t data1[] __attribute__((aligned(kXRegSizeInBytes * 2))) = {arg2, 0};
18755 uint64_t data2[] __attribute__((aligned(kXRegSizeInBytes * 2))) = {arg2, 0};
18756 uint64_t data3[] __attribute__((aligned(kXRegSizeInBytes * 2))) = {arg2, 0};
18757 uint64_t data4[] __attribute__((aligned(kXRegSizeInBytes * 2))) = {arg2, 0};
18758 uint64_t data5[] __attribute__((aligned(kXRegSizeInBytes * 2))) = {arg2, 0};
18759
18760 SETUP_WITH_FEATURES(CPUFeatures::kAtomics);
18761 START();
18762
18763 __ Mov(x20, reinterpret_cast<uintptr_t>(data0));
18764 __ Mov(x21, reinterpret_cast<uintptr_t>(data1));
18765 __ Mov(x22, reinterpret_cast<uintptr_t>(data2));
18766 __ Mov(x23, reinterpret_cast<uintptr_t>(data3));
18767
18768 __ Mov(x0, arg1);
18769 __ Mov(x1, arg1);
18770 __ Mov(x2, arg1);
18771 __ Mov(x3, arg1);
18772
18773 (masm.*(load_funcs[0]))(x0, x10, MemOperand(x20));
18774 (masm.*(load_funcs[1]))(x1, x11, MemOperand(x21));
18775 (masm.*(load_funcs[2]))(x2, x12, MemOperand(x22));
18776 (masm.*(load_funcs[3]))(x3, x13, MemOperand(x23));
18777
18778 if (store_funcs != NULL) {
18779 __ Mov(x24, reinterpret_cast<uintptr_t>(data4));
18780 __ Mov(x25, reinterpret_cast<uintptr_t>(data5));
18781 __ Mov(x4, arg1);
18782 __ Mov(x5, arg1);
18783
18784 (masm.*(store_funcs[0]))(x4, MemOperand(x24));
18785 (masm.*(store_funcs[1]))(x5, MemOperand(x25));
18786 }
18787
18788 END();
18789
18790 #ifdef VIXL_INCLUDE_SIMULATOR_AARCH64
18791 RUN();
18792
18793 ASSERT_EQUAL_64(arg2, x10);
18794 ASSERT_EQUAL_64(arg2, x11);
18795 ASSERT_EQUAL_64(arg2, x12);
18796 ASSERT_EQUAL_64(arg2, x13);
18797
18798 ASSERT_EQUAL_64(expected, data0[0]);
18799 ASSERT_EQUAL_64(expected, data1[0]);
18800 ASSERT_EQUAL_64(expected, data2[0]);
18801 ASSERT_EQUAL_64(expected, data3[0]);
18802
18803 if (store_funcs != NULL) {
18804 ASSERT_EQUAL_64(expected, data4[0]);
18805 ASSERT_EQUAL_64(expected, data5[0]);
18806 }
18807 #else
18808 USE(expected);
18809 #endif // VIXL_INCLUDE_SIMULATOR_AARCH64
18810
18811 TEARDOWN();
18812 }
18813
18814 // clang-format off
18815 #define MAKE_LOADS(NAME) \
18816 {&MacroAssembler::Ld##NAME, \
18817 &MacroAssembler::Ld##NAME##a, \
18818 &MacroAssembler::Ld##NAME##l, \
18819 &MacroAssembler::Ld##NAME##al}
18820 #define MAKE_STORES(NAME) \
18821 {&MacroAssembler::St##NAME, &MacroAssembler::St##NAME##l}
18822
18823 #define MAKE_B_LOADS(NAME) \
18824 {&MacroAssembler::Ld##NAME##b, \
18825 &MacroAssembler::Ld##NAME##ab, \
18826 &MacroAssembler::Ld##NAME##lb, \
18827 &MacroAssembler::Ld##NAME##alb}
18828 #define MAKE_B_STORES(NAME) \
18829 {&MacroAssembler::St##NAME##b, &MacroAssembler::St##NAME##lb}
18830
18831 #define MAKE_H_LOADS(NAME) \
18832 {&MacroAssembler::Ld##NAME##h, \
18833 &MacroAssembler::Ld##NAME##ah, \
18834 &MacroAssembler::Ld##NAME##lh, \
18835 &MacroAssembler::Ld##NAME##alh}
18836 #define MAKE_H_STORES(NAME) \
18837 {&MacroAssembler::St##NAME##h, &MacroAssembler::St##NAME##lh}
18838 // clang-format on
18839
TEST(atomic_memory_add)18840 TEST(atomic_memory_add) {
18841 AtomicMemoryLoadSignature loads[] = MAKE_LOADS(add);
18842 AtomicMemoryStoreSignature stores[] = MAKE_STORES(add);
18843 AtomicMemoryLoadSignature b_loads[] = MAKE_B_LOADS(add);
18844 AtomicMemoryStoreSignature b_stores[] = MAKE_B_STORES(add);
18845 AtomicMemoryLoadSignature h_loads[] = MAKE_H_LOADS(add);
18846 AtomicMemoryStoreSignature h_stores[] = MAKE_H_STORES(add);
18847
18848 // The arguments are chosen to have two useful properties:
18849 // * When multiplied by small values (such as a register index), this value
18850 // is clearly readable in the result.
18851 // * The value is not formed from repeating fixed-size smaller values, so it
18852 // can be used to detect endianness-related errors.
18853 uint64_t arg1 = 0x0100001000100101;
18854 uint64_t arg2 = 0x0200002000200202;
18855 uint64_t expected = arg1 + arg2;
18856
18857 AtomicMemoryWHelper(b_loads, b_stores, arg1, arg2, expected, kByteMask);
18858 AtomicMemoryWHelper(h_loads, h_stores, arg1, arg2, expected, kHalfWordMask);
18859 AtomicMemoryWHelper(loads, stores, arg1, arg2, expected, kWordMask);
18860 AtomicMemoryXHelper(loads, stores, arg1, arg2, expected);
18861 }
18862
TEST(atomic_memory_clr)18863 TEST(atomic_memory_clr) {
18864 AtomicMemoryLoadSignature loads[] = MAKE_LOADS(clr);
18865 AtomicMemoryStoreSignature stores[] = MAKE_STORES(clr);
18866 AtomicMemoryLoadSignature b_loads[] = MAKE_B_LOADS(clr);
18867 AtomicMemoryStoreSignature b_stores[] = MAKE_B_STORES(clr);
18868 AtomicMemoryLoadSignature h_loads[] = MAKE_H_LOADS(clr);
18869 AtomicMemoryStoreSignature h_stores[] = MAKE_H_STORES(clr);
18870
18871 uint64_t arg1 = 0x0300003000300303;
18872 uint64_t arg2 = 0x0500005000500505;
18873 uint64_t expected = arg2 & ~arg1;
18874
18875 AtomicMemoryWHelper(b_loads, b_stores, arg1, arg2, expected, kByteMask);
18876 AtomicMemoryWHelper(h_loads, h_stores, arg1, arg2, expected, kHalfWordMask);
18877 AtomicMemoryWHelper(loads, stores, arg1, arg2, expected, kWordMask);
18878 AtomicMemoryXHelper(loads, stores, arg1, arg2, expected);
18879 }
18880
TEST(atomic_memory_eor)18881 TEST(atomic_memory_eor) {
18882 AtomicMemoryLoadSignature loads[] = MAKE_LOADS(eor);
18883 AtomicMemoryStoreSignature stores[] = MAKE_STORES(eor);
18884 AtomicMemoryLoadSignature b_loads[] = MAKE_B_LOADS(eor);
18885 AtomicMemoryStoreSignature b_stores[] = MAKE_B_STORES(eor);
18886 AtomicMemoryLoadSignature h_loads[] = MAKE_H_LOADS(eor);
18887 AtomicMemoryStoreSignature h_stores[] = MAKE_H_STORES(eor);
18888
18889 uint64_t arg1 = 0x0300003000300303;
18890 uint64_t arg2 = 0x0500005000500505;
18891 uint64_t expected = arg1 ^ arg2;
18892
18893 AtomicMemoryWHelper(b_loads, b_stores, arg1, arg2, expected, kByteMask);
18894 AtomicMemoryWHelper(h_loads, h_stores, arg1, arg2, expected, kHalfWordMask);
18895 AtomicMemoryWHelper(loads, stores, arg1, arg2, expected, kWordMask);
18896 AtomicMemoryXHelper(loads, stores, arg1, arg2, expected);
18897 }
18898
TEST(atomic_memory_set)18899 TEST(atomic_memory_set) {
18900 AtomicMemoryLoadSignature loads[] = MAKE_LOADS(set);
18901 AtomicMemoryStoreSignature stores[] = MAKE_STORES(set);
18902 AtomicMemoryLoadSignature b_loads[] = MAKE_B_LOADS(set);
18903 AtomicMemoryStoreSignature b_stores[] = MAKE_B_STORES(set);
18904 AtomicMemoryLoadSignature h_loads[] = MAKE_H_LOADS(set);
18905 AtomicMemoryStoreSignature h_stores[] = MAKE_H_STORES(set);
18906
18907 uint64_t arg1 = 0x0300003000300303;
18908 uint64_t arg2 = 0x0500005000500505;
18909 uint64_t expected = arg1 | arg2;
18910
18911 AtomicMemoryWHelper(b_loads, b_stores, arg1, arg2, expected, kByteMask);
18912 AtomicMemoryWHelper(h_loads, h_stores, arg1, arg2, expected, kHalfWordMask);
18913 AtomicMemoryWHelper(loads, stores, arg1, arg2, expected, kWordMask);
18914 AtomicMemoryXHelper(loads, stores, arg1, arg2, expected);
18915 }
18916
TEST(atomic_memory_smax)18917 TEST(atomic_memory_smax) {
18918 AtomicMemoryLoadSignature loads[] = MAKE_LOADS(smax);
18919 AtomicMemoryStoreSignature stores[] = MAKE_STORES(smax);
18920 AtomicMemoryLoadSignature b_loads[] = MAKE_B_LOADS(smax);
18921 AtomicMemoryStoreSignature b_stores[] = MAKE_B_STORES(smax);
18922 AtomicMemoryLoadSignature h_loads[] = MAKE_H_LOADS(smax);
18923 AtomicMemoryStoreSignature h_stores[] = MAKE_H_STORES(smax);
18924
18925 uint64_t arg1 = 0x8100000080108181;
18926 uint64_t arg2 = 0x0100001000100101;
18927 uint64_t expected = 0x0100001000100101;
18928
18929 AtomicMemoryWHelper(b_loads, b_stores, arg1, arg2, expected, kByteMask);
18930 AtomicMemoryWHelper(h_loads, h_stores, arg1, arg2, expected, kHalfWordMask);
18931 AtomicMemoryWHelper(loads, stores, arg1, arg2, expected, kWordMask);
18932 AtomicMemoryXHelper(loads, stores, arg1, arg2, expected);
18933 }
18934
TEST(atomic_memory_smin)18935 TEST(atomic_memory_smin) {
18936 AtomicMemoryLoadSignature loads[] = MAKE_LOADS(smin);
18937 AtomicMemoryStoreSignature stores[] = MAKE_STORES(smin);
18938 AtomicMemoryLoadSignature b_loads[] = MAKE_B_LOADS(smin);
18939 AtomicMemoryStoreSignature b_stores[] = MAKE_B_STORES(smin);
18940 AtomicMemoryLoadSignature h_loads[] = MAKE_H_LOADS(smin);
18941 AtomicMemoryStoreSignature h_stores[] = MAKE_H_STORES(smin);
18942
18943 uint64_t arg1 = 0x8100000080108181;
18944 uint64_t arg2 = 0x0100001000100101;
18945 uint64_t expected = 0x8100000080108181;
18946
18947 AtomicMemoryWHelper(b_loads, b_stores, arg1, arg2, expected, kByteMask);
18948 AtomicMemoryWHelper(h_loads, h_stores, arg1, arg2, expected, kHalfWordMask);
18949 AtomicMemoryWHelper(loads, stores, arg1, arg2, expected, kWordMask);
18950 AtomicMemoryXHelper(loads, stores, arg1, arg2, expected);
18951 }
18952
TEST(atomic_memory_umax)18953 TEST(atomic_memory_umax) {
18954 AtomicMemoryLoadSignature loads[] = MAKE_LOADS(umax);
18955 AtomicMemoryStoreSignature stores[] = MAKE_STORES(umax);
18956 AtomicMemoryLoadSignature b_loads[] = MAKE_B_LOADS(umax);
18957 AtomicMemoryStoreSignature b_stores[] = MAKE_B_STORES(umax);
18958 AtomicMemoryLoadSignature h_loads[] = MAKE_H_LOADS(umax);
18959 AtomicMemoryStoreSignature h_stores[] = MAKE_H_STORES(umax);
18960
18961 uint64_t arg1 = 0x8100000080108181;
18962 uint64_t arg2 = 0x0100001000100101;
18963 uint64_t expected = 0x8100000080108181;
18964
18965 AtomicMemoryWHelper(b_loads, b_stores, arg1, arg2, expected, kByteMask);
18966 AtomicMemoryWHelper(h_loads, h_stores, arg1, arg2, expected, kHalfWordMask);
18967 AtomicMemoryWHelper(loads, stores, arg1, arg2, expected, kWordMask);
18968 AtomicMemoryXHelper(loads, stores, arg1, arg2, expected);
18969 }
18970
TEST(atomic_memory_umin)18971 TEST(atomic_memory_umin) {
18972 AtomicMemoryLoadSignature loads[] = MAKE_LOADS(umin);
18973 AtomicMemoryStoreSignature stores[] = MAKE_STORES(umin);
18974 AtomicMemoryLoadSignature b_loads[] = MAKE_B_LOADS(umin);
18975 AtomicMemoryStoreSignature b_stores[] = MAKE_B_STORES(umin);
18976 AtomicMemoryLoadSignature h_loads[] = MAKE_H_LOADS(umin);
18977 AtomicMemoryStoreSignature h_stores[] = MAKE_H_STORES(umin);
18978
18979 uint64_t arg1 = 0x8100000080108181;
18980 uint64_t arg2 = 0x0100001000100101;
18981 uint64_t expected = 0x0100001000100101;
18982
18983 AtomicMemoryWHelper(b_loads, b_stores, arg1, arg2, expected, kByteMask);
18984 AtomicMemoryWHelper(h_loads, h_stores, arg1, arg2, expected, kHalfWordMask);
18985 AtomicMemoryWHelper(loads, stores, arg1, arg2, expected, kWordMask);
18986 AtomicMemoryXHelper(loads, stores, arg1, arg2, expected);
18987 }
18988
TEST(atomic_memory_swp)18989 TEST(atomic_memory_swp) {
18990 AtomicMemoryLoadSignature loads[] = {&MacroAssembler::Swp,
18991 &MacroAssembler::Swpa,
18992 &MacroAssembler::Swpl,
18993 &MacroAssembler::Swpal};
18994 AtomicMemoryLoadSignature b_loads[] = {&MacroAssembler::Swpb,
18995 &MacroAssembler::Swpab,
18996 &MacroAssembler::Swplb,
18997 &MacroAssembler::Swpalb};
18998 AtomicMemoryLoadSignature h_loads[] = {&MacroAssembler::Swph,
18999 &MacroAssembler::Swpah,
19000 &MacroAssembler::Swplh,
19001 &MacroAssembler::Swpalh};
19002
19003 uint64_t arg1 = 0x0100001000100101;
19004 uint64_t arg2 = 0x0200002000200202;
19005 uint64_t expected = 0x0100001000100101;
19006
19007 // SWP functions have equivalent signatures to the Atomic Memory LD functions
19008 // so we can use the same helper but without the ST aliases.
19009 AtomicMemoryWHelper(b_loads, NULL, arg1, arg2, expected, kByteMask);
19010 AtomicMemoryWHelper(h_loads, NULL, arg1, arg2, expected, kHalfWordMask);
19011 AtomicMemoryWHelper(loads, NULL, arg1, arg2, expected, kWordMask);
19012 AtomicMemoryXHelper(loads, NULL, arg1, arg2, expected);
19013 }
19014
19015
TEST(ldaprb_ldaprh_ldapr)19016 TEST(ldaprb_ldaprh_ldapr) {
19017 uint64_t data0[] = {0x1010101010101010, 0};
19018 uint64_t data1[] = {0x1010101010101010, 0};
19019 uint64_t data2[] = {0x1010101010101010, 0};
19020 uint64_t data3[] = {0x1010101010101010, 0};
19021
19022 uint64_t* data0_aligned = AlignUp(data0, kXRegSizeInBytes * 2);
19023 uint64_t* data1_aligned = AlignUp(data1, kXRegSizeInBytes * 2);
19024 uint64_t* data2_aligned = AlignUp(data2, kXRegSizeInBytes * 2);
19025 uint64_t* data3_aligned = AlignUp(data3, kXRegSizeInBytes * 2);
19026
19027 SETUP_WITH_FEATURES(CPUFeatures::kRCpc);
19028 START();
19029
19030 __ Mov(x20, reinterpret_cast<uintptr_t>(data0_aligned));
19031 __ Mov(x21, reinterpret_cast<uintptr_t>(data1_aligned));
19032 __ Mov(x22, reinterpret_cast<uintptr_t>(data2_aligned));
19033 __ Mov(x23, reinterpret_cast<uintptr_t>(data3_aligned));
19034
19035 __ Ldaprb(w0, MemOperand(x20));
19036 __ Ldaprh(w1, MemOperand(x21));
19037 __ Ldapr(w2, MemOperand(x22));
19038 __ Ldapr(x3, MemOperand(x23));
19039
19040 END();
19041
19042 #ifdef VIXL_INCLUDE_SIMULATOR_AARCH64
19043 RUN();
19044 ASSERT_EQUAL_64(0x10, x0);
19045 ASSERT_EQUAL_64(0x1010, x1);
19046 ASSERT_EQUAL_64(0x10101010, x2);
19047 ASSERT_EQUAL_64(0x1010101010101010, x3);
19048 #endif
19049
19050 TEARDOWN();
19051 }
19052
TEST(load_store_tagged_immediate_offset)19053 TEST(load_store_tagged_immediate_offset) {
19054 uint64_t tags[] = {0x00, 0x1, 0x55, 0xff};
19055 int tag_count = sizeof(tags) / sizeof(tags[0]);
19056
19057 const int kMaxDataLength = 160;
19058
19059 for (int i = 0; i < tag_count; i++) {
19060 unsigned char src[kMaxDataLength];
19061 uint64_t src_raw = reinterpret_cast<uint64_t>(src);
19062 uint64_t src_tag = tags[i];
19063 uint64_t src_tagged = CPU::SetPointerTag(src_raw, src_tag);
19064
19065 for (int k = 0; k < kMaxDataLength; k++) {
19066 src[k] = k + 1;
19067 }
19068
19069 for (int j = 0; j < tag_count; j++) {
19070 unsigned char dst[kMaxDataLength];
19071 uint64_t dst_raw = reinterpret_cast<uint64_t>(dst);
19072 uint64_t dst_tag = tags[j];
19073 uint64_t dst_tagged = CPU::SetPointerTag(dst_raw, dst_tag);
19074
19075 memset(dst, 0, kMaxDataLength);
19076
19077 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
19078 START();
19079
19080 __ Mov(x0, src_tagged);
19081 __ Mov(x1, dst_tagged);
19082
19083 int offset = 0;
19084
19085 // Scaled-immediate offsets.
19086 {
19087 ExactAssemblyScope scope(&masm, 2 * kInstructionSize);
19088 __ ldp(q0, q1, MemOperand(x0, offset));
19089 __ stp(q0, q1, MemOperand(x1, offset));
19090 }
19091 offset += 2 * kQRegSizeInBytes;
19092
19093 {
19094 ExactAssemblyScope scope(&masm, 2 * kInstructionSize);
19095 __ ldp(x2, x3, MemOperand(x0, offset));
19096 __ stp(x2, x3, MemOperand(x1, offset));
19097 }
19098 offset += 2 * kXRegSizeInBytes;
19099
19100 {
19101 ExactAssemblyScope scope(&masm, 2 * kInstructionSize);
19102 __ ldpsw(x2, x3, MemOperand(x0, offset));
19103 __ stp(w2, w3, MemOperand(x1, offset));
19104 }
19105 offset += 2 * kWRegSizeInBytes;
19106
19107 {
19108 ExactAssemblyScope scope(&masm, 2 * kInstructionSize);
19109 __ ldp(d0, d1, MemOperand(x0, offset));
19110 __ stp(d0, d1, MemOperand(x1, offset));
19111 }
19112 offset += 2 * kDRegSizeInBytes;
19113
19114 {
19115 ExactAssemblyScope scope(&masm, 2 * kInstructionSize);
19116 __ ldp(w2, w3, MemOperand(x0, offset));
19117 __ stp(w2, w3, MemOperand(x1, offset));
19118 }
19119 offset += 2 * kWRegSizeInBytes;
19120
19121 {
19122 ExactAssemblyScope scope(&masm, 2 * kInstructionSize);
19123 __ ldp(s0, s1, MemOperand(x0, offset));
19124 __ stp(s0, s1, MemOperand(x1, offset));
19125 }
19126 offset += 2 * kSRegSizeInBytes;
19127
19128 {
19129 ExactAssemblyScope scope(&masm, 2 * kInstructionSize);
19130 __ ldr(x2, MemOperand(x0, offset), RequireScaledOffset);
19131 __ str(x2, MemOperand(x1, offset), RequireScaledOffset);
19132 }
19133 offset += kXRegSizeInBytes;
19134
19135 {
19136 ExactAssemblyScope scope(&masm, 2 * kInstructionSize);
19137 __ ldr(d0, MemOperand(x0, offset), RequireScaledOffset);
19138 __ str(d0, MemOperand(x1, offset), RequireScaledOffset);
19139 }
19140 offset += kDRegSizeInBytes;
19141
19142 {
19143 ExactAssemblyScope scope(&masm, 2 * kInstructionSize);
19144 __ ldr(w2, MemOperand(x0, offset), RequireScaledOffset);
19145 __ str(w2, MemOperand(x1, offset), RequireScaledOffset);
19146 }
19147 offset += kWRegSizeInBytes;
19148
19149 {
19150 ExactAssemblyScope scope(&masm, 2 * kInstructionSize);
19151 __ ldr(s0, MemOperand(x0, offset), RequireScaledOffset);
19152 __ str(s0, MemOperand(x1, offset), RequireScaledOffset);
19153 }
19154 offset += kSRegSizeInBytes;
19155
19156 {
19157 ExactAssemblyScope scope(&masm, 2 * kInstructionSize);
19158 __ ldrh(w2, MemOperand(x0, offset), RequireScaledOffset);
19159 __ strh(w2, MemOperand(x1, offset), RequireScaledOffset);
19160 }
19161 offset += 2;
19162
19163 {
19164 ExactAssemblyScope scope(&masm, 2 * kInstructionSize);
19165 __ ldrsh(w2, MemOperand(x0, offset), RequireScaledOffset);
19166 __ strh(w2, MemOperand(x1, offset), RequireScaledOffset);
19167 }
19168 offset += 2;
19169
19170 {
19171 ExactAssemblyScope scope(&masm, 2 * kInstructionSize);
19172 __ ldrb(w2, MemOperand(x0, offset), RequireScaledOffset);
19173 __ strb(w2, MemOperand(x1, offset), RequireScaledOffset);
19174 }
19175 offset += 1;
19176
19177 {
19178 ExactAssemblyScope scope(&masm, 2 * kInstructionSize);
19179 __ ldrsb(w2, MemOperand(x0, offset), RequireScaledOffset);
19180 __ strb(w2, MemOperand(x1, offset), RequireScaledOffset);
19181 }
19182 offset += 1;
19183
19184 // Unscaled-immediate offsets.
19185
19186 {
19187 ExactAssemblyScope scope(&masm, 2 * kInstructionSize);
19188 __ ldur(x2, MemOperand(x0, offset), RequireUnscaledOffset);
19189 __ stur(x2, MemOperand(x1, offset), RequireUnscaledOffset);
19190 }
19191 offset += kXRegSizeInBytes;
19192
19193 {
19194 ExactAssemblyScope scope(&masm, 2 * kInstructionSize);
19195 __ ldur(d0, MemOperand(x0, offset), RequireUnscaledOffset);
19196 __ stur(d0, MemOperand(x1, offset), RequireUnscaledOffset);
19197 }
19198 offset += kDRegSizeInBytes;
19199
19200 {
19201 ExactAssemblyScope scope(&masm, 2 * kInstructionSize);
19202 __ ldur(w2, MemOperand(x0, offset), RequireUnscaledOffset);
19203 __ stur(w2, MemOperand(x1, offset), RequireUnscaledOffset);
19204 }
19205 offset += kWRegSizeInBytes;
19206
19207 {
19208 ExactAssemblyScope scope(&masm, 2 * kInstructionSize);
19209 __ ldur(s0, MemOperand(x0, offset), RequireUnscaledOffset);
19210 __ stur(s0, MemOperand(x1, offset), RequireUnscaledOffset);
19211 }
19212 offset += kSRegSizeInBytes;
19213
19214 {
19215 ExactAssemblyScope scope(&masm, 2 * kInstructionSize);
19216 __ ldurh(w2, MemOperand(x0, offset), RequireUnscaledOffset);
19217 __ sturh(w2, MemOperand(x1, offset), RequireUnscaledOffset);
19218 }
19219 offset += 2;
19220
19221 {
19222 ExactAssemblyScope scope(&masm, 2 * kInstructionSize);
19223 __ ldursh(w2, MemOperand(x0, offset), RequireUnscaledOffset);
19224 __ sturh(w2, MemOperand(x1, offset), RequireUnscaledOffset);
19225 }
19226 offset += 2;
19227
19228 {
19229 ExactAssemblyScope scope(&masm, 2 * kInstructionSize);
19230 __ ldurb(w2, MemOperand(x0, offset), RequireUnscaledOffset);
19231 __ sturb(w2, MemOperand(x1, offset), RequireUnscaledOffset);
19232 }
19233 offset += 1;
19234
19235 {
19236 ExactAssemblyScope scope(&masm, 2 * kInstructionSize);
19237 __ ldursb(w2, MemOperand(x0, offset), RequireUnscaledOffset);
19238 __ sturb(w2, MemOperand(x1, offset), RequireUnscaledOffset);
19239 }
19240 offset += 1;
19241
19242 // Extract the tag (so we can test that it was preserved correctly).
19243 __ Ubfx(x0, x0, kAddressTagOffset, kAddressTagWidth);
19244 __ Ubfx(x1, x1, kAddressTagOffset, kAddressTagWidth);
19245
19246 VIXL_ASSERT(kMaxDataLength >= offset);
19247
19248 END();
19249 RUN();
19250
19251 ASSERT_EQUAL_64(src_tag, x0);
19252 ASSERT_EQUAL_64(dst_tag, x1);
19253
19254 for (int k = 0; k < offset; k++) {
19255 VIXL_CHECK(src[k] == dst[k]);
19256 }
19257
19258 TEARDOWN();
19259 }
19260 }
19261 }
19262
19263
TEST(load_store_tagged_immediate_preindex)19264 TEST(load_store_tagged_immediate_preindex) {
19265 uint64_t tags[] = {0x00, 0x1, 0x55, 0xff};
19266 int tag_count = sizeof(tags) / sizeof(tags[0]);
19267
19268 const int kMaxDataLength = 128;
19269
19270 for (int i = 0; i < tag_count; i++) {
19271 unsigned char src[kMaxDataLength];
19272 uint64_t src_raw = reinterpret_cast<uint64_t>(src);
19273 uint64_t src_tag = tags[i];
19274 uint64_t src_tagged = CPU::SetPointerTag(src_raw, src_tag);
19275
19276 for (int k = 0; k < kMaxDataLength; k++) {
19277 src[k] = k + 1;
19278 }
19279
19280 for (int j = 0; j < tag_count; j++) {
19281 unsigned char dst[kMaxDataLength];
19282 uint64_t dst_raw = reinterpret_cast<uint64_t>(dst);
19283 uint64_t dst_tag = tags[j];
19284 uint64_t dst_tagged = CPU::SetPointerTag(dst_raw, dst_tag);
19285
19286 for (int k = 0; k < kMaxDataLength; k++) {
19287 dst[k] = 0;
19288 }
19289
19290 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
19291 START();
19292
19293 // Each MemOperand must apply a pre-index equal to the size of the
19294 // previous access.
19295
19296 // Start with a non-zero preindex.
19297 int preindex = 62 * kXRegSizeInBytes;
19298 int data_length = 0;
19299
19300 __ Mov(x0, src_tagged - preindex);
19301 __ Mov(x1, dst_tagged - preindex);
19302
19303 {
19304 ExactAssemblyScope scope(&masm, 2 * kInstructionSize);
19305 __ ldp(q0, q1, MemOperand(x0, preindex, PreIndex));
19306 __ stp(q0, q1, MemOperand(x1, preindex, PreIndex));
19307 }
19308 preindex = 2 * kQRegSizeInBytes;
19309 data_length = preindex;
19310
19311 {
19312 ExactAssemblyScope scope(&masm, 2 * kInstructionSize);
19313 __ ldp(x2, x3, MemOperand(x0, preindex, PreIndex));
19314 __ stp(x2, x3, MemOperand(x1, preindex, PreIndex));
19315 }
19316 preindex = 2 * kXRegSizeInBytes;
19317 data_length += preindex;
19318
19319 {
19320 ExactAssemblyScope scope(&masm, 2 * kInstructionSize);
19321 __ ldpsw(x2, x3, MemOperand(x0, preindex, PreIndex));
19322 __ stp(w2, w3, MemOperand(x1, preindex, PreIndex));
19323 }
19324 preindex = 2 * kWRegSizeInBytes;
19325 data_length += preindex;
19326
19327 {
19328 ExactAssemblyScope scope(&masm, 2 * kInstructionSize);
19329 __ ldp(d0, d1, MemOperand(x0, preindex, PreIndex));
19330 __ stp(d0, d1, MemOperand(x1, preindex, PreIndex));
19331 }
19332 preindex = 2 * kDRegSizeInBytes;
19333 data_length += preindex;
19334
19335 {
19336 ExactAssemblyScope scope(&masm, 2 * kInstructionSize);
19337 __ ldp(w2, w3, MemOperand(x0, preindex, PreIndex));
19338 __ stp(w2, w3, MemOperand(x1, preindex, PreIndex));
19339 }
19340 preindex = 2 * kWRegSizeInBytes;
19341 data_length += preindex;
19342
19343 {
19344 ExactAssemblyScope scope(&masm, 2 * kInstructionSize);
19345 __ ldp(s0, s1, MemOperand(x0, preindex, PreIndex));
19346 __ stp(s0, s1, MemOperand(x1, preindex, PreIndex));
19347 }
19348 preindex = 2 * kSRegSizeInBytes;
19349 data_length += preindex;
19350
19351 {
19352 ExactAssemblyScope scope(&masm, 2 * kInstructionSize);
19353 __ ldr(x2, MemOperand(x0, preindex, PreIndex));
19354 __ str(x2, MemOperand(x1, preindex, PreIndex));
19355 }
19356 preindex = kXRegSizeInBytes;
19357 data_length += preindex;
19358
19359 {
19360 ExactAssemblyScope scope(&masm, 2 * kInstructionSize);
19361 __ ldr(d0, MemOperand(x0, preindex, PreIndex));
19362 __ str(d0, MemOperand(x1, preindex, PreIndex));
19363 }
19364 preindex = kDRegSizeInBytes;
19365 data_length += preindex;
19366
19367 {
19368 ExactAssemblyScope scope(&masm, 2 * kInstructionSize);
19369 __ ldr(w2, MemOperand(x0, preindex, PreIndex));
19370 __ str(w2, MemOperand(x1, preindex, PreIndex));
19371 }
19372 preindex = kWRegSizeInBytes;
19373 data_length += preindex;
19374
19375 {
19376 ExactAssemblyScope scope(&masm, 2 * kInstructionSize);
19377 __ ldr(s0, MemOperand(x0, preindex, PreIndex));
19378 __ str(s0, MemOperand(x1, preindex, PreIndex));
19379 }
19380 preindex = kSRegSizeInBytes;
19381 data_length += preindex;
19382
19383 {
19384 ExactAssemblyScope scope(&masm, 2 * kInstructionSize);
19385 __ ldrh(w2, MemOperand(x0, preindex, PreIndex));
19386 __ strh(w2, MemOperand(x1, preindex, PreIndex));
19387 }
19388 preindex = 2;
19389 data_length += preindex;
19390
19391 {
19392 ExactAssemblyScope scope(&masm, 2 * kInstructionSize);
19393 __ ldrsh(w2, MemOperand(x0, preindex, PreIndex));
19394 __ strh(w2, MemOperand(x1, preindex, PreIndex));
19395 }
19396 preindex = 2;
19397 data_length += preindex;
19398
19399 {
19400 ExactAssemblyScope scope(&masm, 2 * kInstructionSize);
19401 __ ldrb(w2, MemOperand(x0, preindex, PreIndex));
19402 __ strb(w2, MemOperand(x1, preindex, PreIndex));
19403 }
19404 preindex = 1;
19405 data_length += preindex;
19406
19407 {
19408 ExactAssemblyScope scope(&masm, 2 * kInstructionSize);
19409 __ ldrsb(w2, MemOperand(x0, preindex, PreIndex));
19410 __ strb(w2, MemOperand(x1, preindex, PreIndex));
19411 }
19412 preindex = 1;
19413 data_length += preindex;
19414
19415 VIXL_ASSERT(kMaxDataLength >= data_length);
19416
19417 END();
19418 RUN();
19419
19420 // Check that the preindex was correctly applied in each operation, and
19421 // that the tag was preserved.
19422 ASSERT_EQUAL_64(src_tagged + data_length - preindex, x0);
19423 ASSERT_EQUAL_64(dst_tagged + data_length - preindex, x1);
19424
19425 for (int k = 0; k < data_length; k++) {
19426 VIXL_CHECK(src[k] == dst[k]);
19427 }
19428
19429 TEARDOWN();
19430 }
19431 }
19432 }
19433
19434
TEST(load_store_tagged_immediate_postindex)19435 TEST(load_store_tagged_immediate_postindex) {
19436 uint64_t tags[] = {0x00, 0x1, 0x55, 0xff};
19437 int tag_count = sizeof(tags) / sizeof(tags[0]);
19438
19439 const int kMaxDataLength = 128;
19440
19441 for (int i = 0; i < tag_count; i++) {
19442 unsigned char src[kMaxDataLength];
19443 uint64_t src_raw = reinterpret_cast<uint64_t>(src);
19444 uint64_t src_tag = tags[i];
19445 uint64_t src_tagged = CPU::SetPointerTag(src_raw, src_tag);
19446
19447 for (int k = 0; k < kMaxDataLength; k++) {
19448 src[k] = k + 1;
19449 }
19450
19451 for (int j = 0; j < tag_count; j++) {
19452 unsigned char dst[kMaxDataLength];
19453 uint64_t dst_raw = reinterpret_cast<uint64_t>(dst);
19454 uint64_t dst_tag = tags[j];
19455 uint64_t dst_tagged = CPU::SetPointerTag(dst_raw, dst_tag);
19456
19457 for (int k = 0; k < kMaxDataLength; k++) {
19458 dst[k] = 0;
19459 }
19460
19461 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
19462 START();
19463
19464 int postindex = 2 * kXRegSizeInBytes;
19465 int data_length = 0;
19466
19467 __ Mov(x0, src_tagged);
19468 __ Mov(x1, dst_tagged);
19469
19470 {
19471 ExactAssemblyScope scope(&masm, 2 * kInstructionSize);
19472 __ ldp(x2, x3, MemOperand(x0, postindex, PostIndex));
19473 __ stp(x2, x3, MemOperand(x1, postindex, PostIndex));
19474 }
19475 data_length = postindex;
19476
19477 postindex = 2 * kQRegSizeInBytes;
19478 {
19479 ExactAssemblyScope scope(&masm, 2 * kInstructionSize);
19480 __ ldp(q0, q1, MemOperand(x0, postindex, PostIndex));
19481 __ stp(q0, q1, MemOperand(x1, postindex, PostIndex));
19482 }
19483 data_length += postindex;
19484
19485 postindex = 2 * kWRegSizeInBytes;
19486 {
19487 ExactAssemblyScope scope(&masm, 2 * kInstructionSize);
19488 __ ldpsw(x2, x3, MemOperand(x0, postindex, PostIndex));
19489 __ stp(w2, w3, MemOperand(x1, postindex, PostIndex));
19490 }
19491 data_length += postindex;
19492
19493 postindex = 2 * kDRegSizeInBytes;
19494 {
19495 ExactAssemblyScope scope(&masm, 2 * kInstructionSize);
19496 __ ldp(d0, d1, MemOperand(x0, postindex, PostIndex));
19497 __ stp(d0, d1, MemOperand(x1, postindex, PostIndex));
19498 }
19499 data_length += postindex;
19500
19501 postindex = 2 * kWRegSizeInBytes;
19502 {
19503 ExactAssemblyScope scope(&masm, 2 * kInstructionSize);
19504 __ ldp(w2, w3, MemOperand(x0, postindex, PostIndex));
19505 __ stp(w2, w3, MemOperand(x1, postindex, PostIndex));
19506 }
19507 data_length += postindex;
19508
19509 postindex = 2 * kSRegSizeInBytes;
19510 {
19511 ExactAssemblyScope scope(&masm, 2 * kInstructionSize);
19512 __ ldp(s0, s1, MemOperand(x0, postindex, PostIndex));
19513 __ stp(s0, s1, MemOperand(x1, postindex, PostIndex));
19514 }
19515 data_length += postindex;
19516
19517 postindex = kXRegSizeInBytes;
19518 {
19519 ExactAssemblyScope scope(&masm, 2 * kInstructionSize);
19520 __ ldr(x2, MemOperand(x0, postindex, PostIndex));
19521 __ str(x2, MemOperand(x1, postindex, PostIndex));
19522 }
19523 data_length += postindex;
19524
19525 postindex = kDRegSizeInBytes;
19526 {
19527 ExactAssemblyScope scope(&masm, 2 * kInstructionSize);
19528 __ ldr(d0, MemOperand(x0, postindex, PostIndex));
19529 __ str(d0, MemOperand(x1, postindex, PostIndex));
19530 }
19531 data_length += postindex;
19532
19533 postindex = kWRegSizeInBytes;
19534 {
19535 ExactAssemblyScope scope(&masm, 2 * kInstructionSize);
19536 __ ldr(w2, MemOperand(x0, postindex, PostIndex));
19537 __ str(w2, MemOperand(x1, postindex, PostIndex));
19538 }
19539 data_length += postindex;
19540
19541 postindex = kSRegSizeInBytes;
19542 {
19543 ExactAssemblyScope scope(&masm, 2 * kInstructionSize);
19544 __ ldr(s0, MemOperand(x0, postindex, PostIndex));
19545 __ str(s0, MemOperand(x1, postindex, PostIndex));
19546 }
19547 data_length += postindex;
19548
19549 postindex = 2;
19550 {
19551 ExactAssemblyScope scope(&masm, 2 * kInstructionSize);
19552 __ ldrh(w2, MemOperand(x0, postindex, PostIndex));
19553 __ strh(w2, MemOperand(x1, postindex, PostIndex));
19554 }
19555 data_length += postindex;
19556
19557 postindex = 2;
19558 {
19559 ExactAssemblyScope scope(&masm, 2 * kInstructionSize);
19560 __ ldrsh(w2, MemOperand(x0, postindex, PostIndex));
19561 __ strh(w2, MemOperand(x1, postindex, PostIndex));
19562 }
19563 data_length += postindex;
19564
19565 postindex = 1;
19566 {
19567 ExactAssemblyScope scope(&masm, 2 * kInstructionSize);
19568 __ ldrb(w2, MemOperand(x0, postindex, PostIndex));
19569 __ strb(w2, MemOperand(x1, postindex, PostIndex));
19570 }
19571 data_length += postindex;
19572
19573 postindex = 1;
19574 {
19575 ExactAssemblyScope scope(&masm, 2 * kInstructionSize);
19576 __ ldrsb(w2, MemOperand(x0, postindex, PostIndex));
19577 __ strb(w2, MemOperand(x1, postindex, PostIndex));
19578 }
19579 data_length += postindex;
19580
19581 VIXL_ASSERT(kMaxDataLength >= data_length);
19582
19583 END();
19584 RUN();
19585
19586 // Check that the postindex was correctly applied in each operation, and
19587 // that the tag was preserved.
19588 ASSERT_EQUAL_64(src_tagged + data_length, x0);
19589 ASSERT_EQUAL_64(dst_tagged + data_length, x1);
19590
19591 for (int k = 0; k < data_length; k++) {
19592 VIXL_CHECK(src[k] == dst[k]);
19593 }
19594
19595 TEARDOWN();
19596 }
19597 }
19598 }
19599
19600
TEST(load_store_tagged_register_offset)19601 TEST(load_store_tagged_register_offset) {
19602 uint64_t tags[] = {0x00, 0x1, 0x55, 0xff};
19603 int tag_count = sizeof(tags) / sizeof(tags[0]);
19604
19605 const int kMaxDataLength = 128;
19606
19607 for (int i = 0; i < tag_count; i++) {
19608 unsigned char src[kMaxDataLength];
19609 uint64_t src_raw = reinterpret_cast<uint64_t>(src);
19610 uint64_t src_tag = tags[i];
19611 uint64_t src_tagged = CPU::SetPointerTag(src_raw, src_tag);
19612
19613 for (int k = 0; k < kMaxDataLength; k++) {
19614 src[k] = k + 1;
19615 }
19616
19617 for (int j = 0; j < tag_count; j++) {
19618 unsigned char dst[kMaxDataLength];
19619 uint64_t dst_raw = reinterpret_cast<uint64_t>(dst);
19620 uint64_t dst_tag = tags[j];
19621 uint64_t dst_tagged = CPU::SetPointerTag(dst_raw, dst_tag);
19622
19623 // Also tag the offset register; the operation should still succeed.
19624 for (int o = 0; o < tag_count; o++) {
19625 uint64_t offset_base = CPU::SetPointerTag(UINT64_C(0), tags[o]);
19626 int data_length = 0;
19627
19628 for (int k = 0; k < kMaxDataLength; k++) {
19629 dst[k] = 0;
19630 }
19631
19632 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
19633 START();
19634
19635 __ Mov(x0, src_tagged);
19636 __ Mov(x1, dst_tagged);
19637
19638 __ Mov(x10, offset_base + data_length);
19639 {
19640 ExactAssemblyScope scope(&masm, 2 * kInstructionSize);
19641 __ ldr(x2, MemOperand(x0, x10));
19642 __ str(x2, MemOperand(x1, x10));
19643 }
19644 data_length += kXRegSizeInBytes;
19645
19646 __ Mov(x10, offset_base + data_length);
19647 {
19648 ExactAssemblyScope scope(&masm, 2 * kInstructionSize);
19649 __ ldr(d0, MemOperand(x0, x10));
19650 __ str(d0, MemOperand(x1, x10));
19651 }
19652 data_length += kDRegSizeInBytes;
19653
19654 __ Mov(x10, offset_base + data_length);
19655 {
19656 ExactAssemblyScope scope(&masm, 2 * kInstructionSize);
19657 __ ldr(w2, MemOperand(x0, x10));
19658 __ str(w2, MemOperand(x1, x10));
19659 }
19660 data_length += kWRegSizeInBytes;
19661
19662 __ Mov(x10, offset_base + data_length);
19663 {
19664 ExactAssemblyScope scope(&masm, 2 * kInstructionSize);
19665 __ ldr(s0, MemOperand(x0, x10));
19666 __ str(s0, MemOperand(x1, x10));
19667 }
19668 data_length += kSRegSizeInBytes;
19669
19670 __ Mov(x10, offset_base + data_length);
19671 {
19672 ExactAssemblyScope scope(&masm, 2 * kInstructionSize);
19673 __ ldrh(w2, MemOperand(x0, x10));
19674 __ strh(w2, MemOperand(x1, x10));
19675 }
19676 data_length += 2;
19677
19678 __ Mov(x10, offset_base + data_length);
19679 {
19680 ExactAssemblyScope scope(&masm, 2 * kInstructionSize);
19681 __ ldrsh(w2, MemOperand(x0, x10));
19682 __ strh(w2, MemOperand(x1, x10));
19683 }
19684 data_length += 2;
19685
19686 __ Mov(x10, offset_base + data_length);
19687 {
19688 ExactAssemblyScope scope(&masm, 2 * kInstructionSize);
19689 __ ldrb(w2, MemOperand(x0, x10));
19690 __ strb(w2, MemOperand(x1, x10));
19691 }
19692 data_length += 1;
19693
19694 __ Mov(x10, offset_base + data_length);
19695 {
19696 ExactAssemblyScope scope(&masm, 2 * kInstructionSize);
19697 __ ldrsb(w2, MemOperand(x0, x10));
19698 __ strb(w2, MemOperand(x1, x10));
19699 }
19700 data_length += 1;
19701
19702 VIXL_ASSERT(kMaxDataLength >= data_length);
19703
19704 END();
19705 RUN();
19706
19707 // Check that the postindex was correctly applied in each operation, and
19708 // that the tag was preserved.
19709 ASSERT_EQUAL_64(src_tagged, x0);
19710 ASSERT_EQUAL_64(dst_tagged, x1);
19711 ASSERT_EQUAL_64(offset_base + data_length - 1, x10);
19712
19713 for (int k = 0; k < data_length; k++) {
19714 VIXL_CHECK(src[k] == dst[k]);
19715 }
19716
19717 TEARDOWN();
19718 }
19719 }
19720 }
19721 }
19722
19723
TEST(load_store_tagged_register_postindex)19724 TEST(load_store_tagged_register_postindex) {
19725 uint64_t src[] = {0x0706050403020100, 0x0f0e0d0c0b0a0908};
19726 uint64_t tags[] = {0x00, 0x1, 0x55, 0xff};
19727 int tag_count = sizeof(tags) / sizeof(tags[0]);
19728
19729 for (int j = 0; j < tag_count; j++) {
19730 for (int i = 0; i < tag_count; i++) {
19731 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
19732
19733 uint64_t src_base = reinterpret_cast<uint64_t>(src);
19734 uint64_t src_tagged = CPU::SetPointerTag(src_base, tags[i]);
19735 uint64_t offset_tagged = CPU::SetPointerTag(UINT64_C(0), tags[j]);
19736
19737 START();
19738 __ Mov(x10, src_tagged);
19739 __ Mov(x11, offset_tagged);
19740 __ Ld1(v0.V16B(), MemOperand(x10, x11, PostIndex));
19741 // TODO: add other instructions (ld2-4, st1-4) as they become available.
19742 END();
19743
19744 RUN();
19745
19746 ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0706050403020100, q0);
19747 ASSERT_EQUAL_64(src_tagged + offset_tagged, x10);
19748
19749 TEARDOWN();
19750 }
19751 }
19752 }
19753
19754
TEST(branch_tagged)19755 TEST(branch_tagged) {
19756 SETUP();
19757 START();
19758
19759 Label loop, loop_entry, done;
19760 __ Adr(x0, &loop);
19761 __ Mov(x1, 0);
19762 __ B(&loop_entry);
19763
19764 __ Bind(&loop);
19765 __ Add(x1, x1, 1); // Count successful jumps.
19766
19767 // Advance to the next tag, then bail out if we've come back around to tag 0.
19768 __ Add(x0, x0, UINT64_C(1) << kAddressTagOffset);
19769 __ Tst(x0, kAddressTagMask);
19770 __ B(eq, &done);
19771
19772 __ Bind(&loop_entry);
19773 __ Br(x0);
19774
19775 __ Bind(&done);
19776
19777 END();
19778 RUN();
19779
19780 ASSERT_EQUAL_64(1 << kAddressTagWidth, x1);
19781
19782 TEARDOWN();
19783 }
19784
19785
TEST(branch_and_link_tagged)19786 TEST(branch_and_link_tagged) {
19787 SETUP();
19788 START();
19789
19790 Label loop, loop_entry, done;
19791 __ Adr(x0, &loop);
19792 __ Mov(x1, 0);
19793 __ B(&loop_entry);
19794
19795 __ Bind(&loop);
19796
19797 // Bail out (before counting a successful jump) if lr appears to be tagged.
19798 __ Tst(lr, kAddressTagMask);
19799 __ B(ne, &done);
19800
19801 __ Add(x1, x1, 1); // Count successful jumps.
19802
19803 // Advance to the next tag, then bail out if we've come back around to tag 0.
19804 __ Add(x0, x0, UINT64_C(1) << kAddressTagOffset);
19805 __ Tst(x0, kAddressTagMask);
19806 __ B(eq, &done);
19807
19808 __ Bind(&loop_entry);
19809 __ Blr(x0);
19810
19811 __ Bind(&done);
19812
19813 END();
19814 RUN();
19815
19816 ASSERT_EQUAL_64(1 << kAddressTagWidth, x1);
19817
19818 TEARDOWN();
19819 }
19820
19821
TEST(branch_tagged_and_adr_adrp)19822 TEST(branch_tagged_and_adr_adrp) {
19823 SETUP_CUSTOM(kPageSize, PageOffsetDependentCode);
19824 START();
19825
19826 Label loop, loop_entry, done;
19827 __ Adr(x0, &loop);
19828 __ Mov(x1, 0);
19829 __ B(&loop_entry);
19830
19831 __ Bind(&loop);
19832
19833 // Bail out (before counting a successful jump) if `adr x10, ...` is tagged.
19834 __ Adr(x10, &done);
19835 __ Tst(x10, kAddressTagMask);
19836 __ B(ne, &done);
19837
19838 // Bail out (before counting a successful jump) if `adrp x11, ...` is tagged.
19839 __ Adrp(x11, &done);
19840 __ Tst(x11, kAddressTagMask);
19841 __ B(ne, &done);
19842
19843 __ Add(x1, x1, 1); // Count successful iterations.
19844
19845 // Advance to the next tag, then bail out if we've come back around to tag 0.
19846 __ Add(x0, x0, UINT64_C(1) << kAddressTagOffset);
19847 __ Tst(x0, kAddressTagMask);
19848 __ B(eq, &done);
19849
19850 __ Bind(&loop_entry);
19851 __ Br(x0);
19852
19853 __ Bind(&done);
19854
19855 END();
19856 RUN_CUSTOM();
19857
19858 ASSERT_EQUAL_64(1 << kAddressTagWidth, x1);
19859
19860 TEARDOWN_CUSTOM();
19861 }
19862
TEST(neon_3same_addp)19863 TEST(neon_3same_addp) {
19864 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
19865
19866 START();
19867
19868 __ Movi(v0.V2D(), 0xff00aa5500ff55aa, 0xff00aa5500ff55aa);
19869 __ Movi(v1.V2D(), 0x000055aaff55ff00, 0xaa55ff55555500ff);
19870 __ Addp(v16.V16B(), v0.V16B(), v1.V16B());
19871
19872 END();
19873
19874 RUN();
19875 ASSERT_EQUAL_128(0x00ff54ffff54aaff, 0xffffffffffffffff, q16);
19876 TEARDOWN();
19877 }
19878
TEST(neon_3same_sqdmulh_sqrdmulh)19879 TEST(neon_3same_sqdmulh_sqrdmulh) {
19880 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
19881
19882 START();
19883
19884 __ Movi(v0.V2D(), 0x0000000000000000, 0x0000040004008000);
19885 __ Movi(v1.V2D(), 0x0000000000000000, 0x0000002000108000);
19886 __ Movi(v2.V2D(), 0x0400000080000000, 0x0400000080000000);
19887 __ Movi(v3.V2D(), 0x0000002080000000, 0x0000001080000000);
19888
19889 __ Sqdmulh(v16.V4H(), v0.V4H(), v1.V4H());
19890 __ Sqdmulh(v17.V4S(), v2.V4S(), v3.V4S());
19891 __ Sqdmulh(h18, h0, h1);
19892 __ Sqdmulh(s19, s2, s3);
19893
19894 __ Sqrdmulh(v20.V4H(), v0.V4H(), v1.V4H());
19895 __ Sqrdmulh(v21.V4S(), v2.V4S(), v3.V4S());
19896 __ Sqrdmulh(h22, h0, h1);
19897 __ Sqrdmulh(s23, s2, s3);
19898
19899 END();
19900
19901 RUN();
19902 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000100007fff, q16);
19903 ASSERT_EQUAL_128(0x000000017fffffff, 0x000000007fffffff, q17);
19904 ASSERT_EQUAL_128(0, 0x7fff, q18);
19905 ASSERT_EQUAL_128(0, 0x7fffffff, q19);
19906 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000100017fff, q20);
19907 ASSERT_EQUAL_128(0x000000017fffffff, 0x000000017fffffff, q21);
19908 ASSERT_EQUAL_128(0, 0x7fff, q22);
19909 ASSERT_EQUAL_128(0, 0x7fffffff, q23);
19910 TEARDOWN();
19911 }
19912
TEST(neon_byelement_sqdmulh_sqrdmulh)19913 TEST(neon_byelement_sqdmulh_sqrdmulh) {
19914 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
19915
19916 START();
19917
19918 __ Movi(v0.V2D(), 0x0000000000000000, 0x0000040004008000);
19919 __ Movi(v1.V2D(), 0x0000000000000000, 0x0000002000108000);
19920 __ Movi(v2.V2D(), 0x0400000080000000, 0x0400000080000000);
19921 __ Movi(v3.V2D(), 0x0000002080000000, 0x0000001080000000);
19922
19923 __ Sqdmulh(v16.V4H(), v0.V4H(), v1.H(), 1);
19924 __ Sqdmulh(v17.V4S(), v2.V4S(), v3.S(), 1);
19925 __ Sqdmulh(h18, h0, v1.H(), 0);
19926 __ Sqdmulh(s19, s2, v3.S(), 0);
19927
19928 __ Sqrdmulh(v20.V4H(), v0.V4H(), v1.H(), 1);
19929 __ Sqrdmulh(v21.V4S(), v2.V4S(), v3.S(), 1);
19930 __ Sqrdmulh(h22, h0, v1.H(), 0);
19931 __ Sqrdmulh(s23, s2, v3.S(), 0);
19932
19933 END();
19934
19935 RUN();
19936 ASSERT_EQUAL_128(0x0000000000000000, 0x000000000000fff0, q16);
19937 ASSERT_EQUAL_128(0x00000000fffffff0, 0x00000000fffffff0, q17);
19938 ASSERT_EQUAL_128(0, 0x7fff, q18);
19939 ASSERT_EQUAL_128(0, 0x7fffffff, q19);
19940 ASSERT_EQUAL_128(0x0000000000000000, 0x000000010001fff0, q20);
19941 ASSERT_EQUAL_128(0x00000001fffffff0, 0x00000001fffffff0, q21);
19942 ASSERT_EQUAL_128(0, 0x7fff, q22);
19943 ASSERT_EQUAL_128(0, 0x7fffffff, q23);
19944 TEARDOWN();
19945 }
19946
TEST(neon_3same_sqrdmlah)19947 TEST(neon_3same_sqrdmlah) {
19948 SETUP_WITH_FEATURES(CPUFeatures::kNEON, CPUFeatures::kRDM);
19949
19950 START();
19951
19952 __ Movi(v0.V2D(), 0x0000000000000000, 0x0000040004008000);
19953 __ Movi(v1.V2D(), 0x0000000000000000, 0x0000002000108000);
19954 __ Movi(v2.V2D(), 0x0400000080000000, 0x0400000080000000);
19955 __ Movi(v3.V2D(), 0x0000002080000000, 0x0000001080000000);
19956
19957 __ Movi(v16.V2D(), 0x0000040004008000, 0x0000040004008000);
19958 __ Movi(v17.V2D(), 0x0000000000000000, 0x0000002000108000);
19959 __ Movi(v18.V2D(), 0x0400000080000000, 0x0400000080000000);
19960 __ Movi(v19.V2D(), 0x0000002080000000, 0x0000001080000000);
19961
19962 __ Sqrdmlah(v16.V4H(), v0.V4H(), v1.V4H());
19963 __ Sqrdmlah(v17.V4S(), v2.V4S(), v3.V4S());
19964 __ Sqrdmlah(h18, h0, h1);
19965 __ Sqrdmlah(s19, s2, s3);
19966
19967 END();
19968
19969 #ifdef VIXL_INCLUDE_SIMULATOR_AARCH64
19970 RUN();
19971 ASSERT_EQUAL_128(0, 0x0000040104010000, q16);
19972 ASSERT_EQUAL_128(0x000000017fffffff, 0x000000217fffffff, q17);
19973 ASSERT_EQUAL_128(0, 0x7fff, q18);
19974 ASSERT_EQUAL_128(0, 0, q19);
19975 #endif
19976 TEARDOWN();
19977 }
19978
TEST(neon_byelement_sqrdmlah)19979 TEST(neon_byelement_sqrdmlah) {
19980 SETUP_WITH_FEATURES(CPUFeatures::kNEON, CPUFeatures::kRDM);
19981
19982 START();
19983
19984 __ Movi(v0.V2D(), 0x0000000000000000, 0x0000040004008000);
19985 __ Movi(v1.V2D(), 0x0000000000000000, 0x0000002000108000);
19986 __ Movi(v2.V2D(), 0x0400000080000000, 0x0400000080000000);
19987 __ Movi(v3.V2D(), 0x0000002080000000, 0x0000001080000000);
19988
19989 __ Movi(v16.V2D(), 0x0000040004008000, 0x0000040004008000);
19990 __ Movi(v17.V2D(), 0x0000000000000000, 0x0000002000108000);
19991 __ Movi(v18.V2D(), 0x0400000080000000, 0x0400000080000000);
19992 __ Movi(v19.V2D(), 0x0000002080000000, 0x0000001080000000);
19993
19994 __ Sqrdmlah(v16.V4H(), v0.V4H(), v1.H(), 1);
19995 __ Sqrdmlah(v17.V4S(), v2.V4S(), v3.S(), 1);
19996 __ Sqrdmlah(h18, h0, v1.H(), 0);
19997 __ Sqrdmlah(s19, s2, v3.S(), 0);
19998
19999 END();
20000
20001 #ifdef VIXL_INCLUDE_SIMULATOR_AARCH64
20002 RUN();
20003 ASSERT_EQUAL_128(0, 0x0000040104018000, q16);
20004 ASSERT_EQUAL_128(0x00000001fffffff0, 0x0000002100107ff0, q17);
20005 ASSERT_EQUAL_128(0, 0x7fff, q18);
20006 ASSERT_EQUAL_128(0, 0, q19);
20007 #endif
20008 TEARDOWN();
20009 }
20010
TEST(neon_3same_sqrdmlsh)20011 TEST(neon_3same_sqrdmlsh) {
20012 SETUP_WITH_FEATURES(CPUFeatures::kNEON, CPUFeatures::kRDM);
20013
20014 START();
20015
20016 __ Movi(v0.V2D(), 0x0000000000000000, 0x0000040004000500);
20017 __ Movi(v1.V2D(), 0x0000000000000000, 0x0000002000100080);
20018 __ Movi(v2.V2D(), 0x0400000080000000, 0x0400000080000000);
20019 __ Movi(v3.V2D(), 0x0000002080000000, 0x0000001080000000);
20020
20021 __ Movi(v16.V2D(), 0x4000400040004000, 0x4000400040004000);
20022 __ Movi(v17.V2D(), 0x4000400040004000, 0x4000400040004000);
20023 __ Movi(v18.V2D(), 0x4000400040004000, 0x4000400040004000);
20024 __ Movi(v19.V2D(), 0x4000400040004000, 0x4000400040004000);
20025
20026 __ Sqrdmlsh(v16.V4H(), v0.V4H(), v1.V4H());
20027 __ Sqrdmlsh(v17.V4S(), v2.V4S(), v3.V4S());
20028 __ Sqrdmlsh(h18, h0, h1);
20029 __ Sqrdmlsh(s19, s2, s3);
20030
20031 END();
20032
20033 #ifdef VIXL_INCLUDE_SIMULATOR_AARCH64
20034 RUN();
20035 ASSERT_EQUAL_128(0, 0x40003fff40003ffb, q16);
20036 ASSERT_EQUAL_128(0x40003fffc0004000, 0x40004000c0004000, q17);
20037 ASSERT_EQUAL_128(0, 0x3ffb, q18);
20038 ASSERT_EQUAL_128(0, 0xc0004000, q19);
20039 #endif
20040 TEARDOWN();
20041 }
20042
TEST(neon_byelement_sqrdmlsh)20043 TEST(neon_byelement_sqrdmlsh) {
20044 SETUP_WITH_FEATURES(CPUFeatures::kNEON, CPUFeatures::kRDM);
20045
20046 START();
20047
20048 __ Movi(v0.V2D(), 0x0000000000000000, 0x0000040004008000);
20049 __ Movi(v1.V2D(), 0x0000000000000000, 0x0000002000108000);
20050 __ Movi(v2.V2D(), 0x0400000080000000, 0x0400000080000000);
20051 __ Movi(v3.V2D(), 0x0000002080000000, 0x0000001080000000);
20052
20053 __ Movi(v16.V2D(), 0x4000400040004000, 0x4000400040004000);
20054 __ Movi(v17.V2D(), 0x4000400040004000, 0x4000400040004000);
20055 __ Movi(v18.V2D(), 0x4000400040004000, 0x4000400040004000);
20056 __ Movi(v19.V2D(), 0x4000400040004000, 0x4000400040004000);
20057
20058 __ Sqrdmlsh(v16.V4H(), v0.V4H(), v1.H(), 1);
20059 __ Sqrdmlsh(v17.V4S(), v2.V4S(), v3.S(), 1);
20060 __ Sqrdmlsh(h18, h0, v1.H(), 0);
20061 __ Sqrdmlsh(s19, s2, v3.S(), 0);
20062
20063 END();
20064
20065 #ifdef VIXL_INCLUDE_SIMULATOR_AARCH64
20066 RUN();
20067 ASSERT_EQUAL_128(0, 0x4000400040004010, q16);
20068 ASSERT_EQUAL_128(0x4000400040004010, 0x4000400040004010, q17);
20069 ASSERT_EQUAL_128(0, 0xc000, q18);
20070 ASSERT_EQUAL_128(0, 0xc0004000, q19);
20071 #endif
20072 TEARDOWN();
20073 }
20074
TEST(neon_3same_sdot_udot)20075 TEST(neon_3same_sdot_udot) {
20076 SETUP_WITH_FEATURES(CPUFeatures::kNEON, CPUFeatures::kDotProduct);
20077
20078 START();
20079
20080 __ Movi(v0.V2D(), 0x7122712271227122, 0x7122712271227122);
20081 __ Movi(v1.V2D(), 0xe245e245f245f245, 0xe245e245f245f245);
20082 __ Movi(v2.V2D(), 0x3939393900000000, 0x3939393900000000);
20083
20084 __ Movi(v16.V2D(), 0x0000400000004000, 0x0000400000004000);
20085 __ Movi(v17.V2D(), 0x0000400000004000, 0x0000400000004000);
20086 __ Movi(v18.V2D(), 0x0000400000004000, 0x0000400000004000);
20087 __ Movi(v19.V2D(), 0x0000400000004000, 0x0000400000004000);
20088
20089 __ Sdot(v16.V4S(), v0.V16B(), v1.V16B());
20090 __ Sdot(v17.V2S(), v1.V8B(), v2.V8B());
20091
20092 __ Udot(v18.V4S(), v0.V16B(), v1.V16B());
20093 __ Udot(v19.V2S(), v1.V8B(), v2.V8B());
20094
20095 END();
20096
20097 #ifdef VIXL_INCLUDE_SIMULATOR_AARCH64
20098 RUN();
20099 ASSERT_EQUAL_128(0x000037d8000045f8, 0x000037d8000045f8, q16);
20100 ASSERT_EQUAL_128(0, 0x0000515e00004000, q17);
20101 ASSERT_EQUAL_128(0x000119d8000127f8, 0x000119d8000127f8, q18);
20102 ASSERT_EQUAL_128(0, 0x0000c35e00004000, q19);
20103 #endif
20104 TEARDOWN();
20105 }
20106
TEST(neon_byelement_sdot_udot)20107 TEST(neon_byelement_sdot_udot) {
20108 SETUP_WITH_FEATURES(CPUFeatures::kNEON, CPUFeatures::kDotProduct);
20109
20110 START();
20111
20112 __ Movi(v0.V2D(), 0x7122712271227122, 0x7122712271227122);
20113 __ Movi(v1.V2D(), 0xe245e245f245f245, 0xe245e245f245f245);
20114 __ Movi(v2.V2D(), 0x3939393900000000, 0x3939393900000000);
20115
20116 __ Movi(v16.V2D(), 0x0000400000004000, 0x0000400000004000);
20117 __ Movi(v17.V2D(), 0x0000400000004000, 0x0000400000004000);
20118 __ Movi(v18.V2D(), 0x0000400000004000, 0x0000400000004000);
20119 __ Movi(v19.V2D(), 0x0000400000004000, 0x0000400000004000);
20120
20121 __ Sdot(v16.V4S(), v0.V16B(), v1.S4B(), 1);
20122 __ Sdot(v17.V2S(), v1.V8B(), v2.S4B(), 1);
20123
20124 __ Udot(v18.V4S(), v0.V16B(), v1.S4B(), 1);
20125 __ Udot(v19.V2S(), v1.V8B(), v2.S4B(), 1);
20126
20127 END();
20128
20129 #ifdef VIXL_INCLUDE_SIMULATOR_AARCH64
20130 RUN();
20131 ASSERT_EQUAL_128(0x000037d8000037d8, 0x000037d8000037d8, q16);
20132 ASSERT_EQUAL_128(0, 0x0000515e0000587e, q17);
20133 ASSERT_EQUAL_128(0x000119d8000119d8, 0x000119d8000119d8, q18);
20134 ASSERT_EQUAL_128(0, 0x0000c35e0000ca7e, q19);
20135 #endif
20136 TEARDOWN();
20137 }
20138
20139
TEST(neon_2regmisc_saddlp)20140 TEST(neon_2regmisc_saddlp) {
20141 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
20142
20143 START();
20144
20145 __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081);
20146
20147 __ Saddlp(v16.V8H(), v0.V16B());
20148 __ Saddlp(v17.V4H(), v0.V8B());
20149
20150 __ Saddlp(v18.V4S(), v0.V8H());
20151 __ Saddlp(v19.V2S(), v0.V4H());
20152
20153 __ Saddlp(v20.V2D(), v0.V4S());
20154 __ Saddlp(v21.V1D(), v0.V2S());
20155
20156 END();
20157
20158 RUN();
20159 ASSERT_EQUAL_128(0x0080ffffff010080, 0xff01ffff0080ff01, q16);
20160 ASSERT_EQUAL_128(0x0000000000000000, 0xff01ffff0080ff01, q17);
20161 ASSERT_EQUAL_128(0x0000800000000081, 0xffff7f81ffff8200, q18);
20162 ASSERT_EQUAL_128(0x0000000000000000, 0xffff7f81ffff8200, q19);
20163 ASSERT_EQUAL_128(0x0000000000818000, 0xffffffff82017f81, q20);
20164 ASSERT_EQUAL_128(0x0000000000000000, 0xffffffff82017f81, q21);
20165 TEARDOWN();
20166 }
20167
TEST(neon_2regmisc_uaddlp)20168 TEST(neon_2regmisc_uaddlp) {
20169 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
20170
20171 START();
20172
20173 __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081);
20174
20175 __ Uaddlp(v16.V8H(), v0.V16B());
20176 __ Uaddlp(v17.V4H(), v0.V8B());
20177
20178 __ Uaddlp(v18.V4S(), v0.V8H());
20179 __ Uaddlp(v19.V2S(), v0.V4H());
20180
20181 __ Uaddlp(v20.V2D(), v0.V4S());
20182 __ Uaddlp(v21.V1D(), v0.V2S());
20183
20184 END();
20185
20186 RUN();
20187 ASSERT_EQUAL_128(0x008000ff01010080, 0x010100ff00800101, q16);
20188 ASSERT_EQUAL_128(0x0000000000000000, 0x010100ff00800101, q17);
20189 ASSERT_EQUAL_128(0x0000800000010081, 0x00017f8100008200, q18);
20190 ASSERT_EQUAL_128(0x0000000000000000, 0x00017f8100008200, q19);
20191 ASSERT_EQUAL_128(0x0000000100818000, 0x0000000082017f81, q20);
20192 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000082017f81, q21);
20193 TEARDOWN();
20194 }
20195
TEST(neon_2regmisc_sadalp)20196 TEST(neon_2regmisc_sadalp) {
20197 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
20198
20199 START();
20200
20201 __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081);
20202 __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff);
20203 __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff);
20204 __ Movi(v3.V2D(), 0x8000000000000001, 0x7fffffffffffffff);
20205 __ Movi(v4.V2D(), 0x8000000000000000, 0x0000000000000000);
20206
20207 __ Mov(v16.V16B(), v1.V16B());
20208 __ Mov(v17.V16B(), v1.V16B());
20209 __ Sadalp(v16.V8H(), v0.V16B());
20210 __ Sadalp(v17.V4H(), v0.V8B());
20211
20212 __ Mov(v18.V16B(), v2.V16B());
20213 __ Mov(v19.V16B(), v2.V16B());
20214 __ Sadalp(v18.V4S(), v1.V8H());
20215 __ Sadalp(v19.V2S(), v1.V4H());
20216
20217 __ Mov(v20.V16B(), v3.V16B());
20218 __ Mov(v21.V16B(), v4.V16B());
20219 __ Sadalp(v20.V2D(), v2.V4S());
20220 __ Sadalp(v21.V1D(), v2.V2S());
20221
20222 END();
20223
20224 RUN();
20225 ASSERT_EQUAL_128(0x80808000ff000080, 0xff00ffff00817f00, q16);
20226 ASSERT_EQUAL_128(0x0000000000000000, 0xff00ffff00817f00, q17);
20227 ASSERT_EQUAL_128(0x7fff0001fffffffe, 0xffffffff80007fff, q18);
20228 ASSERT_EQUAL_128(0x0000000000000000, 0xffffffff80007fff, q19);
20229 ASSERT_EQUAL_128(0x7fffffff80000000, 0x800000007ffffffe, q20);
20230 ASSERT_EQUAL_128(0x0000000000000000, 0x000000007fffffff, q21);
20231 TEARDOWN();
20232 }
20233
TEST(neon_2regmisc_uadalp)20234 TEST(neon_2regmisc_uadalp) {
20235 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
20236
20237 START();
20238
20239 __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081);
20240 __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff);
20241 __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff);
20242 __ Movi(v3.V2D(), 0x8000000000000001, 0x7fffffffffffffff);
20243 __ Movi(v4.V2D(), 0x8000000000000000, 0x0000000000000000);
20244
20245 __ Mov(v16.V16B(), v1.V16B());
20246 __ Mov(v17.V16B(), v1.V16B());
20247 __ Uadalp(v16.V8H(), v0.V16B());
20248 __ Uadalp(v17.V4H(), v0.V8B());
20249
20250 __ Mov(v18.V16B(), v2.V16B());
20251 __ Mov(v19.V16B(), v2.V16B());
20252 __ Uadalp(v18.V4S(), v1.V8H());
20253 __ Uadalp(v19.V2S(), v1.V4H());
20254
20255 __ Mov(v20.V16B(), v3.V16B());
20256 __ Mov(v21.V16B(), v4.V16B());
20257 __ Uadalp(v20.V2D(), v2.V4S());
20258 __ Uadalp(v21.V1D(), v2.V2S());
20259
20260 END();
20261
20262 RUN();
20263 ASSERT_EQUAL_128(0x8080810001000080, 0x010000ff00818100, q16);
20264 ASSERT_EQUAL_128(0x0000000000000000, 0x010000ff00818100, q17);
20265 ASSERT_EQUAL_128(0x800100010000fffe, 0x0000ffff80007fff, q18);
20266 ASSERT_EQUAL_128(0x0000000000000000, 0x0000ffff80007fff, q19);
20267 ASSERT_EQUAL_128(0x8000000180000000, 0x800000007ffffffe, q20);
20268 ASSERT_EQUAL_128(0x0000000000000000, 0x000000007fffffff, q21);
20269 TEARDOWN();
20270 }
20271
TEST(neon_3same_mul)20272 TEST(neon_3same_mul) {
20273 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
20274
20275 START();
20276
20277 __ Movi(v0.V2D(), 0xff00aa5500ff55aa, 0xff00aa5500ff55aa);
20278 __ Movi(v1.V2D(), 0x000055aaff55ff00, 0xaa55ff55555500ff);
20279 __ Movi(v16.V2D(), 0x0102030405060708, 0x090a0b0c0d0e0f00);
20280 __ Movi(v17.V2D(), 0x0102030405060708, 0x090a0b0c0d0e0f00);
20281
20282 __ Mla(v16.V16B(), v0.V16B(), v1.V16B());
20283 __ Mls(v17.V16B(), v0.V16B(), v1.V16B());
20284 __ Mul(v18.V16B(), v0.V16B(), v1.V16B());
20285
20286 END();
20287
20288 RUN();
20289 ASSERT_EQUAL_128(0x0102757605b1b208, 0x5f0a61450db90f56, q16);
20290 ASSERT_EQUAL_128(0x01029192055b5c08, 0xb30ab5d30d630faa, q17);
20291 ASSERT_EQUAL_128(0x0000727200abab00, 0x5600563900ab0056, q18);
20292 TEARDOWN();
20293 }
20294
20295
TEST(neon_3same_absdiff)20296 TEST(neon_3same_absdiff) {
20297 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
20298
20299 START();
20300
20301 __ Movi(v0.V2D(), 0xff00aa5500ff55aa, 0xff00aa5500ff55aa);
20302 __ Movi(v1.V2D(), 0x000055aaff55ff00, 0xaa55ff55555500ff);
20303 __ Movi(v16.V2D(), 0x0102030405060708, 0x090a0b0c0d0e0f00);
20304 __ Movi(v17.V2D(), 0x0102030405060708, 0x090a0b0c0d0e0f00);
20305
20306 __ Saba(v16.V16B(), v0.V16B(), v1.V16B());
20307 __ Uaba(v17.V16B(), v0.V16B(), v1.V16B());
20308 __ Sabd(v18.V16B(), v0.V16B(), v1.V16B());
20309 __ Uabd(v19.V16B(), v0.V16B(), v1.V16B());
20310
20311 END();
20312
20313 RUN();
20314 ASSERT_EQUAL_128(0x0202aeaf065c5d5e, 0x5e5f600c62646455, q16);
20315 ASSERT_EQUAL_128(0x0002585904b0b1b2, 0x5e5f600c62b86455, q17);
20316 ASSERT_EQUAL_128(0x0100abab01565656, 0x5555550055565555, q18);
20317 ASSERT_EQUAL_128(0xff005555ffaaaaaa, 0x5555550055aa5555, q19);
20318 TEARDOWN();
20319 }
20320
20321
TEST(neon_byelement_mul)20322 TEST(neon_byelement_mul) {
20323 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
20324
20325 START();
20326
20327 __ Movi(v0.V2D(), 0xff00aa5500ff55aa, 0xff00aa5500ff55aa);
20328 __ Movi(v1.V2D(), 0x000155aaff55ff00, 0xaa55ff55555500ff);
20329
20330
20331 __ Mul(v16.V4H(), v0.V4H(), v1.H(), 0);
20332 __ Mul(v17.V8H(), v0.V8H(), v1.H(), 7);
20333 __ Mul(v18.V2S(), v0.V2S(), v1.S(), 0);
20334 __ Mul(v19.V4S(), v0.V4S(), v1.S(), 3);
20335
20336 __ Movi(v20.V2D(), 0x0000000000000000, 0x0001000200030004);
20337 __ Movi(v21.V2D(), 0x0005000600070008, 0x0001000200030004);
20338 __ Mla(v20.V4H(), v0.V4H(), v1.H(), 0);
20339 __ Mla(v21.V8H(), v0.V8H(), v1.H(), 7);
20340
20341 __ Movi(v22.V2D(), 0x0000000000000000, 0x0000000200000004);
20342 __ Movi(v23.V2D(), 0x0000000600000008, 0x0000000200000004);
20343 __ Mla(v22.V2S(), v0.V2S(), v1.S(), 0);
20344 __ Mla(v23.V4S(), v0.V4S(), v1.S(), 3);
20345
20346 __ Movi(v24.V2D(), 0x0000000000000000, 0x0100aaabfe015456);
20347 __ Movi(v25.V2D(), 0xff00aa5500ff55aa, 0xff00aa5500ff55aa);
20348 __ Mls(v24.V4H(), v0.V4H(), v1.H(), 0);
20349 __ Mls(v25.V8H(), v0.V8H(), v1.H(), 7);
20350
20351 __ Movi(v26.V2D(), 0x0000000000000000, 0xc8e2aaabe1c85456);
20352 __ Movi(v27.V2D(), 0x39545572c6aa54e4, 0x39545572c6aa54e4);
20353 __ Mls(v26.V2S(), v0.V2S(), v1.S(), 0);
20354 __ Mls(v27.V4S(), v0.V4S(), v1.S(), 3);
20355
20356 END();
20357
20358 RUN();
20359 ASSERT_EQUAL_128(0x0000000000000000, 0x0100aaabfe015456, q16);
20360 ASSERT_EQUAL_128(0xff00aa5500ff55aa, 0xff00aa5500ff55aa, q17);
20361 ASSERT_EQUAL_128(0x0000000000000000, 0xc8e2aaabe1c85456, q18);
20362 ASSERT_EQUAL_128(0x39545572c6aa54e4, 0x39545572c6aa54e4, q19);
20363
20364 ASSERT_EQUAL_128(0x0000000000000000, 0x0101aaadfe04545a, q20);
20365 ASSERT_EQUAL_128(0xff05aa5b010655b2, 0xff01aa57010255ae, q21);
20366 ASSERT_EQUAL_128(0x0000000000000000, 0xc8e2aaade1c8545a, q22);
20367 ASSERT_EQUAL_128(0x39545578c6aa54ec, 0x39545574c6aa54e8, q23);
20368
20369 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q24);
20370 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q25);
20371 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q26);
20372 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q27);
20373 TEARDOWN();
20374 }
20375
20376
TEST(neon_byelement_mull)20377 TEST(neon_byelement_mull) {
20378 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
20379
20380 START();
20381
20382 __ Movi(v0.V2D(), 0xaa55ff55555500ff, 0xff00aa5500ff55aa);
20383 __ Movi(v1.V2D(), 0x000155aaff55ff00, 0xaa55ff55555500ff);
20384
20385
20386 __ Smull(v16.V4S(), v0.V4H(), v1.H(), 7);
20387 __ Smull2(v17.V4S(), v0.V8H(), v1.H(), 0);
20388 __ Umull(v18.V4S(), v0.V4H(), v1.H(), 7);
20389 __ Umull2(v19.V4S(), v0.V8H(), v1.H(), 0);
20390
20391 __ Movi(v20.V2D(), 0x0000000100000002, 0x0000000200000001);
20392 __ Movi(v21.V2D(), 0x0000000100000002, 0x0000000200000001);
20393 __ Movi(v22.V2D(), 0x0000000100000002, 0x0000000200000001);
20394 __ Movi(v23.V2D(), 0x0000000100000002, 0x0000000200000001);
20395
20396 __ Smlal(v20.V4S(), v0.V4H(), v1.H(), 7);
20397 __ Smlal2(v21.V4S(), v0.V8H(), v1.H(), 0);
20398 __ Umlal(v22.V4S(), v0.V4H(), v1.H(), 7);
20399 __ Umlal2(v23.V4S(), v0.V8H(), v1.H(), 0);
20400
20401 __ Movi(v24.V2D(), 0xffffff00ffffaa55, 0x000000ff000055aa);
20402 __ Movi(v25.V2D(), 0xffaaaaabffff55ab, 0x0054ffab0000fe01);
20403 __ Movi(v26.V2D(), 0x0000ff000000aa55, 0x000000ff000055aa);
20404 __ Movi(v27.V2D(), 0x00a9aaab00fe55ab, 0x0054ffab0000fe01);
20405
20406 __ Smlsl(v24.V4S(), v0.V4H(), v1.H(), 7);
20407 __ Smlsl2(v25.V4S(), v0.V8H(), v1.H(), 0);
20408 __ Umlsl(v26.V4S(), v0.V4H(), v1.H(), 7);
20409 __ Umlsl2(v27.V4S(), v0.V8H(), v1.H(), 0);
20410
20411 END();
20412
20413 RUN();
20414
20415 ASSERT_EQUAL_128(0xffffff00ffffaa55, 0x000000ff000055aa, q16);
20416 ASSERT_EQUAL_128(0xffaaaaabffff55ab, 0x0054ffab0000fe01, q17);
20417 ASSERT_EQUAL_128(0x0000ff000000aa55, 0x000000ff000055aa, q18);
20418 ASSERT_EQUAL_128(0x00a9aaab00fe55ab, 0x0054ffab0000fe01, q19);
20419
20420 ASSERT_EQUAL_128(0xffffff01ffffaa57, 0x00000101000055ab, q20);
20421 ASSERT_EQUAL_128(0xffaaaaacffff55ad, 0x0054ffad0000fe02, q21);
20422 ASSERT_EQUAL_128(0x0000ff010000aa57, 0x00000101000055ab, q22);
20423 ASSERT_EQUAL_128(0x00a9aaac00fe55ad, 0x0054ffad0000fe02, q23);
20424
20425 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q24);
20426 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q25);
20427 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q26);
20428 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q27);
20429
20430 TEARDOWN();
20431 }
20432
20433
TEST(neon_byelement_sqdmull)20434 TEST(neon_byelement_sqdmull) {
20435 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
20436
20437 START();
20438
20439 __ Movi(v0.V2D(), 0xaa55ff55555500ff, 0xff00aa5500ff55aa);
20440 __ Movi(v1.V2D(), 0x000155aaff55ff00, 0xaa55ff55555500ff);
20441
20442 __ Sqdmull(v16.V4S(), v0.V4H(), v1.H(), 7);
20443 __ Sqdmull2(v17.V4S(), v0.V8H(), v1.H(), 0);
20444 __ Sqdmull(s18, h0, v1.H(), 7);
20445
20446 __ Movi(v20.V2D(), 0x0000000100000002, 0x0000000200000001);
20447 __ Movi(v21.V2D(), 0x0000000100000002, 0x0000000200000001);
20448 __ Movi(v22.V2D(), 0x0000000100000002, 0x0000000200000001);
20449
20450 __ Sqdmlal(v20.V4S(), v0.V4H(), v1.H(), 7);
20451 __ Sqdmlal2(v21.V4S(), v0.V8H(), v1.H(), 0);
20452 __ Sqdmlal(s22, h0, v1.H(), 7);
20453
20454 __ Movi(v24.V2D(), 0xfffffe00ffff54aa, 0x000001fe0000ab54);
20455 __ Movi(v25.V2D(), 0xff555556fffeab56, 0x00a9ff560001fc02);
20456 __ Movi(v26.V2D(), 0x0000000000000000, 0x000000000000ab54);
20457
20458 __ Sqdmlsl(v24.V4S(), v0.V4H(), v1.H(), 7);
20459 __ Sqdmlsl2(v25.V4S(), v0.V8H(), v1.H(), 0);
20460 __ Sqdmlsl(s26, h0, v1.H(), 7);
20461
20462 END();
20463
20464 RUN();
20465
20466 ASSERT_EQUAL_128(0xfffffe00ffff54aa, 0x000001fe0000ab54, q16);
20467 ASSERT_EQUAL_128(0xff555556fffeab56, 0x00a9ff560001fc02, q17);
20468 ASSERT_EQUAL_128(0, 0x0000ab54, q18);
20469
20470 ASSERT_EQUAL_128(0xfffffe01ffff54ac, 0x000002000000ab55, q20);
20471 ASSERT_EQUAL_128(0xff555557fffeab58, 0x00a9ff580001fc03, q21);
20472 ASSERT_EQUAL_128(0, 0x0000ab55, q22);
20473
20474 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q24);
20475 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q25);
20476 ASSERT_EQUAL_128(0, 0x00000000, q26);
20477
20478 TEARDOWN();
20479 }
20480
20481
TEST(neon_3diff_absdiff)20482 TEST(neon_3diff_absdiff) {
20483 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
20484
20485 START();
20486
20487 __ Movi(v0.V2D(), 0xff00aa5500ff55ab, 0xff00aa5500ff55aa);
20488 __ Movi(v1.V2D(), 0x000055aaff55ff00, 0xaa55ff55555500ff);
20489 __ Movi(v16.V2D(), 0x0102030405060708, 0x090a0b0c0d0e0f00);
20490 __ Movi(v17.V2D(), 0x0102030405060708, 0x090a0b0c0d0e0f00);
20491 __ Movi(v18.V2D(), 0x0102030405060708, 0x090a0b0c0d0e0f00);
20492 __ Movi(v19.V2D(), 0x0102030405060708, 0x090a0b0c0d0e0f00);
20493
20494 __ Sabal(v16.V8H(), v0.V8B(), v1.V8B());
20495 __ Uabal(v17.V8H(), v0.V8B(), v1.V8B());
20496 __ Sabal2(v18.V8H(), v0.V16B(), v1.V16B());
20497 __ Uabal2(v19.V8H(), v0.V16B(), v1.V16B());
20498
20499 END();
20500
20501 RUN();
20502 ASSERT_EQUAL_128(0x01570359055b0708, 0x095f0b620d630f55, q16);
20503 ASSERT_EQUAL_128(0x01570359055b0708, 0x095f0bb60d630f55, q17);
20504 ASSERT_EQUAL_128(0x0103030405b107b3, 0x090b0b620d640f55, q18);
20505 ASSERT_EQUAL_128(0x02010304055b075d, 0x0a090bb60db80fab, q19);
20506 TEARDOWN();
20507 }
20508
20509
TEST(neon_3diff_sqdmull)20510 TEST(neon_3diff_sqdmull) {
20511 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
20512
20513 START();
20514
20515 __ Movi(v0.V2D(), 0x7fff7fff80008000, 0x80007fff7fff8000);
20516 __ Movi(v1.V2D(), 0x80007fff7fff8000, 0x7fff7fff80008000);
20517 __ Movi(v2.V2D(), 0x800000007fffffff, 0x7fffffff80000000);
20518 __ Movi(v3.V2D(), 0x8000000080000000, 0x8000000080000000);
20519
20520 __ Sqdmull(v16.V4S(), v0.V4H(), v1.V4H());
20521 __ Sqdmull2(v17.V4S(), v0.V8H(), v1.V8H());
20522 __ Sqdmull(v18.V2D(), v2.V2S(), v3.V2S());
20523 __ Sqdmull2(v19.V2D(), v2.V4S(), v3.V4S());
20524 __ Sqdmull(s20, h0, h1);
20525 __ Sqdmull(d21, s2, s3);
20526
20527 END();
20528
20529 RUN();
20530 ASSERT_EQUAL_128(0x800100007ffe0002, 0x800100007fffffff, q16);
20531 ASSERT_EQUAL_128(0x800100007ffe0002, 0x800100007fffffff, q17);
20532 ASSERT_EQUAL_128(0x8000000100000000, 0x7fffffffffffffff, q18);
20533 ASSERT_EQUAL_128(0x7fffffffffffffff, 0x8000000100000000, q19);
20534 ASSERT_EQUAL_128(0, 0x7fffffff, q20);
20535 ASSERT_EQUAL_128(0, 0x7fffffffffffffff, q21);
20536 TEARDOWN();
20537 }
20538
20539
TEST(neon_3diff_sqdmlal)20540 TEST(neon_3diff_sqdmlal) {
20541 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
20542
20543 START();
20544
20545 __ Movi(v0.V2D(), 0x7fff7fff80008000, 0x80007fff7fff8000);
20546 __ Movi(v1.V2D(), 0x80007fff7fff8000, 0x7fff7fff80008000);
20547 __ Movi(v2.V2D(), 0x800000007fffffff, 0x7fffffff80000000);
20548 __ Movi(v3.V2D(), 0x8000000080000000, 0x8000000080000000);
20549
20550 __ Movi(v16.V2D(), 0xffffffff00000001, 0x8fffffff00000001);
20551 __ Movi(v17.V2D(), 0x00000001ffffffff, 0x00000001ffffffff);
20552 __ Movi(v18.V2D(), 0x8000000000000001, 0x0000000000000001);
20553 __ Movi(v19.V2D(), 0xffffffffffffffff, 0x7fffffffffffffff);
20554 __ Movi(v20.V2D(), 0, 0x00000001);
20555 __ Movi(v21.V2D(), 0, 0x00000001);
20556
20557 __ Sqdmlal(v16.V4S(), v0.V4H(), v1.V4H());
20558 __ Sqdmlal2(v17.V4S(), v0.V8H(), v1.V8H());
20559 __ Sqdmlal(v18.V2D(), v2.V2S(), v3.V2S());
20560 __ Sqdmlal2(v19.V2D(), v2.V4S(), v3.V4S());
20561 __ Sqdmlal(s20, h0, h1);
20562 __ Sqdmlal(d21, s2, s3);
20563
20564 END();
20565
20566 RUN();
20567 ASSERT_EQUAL_128(0x8000ffff7ffe0003, 0x800000007fffffff, q16);
20568 ASSERT_EQUAL_128(0x800100017ffe0001, 0x800100017ffffffe, q17);
20569 ASSERT_EQUAL_128(0x8000000000000000, 0x7fffffffffffffff, q18);
20570 ASSERT_EQUAL_128(0x7ffffffffffffffe, 0x00000000ffffffff, q19);
20571 ASSERT_EQUAL_128(0, 0x7fffffff, q20);
20572 ASSERT_EQUAL_128(0, 0x7fffffffffffffff, q21);
20573 TEARDOWN();
20574 }
20575
20576
TEST(neon_3diff_sqdmlsl)20577 TEST(neon_3diff_sqdmlsl) {
20578 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
20579
20580 START();
20581
20582 __ Movi(v0.V2D(), 0x7fff7fff80008000, 0x80007fff7fff8000);
20583 __ Movi(v1.V2D(), 0x80007fff7fff8000, 0x7fff7fff80008000);
20584 __ Movi(v2.V2D(), 0x800000007fffffff, 0x7fffffff80000000);
20585 __ Movi(v3.V2D(), 0x8000000080000000, 0x8000000080000000);
20586
20587 __ Movi(v16.V2D(), 0xffffffff00000001, 0x7ffffffe80000001);
20588 __ Movi(v17.V2D(), 0x00000001ffffffff, 0x7ffffffe00000001);
20589 __ Movi(v18.V2D(), 0x8000000000000001, 0x8000000000000001);
20590 __ Movi(v19.V2D(), 0xfffffffffffffffe, 0x7fffffffffffffff);
20591 __ Movi(v20.V2D(), 0, 0x00000001);
20592 __ Movi(v21.V2D(), 0, 0x00000001);
20593
20594 __ Sqdmlsl(v16.V4S(), v0.V4H(), v1.V4H());
20595 __ Sqdmlsl2(v17.V4S(), v0.V8H(), v1.V8H());
20596 __ Sqdmlsl(v18.V2D(), v2.V2S(), v3.V2S());
20597 __ Sqdmlsl2(v19.V2D(), v2.V4S(), v3.V4S());
20598 __ Sqdmlsl(s20, h0, h1);
20599 __ Sqdmlsl(d21, s2, s3);
20600
20601 END();
20602
20603 RUN();
20604 ASSERT_EQUAL_128(0x7ffeffff8001ffff, 0x7fffffff80000000, q16);
20605 ASSERT_EQUAL_128(0x7fff00018001fffd, 0x7fffffff80000002, q17);
20606 ASSERT_EQUAL_128(0xffffffff00000001, 0x8000000000000000, q18);
20607 ASSERT_EQUAL_128(0x8000000000000000, 0x7fffffffffffffff, q19);
20608 ASSERT_EQUAL_128(0, 0x80000002, q20);
20609 ASSERT_EQUAL_128(0, 0x8000000000000002, q21);
20610
20611 TEARDOWN();
20612 }
20613
20614
TEST(neon_3diff_mla)20615 TEST(neon_3diff_mla) {
20616 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
20617
20618 START();
20619
20620 __ Movi(v0.V2D(), 0xff00aa5500ff55ab, 0xff00aa5500ff55aa);
20621 __ Movi(v1.V2D(), 0x000055aaff55ff00, 0xaa55ff55555500ff);
20622 __ Movi(v16.V2D(), 0x0102030405060708, 0x090a0b0c0d0e0f00);
20623 __ Movi(v17.V2D(), 0x0102030405060708, 0x090a0b0c0d0e0f00);
20624 __ Movi(v18.V2D(), 0x0102030405060708, 0x090a0b0c0d0e0f00);
20625 __ Movi(v19.V2D(), 0x0102030405060708, 0x090a0b0c0d0e0f00);
20626
20627 __ Smlal(v16.V8H(), v0.V8B(), v1.V8B());
20628 __ Umlal(v17.V8H(), v0.V8B(), v1.V8B());
20629 __ Smlal2(v18.V8H(), v0.V16B(), v1.V16B());
20630 __ Umlal2(v19.V8H(), v0.V16B(), v1.V16B());
20631
20632 END();
20633
20634 RUN();
20635 ASSERT_EQUAL_128(0x01580304055c2341, 0x090a0ab70d0e0f56, q16);
20636 ASSERT_EQUAL_128(0xaa580304ae5c2341, 0x090a5fb70d0eb856, q17);
20637 ASSERT_EQUAL_128(0x01020304e878ea7a, 0x090a0ab70cb90f00, q18);
20638 ASSERT_EQUAL_128(0x010203043d783f7a, 0x090a5fb761b90f00, q19);
20639 TEARDOWN();
20640 }
20641
20642
TEST(neon_3diff_mls)20643 TEST(neon_3diff_mls) {
20644 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
20645
20646 START();
20647
20648 __ Movi(v0.V2D(), 0xff00aa5500ff55ab, 0xff00aa5500ff55aa);
20649 __ Movi(v1.V2D(), 0x000055aaff55ff00, 0xaa55ff55555500ff);
20650 __ Movi(v16.V2D(), 0x0102030405060708, 0x090a0b0c0d0e0f00);
20651 __ Movi(v17.V2D(), 0x0102030405060708, 0x090a0b0c0d0e0f00);
20652 __ Movi(v18.V2D(), 0x0102030405060708, 0x090a0b0c0d0e0f00);
20653 __ Movi(v19.V2D(), 0x0102030405060708, 0x090a0b0c0d0e0f00);
20654
20655 __ Smlsl(v16.V8H(), v0.V8B(), v1.V8B());
20656 __ Umlsl(v17.V8H(), v0.V8B(), v1.V8B());
20657 __ Smlsl2(v18.V8H(), v0.V16B(), v1.V16B());
20658 __ Umlsl2(v19.V8H(), v0.V16B(), v1.V16B());
20659
20660 END();
20661
20662 RUN();
20663 ASSERT_EQUAL_128(0x00ac030404b0eacf, 0x090a0b610d0e0eaa, q16);
20664 ASSERT_EQUAL_128(0x57ac03045bb0eacf, 0x090ab6610d0e65aa, q17);
20665 ASSERT_EQUAL_128(0x0102030421942396, 0x090a0b610d630f00, q18);
20666 ASSERT_EQUAL_128(0x01020304cc94ce96, 0x090ab661b8630f00, q19);
20667 TEARDOWN();
20668 }
20669
20670
TEST(neon_3same_compare)20671 TEST(neon_3same_compare) {
20672 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
20673
20674 START();
20675
20676 __ Movi(v0.V2D(), 0xff00aa5500ff55aa, 0xff00aa5500ff55aa);
20677 __ Movi(v1.V2D(), 0x000055aaff55ff00, 0xaa55ff55555500ff);
20678
20679 __ Cmeq(v16.V16B(), v0.V16B(), v0.V16B());
20680 __ Cmeq(v17.V16B(), v0.V16B(), v1.V16B());
20681 __ Cmge(v18.V16B(), v0.V16B(), v0.V16B());
20682 __ Cmge(v19.V16B(), v0.V16B(), v1.V16B());
20683 __ Cmgt(v20.V16B(), v0.V16B(), v0.V16B());
20684 __ Cmgt(v21.V16B(), v0.V16B(), v1.V16B());
20685 __ Cmhi(v22.V16B(), v0.V16B(), v0.V16B());
20686 __ Cmhi(v23.V16B(), v0.V16B(), v1.V16B());
20687 __ Cmhs(v24.V16B(), v0.V16B(), v0.V16B());
20688 __ Cmhs(v25.V16B(), v0.V16B(), v1.V16B());
20689
20690 END();
20691
20692 RUN();
20693 ASSERT_EQUAL_128(0xffffffffffffffff, 0xffffffffffffffff, q16);
20694 ASSERT_EQUAL_128(0x00ff000000000000, 0x000000ff00000000, q17);
20695 ASSERT_EQUAL_128(0xffffffffffffffff, 0xffffffffffffffff, q18);
20696 ASSERT_EQUAL_128(0x00ff00ffff00ff00, 0xff0000ff0000ff00, q19);
20697 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q20);
20698 ASSERT_EQUAL_128(0x000000ffff00ff00, 0xff0000000000ff00, q21);
20699 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q22);
20700 ASSERT_EQUAL_128(0xff00ff0000ff00ff, 0xff00000000ffff00, q23);
20701 ASSERT_EQUAL_128(0xffffffffffffffff, 0xffffffffffffffff, q24);
20702 ASSERT_EQUAL_128(0xffffff0000ff00ff, 0xff0000ff00ffff00, q25);
20703 TEARDOWN();
20704 }
20705
20706
TEST(neon_3same_scalar_compare)20707 TEST(neon_3same_scalar_compare) {
20708 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
20709
20710 START();
20711
20712 __ Movi(v0.V2D(), 0xff00aa5500ff55aa, 0xff00aa5500ff55aa);
20713 __ Movi(v1.V2D(), 0x000055aaff55ff00, 0xaa55ff55555500ff);
20714
20715 __ Cmeq(d16, d0, d0);
20716 __ Cmeq(d17, d0, d1);
20717 __ Cmeq(d18, d1, d0);
20718 __ Cmge(d19, d0, d0);
20719 __ Cmge(d20, d0, d1);
20720 __ Cmge(d21, d1, d0);
20721 __ Cmgt(d22, d0, d0);
20722 __ Cmgt(d23, d0, d1);
20723 __ Cmhi(d24, d0, d0);
20724 __ Cmhi(d25, d0, d1);
20725 __ Cmhs(d26, d0, d0);
20726 __ Cmhs(d27, d0, d1);
20727 __ Cmhs(d28, d1, d0);
20728
20729 END();
20730
20731 RUN();
20732
20733 ASSERT_EQUAL_128(0, 0xffffffffffffffff, q16);
20734 ASSERT_EQUAL_128(0, 0x0000000000000000, q17);
20735 ASSERT_EQUAL_128(0, 0x0000000000000000, q18);
20736 ASSERT_EQUAL_128(0, 0xffffffffffffffff, q19);
20737 ASSERT_EQUAL_128(0, 0xffffffffffffffff, q20);
20738 ASSERT_EQUAL_128(0, 0x0000000000000000, q21);
20739 ASSERT_EQUAL_128(0, 0x0000000000000000, q22);
20740 ASSERT_EQUAL_128(0, 0xffffffffffffffff, q23);
20741 ASSERT_EQUAL_128(0, 0x0000000000000000, q24);
20742 ASSERT_EQUAL_128(0, 0xffffffffffffffff, q25);
20743 ASSERT_EQUAL_128(0, 0xffffffffffffffff, q26);
20744 ASSERT_EQUAL_128(0, 0xffffffffffffffff, q27);
20745 ASSERT_EQUAL_128(0, 0x0000000000000000, q28);
20746
20747 TEARDOWN();
20748 }
20749
TEST(neon_fcmeq_h)20750 TEST(neon_fcmeq_h) {
20751 SETUP_WITH_FEATURES(CPUFeatures::kNEON,
20752 CPUFeatures::kFP,
20753 CPUFeatures::kNEONHalf);
20754
20755 START();
20756
20757 __ Movi(v0.V2D(), 0x0000000000000000, 0x0000000000000000); // 0.
20758 __ Movi(v1.V2D(), 0xffffffffffffffff, 0xffffffffffffffff); // NaN.
20759 __ Movi(v2.V2D(), 0xbc00bc00bc00bc00, 0xbc00bc00bc00bc00); // -1.0.
20760 __ Movi(v3.V2D(), 0x3c003c003c003c00, 0x3c003c003c003c00); // 1.0.
20761
20762 __ Fcmeq(v4.V8H(), v0.V8H(), v0.V8H());
20763 __ Fcmeq(v5.V8H(), v1.V8H(), v0.V8H());
20764 __ Fcmeq(v6.V8H(), v2.V8H(), v0.V8H());
20765 __ Fcmeq(v7.V8H(), v3.V8H(), v0.V8H());
20766 __ Fcmeq(v8.V4H(), v0.V4H(), v0.V4H());
20767 __ Fcmeq(v9.V4H(), v1.V4H(), v0.V4H());
20768 __ Fcmeq(v10.V4H(), v2.V4H(), v0.V4H());
20769 __ Fcmeq(v11.V4H(), v3.V4H(), v0.V4H());
20770
20771 END();
20772
20773 #ifdef VIXL_INCLUDE_SIMULATOR_AARCH64
20774 RUN();
20775
20776 ASSERT_EQUAL_128(0xffffffffffffffff, 0xffffffffffffffff, v4);
20777 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, v5);
20778 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, v6);
20779 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, v7);
20780 ASSERT_EQUAL_128(0, 0xffffffffffffffff, v8);
20781 ASSERT_EQUAL_128(0, 0x0000000000000000, v9);
20782 ASSERT_EQUAL_128(0, 0x0000000000000000, v10);
20783 ASSERT_EQUAL_128(0, 0x0000000000000000, v11);
20784 #endif // VIXL_INCLUDE_SIMULATOR_AARCH64
20785
20786 TEARDOWN();
20787 }
20788
TEST(neon_fcmeq_h_scalar)20789 TEST(neon_fcmeq_h_scalar) {
20790 SETUP_WITH_FEATURES(CPUFeatures::kNEON,
20791 CPUFeatures::kFP,
20792 CPUFeatures::kNEONHalf,
20793 CPUFeatures::kFPHalf);
20794
20795 START();
20796
20797 __ Fmov(h0, Float16(0.0));
20798 __ Fmov(h1, RawbitsToFloat16(0xffff));
20799 __ Fmov(h2, Float16(-1.0));
20800 __ Fmov(h3, Float16(1.0));
20801 __ Fcmeq(h4, h0, h0);
20802 __ Fcmeq(h5, h1, h0);
20803 __ Fcmeq(h6, h2, h0);
20804 __ Fcmeq(h7, h3, h0);
20805
20806 END();
20807
20808 #ifdef VIXL_INCLUDE_SIMULATOR_AARCH64
20809 RUN();
20810
20811 ASSERT_EQUAL_FP16(RawbitsToFloat16(0xffff), h4);
20812 ASSERT_EQUAL_FP16(RawbitsToFloat16(0x0000), h5);
20813 ASSERT_EQUAL_FP16(RawbitsToFloat16(0x0000), h6);
20814 ASSERT_EQUAL_FP16(RawbitsToFloat16(0x0000), h7);
20815 #endif // VIXL_INCLUDE_SIMULATOR_AARCH64
20816
20817 TEARDOWN();
20818 }
20819
TEST(neon_fcmge_h)20820 TEST(neon_fcmge_h) {
20821 SETUP_WITH_FEATURES(CPUFeatures::kNEON,
20822 CPUFeatures::kFP,
20823 CPUFeatures::kNEONHalf);
20824
20825 START();
20826
20827 __ Movi(v0.V2D(), 0x0000000000000000, 0x0000000000000000); // 0.
20828 __ Movi(v1.V2D(), 0xffffffffffffffff, 0xffffffffffffffff); // NaN.
20829 __ Movi(v2.V2D(), 0xbc00bc00bc00bc00, 0xbc00bc00bc00bc00); // -1.0.
20830 __ Movi(v3.V2D(), 0x3c003c003c003c00, 0x3c003c003c003c00); // 1.0.
20831
20832 __ Fcmge(v4.V8H(), v0.V8H(), v0.V8H());
20833 __ Fcmge(v5.V8H(), v1.V8H(), v0.V8H());
20834 __ Fcmge(v6.V8H(), v2.V8H(), v0.V8H());
20835 __ Fcmge(v7.V8H(), v3.V8H(), v0.V8H());
20836 __ Fcmge(v8.V4H(), v0.V4H(), v0.V4H());
20837 __ Fcmge(v9.V4H(), v1.V4H(), v0.V4H());
20838 __ Fcmge(v10.V4H(), v2.V4H(), v0.V4H());
20839 __ Fcmge(v11.V4H(), v3.V4H(), v0.V4H());
20840
20841 END();
20842
20843 #ifdef VIXL_INCLUDE_SIMULATOR_AARCH64
20844 RUN();
20845
20846 ASSERT_EQUAL_128(0xffffffffffffffff, 0xffffffffffffffff, v4);
20847 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, v5);
20848 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, v6);
20849 ASSERT_EQUAL_128(0xffffffffffffffff, 0xffffffffffffffff, v7);
20850 ASSERT_EQUAL_128(0, 0xffffffffffffffff, v8);
20851 ASSERT_EQUAL_128(0, 0x0000000000000000, v9);
20852 ASSERT_EQUAL_128(0, 0x0000000000000000, v10);
20853 ASSERT_EQUAL_128(0, 0xffffffffffffffff, v11);
20854 #endif // VIXL_INCLUDE_SIMULATOR_AARCH64
20855
20856 TEARDOWN();
20857 }
20858
TEST(neon_fcmge_h_scalar)20859 TEST(neon_fcmge_h_scalar) {
20860 SETUP_WITH_FEATURES(CPUFeatures::kNEON,
20861 CPUFeatures::kFP,
20862 CPUFeatures::kNEONHalf,
20863 CPUFeatures::kFPHalf);
20864
20865 START();
20866
20867 __ Fmov(h0, Float16(0.0));
20868 __ Fmov(h1, RawbitsToFloat16(0xffff));
20869 __ Fmov(h2, Float16(-1.0));
20870 __ Fmov(h3, Float16(1.0));
20871 __ Fcmge(h4, h0, h0);
20872 __ Fcmge(h5, h1, h0);
20873 __ Fcmge(h6, h2, h0);
20874 __ Fcmge(h7, h3, h0);
20875
20876 END();
20877
20878 #ifdef VIXL_INCLUDE_SIMULATOR_AARCH64
20879 RUN();
20880
20881 ASSERT_EQUAL_FP16(RawbitsToFloat16(0xffff), h4);
20882 ASSERT_EQUAL_FP16(RawbitsToFloat16(0x0000), h5);
20883 ASSERT_EQUAL_FP16(RawbitsToFloat16(0x0000), h6);
20884 ASSERT_EQUAL_FP16(RawbitsToFloat16(0xffff), h7);
20885 #endif // VIXL_INCLUDE_SIMULATOR_AARCH64
20886
20887 TEARDOWN();
20888 }
20889
TEST(neon_fcmgt_h)20890 TEST(neon_fcmgt_h) {
20891 SETUP_WITH_FEATURES(CPUFeatures::kNEON,
20892 CPUFeatures::kFP,
20893 CPUFeatures::kNEONHalf);
20894
20895 START();
20896
20897 __ Movi(v0.V2D(), 0x0000000000000000, 0x0000000000000000); // 0.
20898 __ Movi(v1.V2D(), 0xffffffffffffffff, 0xffffffffffffffff); // NaN.
20899 __ Movi(v2.V2D(), 0xbc00bc00bc00bc00, 0xbc00bc00bc00bc00); // -1.0.
20900 __ Movi(v3.V2D(), 0x3c003c003c003c00, 0x3c003c003c003c00); // 1.0.
20901
20902 __ Fcmgt(v4.V8H(), v0.V8H(), v0.V8H());
20903 __ Fcmgt(v5.V8H(), v1.V8H(), v0.V8H());
20904 __ Fcmgt(v6.V8H(), v2.V8H(), v0.V8H());
20905 __ Fcmgt(v7.V8H(), v3.V8H(), v0.V8H());
20906 __ Fcmgt(v8.V4H(), v0.V4H(), v0.V4H());
20907 __ Fcmgt(v9.V4H(), v1.V4H(), v0.V4H());
20908 __ Fcmgt(v10.V4H(), v2.V4H(), v0.V4H());
20909 __ Fcmgt(v11.V4H(), v3.V4H(), v0.V4H());
20910
20911 END();
20912
20913 #ifdef VIXL_INCLUDE_SIMULATOR_AARCH64
20914 RUN();
20915
20916 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, v4);
20917 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, v5);
20918 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, v6);
20919 ASSERT_EQUAL_128(0xffffffffffffffff, 0xffffffffffffffff, v7);
20920 ASSERT_EQUAL_128(0, 0x0000000000000000, v8);
20921 ASSERT_EQUAL_128(0, 0x0000000000000000, v9);
20922 ASSERT_EQUAL_128(0, 0x0000000000000000, v10);
20923 ASSERT_EQUAL_128(0, 0xffffffffffffffff, v11);
20924 #endif // VIXL_INCLUDE_SIMULATOR_AARCH64
20925
20926 TEARDOWN();
20927 }
20928
TEST(neon_fcmgt_h_scalar)20929 TEST(neon_fcmgt_h_scalar) {
20930 SETUP_WITH_FEATURES(CPUFeatures::kNEON,
20931 CPUFeatures::kFP,
20932 CPUFeatures::kNEONHalf,
20933 CPUFeatures::kFPHalf);
20934
20935 START();
20936
20937 __ Fmov(h0, Float16(0.0));
20938 __ Fmov(h1, RawbitsToFloat16(0xffff));
20939 __ Fmov(h2, Float16(-1.0));
20940 __ Fmov(h3, Float16(1.0));
20941 __ Fcmgt(h4, h0, h0);
20942 __ Fcmgt(h5, h1, h0);
20943 __ Fcmgt(h6, h2, h0);
20944 __ Fcmgt(h7, h3, h0);
20945
20946 END();
20947
20948 #ifdef VIXL_INCLUDE_SIMULATOR_AARCH64
20949 RUN();
20950
20951 ASSERT_EQUAL_FP16(RawbitsToFloat16(0x0000), h4);
20952 ASSERT_EQUAL_FP16(RawbitsToFloat16(0x0000), h5);
20953 ASSERT_EQUAL_FP16(RawbitsToFloat16(0x0000), h6);
20954 ASSERT_EQUAL_FP16(RawbitsToFloat16(0xffff), h7);
20955 #endif // VIXL_INCLUDE_SIMULATOR_AARCH64
20956
20957 TEARDOWN();
20958 }
20959
TEST(neon_facge_h)20960 TEST(neon_facge_h) {
20961 SETUP_WITH_FEATURES(CPUFeatures::kNEON,
20962 CPUFeatures::kFP,
20963 CPUFeatures::kNEONHalf);
20964
20965 START();
20966
20967 __ Movi(v0.V2D(), 0x0000000000000000, 0x0000000000000000); // 0.
20968 __ Movi(v1.V2D(), 0xffffffffffffffff, 0xffffffffffffffff); // NaN.
20969 __ Movi(v2.V2D(), 0xbc00bc00bc00bc00, 0xbc00bc00bc00bc00); // -1.0.
20970 __ Movi(v3.V2D(), 0x3c003c003c003c00, 0x3c003c003c003c00); // 1.0.
20971
20972 __ Facge(v4.V8H(), v0.V8H(), v0.V8H());
20973 __ Facge(v5.V8H(), v1.V8H(), v0.V8H());
20974 __ Facge(v6.V8H(), v2.V8H(), v0.V8H());
20975 __ Facge(v7.V8H(), v3.V8H(), v0.V8H());
20976 __ Facge(v8.V4H(), v0.V4H(), v0.V4H());
20977 __ Facge(v9.V4H(), v1.V4H(), v0.V4H());
20978 __ Facge(v10.V4H(), v2.V4H(), v0.V4H());
20979 __ Facge(v11.V4H(), v3.V4H(), v0.V4H());
20980
20981 END();
20982
20983 #ifdef VIXL_INCLUDE_SIMULATOR_AARCH64
20984 RUN();
20985
20986 ASSERT_EQUAL_128(0xffffffffffffffff, 0xffffffffffffffff, v4);
20987 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, v5);
20988 ASSERT_EQUAL_128(0xffffffffffffffff, 0xffffffffffffffff, v6);
20989 ASSERT_EQUAL_128(0xffffffffffffffff, 0xffffffffffffffff, v7);
20990 ASSERT_EQUAL_128(0, 0xffffffffffffffff, v8);
20991 ASSERT_EQUAL_128(0, 0x0000000000000000, v9);
20992 ASSERT_EQUAL_128(0, 0xffffffffffffffff, v10);
20993 ASSERT_EQUAL_128(0, 0xffffffffffffffff, v11);
20994 #endif // VIXL_INCLUDE_SIMULATOR_AARCH64
20995
20996 TEARDOWN();
20997 }
20998
TEST(neon_facge_h_scalar)20999 TEST(neon_facge_h_scalar) {
21000 SETUP_WITH_FEATURES(CPUFeatures::kNEON,
21001 CPUFeatures::kFP,
21002 CPUFeatures::kNEONHalf,
21003 CPUFeatures::kFPHalf);
21004
21005 START();
21006
21007 __ Fmov(h0, Float16(0.0));
21008 __ Fmov(h1, RawbitsToFloat16(0xffff));
21009 __ Fmov(h2, Float16(-1.0));
21010 __ Fmov(h3, Float16(1.0));
21011 __ Facge(h4, h0, h0);
21012 __ Facge(h5, h1, h0);
21013 __ Facge(h6, h2, h0);
21014 __ Facge(h7, h3, h0);
21015
21016 END();
21017
21018 #ifdef VIXL_INCLUDE_SIMULATOR_AARCH64
21019 RUN();
21020
21021 ASSERT_EQUAL_FP16(RawbitsToFloat16(0xffff), h4);
21022 ASSERT_EQUAL_FP16(RawbitsToFloat16(0x0000), h5);
21023 ASSERT_EQUAL_FP16(RawbitsToFloat16(0xffff), h6);
21024 ASSERT_EQUAL_FP16(RawbitsToFloat16(0xffff), h7);
21025 #endif // VIXL_INCLUDE_SIMULATOR_AARCH64
21026
21027 TEARDOWN();
21028 }
21029
TEST(neon_facgt_h)21030 TEST(neon_facgt_h) {
21031 SETUP_WITH_FEATURES(CPUFeatures::kNEON,
21032 CPUFeatures::kFP,
21033 CPUFeatures::kNEONHalf);
21034
21035 START();
21036
21037 __ Movi(v0.V2D(), 0x0000000000000000, 0x0000000000000000); // 0.
21038 __ Movi(v1.V2D(), 0xffffffffffffffff, 0xffffffffffffffff); // NaN.
21039 __ Movi(v2.V2D(), 0xbc00bc00bc00bc00, 0xbc00bc00bc00bc00); // -1.0.
21040 __ Movi(v3.V2D(), 0x3c003c003c003c00, 0x3c003c003c003c00); // 1.0.
21041
21042 __ Facgt(v4.V8H(), v0.V8H(), v0.V8H());
21043 __ Facgt(v5.V8H(), v1.V8H(), v0.V8H());
21044 __ Facgt(v6.V8H(), v2.V8H(), v0.V8H());
21045 __ Facgt(v7.V8H(), v3.V8H(), v0.V8H());
21046 __ Facgt(v8.V4H(), v0.V4H(), v0.V4H());
21047 __ Facgt(v9.V4H(), v1.V4H(), v0.V4H());
21048 __ Facgt(v10.V4H(), v2.V4H(), v0.V4H());
21049 __ Facgt(v11.V4H(), v3.V4H(), v0.V4H());
21050
21051 END();
21052
21053 #ifdef VIXL_INCLUDE_SIMULATOR_AARCH64
21054 RUN();
21055
21056 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, v4);
21057 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, v5);
21058 ASSERT_EQUAL_128(0xffffffffffffffff, 0xffffffffffffffff, v6);
21059 ASSERT_EQUAL_128(0xffffffffffffffff, 0xffffffffffffffff, v7);
21060 ASSERT_EQUAL_128(0, 0x0000000000000000, v8);
21061 ASSERT_EQUAL_128(0, 0x0000000000000000, v9);
21062 ASSERT_EQUAL_128(0, 0xffffffffffffffff, v10);
21063 ASSERT_EQUAL_128(0, 0xffffffffffffffff, v11);
21064 #endif // VIXL_INCLUDE_SIMULATOR_AARCH64
21065
21066 TEARDOWN();
21067 }
21068
TEST(neon_facgt_h_scalar)21069 TEST(neon_facgt_h_scalar) {
21070 SETUP_WITH_FEATURES(CPUFeatures::kNEON,
21071 CPUFeatures::kFP,
21072 CPUFeatures::kNEONHalf,
21073 CPUFeatures::kFPHalf);
21074
21075 START();
21076
21077 __ Fmov(h0, Float16(0.0));
21078 __ Fmov(h1, RawbitsToFloat16(0xffff));
21079 __ Fmov(h2, Float16(-1.0));
21080 __ Fmov(h3, Float16(1.0));
21081 __ Facgt(h4, h0, h0);
21082 __ Facgt(h5, h1, h0);
21083 __ Facgt(h6, h2, h0);
21084 __ Facgt(h7, h3, h0);
21085
21086 END();
21087
21088 #ifdef VIXL_INCLUDE_SIMULATOR_AARCH64
21089 RUN();
21090
21091 ASSERT_EQUAL_FP16(RawbitsToFloat16(0x0000), h4);
21092 ASSERT_EQUAL_FP16(RawbitsToFloat16(0x0000), h5);
21093 ASSERT_EQUAL_FP16(RawbitsToFloat16(0xffff), h6);
21094 ASSERT_EQUAL_FP16(RawbitsToFloat16(0xffff), h7);
21095 #endif // VIXL_INCLUDE_SIMULATOR_AARCH64
21096
21097 TEARDOWN();
21098 }
21099
TEST(neon_2regmisc_fcmeq)21100 TEST(neon_2regmisc_fcmeq) {
21101 SETUP_WITH_FEATURES(CPUFeatures::kNEON, CPUFeatures::kFP);
21102
21103 START();
21104
21105 __ Movi(v0.V2D(), 0x0000000000000000, 0x0000000000000000); // Zero.
21106 __ Movi(v1.V2D(), 0xffffffffffffffff, 0xffffffffffffffff); // Nan.
21107 __ Movi(v2.V2D(), 0xbf800000bf800000, 0xbf800000bf800000); // < 0.
21108 __ Movi(v3.V2D(), 0x3f8000003f800000, 0x3f8000003f800000); // > 0.
21109
21110 __ Fcmeq(s16, s0, 0.0);
21111 __ Fcmeq(s17, s1, 0.0);
21112 __ Fcmeq(s18, s2, 0.0);
21113 __ Fcmeq(d19, d0, 0.0);
21114 __ Fcmeq(d20, d1, 0.0);
21115 __ Fcmeq(d21, d2, 0.0);
21116 __ Fcmeq(v22.V2S(), v0.V2S(), 0.0);
21117 __ Fcmeq(v23.V4S(), v1.V4S(), 0.0);
21118 __ Fcmeq(v24.V2D(), v1.V2D(), 0.0);
21119 __ Fcmeq(v25.V2D(), v2.V2D(), 0.0);
21120
21121 END();
21122
21123 RUN();
21124 ASSERT_EQUAL_128(0, 0xffffffff, q16);
21125 ASSERT_EQUAL_128(0, 0x00000000, q17);
21126 ASSERT_EQUAL_128(0, 0x00000000, q18);
21127 ASSERT_EQUAL_128(0, 0xffffffffffffffff, q19);
21128 ASSERT_EQUAL_128(0, 0x0000000000000000, q20);
21129 ASSERT_EQUAL_128(0, 0x0000000000000000, q21);
21130 ASSERT_EQUAL_128(0, 0xffffffffffffffff, q22);
21131 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q23);
21132 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q24);
21133 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q25);
21134 TEARDOWN();
21135 }
21136
TEST(neon_2regmisc_fcmge)21137 TEST(neon_2regmisc_fcmge) {
21138 SETUP_WITH_FEATURES(CPUFeatures::kNEON, CPUFeatures::kFP);
21139
21140 START();
21141
21142 __ Movi(v0.V2D(), 0x0000000000000000, 0x0000000000000000); // Zero.
21143 __ Movi(v1.V2D(), 0xffffffffffffffff, 0xffffffffffffffff); // Nan.
21144 __ Movi(v2.V2D(), 0xbf800000bf800000, 0xbf800000bf800000); // < 0.
21145 __ Movi(v3.V2D(), 0x3f8000003f800000, 0x3f8000003f800000); // > 0.
21146
21147 __ Fcmge(s16, s0, 0.0);
21148 __ Fcmge(s17, s1, 0.0);
21149 __ Fcmge(s18, s2, 0.0);
21150 __ Fcmge(d19, d0, 0.0);
21151 __ Fcmge(d20, d1, 0.0);
21152 __ Fcmge(d21, d3, 0.0);
21153 __ Fcmge(v22.V2S(), v0.V2S(), 0.0);
21154 __ Fcmge(v23.V4S(), v1.V4S(), 0.0);
21155 __ Fcmge(v24.V2D(), v1.V2D(), 0.0);
21156 __ Fcmge(v25.V2D(), v3.V2D(), 0.0);
21157
21158 END();
21159
21160 RUN();
21161 ASSERT_EQUAL_128(0, 0xffffffff, q16);
21162 ASSERT_EQUAL_128(0, 0x00000000, q17);
21163 ASSERT_EQUAL_128(0, 0x00000000, q18);
21164 ASSERT_EQUAL_128(0, 0xffffffffffffffff, q19);
21165 ASSERT_EQUAL_128(0, 0x0000000000000000, q20);
21166 ASSERT_EQUAL_128(0, 0xffffffffffffffff, q21);
21167 ASSERT_EQUAL_128(0, 0xffffffffffffffff, q22);
21168 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q23);
21169 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q24);
21170 ASSERT_EQUAL_128(0xffffffffffffffff, 0xffffffffffffffff, q25);
21171 TEARDOWN();
21172 }
21173
21174
TEST(neon_2regmisc_fcmgt)21175 TEST(neon_2regmisc_fcmgt) {
21176 SETUP_WITH_FEATURES(CPUFeatures::kNEON, CPUFeatures::kFP);
21177
21178 START();
21179
21180 __ Movi(v0.V2D(), 0x0000000000000000, 0x0000000000000000); // Zero.
21181 __ Movi(v1.V2D(), 0xffffffffffffffff, 0xffffffffffffffff); // Nan.
21182 __ Movi(v2.V2D(), 0xbf800000bf800000, 0xbf800000bf800000); // < 0.
21183 __ Movi(v3.V2D(), 0x3f8000003f800000, 0x3f8000003f800000); // > 0.
21184
21185 __ Fcmgt(s16, s0, 0.0);
21186 __ Fcmgt(s17, s1, 0.0);
21187 __ Fcmgt(s18, s2, 0.0);
21188 __ Fcmgt(d19, d0, 0.0);
21189 __ Fcmgt(d20, d1, 0.0);
21190 __ Fcmgt(d21, d3, 0.0);
21191 __ Fcmgt(v22.V2S(), v0.V2S(), 0.0);
21192 __ Fcmgt(v23.V4S(), v1.V4S(), 0.0);
21193 __ Fcmgt(v24.V2D(), v1.V2D(), 0.0);
21194 __ Fcmgt(v25.V2D(), v3.V2D(), 0.0);
21195
21196 END();
21197
21198 RUN();
21199 ASSERT_EQUAL_128(0, 0x00000000, q16);
21200 ASSERT_EQUAL_128(0, 0x00000000, q17);
21201 ASSERT_EQUAL_128(0, 0x00000000, q18);
21202 ASSERT_EQUAL_128(0, 0x0000000000000000, q19);
21203 ASSERT_EQUAL_128(0, 0x0000000000000000, q20);
21204 ASSERT_EQUAL_128(0, 0xffffffffffffffff, q21);
21205 ASSERT_EQUAL_128(0, 0x0000000000000000, q22);
21206 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q23);
21207 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q24);
21208 ASSERT_EQUAL_128(0xffffffffffffffff, 0xffffffffffffffff, q25);
21209 TEARDOWN();
21210 }
21211
TEST(neon_2regmisc_fcmle)21212 TEST(neon_2regmisc_fcmle) {
21213 SETUP_WITH_FEATURES(CPUFeatures::kNEON, CPUFeatures::kFP);
21214
21215 START();
21216
21217 __ Movi(v0.V2D(), 0x0000000000000000, 0x0000000000000000); // Zero.
21218 __ Movi(v1.V2D(), 0xffffffffffffffff, 0xffffffffffffffff); // Nan.
21219 __ Movi(v2.V2D(), 0xbf800000bf800000, 0xbf800000bf800000); // < 0.
21220 __ Movi(v3.V2D(), 0x3f8000003f800000, 0x3f8000003f800000); // > 0.
21221
21222 __ Fcmle(s16, s0, 0.0);
21223 __ Fcmle(s17, s1, 0.0);
21224 __ Fcmle(s18, s3, 0.0);
21225 __ Fcmle(d19, d0, 0.0);
21226 __ Fcmle(d20, d1, 0.0);
21227 __ Fcmle(d21, d2, 0.0);
21228 __ Fcmle(v22.V2S(), v0.V2S(), 0.0);
21229 __ Fcmle(v23.V4S(), v1.V4S(), 0.0);
21230 __ Fcmle(v24.V2D(), v1.V2D(), 0.0);
21231 __ Fcmle(v25.V2D(), v2.V2D(), 0.0);
21232
21233 END();
21234
21235 RUN();
21236 ASSERT_EQUAL_128(0, 0xffffffff, q16);
21237 ASSERT_EQUAL_128(0, 0x00000000, q17);
21238 ASSERT_EQUAL_128(0, 0x00000000, q18);
21239 ASSERT_EQUAL_128(0, 0xffffffffffffffff, q19);
21240 ASSERT_EQUAL_128(0, 0x0000000000000000, q20);
21241 ASSERT_EQUAL_128(0, 0xffffffffffffffff, q21);
21242 ASSERT_EQUAL_128(0, 0xffffffffffffffff, q22);
21243 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q23);
21244 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q24);
21245 ASSERT_EQUAL_128(0xffffffffffffffff, 0xffffffffffffffff, q25);
21246 TEARDOWN();
21247 }
21248
21249
TEST(neon_2regmisc_fcmlt)21250 TEST(neon_2regmisc_fcmlt) {
21251 SETUP_WITH_FEATURES(CPUFeatures::kNEON, CPUFeatures::kFP);
21252
21253 START();
21254
21255 __ Movi(v0.V2D(), 0x0000000000000000, 0x0000000000000000); // Zero.
21256 __ Movi(v1.V2D(), 0xffffffffffffffff, 0xffffffffffffffff); // Nan.
21257 __ Movi(v2.V2D(), 0xbf800000bf800000, 0xbf800000bf800000); // < 0.
21258 __ Movi(v3.V2D(), 0x3f8000003f800000, 0x3f8000003f800000); // > 0.
21259
21260 __ Fcmlt(s16, s0, 0.0);
21261 __ Fcmlt(s17, s1, 0.0);
21262 __ Fcmlt(s18, s3, 0.0);
21263 __ Fcmlt(d19, d0, 0.0);
21264 __ Fcmlt(d20, d1, 0.0);
21265 __ Fcmlt(d21, d2, 0.0);
21266 __ Fcmlt(v22.V2S(), v0.V2S(), 0.0);
21267 __ Fcmlt(v23.V4S(), v1.V4S(), 0.0);
21268 __ Fcmlt(v24.V2D(), v1.V2D(), 0.0);
21269 __ Fcmlt(v25.V2D(), v2.V2D(), 0.0);
21270
21271 END();
21272
21273 RUN();
21274 ASSERT_EQUAL_128(0, 0x00000000, q16);
21275 ASSERT_EQUAL_128(0, 0x00000000, q17);
21276 ASSERT_EQUAL_128(0, 0x00000000, q18);
21277 ASSERT_EQUAL_128(0, 0x0000000000000000, q19);
21278 ASSERT_EQUAL_128(0, 0x0000000000000000, q20);
21279 ASSERT_EQUAL_128(0, 0xffffffffffffffff, q21);
21280 ASSERT_EQUAL_128(0, 0x0000000000000000, q22);
21281 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q23);
21282 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q24);
21283 ASSERT_EQUAL_128(0xffffffffffffffff, 0xffffffffffffffff, q25);
21284 TEARDOWN();
21285 }
21286
TEST(neon_2regmisc_cmeq)21287 TEST(neon_2regmisc_cmeq) {
21288 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
21289
21290 START();
21291
21292 __ Movi(v0.V2D(), 0x0001000200030004, 0x0000000000000000);
21293 __ Movi(v1.V2D(), 0x000055aaff55ff00, 0x0000ff55555500ff);
21294
21295 __ Cmeq(v16.V8B(), v1.V8B(), 0);
21296 __ Cmeq(v17.V16B(), v1.V16B(), 0);
21297 __ Cmeq(v18.V4H(), v1.V4H(), 0);
21298 __ Cmeq(v19.V8H(), v1.V8H(), 0);
21299 __ Cmeq(v20.V2S(), v0.V2S(), 0);
21300 __ Cmeq(v21.V4S(), v0.V4S(), 0);
21301 __ Cmeq(d22, d0, 0);
21302 __ Cmeq(d23, d1, 0);
21303 __ Cmeq(v24.V2D(), v0.V2D(), 0);
21304
21305 END();
21306
21307 RUN();
21308 ASSERT_EQUAL_128(0x0000000000000000, 0xffff00000000ff00, q16);
21309 ASSERT_EQUAL_128(0xffff0000000000ff, 0xffff00000000ff00, q17);
21310 ASSERT_EQUAL_128(0x0000000000000000, 0xffff000000000000, q18);
21311 ASSERT_EQUAL_128(0xffff000000000000, 0xffff000000000000, q19);
21312 ASSERT_EQUAL_128(0x0000000000000000, 0xffffffffffffffff, q20);
21313 ASSERT_EQUAL_128(0x0000000000000000, 0xffffffffffffffff, q21);
21314 ASSERT_EQUAL_128(0x0000000000000000, 0xffffffffffffffff, q22);
21315 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q23);
21316 ASSERT_EQUAL_128(0x0000000000000000, 0xffffffffffffffff, q24);
21317 TEARDOWN();
21318 }
21319
21320
TEST(neon_2regmisc_cmge)21321 TEST(neon_2regmisc_cmge) {
21322 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
21323
21324 START();
21325
21326 __ Movi(v0.V2D(), 0xff01000200030004, 0x0000000000000000);
21327 __ Movi(v1.V2D(), 0x000055aaff55ff00, 0x0000ff55555500ff);
21328
21329 __ Cmge(v16.V8B(), v1.V8B(), 0);
21330 __ Cmge(v17.V16B(), v1.V16B(), 0);
21331 __ Cmge(v18.V4H(), v1.V4H(), 0);
21332 __ Cmge(v19.V8H(), v1.V8H(), 0);
21333 __ Cmge(v20.V2S(), v0.V2S(), 0);
21334 __ Cmge(v21.V4S(), v0.V4S(), 0);
21335 __ Cmge(d22, d0, 0);
21336 __ Cmge(d23, d1, 0);
21337 __ Cmge(v24.V2D(), v0.V2D(), 0);
21338
21339 END();
21340
21341 RUN();
21342 ASSERT_EQUAL_128(0x0000000000000000, 0xffff00ffffffff00, q16);
21343 ASSERT_EQUAL_128(0xffffff0000ff00ff, 0xffff00ffffffff00, q17);
21344 ASSERT_EQUAL_128(0x0000000000000000, 0xffff0000ffffffff, q18);
21345 ASSERT_EQUAL_128(0xffffffff00000000, 0xffff0000ffffffff, q19);
21346 ASSERT_EQUAL_128(0x0000000000000000, 0xffffffffffffffff, q20);
21347 ASSERT_EQUAL_128(0x00000000ffffffff, 0xffffffffffffffff, q21);
21348 ASSERT_EQUAL_128(0x0000000000000000, 0xffffffffffffffff, q22);
21349 ASSERT_EQUAL_128(0x0000000000000000, 0xffffffffffffffff, q23);
21350 ASSERT_EQUAL_128(0x0000000000000000, 0xffffffffffffffff, q24);
21351 TEARDOWN();
21352 }
21353
21354
TEST(neon_2regmisc_cmlt)21355 TEST(neon_2regmisc_cmlt) {
21356 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
21357
21358 START();
21359
21360 __ Movi(v0.V2D(), 0x0001000200030004, 0xff00000000000000);
21361 __ Movi(v1.V2D(), 0x000055aaff55ff00, 0x0000ff55555500ff);
21362
21363 __ Cmlt(v16.V8B(), v1.V8B(), 0);
21364 __ Cmlt(v17.V16B(), v1.V16B(), 0);
21365 __ Cmlt(v18.V4H(), v1.V4H(), 0);
21366 __ Cmlt(v19.V8H(), v1.V8H(), 0);
21367 __ Cmlt(v20.V2S(), v1.V2S(), 0);
21368 __ Cmlt(v21.V4S(), v1.V4S(), 0);
21369 __ Cmlt(d22, d0, 0);
21370 __ Cmlt(d23, d1, 0);
21371 __ Cmlt(v24.V2D(), v0.V2D(), 0);
21372
21373 END();
21374
21375 RUN();
21376 ASSERT_EQUAL_128(0x0000000000000000, 0x0000ff00000000ff, q16);
21377 ASSERT_EQUAL_128(0x000000ffff00ff00, 0x0000ff00000000ff, q17);
21378 ASSERT_EQUAL_128(0x0000000000000000, 0x0000ffff00000000, q18);
21379 ASSERT_EQUAL_128(0x00000000ffffffff, 0x0000ffff00000000, q19);
21380 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q20);
21381 ASSERT_EQUAL_128(0x00000000ffffffff, 0x0000000000000000, q21);
21382 ASSERT_EQUAL_128(0x0000000000000000, 0xffffffffffffffff, q22);
21383 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q23);
21384 ASSERT_EQUAL_128(0x0000000000000000, 0xffffffffffffffff, q24);
21385 TEARDOWN();
21386 }
21387
21388
TEST(neon_2regmisc_cmle)21389 TEST(neon_2regmisc_cmle) {
21390 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
21391
21392 START();
21393
21394 __ Movi(v0.V2D(), 0x0001000200030004, 0x0000000000000000);
21395 __ Movi(v1.V2D(), 0x000055aaff55ff00, 0x0000ff55555500ff);
21396
21397 __ Cmle(v16.V8B(), v1.V8B(), 0);
21398 __ Cmle(v17.V16B(), v1.V16B(), 0);
21399 __ Cmle(v18.V4H(), v1.V4H(), 0);
21400 __ Cmle(v19.V8H(), v1.V8H(), 0);
21401 __ Cmle(v20.V2S(), v1.V2S(), 0);
21402 __ Cmle(v21.V4S(), v1.V4S(), 0);
21403 __ Cmle(d22, d0, 0);
21404 __ Cmle(d23, d1, 0);
21405 __ Cmle(v24.V2D(), v0.V2D(), 0);
21406
21407 END();
21408
21409 RUN();
21410 ASSERT_EQUAL_128(0x0000000000000000, 0xffffff000000ffff, q16);
21411 ASSERT_EQUAL_128(0xffff00ffff00ffff, 0xffffff000000ffff, q17);
21412 ASSERT_EQUAL_128(0x0000000000000000, 0xffffffff00000000, q18);
21413 ASSERT_EQUAL_128(0xffff0000ffffffff, 0xffffffff00000000, q19);
21414 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q20);
21415 ASSERT_EQUAL_128(0x00000000ffffffff, 0x0000000000000000, q21);
21416 ASSERT_EQUAL_128(0x0000000000000000, 0xffffffffffffffff, q22);
21417 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q23);
21418 ASSERT_EQUAL_128(0x0000000000000000, 0xffffffffffffffff, q24);
21419 TEARDOWN();
21420 }
21421
21422
TEST(neon_2regmisc_cmgt)21423 TEST(neon_2regmisc_cmgt) {
21424 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
21425
21426 START();
21427
21428 __ Movi(v0.V2D(), 0x0001000200030004, 0x0000000000000000);
21429 __ Movi(v1.V2D(), 0x000055aaff55ff00, 0x0000ff55555500ff);
21430
21431 __ Cmgt(v16.V8B(), v1.V8B(), 0);
21432 __ Cmgt(v17.V16B(), v1.V16B(), 0);
21433 __ Cmgt(v18.V4H(), v1.V4H(), 0);
21434 __ Cmgt(v19.V8H(), v1.V8H(), 0);
21435 __ Cmgt(v20.V2S(), v0.V2S(), 0);
21436 __ Cmgt(v21.V4S(), v0.V4S(), 0);
21437 __ Cmgt(d22, d0, 0);
21438 __ Cmgt(d23, d1, 0);
21439 __ Cmgt(v24.V2D(), v0.V2D(), 0);
21440
21441 END();
21442
21443 RUN();
21444 ASSERT_EQUAL_128(0x0000000000000000, 0x000000ffffff0000, q16);
21445 ASSERT_EQUAL_128(0x0000ff0000ff0000, 0x000000ffffff0000, q17);
21446 ASSERT_EQUAL_128(0x0000000000000000, 0x00000000ffffffff, q18);
21447 ASSERT_EQUAL_128(0x0000ffff00000000, 0x00000000ffffffff, q19);
21448 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q20);
21449 ASSERT_EQUAL_128(0xffffffffffffffff, 0x0000000000000000, q21);
21450 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q22);
21451 ASSERT_EQUAL_128(0x0000000000000000, 0xffffffffffffffff, q23);
21452 ASSERT_EQUAL_128(0xffffffffffffffff, 0x0000000000000000, q24);
21453 TEARDOWN();
21454 }
21455
21456
TEST(neon_2regmisc_neg)21457 TEST(neon_2regmisc_neg) {
21458 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
21459
21460 START();
21461
21462 __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081);
21463 __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff);
21464 __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff);
21465 __ Movi(v3.V2D(), 0x8000000000000001, 0x7fffffffffffffff);
21466 __ Movi(v4.V2D(), 0x8000000000000000, 0x0000000000000000);
21467
21468 __ Neg(v16.V8B(), v0.V8B());
21469 __ Neg(v17.V16B(), v0.V16B());
21470 __ Neg(v18.V4H(), v1.V4H());
21471 __ Neg(v19.V8H(), v1.V8H());
21472 __ Neg(v20.V2S(), v2.V2S());
21473 __ Neg(v21.V4S(), v2.V4S());
21474 __ Neg(d22, d3);
21475 __ Neg(v23.V2D(), v3.V2D());
21476 __ Neg(v24.V2D(), v4.V2D());
21477
21478 END();
21479
21480 RUN();
21481 ASSERT_EQUAL_128(0x0000000000000000, 0x807f0100ff81807f, q16);
21482 ASSERT_EQUAL_128(0x81ff00017f8081ff, 0x807f0100ff81807f, q17);
21483 ASSERT_EQUAL_128(0x0000000000000000, 0x00010000ffff8001, q18);
21484 ASSERT_EQUAL_128(0x80007fff00010000, 0x00010000ffff8001, q19);
21485 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000080000001, q20);
21486 ASSERT_EQUAL_128(0x8000000000000001, 0x0000000080000001, q21);
21487 ASSERT_EQUAL_128(0x0000000000000000, 0x8000000000000001, q22);
21488 ASSERT_EQUAL_128(0x7fffffffffffffff, 0x8000000000000001, q23);
21489 ASSERT_EQUAL_128(0x8000000000000000, 0x0000000000000000, q24);
21490
21491 TEARDOWN();
21492 }
21493
21494
TEST(neon_2regmisc_sqneg)21495 TEST(neon_2regmisc_sqneg) {
21496 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
21497
21498 START();
21499
21500 __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081);
21501 __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff);
21502 __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff);
21503 __ Movi(v3.V2D(), 0x8000000000000001, 0x7fffffffffffffff);
21504 __ Movi(v4.V2D(), 0x8000000000000000, 0x0000000000000000);
21505
21506 __ Sqneg(v16.V8B(), v0.V8B());
21507 __ Sqneg(v17.V16B(), v0.V16B());
21508 __ Sqneg(v18.V4H(), v1.V4H());
21509 __ Sqneg(v19.V8H(), v1.V8H());
21510 __ Sqneg(v20.V2S(), v2.V2S());
21511 __ Sqneg(v21.V4S(), v2.V4S());
21512 __ Sqneg(v22.V2D(), v3.V2D());
21513 __ Sqneg(v23.V2D(), v4.V2D());
21514
21515 __ Sqneg(b24, b0);
21516 __ Sqneg(h25, h1);
21517 __ Sqneg(s26, s2);
21518 __ Sqneg(d27, d3);
21519
21520 END();
21521
21522 RUN();
21523 ASSERT_EQUAL_128(0x0000000000000000, 0x7f7f0100ff817f7f, q16);
21524 ASSERT_EQUAL_128(0x81ff00017f7f81ff, 0x7f7f0100ff817f7f, q17);
21525 ASSERT_EQUAL_128(0x0000000000000000, 0x00010000ffff8001, q18);
21526 ASSERT_EQUAL_128(0x7fff7fff00010000, 0x00010000ffff8001, q19);
21527 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000080000001, q20);
21528 ASSERT_EQUAL_128(0x7fffffff00000001, 0x0000000080000001, q21);
21529 ASSERT_EQUAL_128(0x7fffffffffffffff, 0x8000000000000001, q22);
21530 ASSERT_EQUAL_128(0x7fffffffffffffff, 0x0000000000000000, q23);
21531
21532 ASSERT_EQUAL_128(0, 0x7f, q24);
21533 ASSERT_EQUAL_128(0, 0x8001, q25);
21534 ASSERT_EQUAL_128(0, 0x80000001, q26);
21535 ASSERT_EQUAL_128(0, 0x8000000000000001, q27);
21536
21537 TEARDOWN();
21538 }
21539
21540
TEST(neon_2regmisc_abs)21541 TEST(neon_2regmisc_abs) {
21542 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
21543
21544 START();
21545
21546 __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081);
21547 __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff);
21548 __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff);
21549 __ Movi(v3.V2D(), 0x8000000000000001, 0x7fffffffffffffff);
21550 __ Movi(v4.V2D(), 0x8000000000000000, 0x0000000000000000);
21551
21552 __ Abs(v16.V8B(), v0.V8B());
21553 __ Abs(v17.V16B(), v0.V16B());
21554 __ Abs(v18.V4H(), v1.V4H());
21555 __ Abs(v19.V8H(), v1.V8H());
21556 __ Abs(v20.V2S(), v2.V2S());
21557 __ Abs(v21.V4S(), v2.V4S());
21558 __ Abs(d22, d3);
21559 __ Abs(v23.V2D(), v3.V2D());
21560 __ Abs(v24.V2D(), v4.V2D());
21561
21562 END();
21563
21564 RUN();
21565 ASSERT_EQUAL_128(0x0000000000000000, 0x807f0100017f807f, q16);
21566 ASSERT_EQUAL_128(0x7f0100017f807f01, 0x807f0100017f807f, q17);
21567 ASSERT_EQUAL_128(0x0000000000000000, 0x0001000000017fff, q18);
21568 ASSERT_EQUAL_128(0x80007fff00010000, 0x0001000000017fff, q19);
21569 ASSERT_EQUAL_128(0x0000000000000000, 0x000000007fffffff, q20);
21570 ASSERT_EQUAL_128(0x8000000000000001, 0x000000007fffffff, q21);
21571 ASSERT_EQUAL_128(0x0000000000000000, 0x7fffffffffffffff, q22);
21572 ASSERT_EQUAL_128(0x7fffffffffffffff, 0x7fffffffffffffff, q23);
21573 ASSERT_EQUAL_128(0x8000000000000000, 0x0000000000000000, q24);
21574
21575 TEARDOWN();
21576 }
21577
21578
TEST(neon_2regmisc_sqabs)21579 TEST(neon_2regmisc_sqabs) {
21580 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
21581
21582 START();
21583
21584 __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081);
21585 __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff);
21586 __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff);
21587 __ Movi(v3.V2D(), 0x8000000000000001, 0x7fffffffffffffff);
21588 __ Movi(v4.V2D(), 0x8000000000000000, 0x0000000000000000);
21589
21590 __ Sqabs(v16.V8B(), v0.V8B());
21591 __ Sqabs(v17.V16B(), v0.V16B());
21592 __ Sqabs(v18.V4H(), v1.V4H());
21593 __ Sqabs(v19.V8H(), v1.V8H());
21594 __ Sqabs(v20.V2S(), v2.V2S());
21595 __ Sqabs(v21.V4S(), v2.V4S());
21596 __ Sqabs(v22.V2D(), v3.V2D());
21597 __ Sqabs(v23.V2D(), v4.V2D());
21598
21599 __ Sqabs(b24, b0);
21600 __ Sqabs(h25, h1);
21601 __ Sqabs(s26, s2);
21602 __ Sqabs(d27, d3);
21603
21604 END();
21605
21606 RUN();
21607 ASSERT_EQUAL_128(0x0000000000000000, 0x7f7f0100017f7f7f, q16);
21608 ASSERT_EQUAL_128(0x7f0100017f7f7f01, 0x7f7f0100017f7f7f, q17);
21609 ASSERT_EQUAL_128(0x0000000000000000, 0x0001000000017fff, q18);
21610 ASSERT_EQUAL_128(0x7fff7fff00010000, 0x0001000000017fff, q19);
21611 ASSERT_EQUAL_128(0x0000000000000000, 0x000000007fffffff, q20);
21612 ASSERT_EQUAL_128(0x7fffffff00000001, 0x000000007fffffff, q21);
21613 ASSERT_EQUAL_128(0x7fffffffffffffff, 0x7fffffffffffffff, q22);
21614 ASSERT_EQUAL_128(0x7fffffffffffffff, 0x0000000000000000, q23);
21615
21616 ASSERT_EQUAL_128(0, 0x7f, q24);
21617 ASSERT_EQUAL_128(0, 0x7fff, q25);
21618 ASSERT_EQUAL_128(0, 0x7fffffff, q26);
21619 ASSERT_EQUAL_128(0, 0x7fffffffffffffff, q27);
21620
21621 TEARDOWN();
21622 }
21623
TEST(neon_2regmisc_suqadd)21624 TEST(neon_2regmisc_suqadd) {
21625 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
21626
21627 START();
21628
21629 __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081);
21630 __ Movi(v1.V2D(), 0x017f8081ff00017f, 0x010080ff7f0180ff);
21631
21632 __ Movi(v2.V2D(), 0x80008001ffff0000, 0xffff000000017ffd);
21633 __ Movi(v3.V2D(), 0xffff000080008001, 0x00017fffffff0001);
21634
21635 __ Movi(v4.V2D(), 0x80000000fffffffe, 0xfffffff17ffffffe);
21636 __ Movi(v5.V2D(), 0xffffffff80000000, 0x7fffffff00000002);
21637
21638 __ Movi(v6.V2D(), 0x8000000000000001, 0x7fffffffffffffff);
21639 __ Movi(v7.V2D(), 0x8000000000000000, 0x8000000000000002);
21640
21641 __ Mov(v16.V2D(), v0.V2D());
21642 __ Mov(v17.V2D(), v0.V2D());
21643 __ Mov(v18.V2D(), v2.V2D());
21644 __ Mov(v19.V2D(), v2.V2D());
21645 __ Mov(v20.V2D(), v4.V2D());
21646 __ Mov(v21.V2D(), v4.V2D());
21647 __ Mov(v22.V2D(), v6.V2D());
21648
21649 __ Mov(v23.V2D(), v0.V2D());
21650 __ Mov(v24.V2D(), v2.V2D());
21651 __ Mov(v25.V2D(), v4.V2D());
21652 __ Mov(v26.V2D(), v6.V2D());
21653
21654 __ Suqadd(v16.V8B(), v1.V8B());
21655 __ Suqadd(v17.V16B(), v1.V16B());
21656 __ Suqadd(v18.V4H(), v3.V4H());
21657 __ Suqadd(v19.V8H(), v3.V8H());
21658 __ Suqadd(v20.V2S(), v5.V2S());
21659 __ Suqadd(v21.V4S(), v5.V4S());
21660 __ Suqadd(v22.V2D(), v7.V2D());
21661
21662 __ Suqadd(b23, b1);
21663 __ Suqadd(h24, h3);
21664 __ Suqadd(s25, s5);
21665 __ Suqadd(d26, d7);
21666
21667 END();
21668
21669 RUN();
21670 ASSERT_EQUAL_128(0x0000000000000000, 0x81817f7f7f7f007f, q16);
21671 ASSERT_EQUAL_128(0x7f7f7f7f7f807f7f, 0x81817f7f7f7f007f, q17);
21672 ASSERT_EQUAL_128(0x0000000000000000, 0x00007fff7fff7ffe, q18);
21673 ASSERT_EQUAL_128(0x7fff80017fff7fff, 0x00007fff7fff7ffe, q19);
21674 ASSERT_EQUAL_128(0x0000000000000000, 0x7ffffff07fffffff, q20);
21675 ASSERT_EQUAL_128(0x7fffffff7ffffffe, 0x7ffffff07fffffff, q21);
21676 ASSERT_EQUAL_128(0x0000000000000001, 0x7fffffffffffffff, q22);
21677
21678 ASSERT_EQUAL_128(0, 0x7f, q23);
21679 ASSERT_EQUAL_128(0, 0x7ffe, q24);
21680 ASSERT_EQUAL_128(0, 0x7fffffff, q25);
21681 ASSERT_EQUAL_128(0, 0x7fffffffffffffff, q26);
21682 TEARDOWN();
21683 }
21684
TEST(neon_2regmisc_usqadd)21685 TEST(neon_2regmisc_usqadd) {
21686 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
21687
21688 START();
21689
21690 __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f7ffe);
21691 __ Movi(v1.V2D(), 0x017f8081ff00017f, 0x010080ff7f018002);
21692
21693 __ Movi(v2.V2D(), 0x80008001fffe0000, 0xffff000000017ffd);
21694 __ Movi(v3.V2D(), 0xffff000000028001, 0x00017fffffff0001);
21695
21696 __ Movi(v4.V2D(), 0x80000000fffffffe, 0x00000001fffffffe);
21697 __ Movi(v5.V2D(), 0xffffffff80000000, 0xfffffffe00000002);
21698
21699 __ Movi(v6.V2D(), 0x8000000000000002, 0x7fffffffffffffff);
21700 __ Movi(v7.V2D(), 0x7fffffffffffffff, 0x8000000000000000);
21701
21702 __ Mov(v16.V2D(), v0.V2D());
21703 __ Mov(v17.V2D(), v0.V2D());
21704 __ Mov(v18.V2D(), v2.V2D());
21705 __ Mov(v19.V2D(), v2.V2D());
21706 __ Mov(v20.V2D(), v4.V2D());
21707 __ Mov(v21.V2D(), v4.V2D());
21708 __ Mov(v22.V2D(), v6.V2D());
21709
21710 __ Mov(v23.V2D(), v0.V2D());
21711 __ Mov(v24.V2D(), v2.V2D());
21712 __ Mov(v25.V2D(), v4.V2D());
21713 __ Mov(v26.V2D(), v6.V2D());
21714
21715 __ Usqadd(v16.V8B(), v1.V8B());
21716 __ Usqadd(v17.V16B(), v1.V16B());
21717 __ Usqadd(v18.V4H(), v3.V4H());
21718 __ Usqadd(v19.V8H(), v3.V8H());
21719 __ Usqadd(v20.V2S(), v5.V2S());
21720 __ Usqadd(v21.V4S(), v5.V4S());
21721 __ Usqadd(v22.V2D(), v7.V2D());
21722
21723 __ Usqadd(b23, b1);
21724 __ Usqadd(h24, h3);
21725 __ Usqadd(s25, s5);
21726 __ Usqadd(d26, d7);
21727
21728 END();
21729
21730 RUN();
21731 ASSERT_EQUAL_128(0x0000000000000000, 0x81817f00808000ff, q16);
21732 ASSERT_EQUAL_128(0x8080008080808080, 0x81817f00808000ff, q17);
21733 ASSERT_EQUAL_128(0x0000000000000000, 0xffff7fff00007ffe, q18);
21734 ASSERT_EQUAL_128(0x7fff8001ffff0000, 0xffff7fff00007ffe, q19);
21735 ASSERT_EQUAL_128(0x0000000000000000, 0x00000000ffffffff, q20);
21736 ASSERT_EQUAL_128(0x7fffffff7ffffffe, 0x00000000ffffffff, q21);
21737 ASSERT_EQUAL_128(0xffffffffffffffff, 0x0000000000000000, q22);
21738
21739 ASSERT_EQUAL_128(0, 0xff, q23);
21740 ASSERT_EQUAL_128(0, 0x7ffe, q24);
21741 ASSERT_EQUAL_128(0, 0xffffffff, q25);
21742 ASSERT_EQUAL_128(0, 0x0000000000000000, q26);
21743 TEARDOWN();
21744 }
21745
21746
TEST(system_sys)21747 TEST(system_sys) {
21748 SETUP();
21749 const char* msg = "SYS test!";
21750 uintptr_t msg_addr = reinterpret_cast<uintptr_t>(msg);
21751
21752 START();
21753 __ Mov(x4, msg_addr);
21754 __ Sys(3, 0x7, 0x5, 1, x4);
21755 __ Mov(x3, x4);
21756 __ Sys(3, 0x7, 0xa, 1, x3);
21757 __ Mov(x2, x3);
21758 __ Sys(3, 0x7, 0xb, 1, x2);
21759 __ Mov(x1, x2);
21760 __ Sys(3, 0x7, 0xe, 1, x1);
21761 // TODO: Add tests to check ZVA equivalent.
21762 END();
21763
21764 RUN();
21765
21766 TEARDOWN();
21767 }
21768
21769
TEST(system_ic)21770 TEST(system_ic) {
21771 SETUP();
21772 const char* msg = "IC test!";
21773 uintptr_t msg_addr = reinterpret_cast<uintptr_t>(msg);
21774
21775 START();
21776 __ Mov(x11, msg_addr);
21777 __ Ic(IVAU, x11);
21778 END();
21779
21780 RUN();
21781
21782 TEARDOWN();
21783 }
21784
21785
TEST(system_dc)21786 TEST(system_dc) {
21787 SETUP();
21788 const char* msg = "DC test!";
21789 uintptr_t msg_addr = reinterpret_cast<uintptr_t>(msg);
21790
21791 START();
21792 __ Mov(x20, msg_addr);
21793 __ Dc(CVAC, x20);
21794 __ Mov(x21, x20);
21795 __ Dc(CVAU, x21);
21796 __ Mov(x22, x21);
21797 __ Dc(CIVAC, x22);
21798 // TODO: Add tests to check ZVA.
21799 END();
21800
21801 RUN();
21802
21803 TEARDOWN();
21804 }
21805
21806
TEST(neon_2regmisc_xtn)21807 TEST(neon_2regmisc_xtn) {
21808 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
21809
21810 START();
21811
21812 __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081);
21813 __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff);
21814 __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff);
21815 __ Movi(v3.V2D(), 0x8000000000000001, 0x7fffffffffffffff);
21816 __ Movi(v4.V2D(), 0x8000000000000000, 0x0000000000000000);
21817
21818 __ Xtn(v16.V8B(), v0.V8H());
21819 __ Xtn2(v16.V16B(), v1.V8H());
21820 __ Xtn(v17.V4H(), v1.V4S());
21821 __ Xtn2(v17.V8H(), v2.V4S());
21822 __ Xtn(v18.V2S(), v3.V2D());
21823 __ Xtn2(v18.V4S(), v4.V2D());
21824
21825 END();
21826
21827 RUN();
21828 ASSERT_EQUAL_128(0x0001ff00ff0001ff, 0x01ff800181007f81, q16);
21829 ASSERT_EQUAL_128(0x0000ffff0000ffff, 0x8001000000007fff, q17);
21830 ASSERT_EQUAL_128(0x0000000000000000, 0x00000001ffffffff, q18);
21831 TEARDOWN();
21832 }
21833
21834
TEST(neon_2regmisc_sqxtn)21835 TEST(neon_2regmisc_sqxtn) {
21836 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
21837
21838 START();
21839
21840 __ Movi(v0.V2D(), 0x7f01007a81807f01, 0x8081ff00017f8081);
21841 __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff);
21842 __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff);
21843 __ Movi(v3.V2D(), 0x8000000000000001, 0x7fffffffffffffff);
21844 __ Movi(v4.V2D(), 0x8000000000000000, 0x0000000000000000);
21845
21846 __ Sqxtn(v16.V8B(), v0.V8H());
21847 __ Sqxtn2(v16.V16B(), v1.V8H());
21848 __ Sqxtn(v17.V4H(), v1.V4S());
21849 __ Sqxtn2(v17.V8H(), v2.V4S());
21850 __ Sqxtn(v18.V2S(), v3.V2D());
21851 __ Sqxtn2(v18.V4S(), v4.V2D());
21852 __ Sqxtn(b19, h0);
21853 __ Sqxtn(h20, s0);
21854 __ Sqxtn(s21, d0);
21855
21856 END();
21857
21858 RUN();
21859 ASSERT_EQUAL_128(0x8080ff00ff00017f, 0x7f7a807f80807f80, q16);
21860 ASSERT_EQUAL_128(0x8000ffff00007fff, 0x8000800080007fff, q17);
21861 ASSERT_EQUAL_128(0x8000000000000000, 0x800000007fffffff, q18);
21862 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000080, q19);
21863 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000007fff, q20);
21864 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000080000000, q21);
21865 TEARDOWN();
21866 }
21867
21868
TEST(neon_2regmisc_uqxtn)21869 TEST(neon_2regmisc_uqxtn) {
21870 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
21871
21872 START();
21873
21874 __ Movi(v0.V2D(), 0x7f01007a81807f01, 0x8081ff00017f8081);
21875 __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff);
21876 __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff);
21877 __ Movi(v3.V2D(), 0x8000000000000001, 0x7fffffffffffffff);
21878 __ Movi(v4.V2D(), 0x8000000000000000, 0x0000000000000000);
21879
21880 __ Uqxtn(v16.V8B(), v0.V8H());
21881 __ Uqxtn2(v16.V16B(), v1.V8H());
21882 __ Uqxtn(v17.V4H(), v1.V4S());
21883 __ Uqxtn2(v17.V8H(), v2.V4S());
21884 __ Uqxtn(v18.V2S(), v3.V2D());
21885 __ Uqxtn2(v18.V4S(), v4.V2D());
21886 __ Uqxtn(b19, h0);
21887 __ Uqxtn(h20, s0);
21888 __ Uqxtn(s21, d0);
21889
21890 END();
21891
21892 RUN();
21893 ASSERT_EQUAL_128(0xffffff00ff0001ff, 0xff7affffffffffff, q16);
21894 ASSERT_EQUAL_128(0xffffffff0000ffff, 0xffffffffffffffff, q17);
21895 ASSERT_EQUAL_128(0xffffffff00000000, 0xffffffffffffffff, q18);
21896 ASSERT_EQUAL_128(0x0000000000000000, 0x00000000000000ff, q19);
21897 ASSERT_EQUAL_128(0x0000000000000000, 0x000000000000ffff, q20);
21898 ASSERT_EQUAL_128(0x0000000000000000, 0x00000000ffffffff, q21);
21899 TEARDOWN();
21900 }
21901
21902
TEST(neon_2regmisc_sqxtun)21903 TEST(neon_2regmisc_sqxtun) {
21904 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
21905
21906 START();
21907
21908 __ Movi(v0.V2D(), 0x7f01007a81807f01, 0x8081ff00017f8081);
21909 __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff);
21910 __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff);
21911 __ Movi(v3.V2D(), 0x8000000000000001, 0x7fffffffffffffff);
21912 __ Movi(v4.V2D(), 0x8000000000000000, 0x0000000000000000);
21913
21914 __ Sqxtun(v16.V8B(), v0.V8H());
21915 __ Sqxtun2(v16.V16B(), v1.V8H());
21916 __ Sqxtun(v17.V4H(), v1.V4S());
21917 __ Sqxtun2(v17.V8H(), v2.V4S());
21918 __ Sqxtun(v18.V2S(), v3.V2D());
21919 __ Sqxtun2(v18.V4S(), v4.V2D());
21920 __ Sqxtun(b19, h0);
21921 __ Sqxtun(h20, s0);
21922 __ Sqxtun(s21, d0);
21923
21924 END();
21925
21926 RUN();
21927 ASSERT_EQUAL_128(0x00000000000001ff, 0xff7a00ff0000ff00, q16);
21928 ASSERT_EQUAL_128(0x000000000000ffff, 0x000000000000ffff, q17);
21929 ASSERT_EQUAL_128(0x0000000000000000, 0x00000000ffffffff, q18);
21930 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q19);
21931 ASSERT_EQUAL_128(0x0000000000000000, 0x000000000000ffff, q20);
21932 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q21);
21933 TEARDOWN();
21934 }
21935
TEST(neon_3same_and)21936 TEST(neon_3same_and) {
21937 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
21938
21939 START();
21940
21941 __ Movi(v0.V2D(), 0xff00aa5500ff55aa, 0xff00aa5500ff55aa);
21942 __ Movi(v1.V2D(), 0x00aa55aaff55ff00, 0xaa55ff00555500ff);
21943
21944 __ And(v16.V16B(), v0.V16B(), v0.V16B()); // self test
21945 __ And(v17.V16B(), v0.V16B(), v1.V16B()); // all combinations
21946 __ And(v24.V8B(), v0.V8B(), v0.V8B()); // self test
21947 __ And(v25.V8B(), v0.V8B(), v1.V8B()); // all combinations
21948 END();
21949
21950 RUN();
21951 ASSERT_EQUAL_128(0xff00aa5500ff55aa, 0xff00aa5500ff55aa, q16);
21952 ASSERT_EQUAL_128(0x0000000000555500, 0xaa00aa00005500aa, q17);
21953 ASSERT_EQUAL_128(0, 0xff00aa5500ff55aa, q24);
21954 ASSERT_EQUAL_128(0, 0xaa00aa00005500aa, q25);
21955 TEARDOWN();
21956 }
21957
TEST(neon_3same_bic)21958 TEST(neon_3same_bic) {
21959 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
21960
21961 START();
21962
21963 __ Movi(v0.V2D(), 0xff00aa5500ff55aa, 0xff00aa5500ff55aa);
21964 __ Movi(v1.V2D(), 0x00ffaa00aa55aaff, 0xffff005500ff00ff);
21965
21966 __ Bic(v16.V16B(), v0.V16B(), v0.V16B()); // self test
21967 __ Bic(v17.V16B(), v0.V16B(), v1.V16B()); // all combinations
21968 __ Bic(v24.V8B(), v0.V8B(), v0.V8B()); // self test
21969 __ Bic(v25.V8B(), v0.V8B(), v1.V8B()); // all combinations
21970 END();
21971
21972 RUN();
21973 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q16);
21974 ASSERT_EQUAL_128(0xff00005500aa5500, 0x0000aa0000005500, q17);
21975 ASSERT_EQUAL_128(0, 0x0000000000000000, q24);
21976 ASSERT_EQUAL_128(0, 0x0000aa0000005500, q25);
21977 TEARDOWN();
21978 }
21979
TEST(neon_3same_orr)21980 TEST(neon_3same_orr) {
21981 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
21982
21983 START();
21984
21985 __ Movi(v0.V2D(), 0xff00aa5500ff55aa, 0xff00aa5500ff55aa);
21986 __ Movi(v1.V2D(), 0x00aa55aaff55ff00, 0xaa55ff00555500ff);
21987
21988 __ Orr(v16.V16B(), v0.V16B(), v0.V16B()); // self test
21989 __ Orr(v17.V16B(), v0.V16B(), v1.V16B()); // all combinations
21990 __ Orr(v24.V8B(), v0.V8B(), v0.V8B()); // self test
21991 __ Orr(v25.V8B(), v0.V8B(), v1.V8B()); // all combinations
21992 END();
21993
21994 RUN();
21995 ASSERT_EQUAL_128(0xff00aa5500ff55aa, 0xff00aa5500ff55aa, q16);
21996 ASSERT_EQUAL_128(0xffaaffffffffffaa, 0xff55ff5555ff55ff, q17);
21997 ASSERT_EQUAL_128(0, 0xff00aa5500ff55aa, q24);
21998 ASSERT_EQUAL_128(0, 0xff55ff5555ff55ff, q25);
21999 TEARDOWN();
22000 }
22001
TEST(neon_3same_mov)22002 TEST(neon_3same_mov) {
22003 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
22004
22005 START();
22006
22007 __ Movi(v0.V2D(), 0xff00aa5500ff55aa, 0xff00aa5500ff55aa);
22008
22009 __ Mov(v16.V16B(), v0.V16B());
22010 __ Mov(v17.V8H(), v0.V8H());
22011 __ Mov(v18.V4S(), v0.V4S());
22012 __ Mov(v19.V2D(), v0.V2D());
22013
22014 __ Mov(v24.V8B(), v0.V8B());
22015 __ Mov(v25.V4H(), v0.V4H());
22016 __ Mov(v26.V2S(), v0.V2S());
22017 END();
22018
22019 RUN();
22020
22021 ASSERT_EQUAL_128(0xff00aa5500ff55aa, 0xff00aa5500ff55aa, q16);
22022 ASSERT_EQUAL_128(0xff00aa5500ff55aa, 0xff00aa5500ff55aa, q17);
22023 ASSERT_EQUAL_128(0xff00aa5500ff55aa, 0xff00aa5500ff55aa, q18);
22024 ASSERT_EQUAL_128(0xff00aa5500ff55aa, 0xff00aa5500ff55aa, q19);
22025
22026 ASSERT_EQUAL_128(0x0, 0xff00aa5500ff55aa, q24);
22027 ASSERT_EQUAL_128(0x0, 0xff00aa5500ff55aa, q25);
22028 ASSERT_EQUAL_128(0x0, 0xff00aa5500ff55aa, q26);
22029
22030 TEARDOWN();
22031 }
22032
TEST(neon_3same_orn)22033 TEST(neon_3same_orn) {
22034 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
22035
22036 START();
22037
22038 __ Movi(v0.V2D(), 0xff00aa5500ff55aa, 0xff00aa5500ff55aa);
22039 __ Movi(v1.V2D(), 0x00aa55aaff55ff00, 0xaa55ff00555500ff);
22040
22041 __ Orn(v16.V16B(), v0.V16B(), v0.V16B()); // self test
22042 __ Orn(v17.V16B(), v0.V16B(), v1.V16B()); // all combinations
22043 __ Orn(v24.V8B(), v0.V8B(), v0.V8B()); // self test
22044 __ Orn(v25.V8B(), v0.V8B(), v1.V8B()); // all combinations
22045 END();
22046
22047 RUN();
22048 ASSERT_EQUAL_128(0xffffffffffffffff, 0xffffffffffffffff, q16);
22049 ASSERT_EQUAL_128(0xff55aa5500ff55ff, 0xffaaaaffaaffffaa, q17);
22050 ASSERT_EQUAL_128(0, 0xffffffffffffffff, q24);
22051 ASSERT_EQUAL_128(0, 0xffaaaaffaaffffaa, q25);
22052 TEARDOWN();
22053 }
22054
TEST(neon_3same_eor)22055 TEST(neon_3same_eor) {
22056 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
22057
22058 START();
22059
22060 __ Movi(v0.V2D(), 0xff00aa5500ff55aa, 0xff00aa5500ff55aa);
22061 __ Movi(v1.V2D(), 0x00ffaa00aa55aaff, 0xffff005500ff00ff);
22062
22063 __ Eor(v16.V16B(), v0.V16B(), v0.V16B()); // self test
22064 __ Eor(v17.V16B(), v0.V16B(), v1.V16B()); // all combinations
22065 __ Eor(v24.V8B(), v0.V8B(), v0.V8B()); // self test
22066 __ Eor(v25.V8B(), v0.V8B(), v1.V8B()); // all combinations
22067 END();
22068
22069 RUN();
22070 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q16);
22071 ASSERT_EQUAL_128(0xffff0055aaaaff55, 0x00ffaa0000005555, q17);
22072 ASSERT_EQUAL_128(0, 0x0000000000000000, q24);
22073 ASSERT_EQUAL_128(0, 0x00ffaa0000005555, q25);
22074 TEARDOWN();
22075 }
22076
TEST(neon_3same_bif)22077 TEST(neon_3same_bif) {
22078 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
22079
22080 START();
22081
22082 __ Movi(v16.V2D(), 0xffff0000ff00ffff, 0xffff00000000aaaa);
22083 __ Movi(v0.V2D(), 0xff00ff00ff005555, 0xaaaa5555aaaaaaaa);
22084 __ Movi(v1.V2D(), 0x00ff00ffff0055aa, 0x55aa55aa55aa55aa);
22085
22086 __ Movi(v17.V2D(), 0x5555aa55cccccccc, 0x33333333f0f0f0f0);
22087 __ Movi(v2.V2D(), 0x555555aaff00ff00, 0xff00ff00ff00ff00);
22088 __ Movi(v3.V2D(), 0xaa55aa5500ffff00, 0x00ffff0000ffff00);
22089
22090 __ Movi(v18.V2D(), 0, 0xf0f0f0f00f0f0f0f);
22091 __ Movi(v4.V2D(), 0, 0xf0f0f0f0f0f0f0f0);
22092 __ Movi(v5.V2D(), 0, 0x00ffff0000ffff00);
22093
22094 __ Bif(v16.V16B(), v0.V16B(), v1.V16B());
22095 __ Bif(v17.V16B(), v2.V16B(), v3.V16B());
22096 __ Bif(v18.V8B(), v4.V8B(), v5.V8B());
22097 END();
22098
22099 RUN();
22100
22101 ASSERT_EQUAL_128(0xffffff00ff0055ff, 0xffaa0055aa00aaaa, q16);
22102 ASSERT_EQUAL_128(0x5555ffffffcccc00, 0xff333300fff0f000, q17);
22103 ASSERT_EQUAL_128(0, 0xf0f0f0f0f00f0ff0, q18);
22104 TEARDOWN();
22105 }
22106
TEST(neon_3same_bit)22107 TEST(neon_3same_bit) {
22108 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
22109
22110 START();
22111
22112 __ Movi(v16.V2D(), 0xffff0000ff00ffff, 0xffff00000000aaaa);
22113 __ Movi(v0.V2D(), 0xff00ff00ff005555, 0xaaaa5555aaaaaaaa);
22114 __ Movi(v1.V2D(), 0x00ff00ffff0055aa, 0x55aa55aa55aa55aa);
22115
22116 __ Movi(v17.V2D(), 0x5555aa55cccccccc, 0x33333333f0f0f0f0);
22117 __ Movi(v2.V2D(), 0x555555aaff00ff00, 0xff00ff00ff00ff00);
22118 __ Movi(v3.V2D(), 0xaa55aa5500ffff00, 0x00ffff0000ffff00);
22119
22120 __ Movi(v18.V2D(), 0, 0xf0f0f0f00f0f0f0f);
22121 __ Movi(v4.V2D(), 0, 0xf0f0f0f0f0f0f0f0);
22122 __ Movi(v5.V2D(), 0, 0x00ffff0000ffff00);
22123
22124 __ Bit(v16.V16B(), v0.V16B(), v1.V16B());
22125 __ Bit(v17.V16B(), v2.V16B(), v3.V16B());
22126 __ Bit(v18.V8B(), v4.V8B(), v5.V8B());
22127 END();
22128
22129 RUN();
22130
22131 ASSERT_EQUAL_128(0xff000000ff00ff55, 0xaaff550000aaaaaa, q16);
22132 ASSERT_EQUAL_128(0x55550000cc00ffcc, 0x3300ff33f000fff0, q17);
22133 ASSERT_EQUAL_128(0, 0xf0f0f0f00ff0f00f, q18);
22134 TEARDOWN();
22135 }
22136
TEST(neon_3same_bsl)22137 TEST(neon_3same_bsl) {
22138 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
22139
22140 START();
22141
22142 __ Movi(v16.V2D(), 0xffff0000ff00ffff, 0xffff00000000aaaa);
22143 __ Movi(v0.V2D(), 0xff00ff00ff005555, 0xaaaa5555aaaaaaaa);
22144 __ Movi(v1.V2D(), 0x00ff00ffff0055aa, 0x55aa55aa55aa55aa);
22145
22146 __ Movi(v17.V2D(), 0x5555aa55cccccccc, 0x33333333f0f0f0f0);
22147 __ Movi(v2.V2D(), 0x555555aaff00ff00, 0xff00ff00ff00ff00);
22148 __ Movi(v3.V2D(), 0xaa55aa5500ffff00, 0x00ffff0000ffff00);
22149
22150 __ Movi(v18.V2D(), 0, 0xf0f0f0f00f0f0f0f);
22151 __ Movi(v4.V2D(), 0, 0xf0f0f0f0f0f0f0f0);
22152 __ Movi(v5.V2D(), 0, 0x00ffff0000ffff00);
22153
22154 __ Bsl(v16.V16B(), v0.V16B(), v1.V16B());
22155 __ Bsl(v17.V16B(), v2.V16B(), v3.V16B());
22156 __ Bsl(v18.V8B(), v4.V8B(), v5.V8B());
22157 END();
22158
22159 RUN();
22160
22161 ASSERT_EQUAL_128(0xff0000ffff005555, 0xaaaa55aa55aaffaa, q16);
22162 ASSERT_EQUAL_128(0xff550000cc33ff00, 0x33ccff00f00fff00, q17);
22163 ASSERT_EQUAL_128(0, 0xf0fffff000f0f000, q18);
22164 TEARDOWN();
22165 }
22166
22167
TEST(neon_3same_smax)22168 TEST(neon_3same_smax) {
22169 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
22170
22171 START();
22172
22173 __ Movi(v0.V2D(), 0xaaaaaaaa55555555, 0xffffffff0000aa55);
22174 __ Movi(v1.V2D(), 0x55aa5555aaaaaaaa, 0x00000000ffaa55ff);
22175
22176 __ Smax(v16.V8B(), v0.V8B(), v1.V8B());
22177 __ Smax(v18.V4H(), v0.V4H(), v1.V4H());
22178 __ Smax(v20.V2S(), v0.V2S(), v1.V2S());
22179
22180 __ Smax(v17.V16B(), v0.V16B(), v1.V16B());
22181 __ Smax(v19.V8H(), v0.V8H(), v1.V8H());
22182 __ Smax(v21.V4S(), v0.V4S(), v1.V4S());
22183 END();
22184
22185 RUN();
22186
22187 ASSERT_EQUAL_128(0x0, 0x0000000000005555, q16);
22188 ASSERT_EQUAL_128(0x0, 0x00000000000055ff, q18);
22189 ASSERT_EQUAL_128(0x0, 0x000000000000aa55, q20);
22190 ASSERT_EQUAL_128(0x55aa555555555555, 0x0000000000005555, q17);
22191 ASSERT_EQUAL_128(0x55aa555555555555, 0x00000000000055ff, q19);
22192 ASSERT_EQUAL_128(0x55aa555555555555, 0x000000000000aa55, q21);
22193 TEARDOWN();
22194 }
22195
22196
TEST(neon_3same_smaxp)22197 TEST(neon_3same_smaxp) {
22198 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
22199
22200 START();
22201
22202 __ Movi(v0.V2D(), 0xaaaaaaaa55555555, 0xffffffff0000aa55);
22203 __ Movi(v1.V2D(), 0x55aa5555aaaaaaaa, 0x00000000ffaa55ff);
22204
22205 __ Smaxp(v16.V8B(), v0.V8B(), v1.V8B());
22206 __ Smaxp(v18.V4H(), v0.V4H(), v1.V4H());
22207 __ Smaxp(v20.V2S(), v0.V2S(), v1.V2S());
22208
22209 __ Smaxp(v17.V16B(), v0.V16B(), v1.V16B());
22210 __ Smaxp(v19.V8H(), v0.V8H(), v1.V8H());
22211 __ Smaxp(v21.V4S(), v0.V4S(), v1.V4S());
22212 END();
22213
22214 RUN();
22215
22216 ASSERT_EQUAL_128(0x0, 0x0000ff55ffff0055, q16);
22217 ASSERT_EQUAL_128(0x0, 0x000055ffffff0000, q18);
22218 ASSERT_EQUAL_128(0x0, 0x000000000000aa55, q20);
22219 ASSERT_EQUAL_128(0x5555aaaa0000ff55, 0xaaaa5555ffff0055, q17);
22220 ASSERT_EQUAL_128(0x55aaaaaa000055ff, 0xaaaa5555ffff0000, q19);
22221 ASSERT_EQUAL_128(0x55aa555500000000, 0x555555550000aa55, q21);
22222 TEARDOWN();
22223 }
22224
22225
TEST(neon_addp_scalar)22226 TEST(neon_addp_scalar) {
22227 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
22228
22229 START();
22230
22231 __ Movi(v0.V2D(), 0x0011223344aafe80, 0x00112233aabbfc00);
22232 __ Movi(v1.V2D(), 0x55aa5555aaaaaaaa, 0x00000000ffaa55ff);
22233 __ Movi(v2.V2D(), 0xaaaaaaaa66555555, 0xffffffff0000aa00);
22234
22235 __ Addp(d16, v0.V2D());
22236 __ Addp(d17, v1.V2D());
22237 __ Addp(d18, v2.V2D());
22238
22239 END();
22240
22241 RUN();
22242
22243 ASSERT_EQUAL_128(0x0, 0x00224466ef66fa80, q16);
22244 ASSERT_EQUAL_128(0x0, 0x55aa5556aa5500a9, q17);
22245 ASSERT_EQUAL_128(0x0, 0xaaaaaaa96655ff55, q18);
22246 TEARDOWN();
22247 }
22248
TEST(neon_acrosslanes_addv)22249 TEST(neon_acrosslanes_addv) {
22250 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
22251
22252 START();
22253
22254 __ Movi(v0.V2D(), 0x0011223344aafe80, 0x00112233aabbfc00);
22255 __ Movi(v1.V2D(), 0x55aa5555aaaaaaaa, 0x00000000ffaa55ff);
22256 __ Movi(v2.V2D(), 0xaaaaaaaa66555555, 0xffffffff0000aa00);
22257
22258 __ Addv(b16, v0.V8B());
22259 __ Addv(b17, v0.V16B());
22260 __ Addv(h18, v1.V4H());
22261 __ Addv(h19, v1.V8H());
22262 __ Addv(s20, v2.V4S());
22263
22264 END();
22265
22266 RUN();
22267
22268 ASSERT_EQUAL_128(0x0, 0xc7, q16);
22269 ASSERT_EQUAL_128(0x0, 0x99, q17);
22270 ASSERT_EQUAL_128(0x0, 0x55a9, q18);
22271 ASSERT_EQUAL_128(0x0, 0x55fc, q19);
22272 ASSERT_EQUAL_128(0x0, 0x1100a9fe, q20);
22273 TEARDOWN();
22274 }
22275
22276
TEST(neon_acrosslanes_saddlv)22277 TEST(neon_acrosslanes_saddlv) {
22278 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
22279
22280 START();
22281
22282 __ Movi(v0.V2D(), 0x0011223344aafe80, 0x00112233aabbfc00);
22283 __ Movi(v1.V2D(), 0x55aa5555aaaaaaaa, 0x00000000ffaa55ff);
22284 __ Movi(v2.V2D(), 0xaaaaaaaa66555555, 0xffffffff0000aa00);
22285
22286 __ Saddlv(h16, v0.V8B());
22287 __ Saddlv(h17, v0.V16B());
22288 __ Saddlv(s18, v1.V4H());
22289 __ Saddlv(s19, v1.V8H());
22290 __ Saddlv(d20, v2.V4S());
22291
22292 END();
22293
22294 RUN();
22295
22296 ASSERT_EQUAL_128(0x0, 0xffc7, q16);
22297 ASSERT_EQUAL_128(0x0, 0xff99, q17);
22298 ASSERT_EQUAL_128(0x0, 0x000055a9, q18);
22299 ASSERT_EQUAL_128(0x0, 0x000055fc, q19);
22300 ASSERT_EQUAL_128(0x0, 0x0000001100a9fe, q20);
22301 TEARDOWN();
22302 }
22303
22304
TEST(neon_acrosslanes_uaddlv)22305 TEST(neon_acrosslanes_uaddlv) {
22306 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
22307
22308 START();
22309
22310 __ Movi(v0.V2D(), 0x0011223344aafe80, 0x00112233aabbfc00);
22311 __ Movi(v1.V2D(), 0x55aa5555aaaaaaaa, 0x00000000ffaa55ff);
22312 __ Movi(v2.V2D(), 0xaaaaaaaa66555555, 0xffffffff0000aa00);
22313
22314 __ Uaddlv(h16, v0.V8B());
22315 __ Uaddlv(h17, v0.V16B());
22316 __ Uaddlv(s18, v1.V4H());
22317 __ Uaddlv(s19, v1.V8H());
22318 __ Uaddlv(d20, v2.V4S());
22319
22320 END();
22321
22322 RUN();
22323
22324 ASSERT_EQUAL_128(0x0, 0x02c7, q16);
22325 ASSERT_EQUAL_128(0x0, 0x0599, q17);
22326 ASSERT_EQUAL_128(0x0, 0x000155a9, q18);
22327 ASSERT_EQUAL_128(0x0, 0x000355fc, q19);
22328 ASSERT_EQUAL_128(0x0, 0x000000021100a9fe, q20);
22329 TEARDOWN();
22330 }
22331
22332
TEST(neon_acrosslanes_smaxv)22333 TEST(neon_acrosslanes_smaxv) {
22334 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
22335
22336 START();
22337
22338 __ Movi(v0.V2D(), 0x0011223344aafe80, 0x00112233aabbfc00);
22339 __ Movi(v1.V2D(), 0x55aa5555aaaaaaaa, 0x00000000ffaa55ff);
22340 __ Movi(v2.V2D(), 0xaaaaaaaa66555555, 0xffffffff0000aa00);
22341
22342 __ Smaxv(b16, v0.V8B());
22343 __ Smaxv(b17, v0.V16B());
22344 __ Smaxv(h18, v1.V4H());
22345 __ Smaxv(h19, v1.V8H());
22346 __ Smaxv(s20, v2.V4S());
22347
22348 END();
22349
22350 RUN();
22351
22352 ASSERT_EQUAL_128(0x0, 0x33, q16);
22353 ASSERT_EQUAL_128(0x0, 0x44, q17);
22354 ASSERT_EQUAL_128(0x0, 0x55ff, q18);
22355 ASSERT_EQUAL_128(0x0, 0x55ff, q19);
22356 ASSERT_EQUAL_128(0x0, 0x66555555, q20);
22357 TEARDOWN();
22358 }
22359
22360
TEST(neon_acrosslanes_sminv)22361 TEST(neon_acrosslanes_sminv) {
22362 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
22363
22364 START();
22365
22366 __ Movi(v0.V2D(), 0x0011223344aafe80, 0x00112233aabbfc00);
22367 __ Movi(v1.V2D(), 0xfffa5555aaaaaaaa, 0x00000000ffaa55ff);
22368 __ Movi(v2.V2D(), 0xaaaaaaaa66555555, 0xffffffff0000aa00);
22369
22370 __ Sminv(b16, v0.V8B());
22371 __ Sminv(b17, v0.V16B());
22372 __ Sminv(h18, v1.V4H());
22373 __ Sminv(h19, v1.V8H());
22374 __ Sminv(s20, v2.V4S());
22375
22376 END();
22377
22378 RUN();
22379
22380 ASSERT_EQUAL_128(0x0, 0xaa, q16);
22381 ASSERT_EQUAL_128(0x0, 0x80, q17);
22382 ASSERT_EQUAL_128(0x0, 0xffaa, q18);
22383 ASSERT_EQUAL_128(0x0, 0xaaaa, q19);
22384 ASSERT_EQUAL_128(0x0, 0xaaaaaaaa, q20);
22385 TEARDOWN();
22386 }
22387
TEST(neon_acrosslanes_umaxv)22388 TEST(neon_acrosslanes_umaxv) {
22389 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
22390
22391 START();
22392
22393 __ Movi(v0.V2D(), 0x0011223344aafe80, 0x00112233aabbfc00);
22394 __ Movi(v1.V2D(), 0x55aa5555aaaaffab, 0x00000000ffaa55ff);
22395 __ Movi(v2.V2D(), 0xaaaaaaaa66555555, 0xffffffff0000aa00);
22396
22397 __ Umaxv(b16, v0.V8B());
22398 __ Umaxv(b17, v0.V16B());
22399 __ Umaxv(h18, v1.V4H());
22400 __ Umaxv(h19, v1.V8H());
22401 __ Umaxv(s20, v2.V4S());
22402
22403 END();
22404
22405 RUN();
22406
22407 ASSERT_EQUAL_128(0x0, 0xfc, q16);
22408 ASSERT_EQUAL_128(0x0, 0xfe, q17);
22409 ASSERT_EQUAL_128(0x0, 0xffaa, q18);
22410 ASSERT_EQUAL_128(0x0, 0xffab, q19);
22411 ASSERT_EQUAL_128(0x0, 0xffffffff, q20);
22412 TEARDOWN();
22413 }
22414
22415
TEST(neon_acrosslanes_uminv)22416 TEST(neon_acrosslanes_uminv) {
22417 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
22418
22419 START();
22420
22421 __ Movi(v0.V2D(), 0x0011223344aafe80, 0x02112233aabbfc01);
22422 __ Movi(v1.V2D(), 0xfffa5555aaaa0000, 0x00010003ffaa55ff);
22423 __ Movi(v2.V2D(), 0xaaaaaaaa66555555, 0xffffffff0000aa00);
22424
22425 __ Uminv(b16, v0.V8B());
22426 __ Uminv(b17, v0.V16B());
22427 __ Uminv(h18, v1.V4H());
22428 __ Uminv(h19, v1.V8H());
22429 __ Uminv(s20, v2.V4S());
22430
22431 END();
22432
22433 RUN();
22434
22435 ASSERT_EQUAL_128(0x0, 0x01, q16);
22436 ASSERT_EQUAL_128(0x0, 0x00, q17);
22437 ASSERT_EQUAL_128(0x0, 0x0001, q18);
22438 ASSERT_EQUAL_128(0x0, 0x0000, q19);
22439 ASSERT_EQUAL_128(0x0, 0x0000aa00, q20);
22440 TEARDOWN();
22441 }
22442
22443
TEST(neon_3same_smin)22444 TEST(neon_3same_smin) {
22445 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
22446
22447 START();
22448
22449 __ Movi(v0.V2D(), 0xaaaaaaaa55555555, 0xffffffff0000aa55);
22450 __ Movi(v1.V2D(), 0x55aa5555aaaaaaaa, 0x00000000ffaa55ff);
22451
22452 __ Smin(v16.V8B(), v0.V8B(), v1.V8B());
22453 __ Smin(v18.V4H(), v0.V4H(), v1.V4H());
22454 __ Smin(v20.V2S(), v0.V2S(), v1.V2S());
22455
22456 __ Smin(v17.V16B(), v0.V16B(), v1.V16B());
22457 __ Smin(v19.V8H(), v0.V8H(), v1.V8H());
22458 __ Smin(v21.V4S(), v0.V4S(), v1.V4S());
22459 END();
22460
22461 RUN();
22462
22463 ASSERT_EQUAL_128(0x0, 0xffffffffffaaaaff, q16);
22464 ASSERT_EQUAL_128(0x0, 0xffffffffffaaaa55, q18);
22465 ASSERT_EQUAL_128(0x0, 0xffffffffffaa55ff, q20);
22466 ASSERT_EQUAL_128(0xaaaaaaaaaaaaaaaa, 0xffffffffffaaaaff, q17);
22467 ASSERT_EQUAL_128(0xaaaaaaaaaaaaaaaa, 0xffffffffffaaaa55, q19);
22468 ASSERT_EQUAL_128(0xaaaaaaaaaaaaaaaa, 0xffffffffffaa55ff, q21);
22469 TEARDOWN();
22470 }
22471
22472
TEST(neon_3same_umax)22473 TEST(neon_3same_umax) {
22474 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
22475
22476 START();
22477
22478 __ Movi(v0.V2D(), 0xaaaaaaaa55555555, 0xffffffff0000aa55);
22479 __ Movi(v1.V2D(), 0x55aa5555aaaaaaaa, 0x00000000ffaa55ff);
22480
22481 __ Umax(v16.V8B(), v0.V8B(), v1.V8B());
22482 __ Umax(v18.V4H(), v0.V4H(), v1.V4H());
22483 __ Umax(v20.V2S(), v0.V2S(), v1.V2S());
22484
22485 __ Umax(v17.V16B(), v0.V16B(), v1.V16B());
22486 __ Umax(v19.V8H(), v0.V8H(), v1.V8H());
22487 __ Umax(v21.V4S(), v0.V4S(), v1.V4S());
22488 END();
22489
22490 RUN();
22491
22492 ASSERT_EQUAL_128(0x0, 0xffffffffffaaaaff, q16);
22493 ASSERT_EQUAL_128(0x0, 0xffffffffffaaaa55, q18);
22494 ASSERT_EQUAL_128(0x0, 0xffffffffffaa55ff, q20);
22495 ASSERT_EQUAL_128(0xaaaaaaaaaaaaaaaa, 0xffffffffffaaaaff, q17);
22496 ASSERT_EQUAL_128(0xaaaaaaaaaaaaaaaa, 0xffffffffffaaaa55, q19);
22497 ASSERT_EQUAL_128(0xaaaaaaaaaaaaaaaa, 0xffffffffffaa55ff, q21);
22498 TEARDOWN();
22499 }
22500
22501
TEST(neon_3same_umin)22502 TEST(neon_3same_umin) {
22503 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
22504
22505 START();
22506
22507 __ Movi(v0.V2D(), 0xaaaaaaaa55555555, 0xffffffff0000aa55);
22508 __ Movi(v1.V2D(), 0x55aa5555aaaaaaaa, 0x00000000ffaa55ff);
22509
22510 __ Umin(v16.V8B(), v0.V8B(), v1.V8B());
22511 __ Umin(v18.V4H(), v0.V4H(), v1.V4H());
22512 __ Umin(v20.V2S(), v0.V2S(), v1.V2S());
22513
22514 __ Umin(v17.V16B(), v0.V16B(), v1.V16B());
22515 __ Umin(v19.V8H(), v0.V8H(), v1.V8H());
22516 __ Umin(v21.V4S(), v0.V4S(), v1.V4S());
22517 END();
22518
22519 RUN();
22520
22521 ASSERT_EQUAL_128(0x0, 0x0000000000005555, q16);
22522 ASSERT_EQUAL_128(0x0, 0x00000000000055ff, q18);
22523 ASSERT_EQUAL_128(0x0, 0x000000000000aa55, q20);
22524 ASSERT_EQUAL_128(0x55aa555555555555, 0x0000000000005555, q17);
22525 ASSERT_EQUAL_128(0x55aa555555555555, 0x00000000000055ff, q19);
22526 ASSERT_EQUAL_128(0x55aa555555555555, 0x000000000000aa55, q21);
22527 TEARDOWN();
22528 }
22529
22530
TEST(neon_3same_extra_fcadd)22531 TEST(neon_3same_extra_fcadd) {
22532 SETUP_WITH_FEATURES(CPUFeatures::kNEON, CPUFeatures::kFP, CPUFeatures::kFcma);
22533
22534 START();
22535
22536 // (0i, 5) (d)
22537 __ Movi(v0.V2D(), 0x0, 0x4014000000000000);
22538 // (5i, 0) (d)
22539 __ Movi(v1.V2D(), 0x4014000000000000, 0x0);
22540 // (10i, 10) (d)
22541 __ Movi(v2.V2D(), 0x4024000000000000, 0x4024000000000000);
22542 // (5i, 5), (5i, 5) (f)
22543 __ Movi(v3.V2D(), 0x40A0000040A00000, 0x40A0000040A00000);
22544 // (5i, 5), (0i, 0) (f)
22545 __ Movi(v4.V2D(), 0x40A0000040A00000, 0x0);
22546 // 324567i, 16000 (f)
22547 __ Movi(v5.V2D(), 0x0, 0x489E7AE0467A0000);
22548
22549 // Subtraction (10, 10) - (5, 5) == (5, 5)
22550 __ Fcadd(v31.V2D(), v2.V2D(), v1.V2D(), 90);
22551 __ Fcadd(v31.V2D(), v31.V2D(), v0.V2D(), 270);
22552
22553 // Addition (10, 10) + (5, 5) == (15, 15)
22554 __ Fcadd(v30.V2D(), v2.V2D(), v1.V2D(), 270);
22555 __ Fcadd(v30.V2D(), v30.V2D(), v0.V2D(), 90);
22556
22557 // 2S
22558 __ Fcadd(v29.V2S(), v4.V2S(), v5.V2S(), 90);
22559 __ Fcadd(v28.V2S(), v4.V2S(), v5.V2S(), 270);
22560
22561 // 4S
22562 __ Fcadd(v27.V4S(), v3.V4S(), v4.V4S(), 90);
22563 __ Fcadd(v26.V4S(), v3.V4S(), v4.V4S(), 270);
22564
22565 END();
22566
22567 #ifdef VIXL_INCLUDE_SIMULATOR_AARCH64
22568 RUN();
22569 ASSERT_EQUAL_128(0x4014000000000000, 0x4014000000000000, q31);
22570 ASSERT_EQUAL_128(0x402E000000000000, 0x402E000000000000, q30);
22571 ASSERT_EQUAL_128(0x0, 0x467a0000c89e7ae0, q29); // (16000i, -324567)
22572 ASSERT_EQUAL_128(0x0, 0xc67a0000489e7ae0, q28); // (-16000i, 324567)
22573 ASSERT_EQUAL_128(0x4120000000000000, 0x40A0000040A00000, q27);
22574 ASSERT_EQUAL_128(0x0000000041200000, 0x40A0000040A00000, q26);
22575 #endif
22576 TEARDOWN();
22577 }
22578
22579
TEST(neon_3same_extra_fcmla)22580 TEST(neon_3same_extra_fcmla) {
22581 SETUP_WITH_FEATURES(CPUFeatures::kNEON, CPUFeatures::kFP, CPUFeatures::kFcma);
22582
22583 START();
22584
22585 __ Movi(v1.V2D(), 0x0, 0x40A0000040400000); // (5i, 3) (f)
22586 __ Movi(v2.V2D(), 0x0, 0x4040000040A00000); // (3i, 5) (f)
22587
22588 __ Movi(v3.V2D(), 0x0, 0x4000000040400000); // (2i, 3) (f)
22589 __ Movi(v4.V2D(), 0x0, 0x40E000003F800000); // (7i, 1) (f)
22590
22591 __ Movi(v5.V2D(), 0x0, 0x4000000040400000); // (2i, 3) (f)
22592 __ Movi(v6.V2D(), 0x0, 0x408000003F800000); // (4i, 1) (f)
22593
22594 // (1.5i, 2.5), (31.5i, 1024) (f)
22595 __ Movi(v7.V2D(), 0x3FC0000040200000, 0x41FC000044800000);
22596 // (2048i, 412.75), (3645i, 0) (f)
22597 __ Movi(v8.V2D(), 0x4500000043CE6000, 0x4563D00000000000);
22598 // (2000i, 450,000) (d)
22599 __ Movi(v9.V2D(), 0x409F400000000000, 0x411B774000000000);
22600 // (30,000i, 1250) (d)
22601 __ Movi(v10.V2D(), 0x40DD4C0000000000, 0x4093880000000000);
22602
22603 // DST
22604 __ Movi(v24.V2D(), 0x0, 0x0);
22605 __ Movi(v25.V2D(), 0x0, 0x0);
22606 __ Movi(v26.V2D(), 0x0, 0x0);
22607 __ Movi(v27.V2D(), 0x0, 0x0);
22608 __ Movi(v28.V2D(), 0x0, 0x0);
22609 __ Movi(v29.V2D(), 0x0, 0x0);
22610 __ Movi(v30.V2D(), 0x0, 0x0);
22611 __ Movi(v31.V2D(), 0x0, 0x0);
22612
22613 // Full calculations
22614 __ Fcmla(v31.V2S(), v1.V2S(), v2.V2S(), 90);
22615 __ Fcmla(v31.V2S(), v1.V2S(), v2.V2S(), 0);
22616
22617 __ Fcmla(v30.V2S(), v3.V2S(), v4.V2S(), 0);
22618 __ Fcmla(v30.V2S(), v3.V2S(), v4.V2S(), 90);
22619
22620 __ Fcmla(v29.V2S(), v5.V2S(), v6.V2S(), 90);
22621 __ Fcmla(v29.V2S(), v5.V2S(), v6.V2S(), 0);
22622
22623 __ Fcmla(v28.V2D(), v9.V2D(), v10.V2D(), 0);
22624 __ Fcmla(v28.V2D(), v9.V2D(), v10.V2D(), 90);
22625
22626 // Partial checks
22627 __ Fcmla(v27.V2S(), v1.V2S(), v2.V2S(), 0);
22628 __ Fcmla(v26.V2S(), v2.V2S(), v1.V2S(), 0);
22629
22630 __ Fcmla(v25.V4S(), v7.V4S(), v8.V4S(), 270);
22631 __ Fcmla(v24.V4S(), v7.V4S(), v8.V4S(), 180);
22632
22633 END();
22634
22635 #ifdef VIXL_INCLUDE_SIMULATOR_AARCH64
22636 RUN();
22637
22638 ASSERT_EQUAL_128(0x0, 0x4208000000000000, q31); // (34i, 0)
22639 ASSERT_EQUAL_128(0x0, 0x41B80000C1300000, q30); // (23i, -11)
22640 ASSERT_EQUAL_128(0x0, 0x41600000C0A00000, q29); // (14i, -5)
22641
22642 // (13502500000i, 502500000)
22643 ASSERT_EQUAL_128(0x4209267E65000000, 0x41BDF38AA0000000, q28);
22644 ASSERT_EQUAL_128(0x0, 0x4110000041700000, q27); // (9i, 15)
22645 ASSERT_EQUAL_128(0x0, 0x41C8000041700000, q26); // (25i, 15)
22646 // (512i, 1.031875E3), (373248i, 0)
22647 ASSERT_EQUAL_128(0xc41ac80045400000, 0x0000000047e040c0, q25);
22648 // (619.125i, -3072), (0i, -114817.5)
22649 ASSERT_EQUAL_128(0xc5a00000c480fc00, 0xca63d00000000000, q24);
22650 #endif
22651 TEARDOWN();
22652 }
22653
22654
TEST(neon_byelement_fcmla)22655 TEST(neon_byelement_fcmla) {
22656 SETUP_WITH_FEATURES(CPUFeatures::kNEON, CPUFeatures::kFP, CPUFeatures::kFcma);
22657
22658 START();
22659
22660 // (5i, 3), (5i, 3) (f)
22661 __ Movi(v1.V2D(), 0x40A0000040400000, 0x40A0000040400000);
22662 // (3i, 5), (3i, 5) (f)
22663 __ Movi(v2.V2D(), 0x4040000040A00000, 0x4040000040A00000);
22664 // (7i, 1), (5i, 3) (f)
22665 __ Movi(v3.V2D(), 0x40E000003F800000, 0x40A0000040400000);
22666 // (4i, 1), (3i, 5) (f)
22667 __ Movi(v4.V2D(), 0x408000003F800000, 0x4040000040A00000);
22668 // (4i, 1), (7i, 1) (f)
22669 __ Movi(v5.V2D(), 0x408000003F800000, 0x40E000003F800000);
22670 // (2i, 3), (0, 0) (f)
22671 __ Movi(v6.V2D(), 0x4000000040400000, 0x0);
22672
22673 // DST
22674 __ Movi(v22.V2D(), 0x0, 0x0);
22675 __ Movi(v23.V2D(), 0x0, 0x0);
22676 __ Movi(v24.V2D(), 0x0, 0x0);
22677 __ Movi(v25.V2D(), 0x0, 0x0);
22678 __ Movi(v26.V2D(), 0x0, 0x0);
22679 __ Movi(v27.V2D(), 0x0, 0x0);
22680 __ Movi(v28.V2D(), 0x0, 0x0);
22681 __ Movi(v29.V2D(), 0x0, 0x0);
22682 __ Movi(v30.V2D(), 0x0, 0x0);
22683 __ Movi(v31.V2D(), 0x0, 0x0);
22684
22685 // Full calculation (pairs)
22686 __ Fcmla(v31.V4S(), v1.V4S(), v2.S(), 0, 90);
22687 __ Fcmla(v31.V4S(), v1.V4S(), v2.S(), 0, 0);
22688 __ Fcmla(v30.V4S(), v5.V4S(), v6.S(), 1, 90);
22689 __ Fcmla(v30.V4S(), v5.V4S(), v6.S(), 1, 0);
22690
22691 // Rotations
22692 __ Fcmla(v29.V4S(), v3.V4S(), v4.S(), 1, 0);
22693 __ Fcmla(v28.V4S(), v3.V4S(), v4.S(), 1, 90);
22694 __ Fcmla(v27.V4S(), v3.V4S(), v4.S(), 1, 180);
22695 __ Fcmla(v26.V4S(), v3.V4S(), v4.S(), 1, 270);
22696 __ Fcmla(v25.V4S(), v3.V4S(), v4.S(), 0, 270);
22697 __ Fcmla(v24.V4S(), v3.V4S(), v4.S(), 0, 180);
22698 __ Fcmla(v23.V4S(), v3.V4S(), v4.S(), 0, 90);
22699 __ Fcmla(v22.V4S(), v3.V4S(), v4.S(), 0, 0);
22700
22701 END();
22702
22703 #ifdef VIXL_INCLUDE_SIMULATOR_AARCH64
22704 RUN();
22705 // (34i, 0), (34i, 0)
22706 ASSERT_EQUAL_128(0x4208000000000000, 0x4208000000000000, q31);
22707 // (14i, -5), (23i, -11)
22708 ASSERT_EQUAL_128(0x41600000C0A00000, 0x41B80000C1300000, q30);
22709 // (4i, 1), (12i, 3)
22710 ASSERT_EQUAL_128(0x408000003f800000, 0x4140000040400000, q29);
22711 // (7i, -28), (5i, -20)
22712 ASSERT_EQUAL_128(0x40e00000c1e00000, 0x40a00000c1a00000, q28);
22713 // (-4i, -1), (-12i, -3)
22714 ASSERT_EQUAL_128(0xc0800000bf800000, 0xc1400000c0400000, q27);
22715 // (-7i, 28), (-5i, 20)
22716 ASSERT_EQUAL_128(0xc0e0000041e00000, 0xc0a0000041a00000, q26);
22717 // (-35i, 21), (-25i, 15)
22718 ASSERT_EQUAL_128(0xc20c000041a80000, 0xc1c8000041700000, q25);
22719 // (-3i, -5), (-9i, -15)
22720 ASSERT_EQUAL_128(0xc0400000c0a00000, 0xc1100000c1700000, q24);
22721 // (35i, -21), (25i, -15)
22722 ASSERT_EQUAL_128(0x420c0000c1a80000, 0x41c80000c1700000, q23);
22723 // (3i, 5), (9i, 15)
22724 ASSERT_EQUAL_128(0x4040000040a00000, 0x4110000041700000, q22);
22725 #endif
22726
22727 TEARDOWN();
22728 }
22729
22730
TEST(neon_2regmisc_mvn)22731 TEST(neon_2regmisc_mvn) {
22732 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
22733
22734 START();
22735
22736 __ Movi(v0.V2D(), 0x00ff00ffff0055aa, 0x55aa55aa55aa55aa);
22737
22738 __ Mvn(v16.V16B(), v0.V16B());
22739 __ Mvn(v17.V8H(), v0.V8H());
22740 __ Mvn(v18.V4S(), v0.V4S());
22741 __ Mvn(v19.V2D(), v0.V2D());
22742
22743 __ Mvn(v24.V8B(), v0.V8B());
22744 __ Mvn(v25.V4H(), v0.V4H());
22745 __ Mvn(v26.V2S(), v0.V2S());
22746
22747 END();
22748
22749 RUN();
22750
22751 ASSERT_EQUAL_128(0xff00ff0000ffaa55, 0xaa55aa55aa55aa55, q16);
22752 ASSERT_EQUAL_128(0xff00ff0000ffaa55, 0xaa55aa55aa55aa55, q17);
22753 ASSERT_EQUAL_128(0xff00ff0000ffaa55, 0xaa55aa55aa55aa55, q18);
22754 ASSERT_EQUAL_128(0xff00ff0000ffaa55, 0xaa55aa55aa55aa55, q19);
22755
22756 ASSERT_EQUAL_128(0x0, 0xaa55aa55aa55aa55, q24);
22757 ASSERT_EQUAL_128(0x0, 0xaa55aa55aa55aa55, q25);
22758 ASSERT_EQUAL_128(0x0, 0xaa55aa55aa55aa55, q26);
22759 TEARDOWN();
22760 }
22761
22762
TEST(neon_2regmisc_not)22763 TEST(neon_2regmisc_not) {
22764 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
22765
22766 START();
22767
22768 __ Movi(v0.V2D(), 0x00ff00ffff0055aa, 0x55aa55aa55aa55aa);
22769 __ Movi(v1.V2D(), 0, 0x00ffff0000ffff00);
22770
22771 __ Not(v16.V16B(), v0.V16B());
22772 __ Not(v17.V8B(), v1.V8B());
22773 END();
22774
22775 RUN();
22776
22777 ASSERT_EQUAL_128(0xff00ff0000ffaa55, 0xaa55aa55aa55aa55, q16);
22778 ASSERT_EQUAL_128(0x0, 0xff0000ffff0000ff, q17);
22779 TEARDOWN();
22780 }
22781
22782
TEST(neon_2regmisc_cls_clz_cnt)22783 TEST(neon_2regmisc_cls_clz_cnt) {
22784 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
22785
22786 START();
22787
22788 __ Movi(v0.V2D(), 0x0001020304050607, 0x08090a0b0c0d0e0f);
22789 __ Movi(v1.V2D(), 0xfedcba9876543210, 0x0123456789abcdef);
22790
22791 __ Cls(v16.V8B(), v1.V8B());
22792 __ Cls(v17.V16B(), v1.V16B());
22793 __ Cls(v18.V4H(), v1.V4H());
22794 __ Cls(v19.V8H(), v1.V8H());
22795 __ Cls(v20.V2S(), v1.V2S());
22796 __ Cls(v21.V4S(), v1.V4S());
22797
22798 __ Clz(v22.V8B(), v0.V8B());
22799 __ Clz(v23.V16B(), v0.V16B());
22800 __ Clz(v24.V4H(), v0.V4H());
22801 __ Clz(v25.V8H(), v0.V8H());
22802 __ Clz(v26.V2S(), v0.V2S());
22803 __ Clz(v27.V4S(), v0.V4S());
22804
22805 __ Cnt(v28.V8B(), v0.V8B());
22806 __ Cnt(v29.V16B(), v1.V16B());
22807
22808 END();
22809
22810 RUN();
22811
22812 ASSERT_EQUAL_128(0x0000000000000000, 0x0601000000000102, q16);
22813 ASSERT_EQUAL_128(0x0601000000000102, 0x0601000000000102, q17);
22814 ASSERT_EQUAL_128(0x0000000000000000, 0x0006000000000001, q18);
22815 ASSERT_EQUAL_128(0x0006000000000001, 0x0006000000000001, q19);
22816 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000600000000, q20);
22817 ASSERT_EQUAL_128(0x0000000600000000, 0x0000000600000000, q21);
22818
22819 ASSERT_EQUAL_128(0x0000000000000000, 0x0404040404040404, q22);
22820 ASSERT_EQUAL_128(0x0807060605050505, 0x0404040404040404, q23);
22821 ASSERT_EQUAL_128(0x0000000000000000, 0x0004000400040004, q24);
22822 ASSERT_EQUAL_128(0x000f000600050005, 0x0004000400040004, q25);
22823 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000400000004, q26);
22824 ASSERT_EQUAL_128(0x0000000f00000005, 0x0000000400000004, q27);
22825
22826 ASSERT_EQUAL_128(0x0000000000000000, 0x0102020302030304, q28);
22827 ASSERT_EQUAL_128(0x0705050305030301, 0x0103030503050507, q29);
22828
22829 TEARDOWN();
22830 }
22831
TEST(neon_2regmisc_rev)22832 TEST(neon_2regmisc_rev) {
22833 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
22834
22835 START();
22836
22837 __ Movi(v0.V2D(), 0x0001020304050607, 0x08090a0b0c0d0e0f);
22838 __ Movi(v1.V2D(), 0xfedcba9876543210, 0x0123456789abcdef);
22839
22840 __ Rev16(v16.V8B(), v0.V8B());
22841 __ Rev16(v17.V16B(), v0.V16B());
22842
22843 __ Rev32(v18.V8B(), v0.V8B());
22844 __ Rev32(v19.V16B(), v0.V16B());
22845 __ Rev32(v20.V4H(), v0.V4H());
22846 __ Rev32(v21.V8H(), v0.V8H());
22847
22848 __ Rev64(v22.V8B(), v0.V8B());
22849 __ Rev64(v23.V16B(), v0.V16B());
22850 __ Rev64(v24.V4H(), v0.V4H());
22851 __ Rev64(v25.V8H(), v0.V8H());
22852 __ Rev64(v26.V2S(), v0.V2S());
22853 __ Rev64(v27.V4S(), v0.V4S());
22854
22855 __ Rbit(v28.V8B(), v1.V8B());
22856 __ Rbit(v29.V16B(), v1.V16B());
22857
22858 END();
22859
22860 RUN();
22861
22862 ASSERT_EQUAL_128(0x0000000000000000, 0x09080b0a0d0c0f0e, q16);
22863 ASSERT_EQUAL_128(0x0100030205040706, 0x09080b0a0d0c0f0e, q17);
22864
22865 ASSERT_EQUAL_128(0x0000000000000000, 0x0b0a09080f0e0d0c, q18);
22866 ASSERT_EQUAL_128(0x0302010007060504, 0x0b0a09080f0e0d0c, q19);
22867 ASSERT_EQUAL_128(0x0000000000000000, 0x0a0b08090e0f0c0d, q20);
22868 ASSERT_EQUAL_128(0x0203000106070405, 0x0a0b08090e0f0c0d, q21);
22869
22870 ASSERT_EQUAL_128(0x0000000000000000, 0x0f0e0d0c0b0a0908, q22);
22871 ASSERT_EQUAL_128(0x0706050403020100, 0x0f0e0d0c0b0a0908, q23);
22872 ASSERT_EQUAL_128(0x0000000000000000, 0x0e0f0c0d0a0b0809, q24);
22873 ASSERT_EQUAL_128(0x0607040502030001, 0x0e0f0c0d0a0b0809, q25);
22874 ASSERT_EQUAL_128(0x0000000000000000, 0x0c0d0e0f08090a0b, q26);
22875 ASSERT_EQUAL_128(0x0405060700010203, 0x0c0d0e0f08090a0b, q27);
22876
22877 ASSERT_EQUAL_128(0x0000000000000000, 0x80c4a2e691d5b3f7, q28);
22878 ASSERT_EQUAL_128(0x7f3b5d196e2a4c08, 0x80c4a2e691d5b3f7, q29);
22879
22880 TEARDOWN();
22881 }
22882
22883
TEST(neon_sli)22884 TEST(neon_sli) {
22885 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
22886
22887 START();
22888
22889 __ Movi(v0.V2D(), 0x0001020304050607, 0x08090a0b0c0d0e0f);
22890 __ Movi(v1.V2D(), 0xfedcba9876543210, 0x0123456789abcdef);
22891
22892 __ Mov(v16.V2D(), v0.V2D());
22893 __ Mov(v17.V2D(), v0.V2D());
22894 __ Mov(v18.V2D(), v0.V2D());
22895 __ Mov(v19.V2D(), v0.V2D());
22896 __ Mov(v20.V2D(), v0.V2D());
22897 __ Mov(v21.V2D(), v0.V2D());
22898 __ Mov(v22.V2D(), v0.V2D());
22899 __ Mov(v23.V2D(), v0.V2D());
22900
22901 __ Sli(v16.V8B(), v1.V8B(), 4);
22902 __ Sli(v17.V16B(), v1.V16B(), 7);
22903 __ Sli(v18.V4H(), v1.V4H(), 8);
22904 __ Sli(v19.V8H(), v1.V8H(), 15);
22905 __ Sli(v20.V2S(), v1.V2S(), 0);
22906 __ Sli(v21.V4S(), v1.V4S(), 31);
22907 __ Sli(v22.V2D(), v1.V2D(), 48);
22908
22909 __ Sli(d23, d1, 48);
22910
22911 END();
22912
22913 RUN();
22914
22915 ASSERT_EQUAL_128(0x0000000000000000, 0x18395a7b9cbddeff, q16);
22916 ASSERT_EQUAL_128(0x0001020304050607, 0x88898a8b8c8d8e8f, q17);
22917 ASSERT_EQUAL_128(0x0000000000000000, 0x2309670bab0def0f, q18);
22918 ASSERT_EQUAL_128(0x0001020304050607, 0x88098a0b8c0d8e0f, q19);
22919 ASSERT_EQUAL_128(0x0000000000000000, 0x0123456789abcdef, q20);
22920 ASSERT_EQUAL_128(0x0001020304050607, 0x88090a0b8c0d0e0f, q21);
22921 ASSERT_EQUAL_128(0x3210020304050607, 0xcdef0a0b0c0d0e0f, q22);
22922
22923 ASSERT_EQUAL_128(0x0000000000000000, 0xcdef0a0b0c0d0e0f, q23);
22924
22925
22926 TEARDOWN();
22927 }
22928
22929
TEST(neon_sri)22930 TEST(neon_sri) {
22931 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
22932
22933 START();
22934
22935 __ Movi(v0.V2D(), 0x0001020304050607, 0x08090a0b0c0d0e0f);
22936 __ Movi(v1.V2D(), 0xfedcba9876543210, 0x0123456789abcdef);
22937
22938 __ Mov(v16.V2D(), v0.V2D());
22939 __ Mov(v17.V2D(), v0.V2D());
22940 __ Mov(v18.V2D(), v0.V2D());
22941 __ Mov(v19.V2D(), v0.V2D());
22942 __ Mov(v20.V2D(), v0.V2D());
22943 __ Mov(v21.V2D(), v0.V2D());
22944 __ Mov(v22.V2D(), v0.V2D());
22945 __ Mov(v23.V2D(), v0.V2D());
22946
22947 __ Sri(v16.V8B(), v1.V8B(), 4);
22948 __ Sri(v17.V16B(), v1.V16B(), 7);
22949 __ Sri(v18.V4H(), v1.V4H(), 8);
22950 __ Sri(v19.V8H(), v1.V8H(), 15);
22951 __ Sri(v20.V2S(), v1.V2S(), 1);
22952 __ Sri(v21.V4S(), v1.V4S(), 31);
22953 __ Sri(v22.V2D(), v1.V2D(), 48);
22954
22955 __ Sri(d23, d1, 48);
22956
22957 END();
22958
22959 RUN();
22960
22961 ASSERT_EQUAL_128(0x0000000000000000, 0x00020406080a0c0e, q16);
22962 ASSERT_EQUAL_128(0x0101030304040606, 0x08080a0a0d0d0f0f, q17);
22963 ASSERT_EQUAL_128(0x0000000000000000, 0x08010a450c890ecd, q18);
22964 ASSERT_EQUAL_128(0x0001020304040606, 0x08080a0a0c0d0e0f, q19);
22965 ASSERT_EQUAL_128(0x0000000000000000, 0x0091a2b344d5e6f7, q20);
22966 ASSERT_EQUAL_128(0x0001020304050606, 0x08090a0a0c0d0e0f, q21);
22967 ASSERT_EQUAL_128(0x000102030405fedc, 0x08090a0b0c0d0123, q22);
22968
22969 ASSERT_EQUAL_128(0x0000000000000000, 0x08090a0b0c0d0123, q23);
22970
22971
22972 TEARDOWN();
22973 }
22974
22975
TEST(neon_shrn)22976 TEST(neon_shrn) {
22977 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
22978
22979 START();
22980
22981 __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081);
22982 __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff);
22983 __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff);
22984 __ Movi(v3.V2D(), 0x8000000000000001, 0x7fffffffffffffff);
22985 __ Movi(v4.V2D(), 0x8000000000000000, 0x0000000000000000);
22986
22987 __ Shrn(v16.V8B(), v0.V8H(), 8);
22988 __ Shrn2(v16.V16B(), v1.V8H(), 1);
22989 __ Shrn(v17.V4H(), v1.V4S(), 16);
22990 __ Shrn2(v17.V8H(), v2.V4S(), 1);
22991 __ Shrn(v18.V2S(), v3.V2D(), 32);
22992 __ Shrn2(v18.V4S(), v3.V2D(), 1);
22993
22994 END();
22995
22996 RUN();
22997 ASSERT_EQUAL_128(0x0000ff00ff0000ff, 0x7f00817f80ff0180, q16);
22998 ASSERT_EQUAL_128(0x0000ffff0000ffff, 0x8000ffffffff0001, q17);
22999 ASSERT_EQUAL_128(0x00000000ffffffff, 0x800000007fffffff, q18);
23000 TEARDOWN();
23001 }
23002
23003
TEST(neon_rshrn)23004 TEST(neon_rshrn) {
23005 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
23006
23007 START();
23008
23009 __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081);
23010 __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff);
23011 __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff);
23012 __ Movi(v3.V2D(), 0x8000000000000001, 0x7fffffffffffffff);
23013 __ Movi(v4.V2D(), 0x8000000000000000, 0x0000000000000000);
23014
23015 __ Rshrn(v16.V8B(), v0.V8H(), 8);
23016 __ Rshrn2(v16.V16B(), v1.V8H(), 1);
23017 __ Rshrn(v17.V4H(), v1.V4S(), 16);
23018 __ Rshrn2(v17.V8H(), v2.V4S(), 1);
23019 __ Rshrn(v18.V2S(), v3.V2D(), 32);
23020 __ Rshrn2(v18.V4S(), v3.V2D(), 1);
23021
23022 END();
23023
23024 RUN();
23025 ASSERT_EQUAL_128(0x0001000000000100, 0x7f01827f81ff0181, q16);
23026 ASSERT_EQUAL_128(0x0000000000000000, 0x8001ffffffff0001, q17);
23027 ASSERT_EQUAL_128(0x0000000100000000, 0x8000000080000000, q18);
23028 TEARDOWN();
23029 }
23030
23031
TEST(neon_uqshrn)23032 TEST(neon_uqshrn) {
23033 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
23034
23035 START();
23036
23037 __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081);
23038 __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff);
23039 __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff);
23040 __ Movi(v3.V2D(), 0x8000000000000001, 0x7fffffffffffffff);
23041 __ Movi(v4.V2D(), 0x8000000000000000, 0x0000000000000000);
23042
23043 __ Uqshrn(v16.V8B(), v0.V8H(), 8);
23044 __ Uqshrn2(v16.V16B(), v1.V8H(), 1);
23045 __ Uqshrn(v17.V4H(), v1.V4S(), 16);
23046 __ Uqshrn2(v17.V8H(), v2.V4S(), 1);
23047 __ Uqshrn(v18.V2S(), v3.V2D(), 32);
23048 __ Uqshrn2(v18.V4S(), v3.V2D(), 1);
23049
23050 __ Uqshrn(b19, h0, 8);
23051 __ Uqshrn(h20, s1, 16);
23052 __ Uqshrn(s21, d3, 32);
23053
23054 END();
23055
23056 RUN();
23057 ASSERT_EQUAL_128(0xffffff00ff0000ff, 0x7f00817f80ff0180, q16);
23058 ASSERT_EQUAL_128(0xffffffff0000ffff, 0x8000ffffffff0001, q17);
23059 ASSERT_EQUAL_128(0xffffffffffffffff, 0x800000007fffffff, q18);
23060 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000080, q19);
23061 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000001, q20);
23062 ASSERT_EQUAL_128(0x0000000000000000, 0x000000007fffffff, q21);
23063 TEARDOWN();
23064 }
23065
23066
TEST(neon_uqrshrn)23067 TEST(neon_uqrshrn) {
23068 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
23069
23070 START();
23071
23072 __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081);
23073 __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff);
23074 __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff);
23075 __ Movi(v3.V2D(), 0x8000000000000001, 0x7fffffffffffffff);
23076 __ Movi(v4.V2D(), 0x8000000000000000, 0x0000000000000000);
23077
23078 __ Uqrshrn(v16.V8B(), v0.V8H(), 8);
23079 __ Uqrshrn2(v16.V16B(), v1.V8H(), 1);
23080 __ Uqrshrn(v17.V4H(), v1.V4S(), 16);
23081 __ Uqrshrn2(v17.V8H(), v2.V4S(), 1);
23082 __ Uqrshrn(v18.V2S(), v3.V2D(), 32);
23083 __ Uqrshrn2(v18.V4S(), v3.V2D(), 1);
23084
23085 __ Uqrshrn(b19, h0, 8);
23086 __ Uqrshrn(h20, s1, 16);
23087 __ Uqrshrn(s21, d3, 32);
23088
23089 END();
23090
23091 RUN();
23092 ASSERT_EQUAL_128(0xffffff00ff0001ff, 0x7f01827f81ff0181, q16);
23093 ASSERT_EQUAL_128(0xffffffff0000ffff, 0x8001ffffffff0001, q17);
23094 ASSERT_EQUAL_128(0xffffffffffffffff, 0x8000000080000000, q18);
23095 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000081, q19);
23096 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000001, q20);
23097 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000080000000, q21);
23098 TEARDOWN();
23099 }
23100
23101
TEST(neon_sqshrn)23102 TEST(neon_sqshrn) {
23103 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
23104
23105 START();
23106
23107 __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081);
23108 __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff);
23109 __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff);
23110 __ Movi(v3.V2D(), 0x8000000000000001, 0x7fffffffffffffff);
23111 __ Movi(v4.V2D(), 0x8000000000000000, 0x0000000000000000);
23112
23113 __ Sqshrn(v16.V8B(), v0.V8H(), 8);
23114 __ Sqshrn2(v16.V16B(), v1.V8H(), 1);
23115 __ Sqshrn(v17.V4H(), v1.V4S(), 16);
23116 __ Sqshrn2(v17.V8H(), v2.V4S(), 1);
23117 __ Sqshrn(v18.V2S(), v3.V2D(), 32);
23118 __ Sqshrn2(v18.V4S(), v3.V2D(), 1);
23119
23120 __ Sqshrn(b19, h0, 8);
23121 __ Sqshrn(h20, s1, 16);
23122 __ Sqshrn(s21, d3, 32);
23123
23124 END();
23125
23126 RUN();
23127 ASSERT_EQUAL_128(0x8080ff00ff00007f, 0x7f00817f80ff0180, q16);
23128 ASSERT_EQUAL_128(0x8000ffff00007fff, 0x8000ffffffff0001, q17);
23129 ASSERT_EQUAL_128(0x800000007fffffff, 0x800000007fffffff, q18);
23130 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000080, q19);
23131 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000001, q20);
23132 ASSERT_EQUAL_128(0x0000000000000000, 0x000000007fffffff, q21);
23133 TEARDOWN();
23134 }
23135
23136
TEST(neon_sqrshrn)23137 TEST(neon_sqrshrn) {
23138 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
23139
23140 START();
23141
23142 __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081);
23143 __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff);
23144 __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff);
23145 __ Movi(v3.V2D(), 0x8000000000000001, 0x7fffffffffffffff);
23146 __ Movi(v4.V2D(), 0x8000000000000000, 0x0000000000000000);
23147
23148 __ Sqrshrn(v16.V8B(), v0.V8H(), 8);
23149 __ Sqrshrn2(v16.V16B(), v1.V8H(), 1);
23150 __ Sqrshrn(v17.V4H(), v1.V4S(), 16);
23151 __ Sqrshrn2(v17.V8H(), v2.V4S(), 1);
23152 __ Sqrshrn(v18.V2S(), v3.V2D(), 32);
23153 __ Sqrshrn2(v18.V4S(), v3.V2D(), 1);
23154
23155 __ Sqrshrn(b19, h0, 8);
23156 __ Sqrshrn(h20, s1, 16);
23157 __ Sqrshrn(s21, d3, 32);
23158
23159 END();
23160
23161 RUN();
23162 ASSERT_EQUAL_128(0x808000000000017f, 0x7f01827f81ff0181, q16);
23163 ASSERT_EQUAL_128(0x8000000000007fff, 0x8001ffffffff0001, q17);
23164 ASSERT_EQUAL_128(0x800000007fffffff, 0x800000007fffffff, q18);
23165 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000081, q19);
23166 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000001, q20);
23167 ASSERT_EQUAL_128(0x0000000000000000, 0x000000007fffffff, q21);
23168 TEARDOWN();
23169 }
23170
23171
TEST(neon_sqshrun)23172 TEST(neon_sqshrun) {
23173 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
23174
23175 START();
23176
23177 __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081);
23178 __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff);
23179 __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff);
23180 __ Movi(v3.V2D(), 0x8000000000000001, 0x7fffffffffffffff);
23181 __ Movi(v4.V2D(), 0x8000000000000000, 0x0000000000000000);
23182
23183 __ Sqshrun(v16.V8B(), v0.V8H(), 8);
23184 __ Sqshrun2(v16.V16B(), v1.V8H(), 1);
23185 __ Sqshrun(v17.V4H(), v1.V4S(), 16);
23186 __ Sqshrun2(v17.V8H(), v2.V4S(), 1);
23187 __ Sqshrun(v18.V2S(), v3.V2D(), 32);
23188 __ Sqshrun2(v18.V4S(), v3.V2D(), 1);
23189
23190 __ Sqshrun(b19, h0, 8);
23191 __ Sqshrun(h20, s1, 16);
23192 __ Sqshrun(s21, d3, 32);
23193
23194 END();
23195
23196 RUN();
23197 ASSERT_EQUAL_128(0x00000000000000ff, 0x7f00007f00000100, q16);
23198 ASSERT_EQUAL_128(0x000000000000ffff, 0x0000000000000001, q17);
23199 ASSERT_EQUAL_128(0x00000000ffffffff, 0x000000007fffffff, q18);
23200 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q19);
23201 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000001, q20);
23202 ASSERT_EQUAL_128(0x0000000000000000, 0x000000007fffffff, q21);
23203 TEARDOWN();
23204 }
23205
23206
TEST(neon_sqrshrun)23207 TEST(neon_sqrshrun) {
23208 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
23209
23210 START();
23211
23212 __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081);
23213 __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff);
23214 __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff);
23215 __ Movi(v3.V2D(), 0x8000000000000001, 0x7fffffffffffffff);
23216 __ Movi(v4.V2D(), 0x8000000000000000, 0x0000000000000000);
23217
23218 __ Sqrshrun(v16.V8B(), v0.V8H(), 8);
23219 __ Sqrshrun2(v16.V16B(), v1.V8H(), 1);
23220 __ Sqrshrun(v17.V4H(), v1.V4S(), 16);
23221 __ Sqrshrun2(v17.V8H(), v2.V4S(), 1);
23222 __ Sqrshrun(v18.V2S(), v3.V2D(), 32);
23223 __ Sqrshrun2(v18.V4S(), v3.V2D(), 1);
23224
23225 __ Sqrshrun(b19, h0, 8);
23226 __ Sqrshrun(h20, s1, 16);
23227 __ Sqrshrun(s21, d3, 32);
23228
23229 END();
23230
23231 RUN();
23232 ASSERT_EQUAL_128(0x00000000000001ff, 0x7f01007f00000100, q16);
23233 ASSERT_EQUAL_128(0x000000000000ffff, 0x0000000000000001, q17);
23234 ASSERT_EQUAL_128(0x00000000ffffffff, 0x0000000080000000, q18);
23235 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q19);
23236 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000001, q20);
23237 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000080000000, q21);
23238 TEARDOWN();
23239 }
23240
TEST(neon_modimm_bic)23241 TEST(neon_modimm_bic) {
23242 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
23243
23244 START();
23245
23246 __ Movi(v16.V2D(), 0x00aaff55ff0055aa, 0x5555ffff0000aaaa);
23247 __ Movi(v17.V2D(), 0x00aaff55ff0055aa, 0x5555ffff0000aaaa);
23248 __ Movi(v18.V2D(), 0x00aaff55ff0055aa, 0x5555ffff0000aaaa);
23249 __ Movi(v19.V2D(), 0x00aaff55ff0055aa, 0x5555ffff0000aaaa);
23250 __ Movi(v20.V2D(), 0x00aaff55ff0055aa, 0x5555ffff0000aaaa);
23251 __ Movi(v21.V2D(), 0x00aaff55ff0055aa, 0x5555ffff0000aaaa);
23252 __ Movi(v22.V2D(), 0x00aaff55ff0055aa, 0x5555ffff0000aaaa);
23253 __ Movi(v23.V2D(), 0x00aaff55ff0055aa, 0x5555ffff0000aaaa);
23254 __ Movi(v24.V2D(), 0x00aaff55ff0055aa, 0x5555ffff0000aaaa);
23255 __ Movi(v25.V2D(), 0x00aaff55ff0055aa, 0x5555ffff0000aaaa);
23256 __ Movi(v26.V2D(), 0x00aaff55ff0055aa, 0x5555ffff0000aaaa);
23257 __ Movi(v27.V2D(), 0x00aaff55ff0055aa, 0x5555ffff0000aaaa);
23258
23259 __ Bic(v16.V4H(), 0x00, 0);
23260 __ Bic(v17.V4H(), 0xff, 8);
23261 __ Bic(v18.V8H(), 0x00, 0);
23262 __ Bic(v19.V8H(), 0xff, 8);
23263
23264 __ Bic(v20.V2S(), 0x00, 0);
23265 __ Bic(v21.V2S(), 0xff, 8);
23266 __ Bic(v22.V2S(), 0x00, 16);
23267 __ Bic(v23.V2S(), 0xff, 24);
23268
23269 __ Bic(v24.V4S(), 0xff, 0);
23270 __ Bic(v25.V4S(), 0x00, 8);
23271 __ Bic(v26.V4S(), 0xff, 16);
23272 __ Bic(v27.V4S(), 0x00, 24);
23273
23274 END();
23275
23276 RUN();
23277
23278 ASSERT_EQUAL_128(0x0, 0x5555ffff0000aaaa, q16);
23279 ASSERT_EQUAL_128(0x0, 0x005500ff000000aa, q17);
23280 ASSERT_EQUAL_128(0x00aaff55ff0055aa, 0x5555ffff0000aaaa, q18);
23281 ASSERT_EQUAL_128(0x00aa0055000000aa, 0x005500ff000000aa, q19);
23282
23283 ASSERT_EQUAL_128(0x0, 0x5555ffff0000aaaa, q20);
23284 ASSERT_EQUAL_128(0x0, 0x555500ff000000aa, q21);
23285 ASSERT_EQUAL_128(0x0, 0x5555ffff0000aaaa, q22);
23286 ASSERT_EQUAL_128(0x0, 0x0055ffff0000aaaa, q23);
23287
23288 ASSERT_EQUAL_128(0x00aaff00ff005500, 0x5555ff000000aa00, q24);
23289 ASSERT_EQUAL_128(0x00aaff55ff0055aa, 0x5555ffff0000aaaa, q25);
23290 ASSERT_EQUAL_128(0x0000ff55ff0055aa, 0x5500ffff0000aaaa, q26);
23291 ASSERT_EQUAL_128(0x00aaff55ff0055aa, 0x5555ffff0000aaaa, q27);
23292
23293 TEARDOWN();
23294 }
23295
23296
TEST(neon_modimm_movi_16bit_any)23297 TEST(neon_modimm_movi_16bit_any) {
23298 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
23299
23300 START();
23301
23302 __ Movi(v0.V4H(), 0xabab);
23303 __ Movi(v1.V4H(), 0xab00);
23304 __ Movi(v2.V4H(), 0xabff);
23305 __ Movi(v3.V8H(), 0x00ab);
23306 __ Movi(v4.V8H(), 0xffab);
23307 __ Movi(v5.V8H(), 0xabcd);
23308
23309 END();
23310
23311 RUN();
23312
23313 ASSERT_EQUAL_128(0x0, 0xabababababababab, q0);
23314 ASSERT_EQUAL_128(0x0, 0xab00ab00ab00ab00, q1);
23315 ASSERT_EQUAL_128(0x0, 0xabffabffabffabff, q2);
23316 ASSERT_EQUAL_128(0x00ab00ab00ab00ab, 0x00ab00ab00ab00ab, q3);
23317 ASSERT_EQUAL_128(0xffabffabffabffab, 0xffabffabffabffab, q4);
23318 ASSERT_EQUAL_128(0xabcdabcdabcdabcd, 0xabcdabcdabcdabcd, q5);
23319
23320 TEARDOWN();
23321 }
23322
23323
TEST(neon_modimm_movi_32bit_any)23324 TEST(neon_modimm_movi_32bit_any) {
23325 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
23326
23327 START();
23328
23329 __ Movi(v0.V2S(), 0x000000ab);
23330 __ Movi(v1.V2S(), 0x0000ab00);
23331 __ Movi(v2.V4S(), 0x00ab0000);
23332 __ Movi(v3.V4S(), 0xab000000);
23333
23334 __ Movi(v4.V2S(), 0xffffffab);
23335 __ Movi(v5.V2S(), 0xffffabff);
23336 __ Movi(v6.V4S(), 0xffabffff);
23337 __ Movi(v7.V4S(), 0xabffffff);
23338
23339 __ Movi(v16.V2S(), 0x0000abff);
23340 __ Movi(v17.V2S(), 0x00abffff);
23341 __ Movi(v18.V4S(), 0xffab0000);
23342 __ Movi(v19.V4S(), 0xffffab00);
23343
23344 __ Movi(v20.V4S(), 0xabababab);
23345 __ Movi(v21.V4S(), 0xabcdabcd);
23346 __ Movi(v22.V4S(), 0xabcdef01);
23347 __ Movi(v23.V4S(), 0x00ffff00);
23348
23349 END();
23350
23351 RUN();
23352
23353 ASSERT_EQUAL_128(0x0, 0x000000ab000000ab, q0);
23354 ASSERT_EQUAL_128(0x0, 0x0000ab000000ab00, q1);
23355 ASSERT_EQUAL_128(0x00ab000000ab0000, 0x00ab000000ab0000, q2);
23356 ASSERT_EQUAL_128(0xab000000ab000000, 0xab000000ab000000, q3);
23357
23358 ASSERT_EQUAL_128(0x0, 0xffffffabffffffab, q4);
23359 ASSERT_EQUAL_128(0x0, 0xffffabffffffabff, q5);
23360 ASSERT_EQUAL_128(0xffabffffffabffff, 0xffabffffffabffff, q6);
23361 ASSERT_EQUAL_128(0xabffffffabffffff, 0xabffffffabffffff, q7);
23362
23363 ASSERT_EQUAL_128(0x0, 0x0000abff0000abff, q16);
23364 ASSERT_EQUAL_128(0x0, 0x00abffff00abffff, q17);
23365 ASSERT_EQUAL_128(0xffab0000ffab0000, 0xffab0000ffab0000, q18);
23366 ASSERT_EQUAL_128(0xffffab00ffffab00, 0xffffab00ffffab00, q19);
23367
23368 ASSERT_EQUAL_128(0xabababababababab, 0xabababababababab, q20);
23369 ASSERT_EQUAL_128(0xabcdabcdabcdabcd, 0xabcdabcdabcdabcd, q21);
23370 ASSERT_EQUAL_128(0xabcdef01abcdef01, 0xabcdef01abcdef01, q22);
23371 ASSERT_EQUAL_128(0x00ffff0000ffff00, 0x00ffff0000ffff00, q23);
23372 TEARDOWN();
23373 }
23374
23375
TEST(neon_modimm_movi_64bit_any)23376 TEST(neon_modimm_movi_64bit_any) {
23377 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
23378
23379 START();
23380
23381 __ Movi(v0.V1D(), 0x00ffff0000ffffff);
23382 __ Movi(v1.V2D(), 0xabababababababab);
23383 __ Movi(v2.V2D(), 0xabcdabcdabcdabcd);
23384 __ Movi(v3.V2D(), 0xabcdef01abcdef01);
23385 __ Movi(v4.V1D(), 0xabcdef0123456789);
23386 __ Movi(v5.V2D(), 0xabcdef0123456789);
23387
23388 END();
23389
23390 RUN();
23391
23392 ASSERT_EQUAL_64(0x00ffff0000ffffff, d0);
23393 ASSERT_EQUAL_128(0xabababababababab, 0xabababababababab, q1);
23394 ASSERT_EQUAL_128(0xabcdabcdabcdabcd, 0xabcdabcdabcdabcd, q2);
23395 ASSERT_EQUAL_128(0xabcdef01abcdef01, 0xabcdef01abcdef01, q3);
23396 ASSERT_EQUAL_64(0xabcdef0123456789, d4);
23397 ASSERT_EQUAL_128(0xabcdef0123456789, 0xabcdef0123456789, q5);
23398
23399 TEARDOWN();
23400 }
23401
23402
TEST(neon_modimm_movi)23403 TEST(neon_modimm_movi) {
23404 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
23405
23406 START();
23407
23408 __ Movi(v0.V8B(), 0xaa);
23409 __ Movi(v1.V16B(), 0x55);
23410
23411 __ Movi(d2, 0x00ffff0000ffffff);
23412 __ Movi(v3.V2D(), 0x00ffff0000ffffff);
23413
23414 __ Movi(v16.V4H(), 0x00, LSL, 0);
23415 __ Movi(v17.V4H(), 0xff, LSL, 8);
23416 __ Movi(v18.V8H(), 0x00, LSL, 0);
23417 __ Movi(v19.V8H(), 0xff, LSL, 8);
23418
23419 __ Movi(v20.V2S(), 0x00, LSL, 0);
23420 __ Movi(v21.V2S(), 0xff, LSL, 8);
23421 __ Movi(v22.V2S(), 0x00, LSL, 16);
23422 __ Movi(v23.V2S(), 0xff, LSL, 24);
23423
23424 __ Movi(v24.V4S(), 0xff, LSL, 0);
23425 __ Movi(v25.V4S(), 0x00, LSL, 8);
23426 __ Movi(v26.V4S(), 0xff, LSL, 16);
23427 __ Movi(v27.V4S(), 0x00, LSL, 24);
23428
23429 __ Movi(v28.V2S(), 0xaa, MSL, 8);
23430 __ Movi(v29.V2S(), 0x55, MSL, 16);
23431 __ Movi(v30.V4S(), 0xff, MSL, 8);
23432 __ Movi(v31.V4S(), 0x00, MSL, 16);
23433
23434 END();
23435
23436 RUN();
23437
23438 ASSERT_EQUAL_128(0x0, 0xaaaaaaaaaaaaaaaa, q0);
23439 ASSERT_EQUAL_128(0x5555555555555555, 0x5555555555555555, q1);
23440
23441 ASSERT_EQUAL_128(0x0, 0x00ffff0000ffffff, q2);
23442 ASSERT_EQUAL_128(0x00ffff0000ffffff, 0x00ffff0000ffffff, q3);
23443
23444 ASSERT_EQUAL_128(0x0, 0x0000000000000000, q16);
23445 ASSERT_EQUAL_128(0x0, 0xff00ff00ff00ff00, q17);
23446 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q18);
23447 ASSERT_EQUAL_128(0xff00ff00ff00ff00, 0xff00ff00ff00ff00, q19);
23448
23449 ASSERT_EQUAL_128(0x0, 0x0000000000000000, q20);
23450 ASSERT_EQUAL_128(0x0, 0x0000ff000000ff00, q21);
23451 ASSERT_EQUAL_128(0x0, 0x0000000000000000, q22);
23452 ASSERT_EQUAL_128(0x0, 0xff000000ff000000, q23);
23453
23454 ASSERT_EQUAL_128(0x000000ff000000ff, 0x000000ff000000ff, q24);
23455 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q25);
23456 ASSERT_EQUAL_128(0x00ff000000ff0000, 0x00ff000000ff0000, q26);
23457 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q27);
23458
23459 ASSERT_EQUAL_128(0x0, 0x0000aaff0000aaff, q28);
23460 ASSERT_EQUAL_128(0x0, 0x0055ffff0055ffff, q29);
23461 ASSERT_EQUAL_128(0x0000ffff0000ffff, 0x0000ffff0000ffff, q30);
23462 ASSERT_EQUAL_128(0x0000ffff0000ffff, 0x0000ffff0000ffff, q31);
23463
23464 TEARDOWN();
23465 }
23466
23467
TEST(neon_modimm_mvni)23468 TEST(neon_modimm_mvni) {
23469 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
23470
23471 START();
23472
23473 __ Mvni(v16.V4H(), 0x00, LSL, 0);
23474 __ Mvni(v17.V4H(), 0xff, LSL, 8);
23475 __ Mvni(v18.V8H(), 0x00, LSL, 0);
23476 __ Mvni(v19.V8H(), 0xff, LSL, 8);
23477
23478 __ Mvni(v20.V2S(), 0x00, LSL, 0);
23479 __ Mvni(v21.V2S(), 0xff, LSL, 8);
23480 __ Mvni(v22.V2S(), 0x00, LSL, 16);
23481 __ Mvni(v23.V2S(), 0xff, LSL, 24);
23482
23483 __ Mvni(v24.V4S(), 0xff, LSL, 0);
23484 __ Mvni(v25.V4S(), 0x00, LSL, 8);
23485 __ Mvni(v26.V4S(), 0xff, LSL, 16);
23486 __ Mvni(v27.V4S(), 0x00, LSL, 24);
23487
23488 __ Mvni(v28.V2S(), 0xaa, MSL, 8);
23489 __ Mvni(v29.V2S(), 0x55, MSL, 16);
23490 __ Mvni(v30.V4S(), 0xff, MSL, 8);
23491 __ Mvni(v31.V4S(), 0x00, MSL, 16);
23492
23493 END();
23494
23495 RUN();
23496
23497 ASSERT_EQUAL_128(0x0, 0xffffffffffffffff, q16);
23498 ASSERT_EQUAL_128(0x0, 0x00ff00ff00ff00ff, q17);
23499 ASSERT_EQUAL_128(0xffffffffffffffff, 0xffffffffffffffff, q18);
23500 ASSERT_EQUAL_128(0x00ff00ff00ff00ff, 0x00ff00ff00ff00ff, q19);
23501
23502 ASSERT_EQUAL_128(0x0, 0xffffffffffffffff, q20);
23503 ASSERT_EQUAL_128(0x0, 0xffff00ffffff00ff, q21);
23504 ASSERT_EQUAL_128(0x0, 0xffffffffffffffff, q22);
23505 ASSERT_EQUAL_128(0x0, 0x00ffffff00ffffff, q23);
23506
23507 ASSERT_EQUAL_128(0xffffff00ffffff00, 0xffffff00ffffff00, q24);
23508 ASSERT_EQUAL_128(0xffffffffffffffff, 0xffffffffffffffff, q25);
23509 ASSERT_EQUAL_128(0xff00ffffff00ffff, 0xff00ffffff00ffff, q26);
23510 ASSERT_EQUAL_128(0xffffffffffffffff, 0xffffffffffffffff, q27);
23511
23512 ASSERT_EQUAL_128(0x0, 0xffff5500ffff5500, q28);
23513 ASSERT_EQUAL_128(0x0, 0xffaa0000ffaa0000, q29);
23514 ASSERT_EQUAL_128(0xffff0000ffff0000, 0xffff0000ffff0000, q30);
23515 ASSERT_EQUAL_128(0xffff0000ffff0000, 0xffff0000ffff0000, q31);
23516
23517 TEARDOWN();
23518 }
23519
23520
TEST(neon_modimm_orr)23521 TEST(neon_modimm_orr) {
23522 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
23523
23524 START();
23525
23526 __ Movi(v16.V2D(), 0x00aaff55ff0055aa, 0x5555ffff0000aaaa);
23527 __ Movi(v17.V2D(), 0x00aaff55ff0055aa, 0x5555ffff0000aaaa);
23528 __ Movi(v18.V2D(), 0x00aaff55ff0055aa, 0x5555ffff0000aaaa);
23529 __ Movi(v19.V2D(), 0x00aaff55ff0055aa, 0x5555ffff0000aaaa);
23530 __ Movi(v20.V2D(), 0x00aaff55ff0055aa, 0x5555ffff0000aaaa);
23531 __ Movi(v21.V2D(), 0x00aaff55ff0055aa, 0x5555ffff0000aaaa);
23532 __ Movi(v22.V2D(), 0x00aaff55ff0055aa, 0x5555ffff0000aaaa);
23533 __ Movi(v23.V2D(), 0x00aaff55ff0055aa, 0x5555ffff0000aaaa);
23534 __ Movi(v24.V2D(), 0x00aaff55ff0055aa, 0x5555ffff0000aaaa);
23535 __ Movi(v25.V2D(), 0x00aaff55ff0055aa, 0x5555ffff0000aaaa);
23536 __ Movi(v26.V2D(), 0x00aaff55ff0055aa, 0x5555ffff0000aaaa);
23537 __ Movi(v27.V2D(), 0x00aaff55ff0055aa, 0x5555ffff0000aaaa);
23538
23539 __ Orr(v16.V4H(), 0x00, 0);
23540 __ Orr(v17.V4H(), 0xff, 8);
23541 __ Orr(v18.V8H(), 0x00, 0);
23542 __ Orr(v19.V8H(), 0xff, 8);
23543
23544 __ Orr(v20.V2S(), 0x00, 0);
23545 __ Orr(v21.V2S(), 0xff, 8);
23546 __ Orr(v22.V2S(), 0x00, 16);
23547 __ Orr(v23.V2S(), 0xff, 24);
23548
23549 __ Orr(v24.V4S(), 0xff, 0);
23550 __ Orr(v25.V4S(), 0x00, 8);
23551 __ Orr(v26.V4S(), 0xff, 16);
23552 __ Orr(v27.V4S(), 0x00, 24);
23553
23554 END();
23555
23556 RUN();
23557
23558 ASSERT_EQUAL_128(0x0, 0x5555ffff0000aaaa, q16);
23559 ASSERT_EQUAL_128(0x0, 0xff55ffffff00ffaa, q17);
23560 ASSERT_EQUAL_128(0x00aaff55ff0055aa, 0x5555ffff0000aaaa, q18);
23561 ASSERT_EQUAL_128(0xffaaff55ff00ffaa, 0xff55ffffff00ffaa, q19);
23562
23563 ASSERT_EQUAL_128(0x0, 0x5555ffff0000aaaa, q20);
23564 ASSERT_EQUAL_128(0x0, 0x5555ffff0000ffaa, q21);
23565 ASSERT_EQUAL_128(0x0, 0x5555ffff0000aaaa, q22);
23566 ASSERT_EQUAL_128(0x0, 0xff55ffffff00aaaa, q23);
23567
23568 ASSERT_EQUAL_128(0x00aaffffff0055ff, 0x5555ffff0000aaff, q24);
23569 ASSERT_EQUAL_128(0x00aaff55ff0055aa, 0x5555ffff0000aaaa, q25);
23570 ASSERT_EQUAL_128(0x00ffff55ffff55aa, 0x55ffffff00ffaaaa, q26);
23571 ASSERT_EQUAL_128(0x00aaff55ff0055aa, 0x5555ffff0000aaaa, q27);
23572
23573 TEARDOWN();
23574 }
23575
23576
23577 // TODO: add arbitrary values once load literal to Q registers is supported.
TEST(neon_modimm_fmov)23578 TEST(neon_modimm_fmov) {
23579 SETUP_WITH_FEATURES(CPUFeatures::kNEON, CPUFeatures::kFP);
23580
23581 // Immediates which can be encoded in the instructions.
23582 const float kOne = 1.0f;
23583 const float kPointFive = 0.5f;
23584 const double kMinusThirteen = -13.0;
23585 // Immediates which cannot be encoded in the instructions.
23586 const float kNonImmFP32 = 255.0f;
23587 const double kNonImmFP64 = 12.3456;
23588
23589 START();
23590 __ Fmov(v11.V2S(), kOne);
23591 __ Fmov(v12.V4S(), kPointFive);
23592 __ Fmov(v22.V2D(), kMinusThirteen);
23593 __ Fmov(v13.V2S(), kNonImmFP32);
23594 __ Fmov(v14.V4S(), kNonImmFP32);
23595 __ Fmov(v23.V2D(), kNonImmFP64);
23596 __ Fmov(v1.V2S(), 0.0);
23597 __ Fmov(v2.V4S(), 0.0);
23598 __ Fmov(v3.V2D(), 0.0);
23599 __ Fmov(v4.V2S(), kFP32PositiveInfinity);
23600 __ Fmov(v5.V4S(), kFP32PositiveInfinity);
23601 __ Fmov(v6.V2D(), kFP64PositiveInfinity);
23602 END();
23603
23604 RUN();
23605
23606 const uint64_t kOne1S = FloatToRawbits(1.0);
23607 const uint64_t kOne2S = (kOne1S << 32) | kOne1S;
23608 const uint64_t kPointFive1S = FloatToRawbits(0.5);
23609 const uint64_t kPointFive2S = (kPointFive1S << 32) | kPointFive1S;
23610 const uint64_t kMinusThirteen1D = DoubleToRawbits(-13.0);
23611 const uint64_t kNonImmFP321S = FloatToRawbits(kNonImmFP32);
23612 const uint64_t kNonImmFP322S = (kNonImmFP321S << 32) | kNonImmFP321S;
23613 const uint64_t kNonImmFP641D = DoubleToRawbits(kNonImmFP64);
23614 const uint64_t kFP32Inf1S = FloatToRawbits(kFP32PositiveInfinity);
23615 const uint64_t kFP32Inf2S = (kFP32Inf1S << 32) | kFP32Inf1S;
23616 const uint64_t kFP64Inf1D = DoubleToRawbits(kFP64PositiveInfinity);
23617
23618 ASSERT_EQUAL_128(0x0, kOne2S, q11);
23619 ASSERT_EQUAL_128(kPointFive2S, kPointFive2S, q12);
23620 ASSERT_EQUAL_128(kMinusThirteen1D, kMinusThirteen1D, q22);
23621 ASSERT_EQUAL_128(0x0, kNonImmFP322S, q13);
23622 ASSERT_EQUAL_128(kNonImmFP322S, kNonImmFP322S, q14);
23623 ASSERT_EQUAL_128(kNonImmFP641D, kNonImmFP641D, q23);
23624 ASSERT_EQUAL_128(0x0, 0x0, q1);
23625 ASSERT_EQUAL_128(0x0, 0x0, q2);
23626 ASSERT_EQUAL_128(0x0, 0x0, q3);
23627 ASSERT_EQUAL_128(0x0, kFP32Inf2S, q4);
23628 ASSERT_EQUAL_128(kFP32Inf2S, kFP32Inf2S, q5);
23629 ASSERT_EQUAL_128(kFP64Inf1D, kFP64Inf1D, q6);
23630
23631 TEARDOWN();
23632 }
23633
23634
TEST(neon_perm)23635 TEST(neon_perm) {
23636 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
23637
23638 START();
23639
23640 __ Movi(v0.V2D(), 0x0001020304050607, 0x08090a0b0c0d0e0f);
23641 __ Movi(v1.V2D(), 0x1011121314151617, 0x18191a1b1c1d1e1f);
23642
23643 __ Trn1(v16.V16B(), v0.V16B(), v1.V16B());
23644 __ Trn2(v17.V16B(), v0.V16B(), v1.V16B());
23645 __ Zip1(v18.V16B(), v0.V16B(), v1.V16B());
23646 __ Zip2(v19.V16B(), v0.V16B(), v1.V16B());
23647 __ Uzp1(v20.V16B(), v0.V16B(), v1.V16B());
23648 __ Uzp2(v21.V16B(), v0.V16B(), v1.V16B());
23649
23650 END();
23651
23652 RUN();
23653
23654 ASSERT_EQUAL_128(0x1101130315051707, 0x19091b0b1d0d1f0f, q16);
23655 ASSERT_EQUAL_128(0x1000120214041606, 0x18081a0a1c0c1e0e, q17);
23656 ASSERT_EQUAL_128(0x180819091a0a1b0b, 0x1c0c1d0d1e0e1f0f, q18);
23657 ASSERT_EQUAL_128(0x1000110112021303, 0x1404150516061707, q19);
23658 ASSERT_EQUAL_128(0x11131517191b1d1f, 0x01030507090b0d0f, q20);
23659 ASSERT_EQUAL_128(0x10121416181a1c1e, 0x00020406080a0c0e, q21);
23660
23661 TEARDOWN();
23662 }
23663
23664
TEST(neon_copy_dup_element)23665 TEST(neon_copy_dup_element) {
23666 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
23667
23668 START();
23669
23670 __ Movi(v0.V2D(), 0x0011223344556677, 0x8899aabbccddeeff);
23671 __ Movi(v1.V2D(), 0xffeddccbbaae9988, 0x7766554433221100);
23672 __ Movi(v2.V2D(), 0xffeddccbbaae9988, 0x0011223344556677);
23673 __ Movi(v3.V2D(), 0x7766554433221100, 0x8899aabbccddeeff);
23674 __ Movi(v4.V2D(), 0x7766554433221100, 0x0123456789abcdef);
23675 __ Movi(v5.V2D(), 0x0011223344556677, 0x0123456789abcdef);
23676
23677 __ Dup(v16.V16B(), v0.B(), 0);
23678 __ Dup(v17.V8H(), v1.H(), 7);
23679 __ Dup(v18.V4S(), v1.S(), 3);
23680 __ Dup(v19.V2D(), v0.D(), 0);
23681
23682 __ Dup(v20.V8B(), v0.B(), 0);
23683 __ Dup(v21.V4H(), v1.H(), 7);
23684 __ Dup(v22.V2S(), v1.S(), 3);
23685
23686 __ Dup(v23.B(), v0.B(), 0);
23687 __ Dup(v24.H(), v1.H(), 7);
23688 __ Dup(v25.S(), v1.S(), 3);
23689 __ Dup(v26.D(), v0.D(), 0);
23690
23691 __ Dup(v2.V16B(), v2.B(), 0);
23692 __ Dup(v3.V8H(), v3.H(), 7);
23693 __ Dup(v4.V4S(), v4.S(), 0);
23694 __ Dup(v5.V2D(), v5.D(), 1);
23695
23696 END();
23697
23698 RUN();
23699
23700 ASSERT_EQUAL_128(0xffffffffffffffff, 0xffffffffffffffff, q16);
23701 ASSERT_EQUAL_128(0xffedffedffedffed, 0xffedffedffedffed, q17);
23702 ASSERT_EQUAL_128(0xffeddccbffeddccb, 0xffeddccbffeddccb, q18);
23703 ASSERT_EQUAL_128(0x8899aabbccddeeff, 0x8899aabbccddeeff, q19);
23704
23705 ASSERT_EQUAL_128(0, 0xffffffffffffffff, q20);
23706 ASSERT_EQUAL_128(0, 0xffedffedffedffed, q21);
23707 ASSERT_EQUAL_128(0, 0xffeddccbffeddccb, q22);
23708
23709 ASSERT_EQUAL_128(0, 0x00000000000000ff, q23);
23710 ASSERT_EQUAL_128(0, 0x000000000000ffed, q24);
23711 ASSERT_EQUAL_128(0, 0x00000000ffeddccb, q25);
23712 ASSERT_EQUAL_128(0, 0x8899aabbccddeeff, q26);
23713
23714 ASSERT_EQUAL_128(0x7777777777777777, 0x7777777777777777, q2);
23715 ASSERT_EQUAL_128(0x7766776677667766, 0x7766776677667766, q3);
23716 ASSERT_EQUAL_128(0x89abcdef89abcdef, 0x89abcdef89abcdef, q4);
23717 ASSERT_EQUAL_128(0x0011223344556677, 0x0011223344556677, q5);
23718 TEARDOWN();
23719 }
23720
23721
TEST(neon_copy_dup_general)23722 TEST(neon_copy_dup_general) {
23723 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
23724
23725 START();
23726
23727 __ Mov(x0, 0x0011223344556677);
23728
23729 __ Dup(v16.V16B(), w0);
23730 __ Dup(v17.V8H(), w0);
23731 __ Dup(v18.V4S(), w0);
23732 __ Dup(v19.V2D(), x0);
23733
23734 __ Dup(v20.V8B(), w0);
23735 __ Dup(v21.V4H(), w0);
23736 __ Dup(v22.V2S(), w0);
23737
23738 __ Dup(v2.V16B(), wzr);
23739 __ Dup(v3.V8H(), wzr);
23740 __ Dup(v4.V4S(), wzr);
23741 __ Dup(v5.V2D(), xzr);
23742
23743 END();
23744
23745 RUN();
23746
23747 ASSERT_EQUAL_128(0x7777777777777777, 0x7777777777777777, q16);
23748 ASSERT_EQUAL_128(0x6677667766776677, 0x6677667766776677, q17);
23749 ASSERT_EQUAL_128(0x4455667744556677, 0x4455667744556677, q18);
23750 ASSERT_EQUAL_128(0x0011223344556677, 0x0011223344556677, q19);
23751
23752 ASSERT_EQUAL_128(0, 0x7777777777777777, q20);
23753 ASSERT_EQUAL_128(0, 0x6677667766776677, q21);
23754 ASSERT_EQUAL_128(0, 0x4455667744556677, q22);
23755
23756 ASSERT_EQUAL_128(0, 0, q2);
23757 ASSERT_EQUAL_128(0, 0, q3);
23758 ASSERT_EQUAL_128(0, 0, q4);
23759 ASSERT_EQUAL_128(0, 0, q5);
23760 TEARDOWN();
23761 }
23762
23763
TEST(neon_copy_ins_element)23764 TEST(neon_copy_ins_element) {
23765 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
23766
23767 START();
23768
23769 __ Movi(v0.V2D(), 0x0011223344556677, 0x8899aabbccddeeff);
23770 __ Movi(v1.V2D(), 0xffeddccbbaae9988, 0x7766554433221100);
23771 __ Movi(v16.V2D(), 0x0123456789abcdef, 0xfedcba9876543210);
23772 __ Movi(v17.V2D(), 0xfedcba9876543210, 0x0123456789abcdef);
23773 __ Movi(v18.V2D(), 0x0011223344556677, 0x8899aabbccddeeff);
23774 __ Movi(v19.V2D(), 0x0011223344556677, 0x8899aabbccddeeff);
23775
23776 __ Movi(v2.V2D(), 0, 0x0011223344556677);
23777 __ Movi(v3.V2D(), 0, 0x8899aabbccddeeff);
23778 __ Movi(v4.V2D(), 0, 0x0123456789abcdef);
23779 __ Movi(v5.V2D(), 0, 0x0123456789abcdef);
23780
23781 __ Ins(v16.V16B(), 15, v0.V16B(), 0);
23782 __ Ins(v17.V8H(), 0, v1.V8H(), 7);
23783 __ Ins(v18.V4S(), 3, v1.V4S(), 0);
23784 __ Ins(v19.V2D(), 1, v0.V2D(), 0);
23785
23786 __ Ins(v2.V16B(), 2, v2.V16B(), 0);
23787 __ Ins(v3.V8H(), 0, v3.V8H(), 7);
23788 __ Ins(v4.V4S(), 3, v4.V4S(), 0);
23789 __ Ins(v5.V2D(), 0, v5.V2D(), 1);
23790
23791 END();
23792
23793 RUN();
23794
23795 ASSERT_EQUAL_128(0xff23456789abcdef, 0xfedcba9876543210, q16);
23796 ASSERT_EQUAL_128(0xfedcba9876543210, 0x0123456789abffed, q17);
23797 ASSERT_EQUAL_128(0x3322110044556677, 0x8899aabbccddeeff, q18);
23798 ASSERT_EQUAL_128(0x8899aabbccddeeff, 0x8899aabbccddeeff, q19);
23799
23800 ASSERT_EQUAL_128(0, 0x0011223344776677, q2);
23801 ASSERT_EQUAL_128(0, 0x8899aabbccdd0000, q3);
23802 ASSERT_EQUAL_128(0x89abcdef00000000, 0x0123456789abcdef, q4);
23803 ASSERT_EQUAL_128(0, 0, q5);
23804 TEARDOWN();
23805 }
23806
23807
TEST(neon_copy_mov_element)23808 TEST(neon_copy_mov_element) {
23809 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
23810
23811 START();
23812
23813 __ Movi(v0.V2D(), 0x0011223344556677, 0x8899aabbccddeeff);
23814 __ Movi(v1.V2D(), 0xffeddccbbaae9988, 0x7766554433221100);
23815 __ Movi(v16.V2D(), 0x0123456789abcdef, 0xfedcba9876543210);
23816 __ Movi(v17.V2D(), 0xfedcba9876543210, 0x0123456789abcdef);
23817 __ Movi(v18.V2D(), 0x0011223344556677, 0x8899aabbccddeeff);
23818 __ Movi(v19.V2D(), 0x0011223344556677, 0x8899aabbccddeeff);
23819
23820 __ Movi(v2.V2D(), 0, 0x0011223344556677);
23821 __ Movi(v3.V2D(), 0, 0x8899aabbccddeeff);
23822 __ Movi(v4.V2D(), 0, 0x0123456789abcdef);
23823 __ Movi(v5.V2D(), 0, 0x0123456789abcdef);
23824
23825 __ Mov(v16.V16B(), 15, v0.V16B(), 0);
23826 __ Mov(v17.V8H(), 0, v1.V8H(), 7);
23827 __ Mov(v18.V4S(), 3, v1.V4S(), 0);
23828 __ Mov(v19.V2D(), 1, v0.V2D(), 0);
23829
23830 __ Mov(v2.V16B(), 2, v2.V16B(), 0);
23831 __ Mov(v3.V8H(), 0, v3.V8H(), 7);
23832 __ Mov(v4.V4S(), 3, v4.V4S(), 0);
23833 __ Mov(v5.V2D(), 0, v5.V2D(), 1);
23834
23835 END();
23836
23837 RUN();
23838
23839 ASSERT_EQUAL_128(0xff23456789abcdef, 0xfedcba9876543210, q16);
23840 ASSERT_EQUAL_128(0xfedcba9876543210, 0x0123456789abffed, q17);
23841 ASSERT_EQUAL_128(0x3322110044556677, 0x8899aabbccddeeff, q18);
23842 ASSERT_EQUAL_128(0x8899aabbccddeeff, 0x8899aabbccddeeff, q19);
23843
23844 ASSERT_EQUAL_128(0, 0x0011223344776677, q2);
23845 ASSERT_EQUAL_128(0, 0x8899aabbccdd0000, q3);
23846 ASSERT_EQUAL_128(0x89abcdef00000000, 0x0123456789abcdef, q4);
23847 ASSERT_EQUAL_128(0, 0, q5);
23848 TEARDOWN();
23849 }
23850
23851
TEST(neon_copy_smov)23852 TEST(neon_copy_smov) {
23853 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
23854
23855 START();
23856
23857 __ Movi(v0.V2D(), 0x0123456789abcdef, 0xfedcba9876543210);
23858
23859 __ Smov(w0, v0.B(), 7);
23860 __ Smov(w1, v0.B(), 15);
23861
23862 __ Smov(w2, v0.H(), 0);
23863 __ Smov(w3, v0.H(), 3);
23864
23865 __ Smov(x4, v0.B(), 7);
23866 __ Smov(x5, v0.B(), 15);
23867
23868 __ Smov(x6, v0.H(), 0);
23869 __ Smov(x7, v0.H(), 3);
23870
23871 __ Smov(x16, v0.S(), 0);
23872 __ Smov(x17, v0.S(), 1);
23873
23874 END();
23875
23876 RUN();
23877
23878 ASSERT_EQUAL_32(0xfffffffe, w0);
23879 ASSERT_EQUAL_32(0x00000001, w1);
23880 ASSERT_EQUAL_32(0x00003210, w2);
23881 ASSERT_EQUAL_32(0xfffffedc, w3);
23882 ASSERT_EQUAL_64(0xfffffffffffffffe, x4);
23883 ASSERT_EQUAL_64(0x0000000000000001, x5);
23884 ASSERT_EQUAL_64(0x0000000000003210, x6);
23885 ASSERT_EQUAL_64(0xfffffffffffffedc, x7);
23886 ASSERT_EQUAL_64(0x0000000076543210, x16);
23887 ASSERT_EQUAL_64(0xfffffffffedcba98, x17);
23888
23889 TEARDOWN();
23890 }
23891
23892
TEST(neon_copy_umov_mov)23893 TEST(neon_copy_umov_mov) {
23894 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
23895
23896 START();
23897
23898 __ Movi(v0.V2D(), 0x0123456789abcdef, 0xfedcba9876543210);
23899
23900 __ Umov(w0, v0.B(), 15);
23901 __ Umov(w1, v0.H(), 0);
23902 __ Umov(w2, v0.S(), 3);
23903 __ Umov(x3, v0.D(), 1);
23904
23905 __ Mov(w4, v0.S(), 3);
23906 __ Mov(x5, v0.D(), 1);
23907
23908 END();
23909
23910 RUN();
23911
23912 ASSERT_EQUAL_32(0x00000001, w0);
23913 ASSERT_EQUAL_32(0x00003210, w1);
23914 ASSERT_EQUAL_32(0x01234567, w2);
23915 ASSERT_EQUAL_64(0x0123456789abcdef, x3);
23916 ASSERT_EQUAL_32(0x01234567, w4);
23917 ASSERT_EQUAL_64(0x0123456789abcdef, x5);
23918
23919 TEARDOWN();
23920 }
23921
23922
TEST(neon_copy_ins_general)23923 TEST(neon_copy_ins_general) {
23924 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
23925
23926 START();
23927
23928 __ Mov(x0, 0x0011223344556677);
23929 __ Movi(v16.V2D(), 0x0123456789abcdef, 0xfedcba9876543210);
23930 __ Movi(v17.V2D(), 0xfedcba9876543210, 0x0123456789abcdef);
23931 __ Movi(v18.V2D(), 0x0011223344556677, 0x8899aabbccddeeff);
23932 __ Movi(v19.V2D(), 0x0011223344556677, 0x8899aabbccddeeff);
23933
23934 __ Movi(v2.V2D(), 0, 0x0011223344556677);
23935 __ Movi(v3.V2D(), 0, 0x8899aabbccddeeff);
23936 __ Movi(v4.V2D(), 0, 0x0123456789abcdef);
23937 __ Movi(v5.V2D(), 0, 0x0123456789abcdef);
23938
23939 __ Ins(v16.V16B(), 15, w0);
23940 __ Ins(v17.V8H(), 0, w0);
23941 __ Ins(v18.V4S(), 3, w0);
23942 __ Ins(v19.V2D(), 0, x0);
23943
23944 __ Ins(v2.V16B(), 2, w0);
23945 __ Ins(v3.V8H(), 0, w0);
23946 __ Ins(v4.V4S(), 3, w0);
23947 __ Ins(v5.V2D(), 1, x0);
23948
23949 END();
23950
23951 RUN();
23952
23953 ASSERT_EQUAL_128(0x7723456789abcdef, 0xfedcba9876543210, q16);
23954 ASSERT_EQUAL_128(0xfedcba9876543210, 0x0123456789ab6677, q17);
23955 ASSERT_EQUAL_128(0x4455667744556677, 0x8899aabbccddeeff, q18);
23956 ASSERT_EQUAL_128(0x0011223344556677, 0x0011223344556677, q19);
23957
23958 ASSERT_EQUAL_128(0, 0x0011223344776677, q2);
23959 ASSERT_EQUAL_128(0, 0x8899aabbccdd6677, q3);
23960 ASSERT_EQUAL_128(0x4455667700000000, 0x0123456789abcdef, q4);
23961 ASSERT_EQUAL_128(0x0011223344556677, 0x0123456789abcdef, q5);
23962 TEARDOWN();
23963 }
23964
23965
TEST(neon_extract_ext)23966 TEST(neon_extract_ext) {
23967 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
23968
23969 START();
23970
23971 __ Movi(v0.V2D(), 0x0011223344556677, 0x8899aabbccddeeff);
23972 __ Movi(v1.V2D(), 0xffeddccbbaae9988, 0x7766554433221100);
23973
23974 __ Movi(v2.V2D(), 0, 0x0011223344556677);
23975 __ Movi(v3.V2D(), 0, 0x8899aabbccddeeff);
23976
23977 __ Ext(v16.V16B(), v0.V16B(), v1.V16B(), 0);
23978 __ Ext(v17.V16B(), v0.V16B(), v1.V16B(), 15);
23979 __ Ext(v1.V16B(), v0.V16B(), v1.V16B(), 8); // Dest is same as one Src
23980 __ Ext(v0.V16B(), v0.V16B(), v0.V16B(), 8); // All reg are the same
23981
23982 __ Ext(v18.V8B(), v2.V8B(), v3.V8B(), 0);
23983 __ Ext(v19.V8B(), v2.V8B(), v3.V8B(), 7);
23984 __ Ext(v2.V8B(), v2.V8B(), v3.V8B(), 4); // Dest is same as one Src
23985 __ Ext(v3.V8B(), v3.V8B(), v3.V8B(), 4); // All reg are the same
23986
23987 END();
23988
23989 RUN();
23990
23991 ASSERT_EQUAL_128(0x0011223344556677, 0x8899aabbccddeeff, q16);
23992 ASSERT_EQUAL_128(0xeddccbbaae998877, 0x6655443322110000, q17);
23993 ASSERT_EQUAL_128(0x7766554433221100, 0x0011223344556677, q1);
23994 ASSERT_EQUAL_128(0x8899aabbccddeeff, 0x0011223344556677, q0);
23995
23996 ASSERT_EQUAL_128(0, 0x0011223344556677, q18);
23997 ASSERT_EQUAL_128(0, 0x99aabbccddeeff00, q19);
23998 ASSERT_EQUAL_128(0, 0xccddeeff00112233, q2);
23999 ASSERT_EQUAL_128(0, 0xccddeeff8899aabb, q3);
24000 TEARDOWN();
24001 }
24002
24003
TEST(neon_3different_uaddl)24004 TEST(neon_3different_uaddl) {
24005 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
24006
24007 START();
24008
24009 __ Movi(v0.V2D(), 0x0000000000000000, 0x0000000000000000);
24010 __ Movi(v1.V2D(), 0, 0x00010280810e0fff);
24011 __ Movi(v2.V2D(), 0, 0x0101010101010101);
24012
24013 __ Movi(v3.V2D(), 0x0000000000000000, 0x0000000000000000);
24014 __ Movi(v4.V2D(), 0x0000000000000000, 0x0000000000000000);
24015 __ Movi(v5.V2D(), 0, 0x0000000180008001);
24016 __ Movi(v6.V2D(), 0, 0x000e000ff000ffff);
24017 __ Movi(v7.V2D(), 0, 0x0001000100010001);
24018
24019 __ Movi(v16.V2D(), 0x0000000000000000, 0x0000000000000000);
24020 __ Movi(v17.V2D(), 0x0000000000000000, 0x0000000000000000);
24021 __ Movi(v18.V2D(), 0, 0x0000000000000001);
24022 __ Movi(v19.V2D(), 0, 0x80000001ffffffff);
24023 __ Movi(v20.V2D(), 0, 0x0000000100000001);
24024
24025 __ Uaddl(v0.V8H(), v1.V8B(), v2.V8B());
24026
24027 __ Uaddl(v3.V4S(), v5.V4H(), v7.V4H());
24028 __ Uaddl(v4.V4S(), v6.V4H(), v7.V4H());
24029
24030 __ Uaddl(v16.V2D(), v18.V2S(), v20.V2S());
24031 __ Uaddl(v17.V2D(), v19.V2S(), v20.V2S());
24032
24033
24034 END();
24035
24036 RUN();
24037
24038 ASSERT_EQUAL_128(0x0001000200030081, 0x0082000f00100100, q0);
24039 ASSERT_EQUAL_128(0x0000000100000002, 0x0000800100008002, q3);
24040 ASSERT_EQUAL_128(0x0000000f00000010, 0x0000f00100010000, q4);
24041 ASSERT_EQUAL_128(0x0000000000000001, 0x0000000000000002, q16);
24042 ASSERT_EQUAL_128(0x0000000080000002, 0x0000000100000000, q17);
24043 TEARDOWN();
24044 }
24045
24046
TEST(neon_3different_addhn_subhn)24047 TEST(neon_3different_addhn_subhn) {
24048 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
24049
24050 START();
24051
24052 __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081);
24053 __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff);
24054 __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff);
24055 __ Movi(v3.V2D(), 0x8000000000000001, 0x7fffffffffffffff);
24056 __ Movi(v4.V2D(), 0x8000000000000000, 0x0000000000000000);
24057
24058 __ Addhn(v16.V8B(), v0.V8H(), v1.V8H());
24059 __ Addhn2(v16.V16B(), v2.V8H(), v3.V8H());
24060 __ Raddhn(v17.V8B(), v0.V8H(), v1.V8H());
24061 __ Raddhn2(v17.V16B(), v2.V8H(), v3.V8H());
24062 __ Subhn(v18.V8B(), v0.V8H(), v1.V8H());
24063 __ Subhn2(v18.V16B(), v2.V8H(), v3.V8H());
24064 __ Rsubhn(v19.V8B(), v0.V8H(), v1.V8H());
24065 __ Rsubhn2(v19.V16B(), v2.V8H(), v3.V8H());
24066
24067 END();
24068
24069 RUN();
24070
24071 ASSERT_EQUAL_128(0x0000ff007fff7fff, 0xff81817f80ff0100, q16);
24072 ASSERT_EQUAL_128(0x0000000080008000, 0xff81817f81ff0201, q17);
24073 ASSERT_EQUAL_128(0x0000ffff80008000, 0xff80817f80ff0100, q18);
24074 ASSERT_EQUAL_128(0x0000000080008000, 0xff81827f81ff0101, q19);
24075 TEARDOWN();
24076 }
24077
TEST(neon_d_only_scalar)24078 TEST(neon_d_only_scalar) {
24079 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
24080
24081 START();
24082
24083 __ Movi(v0.V2D(), 0xaaaaaaaaaaaaaaaa, 0xf0000000f000f0f0);
24084 __ Movi(v1.V2D(), 0x5555555555555555, 0x7fffffff7fff7f7f);
24085 __ Movi(v2.V2D(), 0xaaaaaaaaaaaaaaaa, 0x1000000010001010);
24086 __ Movi(v3.V2D(), 0xffffffffffffffff, 2);
24087 __ Movi(v4.V2D(), 0xffffffffffffffff, -2);
24088
24089 __ Add(d16, d0, d0);
24090 __ Add(d17, d1, d1);
24091 __ Add(d18, d2, d2);
24092 __ Sub(d19, d0, d0);
24093 __ Sub(d20, d0, d1);
24094 __ Sub(d21, d1, d0);
24095 __ Ushl(d22, d0, d3);
24096 __ Ushl(d23, d0, d4);
24097 __ Sshl(d24, d0, d3);
24098 __ Sshl(d25, d0, d4);
24099 __ Ushr(d26, d0, 1);
24100 __ Sshr(d27, d0, 3);
24101 __ Shl(d28, d0, 0);
24102 __ Shl(d29, d0, 16);
24103
24104 END();
24105
24106 RUN();
24107
24108 ASSERT_EQUAL_128(0, 0xe0000001e001e1e0, q16);
24109 ASSERT_EQUAL_128(0, 0xfffffffefffefefe, q17);
24110 ASSERT_EQUAL_128(0, 0x2000000020002020, q18);
24111 ASSERT_EQUAL_128(0, 0, q19);
24112 ASSERT_EQUAL_128(0, 0x7000000170017171, q20);
24113 ASSERT_EQUAL_128(0, 0x8ffffffe8ffe8e8f, q21);
24114 ASSERT_EQUAL_128(0, 0xc0000003c003c3c0, q22);
24115 ASSERT_EQUAL_128(0, 0x3c0000003c003c3c, q23);
24116 ASSERT_EQUAL_128(0, 0xc0000003c003c3c0, q24);
24117 ASSERT_EQUAL_128(0, 0xfc0000003c003c3c, q25);
24118 ASSERT_EQUAL_128(0, 0x7800000078007878, q26);
24119 ASSERT_EQUAL_128(0, 0xfe0000001e001e1e, q27);
24120 ASSERT_EQUAL_128(0, 0xf0000000f000f0f0, q28);
24121 ASSERT_EQUAL_128(0, 0x0000f000f0f00000, q29);
24122
24123 TEARDOWN();
24124 }
24125
24126
TEST(neon_sqshl_imm_scalar)24127 TEST(neon_sqshl_imm_scalar) {
24128 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
24129
24130 START();
24131
24132 __ Movi(v0.V2D(), 0x0, 0x7f);
24133 __ Movi(v1.V2D(), 0x0, 0x80);
24134 __ Movi(v2.V2D(), 0x0, 0x01);
24135 __ Sqshl(b16, b0, 1);
24136 __ Sqshl(b17, b1, 1);
24137 __ Sqshl(b18, b2, 1);
24138
24139 __ Movi(v0.V2D(), 0x0, 0x7fff);
24140 __ Movi(v1.V2D(), 0x0, 0x8000);
24141 __ Movi(v2.V2D(), 0x0, 0x0001);
24142 __ Sqshl(h19, h0, 1);
24143 __ Sqshl(h20, h1, 1);
24144 __ Sqshl(h21, h2, 1);
24145
24146 __ Movi(v0.V2D(), 0x0, 0x7fffffff);
24147 __ Movi(v1.V2D(), 0x0, 0x80000000);
24148 __ Movi(v2.V2D(), 0x0, 0x00000001);
24149 __ Sqshl(s22, s0, 1);
24150 __ Sqshl(s23, s1, 1);
24151 __ Sqshl(s24, s2, 1);
24152
24153 __ Movi(v0.V2D(), 0x0, 0x7fffffffffffffff);
24154 __ Movi(v1.V2D(), 0x0, 0x8000000000000000);
24155 __ Movi(v2.V2D(), 0x0, 0x0000000000000001);
24156 __ Sqshl(d25, d0, 1);
24157 __ Sqshl(d26, d1, 1);
24158 __ Sqshl(d27, d2, 1);
24159
24160 END();
24161
24162 RUN();
24163
24164 ASSERT_EQUAL_128(0, 0x7f, q16);
24165 ASSERT_EQUAL_128(0, 0x80, q17);
24166 ASSERT_EQUAL_128(0, 0x02, q18);
24167
24168 ASSERT_EQUAL_128(0, 0x7fff, q19);
24169 ASSERT_EQUAL_128(0, 0x8000, q20);
24170 ASSERT_EQUAL_128(0, 0x0002, q21);
24171
24172 ASSERT_EQUAL_128(0, 0x7fffffff, q22);
24173 ASSERT_EQUAL_128(0, 0x80000000, q23);
24174 ASSERT_EQUAL_128(0, 0x00000002, q24);
24175
24176 ASSERT_EQUAL_128(0, 0x7fffffffffffffff, q25);
24177 ASSERT_EQUAL_128(0, 0x8000000000000000, q26);
24178 ASSERT_EQUAL_128(0, 0x0000000000000002, q27);
24179
24180 TEARDOWN();
24181 }
24182
24183
TEST(neon_uqshl_imm_scalar)24184 TEST(neon_uqshl_imm_scalar) {
24185 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
24186
24187 START();
24188
24189 __ Movi(v0.V2D(), 0x0, 0x7f);
24190 __ Movi(v1.V2D(), 0x0, 0x80);
24191 __ Movi(v2.V2D(), 0x0, 0x01);
24192 __ Uqshl(b16, b0, 1);
24193 __ Uqshl(b17, b1, 1);
24194 __ Uqshl(b18, b2, 1);
24195
24196 __ Movi(v0.V2D(), 0x0, 0x7fff);
24197 __ Movi(v1.V2D(), 0x0, 0x8000);
24198 __ Movi(v2.V2D(), 0x0, 0x0001);
24199 __ Uqshl(h19, h0, 1);
24200 __ Uqshl(h20, h1, 1);
24201 __ Uqshl(h21, h2, 1);
24202
24203 __ Movi(v0.V2D(), 0x0, 0x7fffffff);
24204 __ Movi(v1.V2D(), 0x0, 0x80000000);
24205 __ Movi(v2.V2D(), 0x0, 0x00000001);
24206 __ Uqshl(s22, s0, 1);
24207 __ Uqshl(s23, s1, 1);
24208 __ Uqshl(s24, s2, 1);
24209
24210 __ Movi(v0.V2D(), 0x0, 0x7fffffffffffffff);
24211 __ Movi(v1.V2D(), 0x0, 0x8000000000000000);
24212 __ Movi(v2.V2D(), 0x0, 0x0000000000000001);
24213 __ Uqshl(d25, d0, 1);
24214 __ Uqshl(d26, d1, 1);
24215 __ Uqshl(d27, d2, 1);
24216
24217 END();
24218
24219 RUN();
24220
24221 ASSERT_EQUAL_128(0, 0xfe, q16);
24222 ASSERT_EQUAL_128(0, 0xff, q17);
24223 ASSERT_EQUAL_128(0, 0x02, q18);
24224
24225 ASSERT_EQUAL_128(0, 0xfffe, q19);
24226 ASSERT_EQUAL_128(0, 0xffff, q20);
24227 ASSERT_EQUAL_128(0, 0x0002, q21);
24228
24229 ASSERT_EQUAL_128(0, 0xfffffffe, q22);
24230 ASSERT_EQUAL_128(0, 0xffffffff, q23);
24231 ASSERT_EQUAL_128(0, 0x00000002, q24);
24232
24233 ASSERT_EQUAL_128(0, 0xfffffffffffffffe, q25);
24234 ASSERT_EQUAL_128(0, 0xffffffffffffffff, q26);
24235 ASSERT_EQUAL_128(0, 0x0000000000000002, q27);
24236
24237 TEARDOWN();
24238 }
24239
24240
TEST(neon_sqshlu_scalar)24241 TEST(neon_sqshlu_scalar) {
24242 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
24243
24244 START();
24245
24246 __ Movi(v0.V2D(), 0x0, 0x7f);
24247 __ Movi(v1.V2D(), 0x0, 0x80);
24248 __ Movi(v2.V2D(), 0x0, 0x01);
24249 __ Sqshlu(b16, b0, 2);
24250 __ Sqshlu(b17, b1, 2);
24251 __ Sqshlu(b18, b2, 2);
24252
24253 __ Movi(v0.V2D(), 0x0, 0x7fff);
24254 __ Movi(v1.V2D(), 0x0, 0x8000);
24255 __ Movi(v2.V2D(), 0x0, 0x0001);
24256 __ Sqshlu(h19, h0, 2);
24257 __ Sqshlu(h20, h1, 2);
24258 __ Sqshlu(h21, h2, 2);
24259
24260 __ Movi(v0.V2D(), 0x0, 0x7fffffff);
24261 __ Movi(v1.V2D(), 0x0, 0x80000000);
24262 __ Movi(v2.V2D(), 0x0, 0x00000001);
24263 __ Sqshlu(s22, s0, 2);
24264 __ Sqshlu(s23, s1, 2);
24265 __ Sqshlu(s24, s2, 2);
24266
24267 __ Movi(v0.V2D(), 0x0, 0x7fffffffffffffff);
24268 __ Movi(v1.V2D(), 0x0, 0x8000000000000000);
24269 __ Movi(v2.V2D(), 0x0, 0x0000000000000001);
24270 __ Sqshlu(d25, d0, 2);
24271 __ Sqshlu(d26, d1, 2);
24272 __ Sqshlu(d27, d2, 2);
24273
24274 END();
24275
24276 RUN();
24277
24278 ASSERT_EQUAL_128(0, 0xff, q16);
24279 ASSERT_EQUAL_128(0, 0x00, q17);
24280 ASSERT_EQUAL_128(0, 0x04, q18);
24281
24282 ASSERT_EQUAL_128(0, 0xffff, q19);
24283 ASSERT_EQUAL_128(0, 0x0000, q20);
24284 ASSERT_EQUAL_128(0, 0x0004, q21);
24285
24286 ASSERT_EQUAL_128(0, 0xffffffff, q22);
24287 ASSERT_EQUAL_128(0, 0x00000000, q23);
24288 ASSERT_EQUAL_128(0, 0x00000004, q24);
24289
24290 ASSERT_EQUAL_128(0, 0xffffffffffffffff, q25);
24291 ASSERT_EQUAL_128(0, 0x0000000000000000, q26);
24292 ASSERT_EQUAL_128(0, 0x0000000000000004, q27);
24293
24294 TEARDOWN();
24295 }
24296
24297
TEST(neon_sshll)24298 TEST(neon_sshll) {
24299 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
24300
24301 START();
24302
24303 __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081);
24304 __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff);
24305 __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff);
24306
24307 __ Sshll(v16.V8H(), v0.V8B(), 4);
24308 __ Sshll2(v17.V8H(), v0.V16B(), 4);
24309
24310 __ Sshll(v18.V4S(), v1.V4H(), 8);
24311 __ Sshll2(v19.V4S(), v1.V8H(), 8);
24312
24313 __ Sshll(v20.V2D(), v2.V2S(), 16);
24314 __ Sshll2(v21.V2D(), v2.V4S(), 16);
24315
24316 END();
24317
24318 RUN();
24319
24320 ASSERT_EQUAL_128(0xf800f810fff00000, 0x001007f0f800f810, q16);
24321 ASSERT_EQUAL_128(0x07f000100000fff0, 0xf810f80007f00010, q17);
24322 ASSERT_EQUAL_128(0xffffff0000000000, 0x00000100007fff00, q18);
24323 ASSERT_EQUAL_128(0xff800000ff800100, 0xffffff0000000000, q19);
24324 ASSERT_EQUAL_128(0x0000000000000000, 0x00007fffffff0000, q20);
24325 ASSERT_EQUAL_128(0xffff800000000000, 0xffffffffffff0000, q21);
24326 TEARDOWN();
24327 }
24328
TEST(neon_shll)24329 TEST(neon_shll) {
24330 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
24331
24332 START();
24333
24334 __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081);
24335 __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff);
24336 __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff);
24337
24338 __ Shll(v16.V8H(), v0.V8B(), 8);
24339 __ Shll2(v17.V8H(), v0.V16B(), 8);
24340
24341 __ Shll(v18.V4S(), v1.V4H(), 16);
24342 __ Shll2(v19.V4S(), v1.V8H(), 16);
24343
24344 __ Shll(v20.V2D(), v2.V2S(), 32);
24345 __ Shll2(v21.V2D(), v2.V4S(), 32);
24346
24347 END();
24348
24349 RUN();
24350
24351 ASSERT_EQUAL_128(0x80008100ff000000, 0x01007f0080008100, q16);
24352 ASSERT_EQUAL_128(0x7f0001000000ff00, 0x810080007f000100, q17);
24353 ASSERT_EQUAL_128(0xffff000000000000, 0x000100007fff0000, q18);
24354 ASSERT_EQUAL_128(0x8000000080010000, 0xffff000000000000, q19);
24355 ASSERT_EQUAL_128(0x0000000000000000, 0x7fffffff00000000, q20);
24356 ASSERT_EQUAL_128(0x8000000000000000, 0xffffffff00000000, q21);
24357 TEARDOWN();
24358 }
24359
TEST(neon_ushll)24360 TEST(neon_ushll) {
24361 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
24362
24363 START();
24364
24365 __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081);
24366 __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff);
24367 __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff);
24368
24369 __ Ushll(v16.V8H(), v0.V8B(), 4);
24370 __ Ushll2(v17.V8H(), v0.V16B(), 4);
24371
24372 __ Ushll(v18.V4S(), v1.V4H(), 8);
24373 __ Ushll2(v19.V4S(), v1.V8H(), 8);
24374
24375 __ Ushll(v20.V2D(), v2.V2S(), 16);
24376 __ Ushll2(v21.V2D(), v2.V4S(), 16);
24377
24378 END();
24379
24380 RUN();
24381
24382 ASSERT_EQUAL_128(0x080008100ff00000, 0x001007f008000810, q16);
24383 ASSERT_EQUAL_128(0x07f0001000000ff0, 0x0810080007f00010, q17);
24384 ASSERT_EQUAL_128(0x00ffff0000000000, 0x00000100007fff00, q18);
24385 ASSERT_EQUAL_128(0x0080000000800100, 0x00ffff0000000000, q19);
24386 ASSERT_EQUAL_128(0x0000000000000000, 0x00007fffffff0000, q20);
24387 ASSERT_EQUAL_128(0x0000800000000000, 0x0000ffffffff0000, q21);
24388 TEARDOWN();
24389 }
24390
24391
TEST(neon_sxtl)24392 TEST(neon_sxtl) {
24393 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
24394
24395 START();
24396
24397 __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081);
24398 __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff);
24399 __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff);
24400
24401 __ Sxtl(v16.V8H(), v0.V8B());
24402 __ Sxtl2(v17.V8H(), v0.V16B());
24403
24404 __ Sxtl(v18.V4S(), v1.V4H());
24405 __ Sxtl2(v19.V4S(), v1.V8H());
24406
24407 __ Sxtl(v20.V2D(), v2.V2S());
24408 __ Sxtl2(v21.V2D(), v2.V4S());
24409
24410 END();
24411
24412 RUN();
24413
24414 ASSERT_EQUAL_128(0xff80ff81ffff0000, 0x0001007fff80ff81, q16);
24415 ASSERT_EQUAL_128(0x007f00010000ffff, 0xff81ff80007f0001, q17);
24416 ASSERT_EQUAL_128(0xffffffff00000000, 0x0000000100007fff, q18);
24417 ASSERT_EQUAL_128(0xffff8000ffff8001, 0xffffffff00000000, q19);
24418 ASSERT_EQUAL_128(0x0000000000000000, 0x000000007fffffff, q20);
24419 ASSERT_EQUAL_128(0xffffffff80000000, 0xffffffffffffffff, q21);
24420 TEARDOWN();
24421 }
24422
24423
TEST(neon_uxtl)24424 TEST(neon_uxtl) {
24425 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
24426
24427 START();
24428
24429 __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081);
24430 __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff);
24431 __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff);
24432
24433 __ Uxtl(v16.V8H(), v0.V8B());
24434 __ Uxtl2(v17.V8H(), v0.V16B());
24435
24436 __ Uxtl(v18.V4S(), v1.V4H());
24437 __ Uxtl2(v19.V4S(), v1.V8H());
24438
24439 __ Uxtl(v20.V2D(), v2.V2S());
24440 __ Uxtl2(v21.V2D(), v2.V4S());
24441
24442 END();
24443
24444 RUN();
24445
24446 ASSERT_EQUAL_128(0x0080008100ff0000, 0x0001007f00800081, q16);
24447 ASSERT_EQUAL_128(0x007f0001000000ff, 0x00810080007f0001, q17);
24448 ASSERT_EQUAL_128(0x0000ffff00000000, 0x0000000100007fff, q18);
24449 ASSERT_EQUAL_128(0x0000800000008001, 0x0000ffff00000000, q19);
24450 ASSERT_EQUAL_128(0x0000000000000000, 0x000000007fffffff, q20);
24451 ASSERT_EQUAL_128(0x0000000080000000, 0x00000000ffffffff, q21);
24452 TEARDOWN();
24453 }
24454
24455
TEST(neon_ssra)24456 TEST(neon_ssra) {
24457 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
24458
24459 START();
24460
24461 __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081);
24462 __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff);
24463 __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff);
24464 __ Movi(v3.V2D(), 0x8000000000000001, 0x7fffffffffffffff);
24465 __ Movi(v4.V2D(), 0x8000000000000000, 0x0000000000000000);
24466
24467 __ Mov(v16.V2D(), v0.V2D());
24468 __ Mov(v17.V2D(), v0.V2D());
24469 __ Mov(v18.V2D(), v1.V2D());
24470 __ Mov(v19.V2D(), v1.V2D());
24471 __ Mov(v20.V2D(), v2.V2D());
24472 __ Mov(v21.V2D(), v2.V2D());
24473 __ Mov(v22.V2D(), v3.V2D());
24474 __ Mov(v23.V2D(), v4.V2D());
24475 __ Mov(v24.V2D(), v3.V2D());
24476 __ Mov(v25.V2D(), v4.V2D());
24477
24478 __ Ssra(v16.V8B(), v0.V8B(), 4);
24479 __ Ssra(v17.V16B(), v0.V16B(), 4);
24480
24481 __ Ssra(v18.V4H(), v1.V4H(), 8);
24482 __ Ssra(v19.V8H(), v1.V8H(), 8);
24483
24484 __ Ssra(v20.V2S(), v2.V2S(), 16);
24485 __ Ssra(v21.V4S(), v2.V4S(), 16);
24486
24487 __ Ssra(v22.V2D(), v3.V2D(), 32);
24488 __ Ssra(v23.V2D(), v4.V2D(), 32);
24489
24490 __ Ssra(d24, d3, 48);
24491
24492 END();
24493
24494 RUN();
24495
24496 ASSERT_EQUAL_128(0x0000000000000000, 0x7879fe0001867879, q16);
24497 ASSERT_EQUAL_128(0x860100fe79788601, 0x7879fe0001867879, q17);
24498 ASSERT_EQUAL_128(0x0000000000000000, 0xfffe00000001807e, q18);
24499 ASSERT_EQUAL_128(0x7f807f81fffe0000, 0xfffe00000001807e, q19);
24500 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000080007ffe, q20);
24501 ASSERT_EQUAL_128(0x7fff8000fffffffe, 0x0000000080007ffe, q21);
24502 ASSERT_EQUAL_128(0x7fffffff80000001, 0x800000007ffffffe, q22);
24503 ASSERT_EQUAL_128(0x7fffffff80000000, 0x0000000000000000, q23);
24504 ASSERT_EQUAL_128(0x0000000000000000, 0x8000000000007ffe, q24);
24505 TEARDOWN();
24506 }
24507
TEST(neon_srsra)24508 TEST(neon_srsra) {
24509 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
24510
24511 START();
24512
24513 __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081);
24514 __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff);
24515 __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff);
24516 __ Movi(v3.V2D(), 0x8000000000000001, 0x7fffffffffffffff);
24517 __ Movi(v4.V2D(), 0x8000000000000000, 0x0000000000000000);
24518
24519 __ Mov(v16.V2D(), v0.V2D());
24520 __ Mov(v17.V2D(), v0.V2D());
24521 __ Mov(v18.V2D(), v1.V2D());
24522 __ Mov(v19.V2D(), v1.V2D());
24523 __ Mov(v20.V2D(), v2.V2D());
24524 __ Mov(v21.V2D(), v2.V2D());
24525 __ Mov(v22.V2D(), v3.V2D());
24526 __ Mov(v23.V2D(), v4.V2D());
24527 __ Mov(v24.V2D(), v3.V2D());
24528 __ Mov(v25.V2D(), v4.V2D());
24529
24530 __ Srsra(v16.V8B(), v0.V8B(), 4);
24531 __ Srsra(v17.V16B(), v0.V16B(), 4);
24532
24533 __ Srsra(v18.V4H(), v1.V4H(), 8);
24534 __ Srsra(v19.V8H(), v1.V8H(), 8);
24535
24536 __ Srsra(v20.V2S(), v2.V2S(), 16);
24537 __ Srsra(v21.V4S(), v2.V4S(), 16);
24538
24539 __ Srsra(v22.V2D(), v3.V2D(), 32);
24540 __ Srsra(v23.V2D(), v4.V2D(), 32);
24541
24542 __ Srsra(d24, d3, 48);
24543
24544 END();
24545
24546 RUN();
24547
24548 ASSERT_EQUAL_128(0x0000000000000000, 0x7879ff0001877879, q16);
24549 ASSERT_EQUAL_128(0x870100ff79788701, 0x7879ff0001877879, q17);
24550 ASSERT_EQUAL_128(0x0000000000000000, 0xffff00000001807f, q18);
24551 ASSERT_EQUAL_128(0x7f807f81ffff0000, 0xffff00000001807f, q19);
24552 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000080007fff, q20);
24553 ASSERT_EQUAL_128(0x7fff8000ffffffff, 0x0000000080007fff, q21);
24554 ASSERT_EQUAL_128(0x7fffffff80000001, 0x800000007fffffff, q22);
24555 ASSERT_EQUAL_128(0x7fffffff80000000, 0x0000000000000000, q23);
24556 ASSERT_EQUAL_128(0x0000000000000000, 0x8000000000007fff, q24);
24557
24558 TEARDOWN();
24559 }
24560
TEST(neon_usra)24561 TEST(neon_usra) {
24562 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
24563
24564 START();
24565
24566 __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081);
24567 __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff);
24568 __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff);
24569 __ Movi(v3.V2D(), 0x8000000000000001, 0x7fffffffffffffff);
24570 __ Movi(v4.V2D(), 0x8000000000000000, 0x0000000000000000);
24571
24572 __ Mov(v16.V2D(), v0.V2D());
24573 __ Mov(v17.V2D(), v0.V2D());
24574 __ Mov(v18.V2D(), v1.V2D());
24575 __ Mov(v19.V2D(), v1.V2D());
24576 __ Mov(v20.V2D(), v2.V2D());
24577 __ Mov(v21.V2D(), v2.V2D());
24578 __ Mov(v22.V2D(), v3.V2D());
24579 __ Mov(v23.V2D(), v4.V2D());
24580 __ Mov(v24.V2D(), v3.V2D());
24581 __ Mov(v25.V2D(), v4.V2D());
24582
24583 __ Usra(v16.V8B(), v0.V8B(), 4);
24584 __ Usra(v17.V16B(), v0.V16B(), 4);
24585
24586 __ Usra(v18.V4H(), v1.V4H(), 8);
24587 __ Usra(v19.V8H(), v1.V8H(), 8);
24588
24589 __ Usra(v20.V2S(), v2.V2S(), 16);
24590 __ Usra(v21.V4S(), v2.V4S(), 16);
24591
24592 __ Usra(v22.V2D(), v3.V2D(), 32);
24593 __ Usra(v23.V2D(), v4.V2D(), 32);
24594
24595 __ Usra(d24, d3, 48);
24596
24597 END();
24598
24599 RUN();
24600
24601 ASSERT_EQUAL_128(0x0000000000000000, 0x88890e0001868889, q16);
24602 ASSERT_EQUAL_128(0x8601000e89888601, 0x88890e0001868889, q17);
24603 ASSERT_EQUAL_128(0x0000000000000000, 0x00fe00000001807e, q18);
24604 ASSERT_EQUAL_128(0x8080808100fe0000, 0x00fe00000001807e, q19);
24605 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000080007ffe, q20);
24606 ASSERT_EQUAL_128(0x800080000000fffe, 0x0000000080007ffe, q21);
24607 ASSERT_EQUAL_128(0x8000000080000001, 0x800000007ffffffe, q22);
24608 ASSERT_EQUAL_128(0x8000000080000000, 0x0000000000000000, q23);
24609 ASSERT_EQUAL_128(0x0000000000000000, 0x8000000000007ffe, q24);
24610
24611 TEARDOWN();
24612 }
24613
TEST(neon_ursra)24614 TEST(neon_ursra) {
24615 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
24616
24617 START();
24618
24619 __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081);
24620 __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff);
24621 __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff);
24622 __ Movi(v3.V2D(), 0x8000000000000001, 0x7fffffffffffffff);
24623 __ Movi(v4.V2D(), 0x8000000000000000, 0x0000000000000000);
24624
24625 __ Mov(v16.V2D(), v0.V2D());
24626 __ Mov(v17.V2D(), v0.V2D());
24627 __ Mov(v18.V2D(), v1.V2D());
24628 __ Mov(v19.V2D(), v1.V2D());
24629 __ Mov(v20.V2D(), v2.V2D());
24630 __ Mov(v21.V2D(), v2.V2D());
24631 __ Mov(v22.V2D(), v3.V2D());
24632 __ Mov(v23.V2D(), v4.V2D());
24633 __ Mov(v24.V2D(), v3.V2D());
24634 __ Mov(v25.V2D(), v4.V2D());
24635
24636 __ Ursra(v16.V8B(), v0.V8B(), 4);
24637 __ Ursra(v17.V16B(), v0.V16B(), 4);
24638
24639 __ Ursra(v18.V4H(), v1.V4H(), 8);
24640 __ Ursra(v19.V8H(), v1.V8H(), 8);
24641
24642 __ Ursra(v20.V2S(), v2.V2S(), 16);
24643 __ Ursra(v21.V4S(), v2.V4S(), 16);
24644
24645 __ Ursra(v22.V2D(), v3.V2D(), 32);
24646 __ Ursra(v23.V2D(), v4.V2D(), 32);
24647
24648 __ Ursra(d24, d3, 48);
24649
24650 END();
24651
24652 RUN();
24653
24654 ASSERT_EQUAL_128(0x0000000000000000, 0x88890f0001878889, q16);
24655 ASSERT_EQUAL_128(0x8701000f89888701, 0x88890f0001878889, q17);
24656 ASSERT_EQUAL_128(0x0000000000000000, 0x00ff00000001807f, q18);
24657 ASSERT_EQUAL_128(0x8080808100ff0000, 0x00ff00000001807f, q19);
24658 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000080007fff, q20);
24659 ASSERT_EQUAL_128(0x800080000000ffff, 0x0000000080007fff, q21);
24660 ASSERT_EQUAL_128(0x8000000080000001, 0x800000007fffffff, q22);
24661 ASSERT_EQUAL_128(0x8000000080000000, 0x0000000000000000, q23);
24662 ASSERT_EQUAL_128(0x0000000000000000, 0x8000000000007fff, q24);
24663 TEARDOWN();
24664 }
24665
24666
TEST(neon_uqshl_scalar)24667 TEST(neon_uqshl_scalar) {
24668 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
24669
24670 START();
24671
24672 __ Movi(v0.V2D(), 0xaaaaaaaaaaaaaaaa, 0xf0000000f000f0f0);
24673 __ Movi(v1.V2D(), 0x5555555555555555, 0x7fffffff7fff7f7f);
24674 __ Movi(v2.V2D(), 0xaaaaaaaaaaaaaaaa, 0x0000000000000001);
24675 __ Movi(v3.V2D(), 0xaaaaaaaaaaaaaaaa, 0xffffffffffffffff);
24676
24677 __ Uqshl(b16, b0, b2);
24678 __ Uqshl(b17, b0, b3);
24679 __ Uqshl(b18, b1, b2);
24680 __ Uqshl(b19, b1, b3);
24681 __ Uqshl(h20, h0, h2);
24682 __ Uqshl(h21, h0, h3);
24683 __ Uqshl(h22, h1, h2);
24684 __ Uqshl(h23, h1, h3);
24685 __ Uqshl(s24, s0, s2);
24686 __ Uqshl(s25, s0, s3);
24687 __ Uqshl(s26, s1, s2);
24688 __ Uqshl(s27, s1, s3);
24689 __ Uqshl(d28, d0, d2);
24690 __ Uqshl(d29, d0, d3);
24691 __ Uqshl(d30, d1, d2);
24692 __ Uqshl(d31, d1, d3);
24693
24694 END();
24695
24696 RUN();
24697
24698 ASSERT_EQUAL_128(0, 0xff, q16);
24699 ASSERT_EQUAL_128(0, 0x78, q17);
24700 ASSERT_EQUAL_128(0, 0xfe, q18);
24701 ASSERT_EQUAL_128(0, 0x3f, q19);
24702 ASSERT_EQUAL_128(0, 0xffff, q20);
24703 ASSERT_EQUAL_128(0, 0x7878, q21);
24704 ASSERT_EQUAL_128(0, 0xfefe, q22);
24705 ASSERT_EQUAL_128(0, 0x3fbf, q23);
24706 ASSERT_EQUAL_128(0, 0xffffffff, q24);
24707 ASSERT_EQUAL_128(0, 0x78007878, q25);
24708 ASSERT_EQUAL_128(0, 0xfffefefe, q26);
24709 ASSERT_EQUAL_128(0, 0x3fffbfbf, q27);
24710 ASSERT_EQUAL_128(0, 0xffffffffffffffff, q28);
24711 ASSERT_EQUAL_128(0, 0x7800000078007878, q29);
24712 ASSERT_EQUAL_128(0, 0xfffffffefffefefe, q30);
24713 ASSERT_EQUAL_128(0, 0x3fffffffbfffbfbf, q31);
24714
24715 TEARDOWN();
24716 }
24717
24718
TEST(neon_sqshl_scalar)24719 TEST(neon_sqshl_scalar) {
24720 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
24721
24722 START();
24723
24724 __ Movi(v0.V2D(), 0xaaaaaaaaaaaaaaaa, 0xbfffffffbfffbfbf);
24725 __ Movi(v1.V2D(), 0x5555555555555555, 0x4000000040004040);
24726 __ Movi(v2.V2D(), 0xaaaaaaaaaaaaaaaa, 0x0000000000000001);
24727 __ Movi(v3.V2D(), 0xaaaaaaaaaaaaaaaa, 0xffffffffffffffff);
24728
24729 __ Sqshl(b16, b0, b2);
24730 __ Sqshl(b17, b0, b3);
24731 __ Sqshl(b18, b1, b2);
24732 __ Sqshl(b19, b1, b3);
24733 __ Sqshl(h20, h0, h2);
24734 __ Sqshl(h21, h0, h3);
24735 __ Sqshl(h22, h1, h2);
24736 __ Sqshl(h23, h1, h3);
24737 __ Sqshl(s24, s0, s2);
24738 __ Sqshl(s25, s0, s3);
24739 __ Sqshl(s26, s1, s2);
24740 __ Sqshl(s27, s1, s3);
24741 __ Sqshl(d28, d0, d2);
24742 __ Sqshl(d29, d0, d3);
24743 __ Sqshl(d30, d1, d2);
24744 __ Sqshl(d31, d1, d3);
24745
24746 END();
24747
24748 RUN();
24749
24750 ASSERT_EQUAL_128(0, 0x80, q16);
24751 ASSERT_EQUAL_128(0, 0xdf, q17);
24752 ASSERT_EQUAL_128(0, 0x7f, q18);
24753 ASSERT_EQUAL_128(0, 0x20, q19);
24754 ASSERT_EQUAL_128(0, 0x8000, q20);
24755 ASSERT_EQUAL_128(0, 0xdfdf, q21);
24756 ASSERT_EQUAL_128(0, 0x7fff, q22);
24757 ASSERT_EQUAL_128(0, 0x2020, q23);
24758 ASSERT_EQUAL_128(0, 0x80000000, q24);
24759 ASSERT_EQUAL_128(0, 0xdfffdfdf, q25);
24760 ASSERT_EQUAL_128(0, 0x7fffffff, q26);
24761 ASSERT_EQUAL_128(0, 0x20002020, q27);
24762 ASSERT_EQUAL_128(0, 0x8000000000000000, q28);
24763 ASSERT_EQUAL_128(0, 0xdfffffffdfffdfdf, q29);
24764 ASSERT_EQUAL_128(0, 0x7fffffffffffffff, q30);
24765 ASSERT_EQUAL_128(0, 0x2000000020002020, q31);
24766
24767 TEARDOWN();
24768 }
24769
24770
TEST(neon_urshl_scalar)24771 TEST(neon_urshl_scalar) {
24772 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
24773
24774 START();
24775
24776 __ Movi(v0.V2D(), 0xaaaaaaaaaaaaaaaa, 0xf0000000f000f0f0);
24777 __ Movi(v1.V2D(), 0x5555555555555555, 0x7fffffff7fff7f7f);
24778 __ Movi(v2.V2D(), 0xaaaaaaaaaaaaaaaa, 0x0000000000000001);
24779 __ Movi(v3.V2D(), 0xaaaaaaaaaaaaaaaa, 0xffffffffffffffff);
24780
24781 __ Urshl(d28, d0, d2);
24782 __ Urshl(d29, d0, d3);
24783 __ Urshl(d30, d1, d2);
24784 __ Urshl(d31, d1, d3);
24785
24786 END();
24787
24788 RUN();
24789
24790 ASSERT_EQUAL_128(0, 0xe0000001e001e1e0, q28);
24791 ASSERT_EQUAL_128(0, 0x7800000078007878, q29);
24792 ASSERT_EQUAL_128(0, 0xfffffffefffefefe, q30);
24793 ASSERT_EQUAL_128(0, 0x3fffffffbfffbfc0, q31);
24794
24795 TEARDOWN();
24796 }
24797
24798
TEST(neon_srshl_scalar)24799 TEST(neon_srshl_scalar) {
24800 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
24801
24802 START();
24803
24804 __ Movi(v0.V2D(), 0xaaaaaaaaaaaaaaaa, 0xbfffffffbfffbfbf);
24805 __ Movi(v1.V2D(), 0x5555555555555555, 0x4000000040004040);
24806 __ Movi(v2.V2D(), 0xaaaaaaaaaaaaaaaa, 0x0000000000000001);
24807 __ Movi(v3.V2D(), 0xaaaaaaaaaaaaaaaa, 0xffffffffffffffff);
24808
24809 __ Srshl(d28, d0, d2);
24810 __ Srshl(d29, d0, d3);
24811 __ Srshl(d30, d1, d2);
24812 __ Srshl(d31, d1, d3);
24813
24814 END();
24815
24816 RUN();
24817
24818 ASSERT_EQUAL_128(0, 0x7fffffff7fff7f7e, q28);
24819 ASSERT_EQUAL_128(0, 0xdfffffffdfffdfe0, q29);
24820 ASSERT_EQUAL_128(0, 0x8000000080008080, q30);
24821 ASSERT_EQUAL_128(0, 0x2000000020002020, q31);
24822
24823 TEARDOWN();
24824 }
24825
24826
TEST(neon_uqrshl_scalar)24827 TEST(neon_uqrshl_scalar) {
24828 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
24829
24830 START();
24831
24832 __ Movi(v0.V2D(), 0xaaaaaaaaaaaaaaaa, 0xf0000000f000f0f0);
24833 __ Movi(v1.V2D(), 0x5555555555555555, 0x7fffffff7fff7f7f);
24834 __ Movi(v2.V2D(), 0xaaaaaaaaaaaaaaaa, 0x0000000000000001);
24835 __ Movi(v3.V2D(), 0xaaaaaaaaaaaaaaaa, 0xffffffffffffffff);
24836
24837 __ Uqrshl(b16, b0, b2);
24838 __ Uqrshl(b17, b0, b3);
24839 __ Uqrshl(b18, b1, b2);
24840 __ Uqrshl(b19, b1, b3);
24841 __ Uqrshl(h20, h0, h2);
24842 __ Uqrshl(h21, h0, h3);
24843 __ Uqrshl(h22, h1, h2);
24844 __ Uqrshl(h23, h1, h3);
24845 __ Uqrshl(s24, s0, s2);
24846 __ Uqrshl(s25, s0, s3);
24847 __ Uqrshl(s26, s1, s2);
24848 __ Uqrshl(s27, s1, s3);
24849 __ Uqrshl(d28, d0, d2);
24850 __ Uqrshl(d29, d0, d3);
24851 __ Uqrshl(d30, d1, d2);
24852 __ Uqrshl(d31, d1, d3);
24853
24854 END();
24855
24856 RUN();
24857
24858 ASSERT_EQUAL_128(0, 0xff, q16);
24859 ASSERT_EQUAL_128(0, 0x78, q17);
24860 ASSERT_EQUAL_128(0, 0xfe, q18);
24861 ASSERT_EQUAL_128(0, 0x40, q19);
24862 ASSERT_EQUAL_128(0, 0xffff, q20);
24863 ASSERT_EQUAL_128(0, 0x7878, q21);
24864 ASSERT_EQUAL_128(0, 0xfefe, q22);
24865 ASSERT_EQUAL_128(0, 0x3fc0, q23);
24866 ASSERT_EQUAL_128(0, 0xffffffff, q24);
24867 ASSERT_EQUAL_128(0, 0x78007878, q25);
24868 ASSERT_EQUAL_128(0, 0xfffefefe, q26);
24869 ASSERT_EQUAL_128(0, 0x3fffbfc0, q27);
24870 ASSERT_EQUAL_128(0, 0xffffffffffffffff, q28);
24871 ASSERT_EQUAL_128(0, 0x7800000078007878, q29);
24872 ASSERT_EQUAL_128(0, 0xfffffffefffefefe, q30);
24873 ASSERT_EQUAL_128(0, 0x3fffffffbfffbfc0, q31);
24874
24875 TEARDOWN();
24876 }
24877
24878
TEST(neon_sqrshl_scalar)24879 TEST(neon_sqrshl_scalar) {
24880 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
24881
24882 START();
24883
24884 __ Movi(v0.V2D(), 0xaaaaaaaaaaaaaaaa, 0xbfffffffbfffbfbf);
24885 __ Movi(v1.V2D(), 0x5555555555555555, 0x4000000040004040);
24886 __ Movi(v2.V2D(), 0xaaaaaaaaaaaaaaaa, 0x0000000000000001);
24887 __ Movi(v3.V2D(), 0xaaaaaaaaaaaaaaaa, 0xffffffffffffffff);
24888
24889 __ Sqrshl(b16, b0, b2);
24890 __ Sqrshl(b17, b0, b3);
24891 __ Sqrshl(b18, b1, b2);
24892 __ Sqrshl(b19, b1, b3);
24893 __ Sqrshl(h20, h0, h2);
24894 __ Sqrshl(h21, h0, h3);
24895 __ Sqrshl(h22, h1, h2);
24896 __ Sqrshl(h23, h1, h3);
24897 __ Sqrshl(s24, s0, s2);
24898 __ Sqrshl(s25, s0, s3);
24899 __ Sqrshl(s26, s1, s2);
24900 __ Sqrshl(s27, s1, s3);
24901 __ Sqrshl(d28, d0, d2);
24902 __ Sqrshl(d29, d0, d3);
24903 __ Sqrshl(d30, d1, d2);
24904 __ Sqrshl(d31, d1, d3);
24905
24906 END();
24907
24908 RUN();
24909
24910 ASSERT_EQUAL_128(0, 0x80, q16);
24911 ASSERT_EQUAL_128(0, 0xe0, q17);
24912 ASSERT_EQUAL_128(0, 0x7f, q18);
24913 ASSERT_EQUAL_128(0, 0x20, q19);
24914 ASSERT_EQUAL_128(0, 0x8000, q20);
24915 ASSERT_EQUAL_128(0, 0xdfe0, q21);
24916 ASSERT_EQUAL_128(0, 0x7fff, q22);
24917 ASSERT_EQUAL_128(0, 0x2020, q23);
24918 ASSERT_EQUAL_128(0, 0x80000000, q24);
24919 ASSERT_EQUAL_128(0, 0xdfffdfe0, q25);
24920 ASSERT_EQUAL_128(0, 0x7fffffff, q26);
24921 ASSERT_EQUAL_128(0, 0x20002020, q27);
24922 ASSERT_EQUAL_128(0, 0x8000000000000000, q28);
24923 ASSERT_EQUAL_128(0, 0xdfffffffdfffdfe0, q29);
24924 ASSERT_EQUAL_128(0, 0x7fffffffffffffff, q30);
24925 ASSERT_EQUAL_128(0, 0x2000000020002020, q31);
24926
24927 TEARDOWN();
24928 }
24929
24930
TEST(neon_uqadd_scalar)24931 TEST(neon_uqadd_scalar) {
24932 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
24933
24934 START();
24935
24936 __ Movi(v0.V2D(), 0xaaaaaaaaaaaaaaaa, 0xf0000000f000f0f0);
24937 __ Movi(v1.V2D(), 0x5555555555555555, 0x7fffffff7fff7f7f);
24938 __ Movi(v2.V2D(), 0xaaaaaaaaaaaaaaaa, 0x1000000010001010);
24939
24940 __ Uqadd(b16, b0, b0);
24941 __ Uqadd(b17, b1, b1);
24942 __ Uqadd(b18, b2, b2);
24943 __ Uqadd(h19, h0, h0);
24944 __ Uqadd(h20, h1, h1);
24945 __ Uqadd(h21, h2, h2);
24946 __ Uqadd(s22, s0, s0);
24947 __ Uqadd(s23, s1, s1);
24948 __ Uqadd(s24, s2, s2);
24949 __ Uqadd(d25, d0, d0);
24950 __ Uqadd(d26, d1, d1);
24951 __ Uqadd(d27, d2, d2);
24952
24953 END();
24954
24955 RUN();
24956
24957 ASSERT_EQUAL_128(0, 0xff, q16);
24958 ASSERT_EQUAL_128(0, 0xfe, q17);
24959 ASSERT_EQUAL_128(0, 0x20, q18);
24960 ASSERT_EQUAL_128(0, 0xffff, q19);
24961 ASSERT_EQUAL_128(0, 0xfefe, q20);
24962 ASSERT_EQUAL_128(0, 0x2020, q21);
24963 ASSERT_EQUAL_128(0, 0xffffffff, q22);
24964 ASSERT_EQUAL_128(0, 0xfffefefe, q23);
24965 ASSERT_EQUAL_128(0, 0x20002020, q24);
24966 ASSERT_EQUAL_128(0, 0xffffffffffffffff, q25);
24967 ASSERT_EQUAL_128(0, 0xfffffffefffefefe, q26);
24968 ASSERT_EQUAL_128(0, 0x2000000020002020, q27);
24969
24970 TEARDOWN();
24971 }
24972
24973
TEST(neon_sqadd_scalar)24974 TEST(neon_sqadd_scalar) {
24975 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
24976
24977 START();
24978
24979 __ Movi(v0.V2D(), 0xaaaaaaaaaaaaaaaa, 0x8000000180018181);
24980 __ Movi(v1.V2D(), 0x5555555555555555, 0x7fffffff7fff7f7f);
24981 __ Movi(v2.V2D(), 0xaaaaaaaaaaaaaaaa, 0x1000000010001010);
24982
24983 __ Sqadd(b16, b0, b0);
24984 __ Sqadd(b17, b1, b1);
24985 __ Sqadd(b18, b2, b2);
24986 __ Sqadd(h19, h0, h0);
24987 __ Sqadd(h20, h1, h1);
24988 __ Sqadd(h21, h2, h2);
24989 __ Sqadd(s22, s0, s0);
24990 __ Sqadd(s23, s1, s1);
24991 __ Sqadd(s24, s2, s2);
24992 __ Sqadd(d25, d0, d0);
24993 __ Sqadd(d26, d1, d1);
24994 __ Sqadd(d27, d2, d2);
24995
24996 END();
24997
24998 RUN();
24999
25000 ASSERT_EQUAL_128(0, 0x80, q16);
25001 ASSERT_EQUAL_128(0, 0x7f, q17);
25002 ASSERT_EQUAL_128(0, 0x20, q18);
25003 ASSERT_EQUAL_128(0, 0x8000, q19);
25004 ASSERT_EQUAL_128(0, 0x7fff, q20);
25005 ASSERT_EQUAL_128(0, 0x2020, q21);
25006 ASSERT_EQUAL_128(0, 0x80000000, q22);
25007 ASSERT_EQUAL_128(0, 0x7fffffff, q23);
25008 ASSERT_EQUAL_128(0, 0x20002020, q24);
25009 ASSERT_EQUAL_128(0, 0x8000000000000000, q25);
25010 ASSERT_EQUAL_128(0, 0x7fffffffffffffff, q26);
25011 ASSERT_EQUAL_128(0, 0x2000000020002020, q27);
25012
25013 TEARDOWN();
25014 }
25015
25016
TEST(neon_uqsub_scalar)25017 TEST(neon_uqsub_scalar) {
25018 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
25019
25020 START();
25021
25022 __ Movi(v0.V2D(), 0xaaaaaaaaaaaaaaaa, 0xf0000000f000f0f0);
25023 __ Movi(v1.V2D(), 0x5555555555555555, 0x7fffffff7fff7f7f);
25024
25025 __ Uqsub(b16, b0, b0);
25026 __ Uqsub(b17, b0, b1);
25027 __ Uqsub(b18, b1, b0);
25028 __ Uqsub(h19, h0, h0);
25029 __ Uqsub(h20, h0, h1);
25030 __ Uqsub(h21, h1, h0);
25031 __ Uqsub(s22, s0, s0);
25032 __ Uqsub(s23, s0, s1);
25033 __ Uqsub(s24, s1, s0);
25034 __ Uqsub(d25, d0, d0);
25035 __ Uqsub(d26, d0, d1);
25036 __ Uqsub(d27, d1, d0);
25037
25038 END();
25039
25040 RUN();
25041
25042 ASSERT_EQUAL_128(0, 0, q16);
25043 ASSERT_EQUAL_128(0, 0x71, q17);
25044 ASSERT_EQUAL_128(0, 0, q18);
25045
25046 ASSERT_EQUAL_128(0, 0, q19);
25047 ASSERT_EQUAL_128(0, 0x7171, q20);
25048 ASSERT_EQUAL_128(0, 0, q21);
25049
25050 ASSERT_EQUAL_128(0, 0, q22);
25051 ASSERT_EQUAL_128(0, 0x70017171, q23);
25052 ASSERT_EQUAL_128(0, 0, q24);
25053
25054 ASSERT_EQUAL_128(0, 0, q25);
25055 ASSERT_EQUAL_128(0, 0x7000000170017171, q26);
25056 ASSERT_EQUAL_128(0, 0, q27);
25057
25058 TEARDOWN();
25059 }
25060
25061
TEST(neon_sqsub_scalar)25062 TEST(neon_sqsub_scalar) {
25063 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
25064
25065 START();
25066
25067 __ Movi(v0.V2D(), 0xaaaaaaaaaaaaaaaa, 0xf0000000f000f0f0);
25068 __ Movi(v1.V2D(), 0x5555555555555555, 0x7eeeeeee7eee7e7e);
25069
25070 __ Sqsub(b16, b0, b0);
25071 __ Sqsub(b17, b0, b1);
25072 __ Sqsub(b18, b1, b0);
25073 __ Sqsub(h19, h0, h0);
25074 __ Sqsub(h20, h0, h1);
25075 __ Sqsub(h21, h1, h0);
25076 __ Sqsub(s22, s0, s0);
25077 __ Sqsub(s23, s0, s1);
25078 __ Sqsub(s24, s1, s0);
25079 __ Sqsub(d25, d0, d0);
25080 __ Sqsub(d26, d0, d1);
25081 __ Sqsub(d27, d1, d0);
25082
25083 END();
25084
25085 RUN();
25086
25087 ASSERT_EQUAL_128(0, 0, q16);
25088 ASSERT_EQUAL_128(0, 0x80, q17);
25089 ASSERT_EQUAL_128(0, 0x7f, q18);
25090
25091 ASSERT_EQUAL_128(0, 0, q19);
25092 ASSERT_EQUAL_128(0, 0x8000, q20);
25093 ASSERT_EQUAL_128(0, 0x7fff, q21);
25094
25095 ASSERT_EQUAL_128(0, 0, q22);
25096 ASSERT_EQUAL_128(0, 0x80000000, q23);
25097 ASSERT_EQUAL_128(0, 0x7fffffff, q24);
25098
25099 ASSERT_EQUAL_128(0, 0, q25);
25100 ASSERT_EQUAL_128(0, 0x8000000000000000, q26);
25101 ASSERT_EQUAL_128(0, 0x7fffffffffffffff, q27);
25102
25103 TEARDOWN();
25104 }
25105
25106
TEST(neon_fmla_fmls)25107 TEST(neon_fmla_fmls) {
25108 SETUP_WITH_FEATURES(CPUFeatures::kNEON, CPUFeatures::kFP);
25109
25110 START();
25111 __ Movi(v0.V2D(), 0x3f80000040000000, 0x4100000000000000);
25112 __ Movi(v1.V2D(), 0x400000003f800000, 0x000000003f800000);
25113 __ Movi(v2.V2D(), 0x3f800000ffffffff, 0x7f800000ff800000);
25114 __ Mov(v16.V16B(), v0.V16B());
25115 __ Mov(v17.V16B(), v0.V16B());
25116 __ Mov(v18.V16B(), v0.V16B());
25117 __ Mov(v19.V16B(), v0.V16B());
25118 __ Mov(v20.V16B(), v0.V16B());
25119 __ Mov(v21.V16B(), v0.V16B());
25120
25121 __ Fmla(v16.V2S(), v1.V2S(), v2.V2S());
25122 __ Fmla(v17.V4S(), v1.V4S(), v2.V4S());
25123 __ Fmla(v18.V2D(), v1.V2D(), v2.V2D());
25124 __ Fmls(v19.V2S(), v1.V2S(), v2.V2S());
25125 __ Fmls(v20.V4S(), v1.V4S(), v2.V4S());
25126 __ Fmls(v21.V2D(), v1.V2D(), v2.V2D());
25127 END();
25128
25129 RUN();
25130
25131 ASSERT_EQUAL_128(0x0000000000000000, 0x7fc00000ff800000, q16);
25132 ASSERT_EQUAL_128(0x40400000ffffffff, 0x7fc00000ff800000, q17);
25133 ASSERT_EQUAL_128(0x3f9800015f8003f7, 0x41000000000000fe, q18);
25134 ASSERT_EQUAL_128(0x0000000000000000, 0x7fc000007f800000, q19);
25135 ASSERT_EQUAL_128(0xbf800000ffffffff, 0x7fc000007f800000, q20);
25136 ASSERT_EQUAL_128(0xbf8000023f0007ee, 0x40fffffffffffe04, q21);
25137
25138 TEARDOWN();
25139 }
25140
25141
TEST(neon_fmla_h)25142 TEST(neon_fmla_h) {
25143 SETUP_WITH_FEATURES(CPUFeatures::kNEON,
25144 CPUFeatures::kFP,
25145 CPUFeatures::kNEONHalf);
25146
25147 START();
25148 __ Movi(v0.V2D(), 0x4000400040004000, 0x4000400040004000);
25149 __ Movi(v1.V2D(), 0x51a051a051a051a0, 0x51a051a051a051a0);
25150 __ Movi(v2.V2D(), 0x7c007c007c007c00, 0x7c007c007c007c00);
25151 __ Movi(v3.V2D(), 0xfc00fc00fc00fc00, 0xfc00fc00fc00fc00);
25152 __ Movi(v4.V2D(), 0x7e007e007e007e00, 0x7e007e007e007e00);
25153 __ Movi(v5.V2D(), 0x7c017c017c017c01, 0x7c017c017c017c01);
25154 __ Movi(v6.V2D(), 0x0000000000000000, 0x0000000000000000);
25155 __ Mov(v16.V2D(), v0.V2D());
25156 __ Mov(v17.V2D(), v0.V2D());
25157 __ Mov(v18.V2D(), v4.V2D());
25158 __ Mov(v19.V2D(), v5.V2D());
25159 __ Mov(v20.V2D(), v0.V2D());
25160 __ Mov(v21.V2D(), v0.V2D());
25161 __ Mov(v22.V2D(), v4.V2D());
25162 __ Mov(v23.V2D(), v5.V2D());
25163
25164 __ Fmla(v16.V8H(), v0.V8H(), v1.V8H());
25165 __ Fmla(v17.V8H(), v2.V8H(), v3.V8H());
25166 __ Fmla(v18.V8H(), v2.V8H(), v6.V8H());
25167 __ Fmla(v19.V8H(), v3.V8H(), v6.V8H());
25168 __ Fmla(v20.V4H(), v0.V4H(), v1.V4H());
25169 __ Fmla(v21.V4H(), v2.V4H(), v3.V4H());
25170 __ Fmla(v22.V4H(), v2.V4H(), v6.V4H());
25171 __ Fmla(v23.V4H(), v3.V4H(), v6.V4H());
25172 END();
25173
25174 #ifdef VIXL_INCLUDE_SIMULATOR_AARCH64
25175 RUN();
25176
25177 ASSERT_EQUAL_128(0x55c055c055c055c0, 0x55c055c055c055c0, v16);
25178 ASSERT_EQUAL_128(0xfc00fc00fc00fc00, 0xfc00fc00fc00fc00, v17);
25179 ASSERT_EQUAL_128(0x7e007e007e007e00, 0x7e007e007e007e00, v18);
25180 ASSERT_EQUAL_128(0x7e017e017e017e01, 0x7e017e017e017e01, v19);
25181 ASSERT_EQUAL_128(0, 0x55c055c055c055c0, v20);
25182 ASSERT_EQUAL_128(0, 0xfc00fc00fc00fc00, v21);
25183 ASSERT_EQUAL_128(0, 0x7e007e007e007e00, v22);
25184 ASSERT_EQUAL_128(0, 0x7e017e017e017e01, v23);
25185 #endif // VIXL_INCLUDE_SIMULATOR_AARCH64
25186
25187 TEARDOWN();
25188 }
25189
25190
TEST(neon_fmls_h)25191 TEST(neon_fmls_h) {
25192 SETUP_WITH_FEATURES(CPUFeatures::kNEON,
25193 CPUFeatures::kFP,
25194 CPUFeatures::kNEONHalf);
25195
25196 START();
25197 __ Movi(v0.V2D(), 0x4000400040004000, 0x4000400040004000);
25198 __ Movi(v1.V2D(), 0x51a051a051a051a0, 0x51a051a051a051a0);
25199 __ Movi(v2.V2D(), 0x7c007c007c007c00, 0x7c007c007c007c00);
25200 __ Movi(v3.V2D(), 0xfc00fc00fc00fc00, 0xfc00fc00fc00fc00);
25201 __ Movi(v4.V2D(), 0x7e007e007e007e00, 0x7e007e007e007e00);
25202 __ Movi(v5.V2D(), 0x7c017c017c017c01, 0x7c017c017c017c01);
25203 __ Movi(v6.V2D(), 0x0000000000000000, 0x0000000000000000);
25204 __ Mov(v16.V2D(), v0.V2D());
25205 __ Mov(v17.V2D(), v0.V2D());
25206 __ Mov(v18.V2D(), v4.V2D());
25207 __ Mov(v19.V2D(), v5.V2D());
25208 __ Mov(v20.V2D(), v0.V2D());
25209 __ Mov(v21.V2D(), v0.V2D());
25210 __ Mov(v22.V2D(), v4.V2D());
25211 __ Mov(v23.V2D(), v5.V2D());
25212
25213 __ Fmls(v16.V8H(), v0.V8H(), v1.V8H());
25214 __ Fmls(v17.V8H(), v2.V8H(), v3.V8H());
25215 __ Fmls(v18.V8H(), v2.V8H(), v6.V8H());
25216 __ Fmls(v19.V8H(), v3.V8H(), v6.V8H());
25217 __ Fmls(v20.V4H(), v0.V4H(), v1.V4H());
25218 __ Fmls(v21.V4H(), v2.V4H(), v3.V4H());
25219 __ Fmls(v22.V4H(), v2.V4H(), v6.V4H());
25220 __ Fmls(v23.V4H(), v3.V4H(), v6.V4H());
25221 END();
25222
25223 #ifdef VIXL_INCLUDE_SIMULATOR_AARCH64
25224 RUN();
25225
25226 ASSERT_EQUAL_128(0xd580d580d580d580, 0xd580d580d580d580, v16);
25227 ASSERT_EQUAL_128(0x7c007c007c007c00, 0x7c007c007c007c00, v17);
25228 ASSERT_EQUAL_128(0x7e007e007e007e00, 0x7e007e007e007e00, v18);
25229 ASSERT_EQUAL_128(0x7e017e017e017e01, 0x7e017e017e017e01, v19);
25230 ASSERT_EQUAL_128(0, 0xd580d580d580d580, v20);
25231 ASSERT_EQUAL_128(0, 0x7c007c007c007c00, v21);
25232 ASSERT_EQUAL_128(0, 0x7e007e007e007e00, v22);
25233 ASSERT_EQUAL_128(0, 0x7e017e017e017e01, v23);
25234 #endif // VIXL_INCLUDE_SIMULATOR_AARCH64
25235
25236 TEARDOWN();
25237 }
25238
25239
TEST(neon_fmulx_scalar)25240 TEST(neon_fmulx_scalar) {
25241 SETUP_WITH_FEATURES(CPUFeatures::kNEON, CPUFeatures::kFP);
25242
25243 START();
25244 __ Fmov(s0, 2.0);
25245 __ Fmov(s1, 0.5);
25246 __ Fmov(s2, 0.0);
25247 __ Fmov(s3, -0.0);
25248 __ Fmov(s4, kFP32PositiveInfinity);
25249 __ Fmov(s5, kFP32NegativeInfinity);
25250 __ Fmulx(s16, s0, s1);
25251 __ Fmulx(s17, s2, s4);
25252 __ Fmulx(s18, s2, s5);
25253 __ Fmulx(s19, s3, s4);
25254 __ Fmulx(s20, s3, s5);
25255
25256 __ Fmov(d21, 2.0);
25257 __ Fmov(d22, 0.5);
25258 __ Fmov(d23, 0.0);
25259 __ Fmov(d24, -0.0);
25260 __ Fmov(d25, kFP64PositiveInfinity);
25261 __ Fmov(d26, kFP64NegativeInfinity);
25262 __ Fmulx(d27, d21, d22);
25263 __ Fmulx(d28, d23, d25);
25264 __ Fmulx(d29, d23, d26);
25265 __ Fmulx(d30, d24, d25);
25266 __ Fmulx(d31, d24, d26);
25267 END();
25268
25269 RUN();
25270
25271 ASSERT_EQUAL_FP32(1.0, s16);
25272 ASSERT_EQUAL_FP32(2.0, s17);
25273 ASSERT_EQUAL_FP32(-2.0, s18);
25274 ASSERT_EQUAL_FP32(-2.0, s19);
25275 ASSERT_EQUAL_FP32(2.0, s20);
25276 ASSERT_EQUAL_FP64(1.0, d27);
25277 ASSERT_EQUAL_FP64(2.0, d28);
25278 ASSERT_EQUAL_FP64(-2.0, d29);
25279 ASSERT_EQUAL_FP64(-2.0, d30);
25280 ASSERT_EQUAL_FP64(2.0, d31);
25281
25282 TEARDOWN();
25283 }
25284
25285
TEST(neon_fmulx_h)25286 TEST(neon_fmulx_h) {
25287 SETUP_WITH_FEATURES(CPUFeatures::kNEON,
25288 CPUFeatures::kFP,
25289 CPUFeatures::kNEONHalf);
25290
25291 START();
25292 __ Movi(v0.V2D(), 0x4000400040004000, 0x4000400040004000);
25293 __ Movi(v1.V2D(), 0x3800380038003800, 0x3800380038003800);
25294 __ Movi(v2.V2D(), 0x0000000000000000, 0x0000000000000000);
25295 __ Movi(v3.V2D(), 0x8000800080008000, 0x8000800080008000);
25296 __ Movi(v4.V2D(), 0x7c007c007c007c00, 0x7c007c007c007c00);
25297 __ Movi(v5.V2D(), 0xfc00fc00fc00fc00, 0xfc00fc00fc00fc00);
25298 __ Fmulx(v6.V8H(), v0.V8H(), v1.V8H());
25299 __ Fmulx(v7.V8H(), v2.V8H(), v4.V8H());
25300 __ Fmulx(v8.V8H(), v2.V8H(), v5.V8H());
25301 __ Fmulx(v9.V8H(), v3.V8H(), v4.V8H());
25302 __ Fmulx(v10.V8H(), v3.V8H(), v5.V8H());
25303 __ Fmulx(v11.V4H(), v0.V4H(), v1.V4H());
25304 __ Fmulx(v12.V4H(), v2.V4H(), v4.V4H());
25305 __ Fmulx(v13.V4H(), v2.V4H(), v5.V4H());
25306 __ Fmulx(v14.V4H(), v3.V4H(), v4.V4H());
25307 __ Fmulx(v15.V4H(), v3.V4H(), v5.V4H());
25308 END();
25309
25310 #ifdef VIXL_INCLUDE_SIMULATOR_AARCH64
25311 RUN();
25312 ASSERT_EQUAL_128(0x3c003c003c003c00, 0x3c003c003c003c00, v6);
25313 ASSERT_EQUAL_128(0x4000400040004000, 0x4000400040004000, v7);
25314 ASSERT_EQUAL_128(0xc000c000c000c000, 0xc000c000c000c000, v8);
25315 ASSERT_EQUAL_128(0xc000c000c000c000, 0xc000c000c000c000, v9);
25316 ASSERT_EQUAL_128(0x4000400040004000, 0x4000400040004000, v10);
25317 ASSERT_EQUAL_128(0, 0x3c003c003c003c00, v11);
25318 ASSERT_EQUAL_128(0, 0x4000400040004000, v12);
25319 ASSERT_EQUAL_128(0, 0xc000c000c000c000, v13);
25320 ASSERT_EQUAL_128(0, 0xc000c000c000c000, v14);
25321 ASSERT_EQUAL_128(0, 0x4000400040004000, v15);
25322 #endif // VIXL_INCLUDE_SIMULATOR_AARCH64
25323
25324 TEARDOWN();
25325 }
25326
25327
TEST(neon_fmulx_h_scalar)25328 TEST(neon_fmulx_h_scalar) {
25329 SETUP_WITH_FEATURES(CPUFeatures::kNEON,
25330 CPUFeatures::kFP,
25331 CPUFeatures::kNEONHalf,
25332 CPUFeatures::kFPHalf);
25333
25334 START();
25335 __ Fmov(h0, Float16(2.0));
25336 __ Fmov(h1, Float16(0.5));
25337 __ Fmov(h2, Float16(0.0));
25338 __ Fmov(h3, Float16(-0.0));
25339 __ Fmov(h4, kFP16PositiveInfinity);
25340 __ Fmov(h5, kFP16NegativeInfinity);
25341 __ Fmulx(h6, h0, h1);
25342 __ Fmulx(h7, h2, h4);
25343 __ Fmulx(h8, h2, h5);
25344 __ Fmulx(h9, h3, h4);
25345 __ Fmulx(h10, h3, h5);
25346 END();
25347
25348 #ifdef VIXL_INCLUDE_SIMULATOR_AARCH64
25349 RUN();
25350 ASSERT_EQUAL_FP16(Float16(1.0), h6);
25351 ASSERT_EQUAL_FP16(Float16(2.0), h7);
25352 ASSERT_EQUAL_FP16(Float16(-2.0), h8);
25353 ASSERT_EQUAL_FP16(Float16(-2.0), h9);
25354 ASSERT_EQUAL_FP16(Float16(2.0), h10);
25355 #endif // VIXL_INCLUDE_SIMULATOR_AARCH64
25356
25357 TEARDOWN();
25358 }
25359
25360
25361 // We currently disable tests for CRC32 instructions when running natively.
25362 // Support for this family of instruction is optional, and so native platforms
25363 // may simply fail to execute the test.
25364 // TODO: Run the test on native platforms where the CRC32 instructions are
25365 // available.
25366 #ifdef VIXL_INCLUDE_SIMULATOR_AARCH64
TEST(crc32b)25367 TEST(crc32b) {
25368 SETUP_WITH_FEATURES(CPUFeatures::kCRC32);
25369
25370 START();
25371
25372 __ Mov(w0, 0);
25373 __ Mov(w1, 0);
25374 __ Crc32b(w10, w0, w1);
25375
25376 __ Mov(w0, 0x1);
25377 __ Mov(w1, 0x138);
25378 __ Crc32b(w11, w0, w1);
25379
25380 __ Mov(w0, 0x1);
25381 __ Mov(w1, 0x38);
25382 __ Crc32b(w12, w0, w1);
25383
25384 __ Mov(w0, 0);
25385 __ Mov(w1, 128);
25386 __ Crc32b(w13, w0, w1);
25387
25388 __ Mov(w0, UINT32_MAX);
25389 __ Mov(w1, 255);
25390 __ Crc32b(w14, w0, w1);
25391
25392 __ Mov(w0, 0x00010001);
25393 __ Mov(w1, 0x10001000);
25394 __ Crc32b(w15, w0, w1);
25395
25396 END();
25397 RUN();
25398
25399 ASSERT_EQUAL_64(0x0, x10);
25400 ASSERT_EQUAL_64(0x5f058808, x11);
25401 ASSERT_EQUAL_64(0x5f058808, x12);
25402 ASSERT_EQUAL_64(0xedb88320, x13);
25403 ASSERT_EQUAL_64(0x00ffffff, x14);
25404 ASSERT_EQUAL_64(0x77073196, x15);
25405
25406 TEARDOWN();
25407 }
25408
25409
TEST(crc32h)25410 TEST(crc32h) {
25411 SETUP_WITH_FEATURES(CPUFeatures::kCRC32);
25412
25413 START();
25414
25415 __ Mov(w0, 0);
25416 __ Mov(w1, 0);
25417 __ Crc32h(w10, w0, w1);
25418
25419 __ Mov(w0, 0x1);
25420 __ Mov(w1, 0x10038);
25421 __ Crc32h(w11, w0, w1);
25422
25423 __ Mov(w0, 0x1);
25424 __ Mov(w1, 0x38);
25425 __ Crc32h(w12, w0, w1);
25426
25427 __ Mov(w0, 0);
25428 __ Mov(w1, 128);
25429 __ Crc32h(w13, w0, w1);
25430
25431 __ Mov(w0, UINT32_MAX);
25432 __ Mov(w1, 255);
25433 __ Crc32h(w14, w0, w1);
25434
25435 __ Mov(w0, 0x00010001);
25436 __ Mov(w1, 0x10001000);
25437 __ Crc32h(w15, w0, w1);
25438
25439 END();
25440 RUN();
25441
25442 ASSERT_EQUAL_64(0x0, x10);
25443 ASSERT_EQUAL_64(0x0e848dba, x11);
25444 ASSERT_EQUAL_64(0x0e848dba, x12);
25445 ASSERT_EQUAL_64(0x3b83984b, x13);
25446 ASSERT_EQUAL_64(0x2d021072, x14);
25447 ASSERT_EQUAL_64(0x04ac2124, x15);
25448
25449 TEARDOWN();
25450 }
25451
25452
TEST(crc32w)25453 TEST(crc32w) {
25454 SETUP_WITH_FEATURES(CPUFeatures::kCRC32);
25455
25456 START();
25457
25458 __ Mov(w0, 0);
25459 __ Mov(w1, 0);
25460 __ Crc32w(w10, w0, w1);
25461
25462 __ Mov(w0, 0x1);
25463 __ Mov(w1, 0x80000031);
25464 __ Crc32w(w11, w0, w1);
25465
25466 __ Mov(w0, 0);
25467 __ Mov(w1, 128);
25468 __ Crc32w(w13, w0, w1);
25469
25470 __ Mov(w0, UINT32_MAX);
25471 __ Mov(w1, 255);
25472 __ Crc32w(w14, w0, w1);
25473
25474 __ Mov(w0, 0x00010001);
25475 __ Mov(w1, 0x10001000);
25476 __ Crc32w(w15, w0, w1);
25477
25478 END();
25479 RUN();
25480
25481 ASSERT_EQUAL_64(0x0, x10);
25482 ASSERT_EQUAL_64(0x1d937b81, x11);
25483 ASSERT_EQUAL_64(0xed59b63b, x13);
25484 ASSERT_EQUAL_64(0x00be2612, x14);
25485 ASSERT_EQUAL_64(0xa036e530, x15);
25486
25487 TEARDOWN();
25488 }
25489
25490
TEST(crc32x)25491 TEST(crc32x) {
25492 SETUP_WITH_FEATURES(CPUFeatures::kCRC32);
25493
25494 START();
25495
25496 __ Mov(w0, 0);
25497 __ Mov(x1, 0);
25498 __ Crc32x(w10, w0, x1);
25499
25500 __ Mov(w0, 0x1);
25501 __ Mov(x1, UINT64_C(0x0000000800000031));
25502 __ Crc32x(w11, w0, x1);
25503
25504 __ Mov(w0, 0);
25505 __ Mov(x1, 128);
25506 __ Crc32x(w13, w0, x1);
25507
25508 __ Mov(w0, UINT32_MAX);
25509 __ Mov(x1, 255);
25510 __ Crc32x(w14, w0, x1);
25511
25512 __ Mov(w0, 0x00010001);
25513 __ Mov(x1, UINT64_C(0x1000100000000000));
25514 __ Crc32x(w15, w0, x1);
25515
25516 END();
25517 RUN();
25518
25519 ASSERT_EQUAL_64(0x0, x10);
25520 ASSERT_EQUAL_64(0x40797b92, x11);
25521 ASSERT_EQUAL_64(0x533b85da, x13);
25522 ASSERT_EQUAL_64(0xbc962670, x14);
25523 ASSERT_EQUAL_64(0x0667602f, x15);
25524
25525 TEARDOWN();
25526 }
25527
25528
TEST(crc32cb)25529 TEST(crc32cb) {
25530 SETUP_WITH_FEATURES(CPUFeatures::kCRC32);
25531
25532 START();
25533
25534 __ Mov(w0, 0);
25535 __ Mov(w1, 0);
25536 __ Crc32cb(w10, w0, w1);
25537
25538 __ Mov(w0, 0x1);
25539 __ Mov(w1, 0x138);
25540 __ Crc32cb(w11, w0, w1);
25541
25542 __ Mov(w0, 0x1);
25543 __ Mov(w1, 0x38);
25544 __ Crc32cb(w12, w0, w1);
25545
25546 __ Mov(w0, 0);
25547 __ Mov(w1, 128);
25548 __ Crc32cb(w13, w0, w1);
25549
25550 __ Mov(w0, UINT32_MAX);
25551 __ Mov(w1, 255);
25552 __ Crc32cb(w14, w0, w1);
25553
25554 __ Mov(w0, 0x00010001);
25555 __ Mov(w1, 0x10001000);
25556 __ Crc32cb(w15, w0, w1);
25557
25558 END();
25559 RUN();
25560
25561 ASSERT_EQUAL_64(0x0, x10);
25562 ASSERT_EQUAL_64(0x4851927d, x11);
25563 ASSERT_EQUAL_64(0x4851927d, x12);
25564 ASSERT_EQUAL_64(0x82f63b78, x13);
25565 ASSERT_EQUAL_64(0x00ffffff, x14);
25566 ASSERT_EQUAL_64(0xf26b8203, x15);
25567
25568 TEARDOWN();
25569 }
25570
25571
TEST(crc32ch)25572 TEST(crc32ch) {
25573 SETUP_WITH_FEATURES(CPUFeatures::kCRC32);
25574
25575 START();
25576
25577 __ Mov(w0, 0);
25578 __ Mov(w1, 0);
25579 __ Crc32ch(w10, w0, w1);
25580
25581 __ Mov(w0, 0x1);
25582 __ Mov(w1, 0x10038);
25583 __ Crc32ch(w11, w0, w1);
25584
25585 __ Mov(w0, 0x1);
25586 __ Mov(w1, 0x38);
25587 __ Crc32ch(w12, w0, w1);
25588
25589 __ Mov(w0, 0);
25590 __ Mov(w1, 128);
25591 __ Crc32ch(w13, w0, w1);
25592
25593 __ Mov(w0, UINT32_MAX);
25594 __ Mov(w1, 255);
25595 __ Crc32ch(w14, w0, w1);
25596
25597 __ Mov(w0, 0x00010001);
25598 __ Mov(w1, 0x10001000);
25599 __ Crc32ch(w15, w0, w1);
25600
25601 END();
25602 RUN();
25603
25604 ASSERT_EQUAL_64(0x0, x10);
25605 ASSERT_EQUAL_64(0xcef8494c, x11);
25606 ASSERT_EQUAL_64(0xcef8494c, x12);
25607 ASSERT_EQUAL_64(0xfbc3faf9, x13);
25608 ASSERT_EQUAL_64(0xad7dacae, x14);
25609 ASSERT_EQUAL_64(0x03fc5f19, x15);
25610
25611 TEARDOWN();
25612 }
25613
25614
TEST(crc32cw)25615 TEST(crc32cw) {
25616 SETUP_WITH_FEATURES(CPUFeatures::kCRC32);
25617
25618 START();
25619
25620 __ Mov(w0, 0);
25621 __ Mov(w1, 0);
25622 __ Crc32cw(w10, w0, w1);
25623
25624 __ Mov(w0, 0x1);
25625 __ Mov(w1, 0x80000031);
25626 __ Crc32cw(w11, w0, w1);
25627
25628 __ Mov(w0, 0);
25629 __ Mov(w1, 128);
25630 __ Crc32cw(w13, w0, w1);
25631
25632 __ Mov(w0, UINT32_MAX);
25633 __ Mov(w1, 255);
25634 __ Crc32cw(w14, w0, w1);
25635
25636 __ Mov(w0, 0x00010001);
25637 __ Mov(w1, 0x10001000);
25638 __ Crc32cw(w15, w0, w1);
25639
25640 END();
25641 RUN();
25642
25643 ASSERT_EQUAL_64(0x0, x10);
25644 ASSERT_EQUAL_64(0xbcb79ece, x11);
25645 ASSERT_EQUAL_64(0x52a0c93f, x13);
25646 ASSERT_EQUAL_64(0x9f9b5c7a, x14);
25647 ASSERT_EQUAL_64(0xae1b882a, x15);
25648
25649 TEARDOWN();
25650 }
25651
25652
TEST(crc32cx)25653 TEST(crc32cx) {
25654 SETUP_WITH_FEATURES(CPUFeatures::kCRC32);
25655
25656 START();
25657
25658 __ Mov(w0, 0);
25659 __ Mov(x1, 0);
25660 __ Crc32cx(w10, w0, x1);
25661
25662 __ Mov(w0, 0x1);
25663 __ Mov(x1, UINT64_C(0x0000000800000031));
25664 __ Crc32cx(w11, w0, x1);
25665
25666 __ Mov(w0, 0);
25667 __ Mov(x1, 128);
25668 __ Crc32cx(w13, w0, x1);
25669
25670 __ Mov(w0, UINT32_MAX);
25671 __ Mov(x1, 255);
25672 __ Crc32cx(w14, w0, x1);
25673
25674 __ Mov(w0, 0x00010001);
25675 __ Mov(x1, UINT64_C(0x1000100000000000));
25676 __ Crc32cx(w15, w0, x1);
25677
25678 END();
25679 RUN();
25680
25681 ASSERT_EQUAL_64(0x0, x10);
25682 ASSERT_EQUAL_64(0x7f320fcb, x11);
25683 ASSERT_EQUAL_64(0x34019664, x13);
25684 ASSERT_EQUAL_64(0x6cc27dd0, x14);
25685 ASSERT_EQUAL_64(0xc6f0acdb, x15);
25686
25687 TEARDOWN();
25688 }
25689 #endif // VIXL_INCLUDE_SIMULATOR_AARCH64
25690
25691
TEST(neon_fabd_h)25692 TEST(neon_fabd_h) {
25693 SETUP_WITH_FEATURES(CPUFeatures::kNEON,
25694 CPUFeatures::kFP,
25695 CPUFeatures::kNEONHalf);
25696
25697 START();
25698 __ Movi(v0.V2D(), 0x4000400040004000, 0x4000400040004000);
25699 __ Movi(v1.V2D(), 0x3800380038003800, 0x3800380038003800);
25700 __ Movi(v2.V2D(), 0x0000000000000000, 0x0000000000000000);
25701 __ Movi(v3.V2D(), 0x8000800080008000, 0x8000800080008000);
25702 __ Movi(v4.V2D(), 0x7c007c007c007c00, 0x7c007c007c007c00);
25703 __ Movi(v5.V2D(), 0xfc00fc00fc00fc00, 0xfc00fc00fc00fc00);
25704
25705 __ Fabd(v6.V8H(), v1.V8H(), v0.V8H());
25706 __ Fabd(v7.V8H(), v2.V8H(), v3.V8H());
25707 __ Fabd(v8.V8H(), v2.V8H(), v5.V8H());
25708 __ Fabd(v9.V8H(), v3.V8H(), v4.V8H());
25709 __ Fabd(v10.V8H(), v3.V8H(), v5.V8H());
25710 __ Fabd(v11.V4H(), v1.V4H(), v0.V4H());
25711 __ Fabd(v12.V4H(), v2.V4H(), v3.V4H());
25712 __ Fabd(v13.V4H(), v2.V4H(), v5.V4H());
25713 __ Fabd(v14.V4H(), v3.V4H(), v4.V4H());
25714 __ Fabd(v15.V4H(), v3.V4H(), v5.V4H());
25715 END();
25716
25717 #ifdef VIXL_INCLUDE_SIMULATOR_AARCH64
25718 RUN();
25719
25720 ASSERT_EQUAL_128(0x3e003e003e003e00, 0x3e003e003e003e00, v6);
25721 ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, v7);
25722 ASSERT_EQUAL_128(0x7c007c007c007c00, 0x7c007c007c007c00, v8);
25723 ASSERT_EQUAL_128(0x7c007c007c007c00, 0x7c007c007c007c00, v9);
25724 ASSERT_EQUAL_128(0x7c007c007c007c00, 0x7c007c007c007c00, v10);
25725 ASSERT_EQUAL_128(0, 0x3e003e003e003e00, v11);
25726 ASSERT_EQUAL_128(0, 0x0000000000000000, v12);
25727 ASSERT_EQUAL_128(0, 0x7c007c007c007c00, v13);
25728 ASSERT_EQUAL_128(0, 0x7c007c007c007c00, v14);
25729 ASSERT_EQUAL_128(0, 0x7c007c007c007c00, v15);
25730 #endif // VIXL_INCLUDE_SIMULATOR_AARCH64
25731
25732 TEARDOWN();
25733 }
25734
25735
TEST(neon_fabd_h_scalar)25736 TEST(neon_fabd_h_scalar) {
25737 SETUP_WITH_FEATURES(CPUFeatures::kNEON,
25738 CPUFeatures::kFP,
25739 CPUFeatures::kNEONHalf,
25740 CPUFeatures::kFPHalf);
25741
25742 START();
25743 __ Fmov(h0, Float16(2.0));
25744 __ Fmov(h1, Float16(0.5));
25745 __ Fmov(h2, Float16(0.0));
25746 __ Fmov(h3, Float16(-0.0));
25747 __ Fmov(h4, kFP16PositiveInfinity);
25748 __ Fmov(h5, kFP16NegativeInfinity);
25749 __ Fabd(h16, h1, h0);
25750 __ Fabd(h17, h2, h3);
25751 __ Fabd(h18, h2, h5);
25752 __ Fabd(h19, h3, h4);
25753 __ Fabd(h20, h3, h5);
25754 END();
25755
25756 #ifdef VIXL_INCLUDE_SIMULATOR_AARCH64
25757 RUN();
25758 ASSERT_EQUAL_FP16(Float16(1.5), h16);
25759 ASSERT_EQUAL_FP16(Float16(0.0), h17);
25760 ASSERT_EQUAL_FP16(kFP16PositiveInfinity, h18);
25761 ASSERT_EQUAL_FP16(kFP16PositiveInfinity, h19);
25762 ASSERT_EQUAL_FP16(kFP16PositiveInfinity, h20);
25763 #endif
25764
25765 TEARDOWN();
25766 }
25767
25768
TEST(neon_fabd_scalar)25769 TEST(neon_fabd_scalar) {
25770 SETUP_WITH_FEATURES(CPUFeatures::kNEON, CPUFeatures::kFP);
25771
25772 START();
25773 __ Fmov(s0, 2.0);
25774 __ Fmov(s1, 0.5);
25775 __ Fmov(s2, 0.0);
25776 __ Fmov(s3, -0.0);
25777 __ Fmov(s4, kFP32PositiveInfinity);
25778 __ Fmov(s5, kFP32NegativeInfinity);
25779 __ Fabd(s16, s1, s0);
25780 __ Fabd(s17, s2, s3);
25781 __ Fabd(s18, s2, s5);
25782 __ Fabd(s19, s3, s4);
25783 __ Fabd(s20, s3, s5);
25784
25785 __ Fmov(d21, 2.0);
25786 __ Fmov(d22, 0.5);
25787 __ Fmov(d23, 0.0);
25788 __ Fmov(d24, -0.0);
25789 __ Fmov(d25, kFP64PositiveInfinity);
25790 __ Fmov(d26, kFP64NegativeInfinity);
25791 __ Fabd(d27, d21, d22);
25792 __ Fabd(d28, d23, d24);
25793 __ Fabd(d29, d23, d26);
25794 __ Fabd(d30, d24, d25);
25795 __ Fabd(d31, d24, d26);
25796 END();
25797
25798 RUN();
25799
25800 ASSERT_EQUAL_FP32(1.5, s16);
25801 ASSERT_EQUAL_FP32(0.0, s17);
25802 ASSERT_EQUAL_FP32(kFP32PositiveInfinity, s18);
25803 ASSERT_EQUAL_FP32(kFP32PositiveInfinity, s19);
25804 ASSERT_EQUAL_FP32(kFP32PositiveInfinity, s20);
25805 ASSERT_EQUAL_FP64(1.5, d27);
25806 ASSERT_EQUAL_FP64(0.0, d28);
25807 ASSERT_EQUAL_FP64(kFP64PositiveInfinity, d29);
25808 ASSERT_EQUAL_FP64(kFP64PositiveInfinity, d30);
25809 ASSERT_EQUAL_FP64(kFP64PositiveInfinity, d31);
25810
25811 TEARDOWN();
25812 }
25813
25814
TEST(neon_frecps_h)25815 TEST(neon_frecps_h) {
25816 SETUP_WITH_FEATURES(CPUFeatures::kNEON,
25817 CPUFeatures::kFP,
25818 CPUFeatures::kNEONHalf);
25819
25820 START();
25821 __ Movi(v0.V2D(), 0x4000400040004000, 0x4000400040004000);
25822 __ Movi(v1.V2D(), 0xbc00bc00bc00bc00, 0xbc00bc00bc00bc00);
25823 __ Movi(v2.V2D(), 0x51a051a051a051a0, 0x51a051a051a051a0);
25824 __ Movi(v3.V2D(), 0x7c007c007c007c00, 0x7c007c007c007c00);
25825 __ Movi(v4.V2D(), 0xfc00fc00fc00fc00, 0xfc00fc00fc00fc00);
25826
25827 __ Frecps(v5.V8H(), v0.V8H(), v2.V8H());
25828 __ Frecps(v6.V8H(), v1.V8H(), v2.V8H());
25829 __ Frecps(v7.V8H(), v0.V8H(), v3.V8H());
25830 __ Frecps(v8.V8H(), v0.V8H(), v4.V8H());
25831 __ Frecps(v9.V4H(), v0.V4H(), v2.V4H());
25832 __ Frecps(v10.V4H(), v1.V4H(), v2.V4H());
25833 __ Frecps(v11.V4H(), v0.V4H(), v3.V4H());
25834 __ Frecps(v12.V4H(), v0.V4H(), v4.V4H());
25835 END();
25836
25837 #ifdef VIXL_INCLUDE_SIMULATOR_AARCH64
25838 RUN();
25839
25840 ASSERT_EQUAL_128(0xd580d580d580d580, 0xd580d580d580d580, v5);
25841 ASSERT_EQUAL_128(0x51e051e051e051e0, 0x51e051e051e051e0, v6);
25842 ASSERT_EQUAL_128(0xfc00fc00fc00fc00, 0xfc00fc00fc00fc00, v7);
25843 ASSERT_EQUAL_128(0x7c007c007c007c00, 0x7c007c007c007c00, v8);
25844 ASSERT_EQUAL_128(0, 0xd580d580d580d580, v9);
25845 ASSERT_EQUAL_128(0, 0x51e051e051e051e0, v10);
25846 ASSERT_EQUAL_128(0, 0xfc00fc00fc00fc00, v11);
25847 ASSERT_EQUAL_128(0, 0x7c007c007c007c00, v12);
25848
25849 #endif // VIXL_INCLUDE_SIMULATOR_AARCH64
25850
25851 TEARDOWN();
25852 }
25853
25854
TEST(neon_frecps_h_scalar)25855 TEST(neon_frecps_h_scalar) {
25856 SETUP_WITH_FEATURES(CPUFeatures::kNEON,
25857 CPUFeatures::kFP,
25858 CPUFeatures::kNEONHalf,
25859 CPUFeatures::kFPHalf);
25860
25861 START();
25862 __ Fmov(h0, Float16(2.0));
25863 __ Fmov(h1, Float16(-1.0));
25864 __ Fmov(h2, Float16(45.0));
25865 __ Fmov(h3, kFP16PositiveInfinity);
25866 __ Fmov(h4, kFP16NegativeInfinity);
25867
25868 __ Frecps(h5, h0, h2);
25869 __ Frecps(h6, h1, h2);
25870 __ Frecps(h7, h0, h3);
25871 __ Frecps(h8, h0, h4);
25872 END();
25873
25874 #ifdef VIXL_INCLUDE_SIMULATOR_AARCH64
25875 RUN();
25876
25877 ASSERT_EQUAL_FP16(Float16(-88.0), h5);
25878 ASSERT_EQUAL_FP16(Float16(47.0), h6);
25879 ASSERT_EQUAL_FP16(kFP16NegativeInfinity, h7);
25880 ASSERT_EQUAL_FP16(kFP16PositiveInfinity, h8);
25881
25882 #endif // VIXL_INCLUDE_SIMULATOR_AARCH64
25883
25884 TEARDOWN();
25885 }
25886
25887
TEST(neon_frsqrts_h)25888 TEST(neon_frsqrts_h) {
25889 SETUP_WITH_FEATURES(CPUFeatures::kNEON,
25890 CPUFeatures::kFP,
25891 CPUFeatures::kNEONHalf);
25892
25893 START();
25894 __ Movi(v0.V2D(), 0x4000400040004000, 0x4000400040004000);
25895 __ Movi(v1.V2D(), 0xbc00bc00bc00bc00, 0xbc00bc00bc00bc00);
25896 __ Movi(v2.V2D(), 0x51a051a051a051a0, 0x51a051a051a051a0);
25897 __ Movi(v3.V2D(), 0x7c007c007c007c00, 0x7c007c007c007c00);
25898 __ Movi(v4.V2D(), 0xfc00fc00fc00fc00, 0xfc00fc00fc00fc00);
25899
25900 __ Frsqrts(v5.V8H(), v0.V8H(), v2.V8H());
25901 __ Frsqrts(v6.V8H(), v1.V8H(), v2.V8H());
25902 __ Frsqrts(v7.V8H(), v0.V8H(), v3.V8H());
25903 __ Frsqrts(v8.V8H(), v0.V8H(), v4.V8H());
25904 __ Frsqrts(v9.V4H(), v0.V4H(), v2.V4H());
25905 __ Frsqrts(v10.V4H(), v1.V4H(), v2.V4H());
25906 __ Frsqrts(v11.V4H(), v0.V4H(), v3.V4H());
25907 __ Frsqrts(v12.V4H(), v0.V4H(), v4.V4H());
25908 END();
25909
25910 #ifdef VIXL_INCLUDE_SIMULATOR_AARCH64
25911 RUN();
25912
25913 ASSERT_EQUAL_128(0xd170d170d170d170, 0xd170d170d170d170, v5);
25914 ASSERT_EQUAL_128(0x4e004e004e004e00, 0x4e004e004e004e00, v6);
25915 ASSERT_EQUAL_128(0xfc00fc00fc00fc00, 0xfc00fc00fc00fc00, v7);
25916 ASSERT_EQUAL_128(0x7c007c007c007c00, 0x7c007c007c007c00, v8);
25917 ASSERT_EQUAL_128(0, 0xd170d170d170d170, v9);
25918 ASSERT_EQUAL_128(0, 0x4e004e004e004e00, v10);
25919 ASSERT_EQUAL_128(0, 0xfc00fc00fc00fc00, v11);
25920 ASSERT_EQUAL_128(0, 0x7c007c007c007c00, v12);
25921
25922 #endif // VIXL_INCLUDE_SIMULATOR_AARCH64
25923
25924 TEARDOWN();
25925 }
25926
25927
TEST(neon_frsqrts_h_scalar)25928 TEST(neon_frsqrts_h_scalar) {
25929 SETUP_WITH_FEATURES(CPUFeatures::kNEON,
25930 CPUFeatures::kFP,
25931 CPUFeatures::kNEONHalf,
25932 CPUFeatures::kFPHalf);
25933
25934 START();
25935 __ Fmov(h0, Float16(2.0));
25936 __ Fmov(h1, Float16(-1.0));
25937 __ Fmov(h2, Float16(45.0));
25938 __ Fmov(h3, kFP16PositiveInfinity);
25939 __ Fmov(h4, kFP16NegativeInfinity);
25940
25941 __ Frsqrts(h5, h0, h2);
25942 __ Frsqrts(h6, h1, h2);
25943 __ Frsqrts(h7, h0, h3);
25944 __ Frsqrts(h8, h0, h4);
25945 END();
25946
25947 #ifdef VIXL_INCLUDE_SIMULATOR_AARCH64
25948 RUN();
25949
25950 ASSERT_EQUAL_FP16(Float16(-43.5), h5);
25951 ASSERT_EQUAL_FP16(Float16(24.0), h6);
25952 ASSERT_EQUAL_FP16(kFP16NegativeInfinity, h7);
25953 ASSERT_EQUAL_FP16(kFP16PositiveInfinity, h8);
25954
25955 #endif // VIXL_INCLUDE_SIMULATOR_AARCH64
25956
25957 TEARDOWN();
25958 }
25959
25960
TEST(neon_faddp_h)25961 TEST(neon_faddp_h) {
25962 SETUP_WITH_FEATURES(CPUFeatures::kNEON,
25963 CPUFeatures::kFP,
25964 CPUFeatures::kNEONHalf);
25965
25966 START();
25967 __ Movi(v0.V2D(), 0x3c0040003c004000, 0x3c0040003c004000);
25968 __ Movi(v1.V2D(), 0xfc007c00fc007c00, 0xfc007c00fc007c00);
25969 __ Movi(v2.V2D(), 0x0000800000008000, 0x0000800000008000);
25970 __ Movi(v3.V2D(), 0x7e007c017e007c01, 0x7e007c017e007c01);
25971
25972 __ Faddp(v4.V8H(), v1.V8H(), v0.V8H());
25973 __ Faddp(v5.V8H(), v3.V8H(), v2.V8H());
25974 __ Faddp(v6.V4H(), v1.V4H(), v0.V4H());
25975 __ Faddp(v7.V4H(), v3.V4H(), v2.V4H());
25976 END();
25977
25978 #ifdef VIXL_INCLUDE_SIMULATOR_AARCH64
25979 RUN();
25980
25981 ASSERT_EQUAL_128(0x4200420042004200, 0x7e007e007e007e00, v4);
25982 ASSERT_EQUAL_128(0x0000000000000000, 0x7e017e017e017e01, v5);
25983 ASSERT_EQUAL_128(0, 0x420042007e007e00, v6);
25984 ASSERT_EQUAL_128(0, 0x000000007e017e01, v7);
25985 #endif // VIXL_INCLUDE_SIMULATOR_AARCH64
25986
25987 TEARDOWN();
25988 }
25989
25990
TEST(neon_faddp_scalar)25991 TEST(neon_faddp_scalar) {
25992 SETUP_WITH_FEATURES(CPUFeatures::kNEON, CPUFeatures::kFP);
25993
25994 START();
25995 __ Movi(d0, 0x3f80000040000000);
25996 __ Movi(d1, 0xff8000007f800000);
25997 __ Movi(d2, 0x0000000080000000);
25998 __ Faddp(s0, v0.V2S());
25999 __ Faddp(s1, v1.V2S());
26000 __ Faddp(s2, v2.V2S());
26001
26002 __ Movi(v3.V2D(), 0xc000000000000000, 0x4000000000000000);
26003 __ Movi(v4.V2D(), 0xfff8000000000000, 0x7ff8000000000000);
26004 __ Movi(v5.V2D(), 0x0000000000000000, 0x8000000000000000);
26005 __ Faddp(d3, v3.V2D());
26006 __ Faddp(d4, v4.V2D());
26007 __ Faddp(d5, v5.V2D());
26008 END();
26009
26010 RUN();
26011
26012 ASSERT_EQUAL_FP32(3.0, s0);
26013 ASSERT_EQUAL_FP32(kFP32DefaultNaN, s1);
26014 ASSERT_EQUAL_FP32(0.0, s2);
26015 ASSERT_EQUAL_FP64(0.0, d3);
26016 ASSERT_EQUAL_FP64(kFP64DefaultNaN, d4);
26017 ASSERT_EQUAL_FP64(0.0, d5);
26018
26019 TEARDOWN();
26020 }
26021
26022
TEST(neon_faddp_h_scalar)26023 TEST(neon_faddp_h_scalar) {
26024 SETUP_WITH_FEATURES(CPUFeatures::kNEON,
26025 CPUFeatures::kFP,
26026 CPUFeatures::kNEONHalf);
26027
26028 START();
26029 __ Movi(s0, 0x3c004000);
26030 __ Movi(s1, 0xfc007c00);
26031 __ Movi(s2, 0x00008000);
26032 __ Faddp(h0, v0.V2H());
26033 __ Faddp(h1, v1.V2H());
26034 __ Faddp(h2, v2.V2H());
26035 END();
26036
26037 #ifdef VIXL_INCLUDE_SIMULATOR_AARCH64
26038 RUN();
26039
26040 ASSERT_EQUAL_FP16(Float16(3.0), h0);
26041 ASSERT_EQUAL_FP16(kFP16DefaultNaN, h1);
26042 ASSERT_EQUAL_FP16(Float16(0.0), h2);
26043 #endif // VIXL_INCLUDE_SIMULATOR_AARCH64
26044
26045 TEARDOWN();
26046 }
26047
26048
TEST(neon_fmaxp_scalar)26049 TEST(neon_fmaxp_scalar) {
26050 SETUP_WITH_FEATURES(CPUFeatures::kNEON, CPUFeatures::kFP);
26051
26052 START();
26053 __ Movi(d0, 0x3f80000040000000);
26054 __ Movi(d1, 0xff8000007f800000);
26055 __ Movi(d2, 0x7fc00000ff800000);
26056 __ Fmaxp(s0, v0.V2S());
26057 __ Fmaxp(s1, v1.V2S());
26058 __ Fmaxp(s2, v2.V2S());
26059
26060 __ Movi(v3.V2D(), 0x3ff0000000000000, 0x4000000000000000);
26061 __ Movi(v4.V2D(), 0xfff0000000000000, 0x7ff0000000000000);
26062 __ Movi(v5.V2D(), 0x7ff0000000000000, 0x7ff8000000000000);
26063 __ Fmaxp(d3, v3.V2D());
26064 __ Fmaxp(d4, v4.V2D());
26065 __ Fmaxp(d5, v5.V2D());
26066 END();
26067
26068 RUN();
26069
26070 ASSERT_EQUAL_FP32(2.0, s0);
26071 ASSERT_EQUAL_FP32(kFP32PositiveInfinity, s1);
26072 ASSERT_EQUAL_FP32(kFP32DefaultNaN, s2);
26073 ASSERT_EQUAL_FP64(2.0, d3);
26074 ASSERT_EQUAL_FP64(kFP64PositiveInfinity, d4);
26075 ASSERT_EQUAL_FP64(kFP64DefaultNaN, d5);
26076
26077 TEARDOWN();
26078 }
26079
26080
TEST(neon_fmaxp_h_scalar)26081 TEST(neon_fmaxp_h_scalar) {
26082 SETUP_WITH_FEATURES(CPUFeatures::kNEON,
26083 CPUFeatures::kFP,
26084 CPUFeatures::kNEONHalf);
26085
26086 START();
26087 __ Movi(s0, 0x3c004000);
26088 __ Movi(s1, 0xfc007c00);
26089 __ Movi(s2, 0x7e00fc00);
26090 __ Fmaxp(h0, v0.V2H());
26091 __ Fmaxp(h1, v1.V2H());
26092 __ Fmaxp(h2, v2.V2H());
26093 END();
26094
26095 #ifdef VIXL_INCLUDE_SIMULATOR_AARCH64
26096 RUN();
26097
26098 ASSERT_EQUAL_FP16(Float16(2.0), h0);
26099 ASSERT_EQUAL_FP16(kFP16PositiveInfinity, h1);
26100 ASSERT_EQUAL_FP16(kFP16DefaultNaN, h2);
26101 #endif // VIXL_INCLUDE_SIMULATOR_AARCH64
26102
26103 TEARDOWN();
26104 }
26105
26106
TEST(neon_fmax_h)26107 TEST(neon_fmax_h) {
26108 SETUP_WITH_FEATURES(CPUFeatures::kNEON,
26109 CPUFeatures::kFP,
26110 CPUFeatures::kNEONHalf);
26111
26112 START();
26113 __ Movi(v0.V2D(), 0x3c003c003c003c00, 0x3c003c003c003c00);
26114 __ Movi(v1.V2D(), 0x4000400040004000, 0x4000400040004000);
26115 __ Movi(v2.V2D(), 0xfc00fc00fc00fc00, 0xfc00fc00fc00fc00);
26116 __ Movi(v3.V2D(), 0x7c007c007c007c00, 0x7c007c007c007c00);
26117 __ Movi(v4.V2D(), 0x7e007e007e007e00, 0x7e007e007e007e00);
26118 __ Movi(v5.V2D(), 0x7c017c017c017c01, 0x7c017c017c017c01);
26119
26120 __ Fmax(v6.V8H(), v0.V8H(), v1.V8H());
26121 __ Fmax(v7.V8H(), v2.V8H(), v3.V8H());
26122 __ Fmax(v8.V8H(), v4.V8H(), v0.V8H());
26123 __ Fmax(v9.V8H(), v5.V8H(), v1.V8H());
26124 __ Fmax(v10.V4H(), v0.V4H(), v1.V4H());
26125 __ Fmax(v11.V4H(), v2.V4H(), v3.V4H());
26126 __ Fmax(v12.V4H(), v4.V4H(), v0.V4H());
26127 __ Fmax(v13.V4H(), v5.V4H(), v1.V4H());
26128 END();
26129
26130 #ifdef VIXL_INCLUDE_SIMULATOR_AARCH64
26131 RUN();
26132
26133 ASSERT_EQUAL_128(0x4000400040004000, 0x4000400040004000, v6);
26134 ASSERT_EQUAL_128(0x7c007c007c007c00, 0x7c007c007c007c00, v7);
26135 ASSERT_EQUAL_128(0x7e007e007e007e00, 0x7e007e007e007e00, v8);
26136 ASSERT_EQUAL_128(0x7e017e017e017e01, 0x7e017e017e017e01, v9);
26137 ASSERT_EQUAL_128(0, 0x4000400040004000, v10);
26138 ASSERT_EQUAL_128(0, 0x7c007c007c007c00, v11);
26139 ASSERT_EQUAL_128(0, 0x7e007e007e007e00, v12);
26140 ASSERT_EQUAL_128(0, 0x7e017e017e017e01, v13);
26141 #endif // VIXL_INCLUDE_SIMULATOR_AARCH64
26142
26143 TEARDOWN();
26144 }
26145
26146
TEST(neon_fmaxp_h)26147 TEST(neon_fmaxp_h) {
26148 SETUP_WITH_FEATURES(CPUFeatures::kNEON,
26149 CPUFeatures::kFP,
26150 CPUFeatures::kNEONHalf);
26151
26152 START();
26153 __ Movi(v0.V2D(), 0x3c0040003c004000, 0x3c0040003c004000);
26154 __ Movi(v1.V2D(), 0xfc007c00fc007c00, 0xfc007c00fc007c00);
26155 __ Movi(v2.V2D(), 0x7e003c007e003c00, 0x7e003c007e003c00);
26156 __ Movi(v3.V2D(), 0x7c0140007c014000, 0x7c0140007c014000);
26157
26158 __ Fmaxp(v6.V8H(), v0.V8H(), v1.V8H());
26159 __ Fmaxp(v7.V8H(), v2.V8H(), v3.V8H());
26160 __ Fmaxp(v8.V4H(), v0.V4H(), v1.V4H());
26161 __ Fmaxp(v9.V4H(), v2.V4H(), v3.V4H());
26162 END();
26163
26164 #ifdef VIXL_INCLUDE_SIMULATOR_AARCH64
26165 RUN();
26166
26167 ASSERT_EQUAL_128(0x7c007c007c007c00, 0x4000400040004000, v6);
26168 ASSERT_EQUAL_128(0x7e017e017e017e01, 0x7e007e007e007e00, v7);
26169 ASSERT_EQUAL_128(0, 0x7c007c0040004000, v8);
26170 ASSERT_EQUAL_128(0, 0x7e017e017e007e00, v9);
26171 #endif // VIXL_INCLUDE_SIMULATOR_AARCH64
26172
26173 TEARDOWN();
26174 }
26175
26176
TEST(neon_fmaxnm_h)26177 TEST(neon_fmaxnm_h) {
26178 SETUP_WITH_FEATURES(CPUFeatures::kNEON,
26179 CPUFeatures::kFP,
26180 CPUFeatures::kNEONHalf);
26181
26182 START();
26183 __ Movi(v0.V2D(), 0x3c003c003c003c00, 0x3c003c003c003c00);
26184 __ Movi(v1.V2D(), 0x4000400040004000, 0x4000400040004000);
26185 __ Movi(v2.V2D(), 0xfc00fc00fc00fc00, 0xfc00fc00fc00fc00);
26186 __ Movi(v3.V2D(), 0x7c007c007c007c00, 0x7c007c007c007c00);
26187 __ Movi(v4.V2D(), 0x7e007e007e007e00, 0x7e007e007e007e00);
26188 __ Movi(v5.V2D(), 0x7c017c017c017c01, 0x7c017c017c017c01);
26189
26190 __ Fmaxnm(v6.V8H(), v0.V8H(), v1.V8H());
26191 __ Fmaxnm(v7.V8H(), v2.V8H(), v3.V8H());
26192 __ Fmaxnm(v8.V8H(), v4.V8H(), v0.V8H());
26193 __ Fmaxnm(v9.V8H(), v5.V8H(), v1.V8H());
26194 __ Fmaxnm(v10.V4H(), v0.V4H(), v1.V4H());
26195 __ Fmaxnm(v11.V4H(), v2.V4H(), v3.V4H());
26196 __ Fmaxnm(v12.V4H(), v4.V4H(), v0.V4H());
26197 __ Fmaxnm(v13.V4H(), v5.V4H(), v1.V4H());
26198 END();
26199
26200 #ifdef VIXL_INCLUDE_SIMULATOR_AARCH64
26201 RUN();
26202
26203 ASSERT_EQUAL_128(0x4000400040004000, 0x4000400040004000, v6);
26204 ASSERT_EQUAL_128(0x7c007c007c007c00, 0x7c007c007c007c00, v7);
26205 ASSERT_EQUAL_128(0x3c003c003c003c00, 0x3c003c003c003c00, v8);
26206 ASSERT_EQUAL_128(0x7e017e017e017e01, 0x7e017e017e017e01, v9);
26207 ASSERT_EQUAL_128(0, 0x4000400040004000, v10);
26208 ASSERT_EQUAL_128(0, 0x7c007c007c007c00, v11);
26209 ASSERT_EQUAL_128(0, 0x3c003c003c003c00, v12);
26210 ASSERT_EQUAL_128(0, 0x7e017e017e017e01, v13);
26211 #endif // VIXL_INCLUDE_SIMULATOR_AARCH64
26212
26213 TEARDOWN();
26214 }
26215
26216
TEST(neon_fmaxnmp_h)26217 TEST(neon_fmaxnmp_h) {
26218 SETUP_WITH_FEATURES(CPUFeatures::kNEON,
26219 CPUFeatures::kFP,
26220 CPUFeatures::kNEONHalf);
26221
26222 START();
26223 __ Movi(v0.V2D(), 0x3c0040003c004000, 0x3c0040003c004000);
26224 __ Movi(v1.V2D(), 0xfc007c00fc007c00, 0xfc007c00fc007c00);
26225 __ Movi(v2.V2D(), 0x7e003c007e003c00, 0x7e003c007e003c00);
26226 __ Movi(v3.V2D(), 0x7c0140007c014000, 0x7c0140007c014000);
26227
26228 __ Fmaxnmp(v6.V8H(), v0.V8H(), v1.V8H());
26229 __ Fmaxnmp(v7.V8H(), v2.V8H(), v3.V8H());
26230 __ Fmaxnmp(v8.V4H(), v0.V4H(), v1.V4H());
26231 __ Fmaxnmp(v9.V4H(), v2.V4H(), v3.V4H());
26232 END();
26233
26234 #ifdef VIXL_INCLUDE_SIMULATOR_AARCH64
26235 RUN();
26236
26237 ASSERT_EQUAL_128(0x7c007c007c007c00, 0x4000400040004000, v6);
26238 ASSERT_EQUAL_128(0x7e017e017e017e01, 0x3c003c003c003c00, v7);
26239 ASSERT_EQUAL_128(0, 0x7c007c0040004000, v8);
26240 ASSERT_EQUAL_128(0, 0x7e017e013c003c00, v9);
26241 #endif // VIXL_INCLUDE_SIMULATOR_AARCH64
26242
26243 TEARDOWN();
26244 }
26245
26246
TEST(neon_fmaxnmp_scalar)26247 TEST(neon_fmaxnmp_scalar) {
26248 SETUP_WITH_FEATURES(CPUFeatures::kNEON, CPUFeatures::kFP);
26249
26250 START();
26251 __ Movi(d0, 0x3f80000040000000);
26252 __ Movi(d1, 0xff8000007f800000);
26253 __ Movi(d2, 0x7fc00000ff800000);
26254 __ Fmaxnmp(s0, v0.V2S());
26255 __ Fmaxnmp(s1, v1.V2S());
26256 __ Fmaxnmp(s2, v2.V2S());
26257
26258 __ Movi(v3.V2D(), 0x3ff0000000000000, 0x4000000000000000);
26259 __ Movi(v4.V2D(), 0xfff0000000000000, 0x7ff0000000000000);
26260 __ Movi(v5.V2D(), 0x7ff8000000000000, 0xfff0000000000000);
26261 __ Fmaxnmp(d3, v3.V2D());
26262 __ Fmaxnmp(d4, v4.V2D());
26263 __ Fmaxnmp(d5, v5.V2D());
26264 END();
26265
26266 RUN();
26267
26268 ASSERT_EQUAL_FP32(2.0, s0);
26269 ASSERT_EQUAL_FP32(kFP32PositiveInfinity, s1);
26270 ASSERT_EQUAL_FP32(kFP32NegativeInfinity, s2);
26271 ASSERT_EQUAL_FP64(2.0, d3);
26272 ASSERT_EQUAL_FP64(kFP64PositiveInfinity, d4);
26273 ASSERT_EQUAL_FP64(kFP64NegativeInfinity, d5);
26274
26275 TEARDOWN();
26276 }
26277
26278
TEST(neon_fmaxnmp_h_scalar)26279 TEST(neon_fmaxnmp_h_scalar) {
26280 SETUP_WITH_FEATURES(CPUFeatures::kNEON,
26281 CPUFeatures::kFP,
26282 CPUFeatures::kNEONHalf);
26283
26284 START();
26285 __ Movi(s0, 0x3c004000);
26286 __ Movi(s1, 0xfc007c00);
26287 __ Movi(s2, 0x7e00fc00);
26288 __ Fmaxnmp(h0, v0.V2H());
26289 __ Fmaxnmp(h1, v1.V2H());
26290 __ Fmaxnmp(h2, v2.V2H());
26291 END();
26292
26293 #ifdef VIXL_INCLUDE_SIMULATOR_AARCH64
26294 RUN();
26295
26296 ASSERT_EQUAL_FP16(Float16(2.0), h0);
26297 ASSERT_EQUAL_FP16(kFP16PositiveInfinity, h1);
26298 ASSERT_EQUAL_FP16(kFP16NegativeInfinity, h2);
26299 #endif // VIXL_INCLUDE_SIMULATOR_AARCH64
26300
26301 TEARDOWN();
26302 }
26303
26304
TEST(neon_fminp_scalar)26305 TEST(neon_fminp_scalar) {
26306 SETUP_WITH_FEATURES(CPUFeatures::kNEON, CPUFeatures::kFP);
26307
26308 START();
26309 __ Movi(d0, 0x3f80000040000000);
26310 __ Movi(d1, 0xff8000007f800000);
26311 __ Movi(d2, 0x7fc00000ff800000);
26312 __ Fminp(s0, v0.V2S());
26313 __ Fminp(s1, v1.V2S());
26314 __ Fminp(s2, v2.V2S());
26315
26316 __ Movi(v3.V2D(), 0x3ff0000000000000, 0x4000000000000000);
26317 __ Movi(v4.V2D(), 0xfff0000000000000, 0x7ff0000000000000);
26318 __ Movi(v5.V2D(), 0x7ff0000000000000, 0x7ff8000000000000);
26319 __ Fminp(d3, v3.V2D());
26320 __ Fminp(d4, v4.V2D());
26321 __ Fminp(d5, v5.V2D());
26322 END();
26323
26324 RUN();
26325
26326 ASSERT_EQUAL_FP32(1.0, s0);
26327 ASSERT_EQUAL_FP32(kFP32NegativeInfinity, s1);
26328 ASSERT_EQUAL_FP32(kFP32DefaultNaN, s2);
26329 ASSERT_EQUAL_FP64(1.0, d3);
26330 ASSERT_EQUAL_FP64(kFP64NegativeInfinity, d4);
26331 ASSERT_EQUAL_FP64(kFP64DefaultNaN, d5);
26332
26333 TEARDOWN();
26334 }
26335
26336
TEST(neon_fminp_h_scalar)26337 TEST(neon_fminp_h_scalar) {
26338 SETUP_WITH_FEATURES(CPUFeatures::kNEON,
26339 CPUFeatures::kFP,
26340 CPUFeatures::kNEONHalf);
26341
26342 START();
26343 __ Movi(s0, 0x3c004000);
26344 __ Movi(s1, 0xfc007c00);
26345 __ Movi(s2, 0x7e00fc00);
26346 __ Fminp(h0, v0.V2H());
26347 __ Fminp(h1, v1.V2H());
26348 __ Fminp(h2, v2.V2H());
26349 END();
26350
26351 #ifdef VIXL_INCLUDE_SIMULATOR_AARCH64
26352 RUN();
26353
26354 ASSERT_EQUAL_FP16(Float16(1.0), h0);
26355 ASSERT_EQUAL_FP16(kFP16NegativeInfinity, h1);
26356 ASSERT_EQUAL_FP16(kFP16DefaultNaN, h2);
26357 #endif // VIXL_INCLUDE_SIMULATOR_AARCH64
26358
26359 TEARDOWN();
26360 }
26361
26362
TEST(neon_fmin_h)26363 TEST(neon_fmin_h) {
26364 SETUP_WITH_FEATURES(CPUFeatures::kNEON,
26365 CPUFeatures::kFP,
26366 CPUFeatures::kNEONHalf);
26367
26368 START();
26369 __ Movi(v0.V2D(), 0x3c003c003c003c00, 0x3c003c003c003c00);
26370 __ Movi(v1.V2D(), 0x4000400040004000, 0x4000400040004000);
26371 __ Movi(v2.V2D(), 0xfc00fc00fc00fc00, 0xfc00fc00fc00fc00);
26372 __ Movi(v3.V2D(), 0x7c007c007c007c00, 0x7c007c007c007c00);
26373 __ Movi(v4.V2D(), 0x7e007e007e007e00, 0x7e007e007e007e00);
26374 __ Movi(v5.V2D(), 0x7c017c017c017c01, 0x7c017c017c017c01);
26375
26376 __ Fmin(v6.V8H(), v0.V8H(), v1.V8H());
26377 __ Fmin(v7.V8H(), v2.V8H(), v3.V8H());
26378 __ Fmin(v8.V8H(), v4.V8H(), v0.V8H());
26379 __ Fmin(v9.V8H(), v5.V8H(), v1.V8H());
26380 __ Fmin(v10.V4H(), v0.V4H(), v1.V4H());
26381 __ Fmin(v11.V4H(), v2.V4H(), v3.V4H());
26382 __ Fmin(v12.V4H(), v4.V4H(), v0.V4H());
26383 __ Fmin(v13.V4H(), v5.V4H(), v1.V4H());
26384 END();
26385
26386 #ifdef VIXL_INCLUDE_SIMULATOR_AARCH64
26387 RUN();
26388
26389 ASSERT_EQUAL_128(0x3c003c003c003c00, 0x3c003c003c003c00, v6);
26390 ASSERT_EQUAL_128(0xfc00fc00fc00fc00, 0xfc00fc00fc00fc00, v7);
26391 ASSERT_EQUAL_128(0x7e007e007e007e00, 0x7e007e007e007e00, v8);
26392 ASSERT_EQUAL_128(0x7e017e017e017e01, 0x7e017e017e017e01, v9);
26393 ASSERT_EQUAL_128(0, 0x3c003c003c003c00, v10);
26394 ASSERT_EQUAL_128(0, 0xfc00fc00fc00fc00, v11);
26395 ASSERT_EQUAL_128(0, 0x7e007e007e007e00, v12);
26396 ASSERT_EQUAL_128(0, 0x7e017e017e017e01, v13);
26397 #endif // VIXL_INCLUDE_SIMULATOR_AARCH64
26398
26399 TEARDOWN();
26400 }
26401
26402
TEST(neon_fminp_h)26403 TEST(neon_fminp_h) {
26404 SETUP_WITH_FEATURES(CPUFeatures::kNEON,
26405 CPUFeatures::kFP,
26406 CPUFeatures::kNEONHalf);
26407
26408 START();
26409 __ Movi(v0.V2D(), 0x3c0040003c004000, 0x3c0040003c004000);
26410 __ Movi(v1.V2D(), 0xfc007c00fc007c00, 0xfc007c00fc007c00);
26411 __ Movi(v2.V2D(), 0x7e003c007e003c00, 0x7e003c007e003c00);
26412 __ Movi(v3.V2D(), 0x7c0140007c014000, 0x7c0140007c014000);
26413
26414 __ Fminp(v6.V8H(), v0.V8H(), v1.V8H());
26415 __ Fminp(v7.V8H(), v2.V8H(), v3.V8H());
26416 __ Fminp(v8.V4H(), v0.V4H(), v1.V4H());
26417 __ Fminp(v9.V4H(), v2.V4H(), v3.V4H());
26418 END();
26419
26420 #ifdef VIXL_INCLUDE_SIMULATOR_AARCH64
26421 RUN();
26422
26423 ASSERT_EQUAL_128(0xfc00fc00fc00fc00, 0x3c003c003c003c00, v6);
26424 ASSERT_EQUAL_128(0x7e017e017e017e01, 0x7e007e007e007e00, v7);
26425 ASSERT_EQUAL_128(0, 0xfc00fc003c003c00, v8);
26426 ASSERT_EQUAL_128(0, 0x7e017e017e007e00, v9);
26427 #endif // VIXL_INCLUDE_SIMULATOR_AARCH64
26428
26429 TEARDOWN();
26430 }
26431
26432
TEST(neon_fminnm_h)26433 TEST(neon_fminnm_h) {
26434 SETUP_WITH_FEATURES(CPUFeatures::kNEON,
26435 CPUFeatures::kFP,
26436 CPUFeatures::kNEONHalf);
26437
26438 START();
26439 __ Movi(v0.V2D(), 0x3c003c003c003c00, 0x3c003c003c003c00);
26440 __ Movi(v1.V2D(), 0x4000400040004000, 0x4000400040004000);
26441 __ Movi(v2.V2D(), 0xfc00fc00fc00fc00, 0xfc00fc00fc00fc00);
26442 __ Movi(v3.V2D(), 0x7c007c007c007c00, 0x7c007c007c007c00);
26443 __ Movi(v4.V2D(), 0x7e007e007e007e00, 0x7e007e007e007e00);
26444 __ Movi(v5.V2D(), 0x7c017c017c017c01, 0x7c017c017c017c01);
26445
26446 __ Fminnm(v6.V8H(), v0.V8H(), v1.V8H());
26447 __ Fminnm(v7.V8H(), v2.V8H(), v3.V8H());
26448 __ Fminnm(v8.V8H(), v4.V8H(), v0.V8H());
26449 __ Fminnm(v9.V8H(), v5.V8H(), v1.V8H());
26450 __ Fminnm(v10.V4H(), v0.V4H(), v1.V4H());
26451 __ Fminnm(v11.V4H(), v2.V4H(), v3.V4H());
26452 __ Fminnm(v12.V4H(), v4.V4H(), v0.V4H());
26453 __ Fminnm(v13.V4H(), v5.V4H(), v1.V4H());
26454 END();
26455
26456 #ifdef VIXL_INCLUDE_SIMULATOR_AARCH64
26457 RUN();
26458
26459 ASSERT_EQUAL_128(0x3c003c003c003c00, 0x3c003c003c003c00, v6);
26460 ASSERT_EQUAL_128(0xfc00fc00fc00fc00, 0xfc00fc00fc00fc00, v7);
26461 ASSERT_EQUAL_128(0x3c003c003c003c00, 0x3c003c003c003c00, v8);
26462 ASSERT_EQUAL_128(0x7e017e017e017e01, 0x7e017e017e017e01, v9);
26463 ASSERT_EQUAL_128(0, 0x3c003c003c003c00, v10);
26464 ASSERT_EQUAL_128(0, 0xfc00fc00fc00fc00, v11);
26465 ASSERT_EQUAL_128(0, 0x3c003c003c003c00, v12);
26466 ASSERT_EQUAL_128(0, 0x7e017e017e017e01, v13);
26467 #endif // VIXL_INCLUDE_SIMULATOR_AARCH64
26468
26469 TEARDOWN();
26470 }
26471
26472
TEST(neon_fminnmp_h)26473 TEST(neon_fminnmp_h) {
26474 SETUP_WITH_FEATURES(CPUFeatures::kNEON,
26475 CPUFeatures::kFP,
26476 CPUFeatures::kNEONHalf);
26477
26478 START();
26479 __ Movi(v0.V2D(), 0x3c0040003c004000, 0x3c0040003c004000);
26480 __ Movi(v1.V2D(), 0xfc007c00fc007c00, 0xfc007c00fc007c00);
26481 __ Movi(v2.V2D(), 0x7e003c007e003c00, 0x7e003c007e003c00);
26482 __ Movi(v3.V2D(), 0x7c0140007c014000, 0x7c0140007c014000);
26483
26484 __ Fminnmp(v6.V8H(), v0.V8H(), v1.V8H());
26485 __ Fminnmp(v7.V8H(), v2.V8H(), v3.V8H());
26486 __ Fminnmp(v8.V4H(), v0.V4H(), v1.V4H());
26487 __ Fminnmp(v9.V4H(), v2.V4H(), v3.V4H());
26488 END();
26489
26490 #ifdef VIXL_INCLUDE_SIMULATOR_AARCH64
26491 RUN();
26492
26493 ASSERT_EQUAL_128(0xfc00fc00fc00fc00, 0x3c003c003c003c00, v6);
26494 ASSERT_EQUAL_128(0x7e017e017e017e01, 0x3c003c003c003c00, v7);
26495 ASSERT_EQUAL_128(0, 0xfc00fc003c003c00, v8);
26496 ASSERT_EQUAL_128(0, 0x7e017e013c003c00, v9);
26497 #endif // VIXL_INCLUDE_SIMULATOR_AARCH64
26498
26499 TEARDOWN();
26500 }
26501
26502
TEST(neon_fminnmp_scalar)26503 TEST(neon_fminnmp_scalar) {
26504 SETUP_WITH_FEATURES(CPUFeatures::kNEON, CPUFeatures::kFP);
26505
26506 START();
26507 __ Movi(d0, 0x3f80000040000000);
26508 __ Movi(d1, 0xff8000007f800000);
26509 __ Movi(d2, 0x7fc00000ff800000);
26510 __ Fminnmp(s0, v0.V2S());
26511 __ Fminnmp(s1, v1.V2S());
26512 __ Fminnmp(s2, v2.V2S());
26513
26514 __ Movi(v3.V2D(), 0x3ff0000000000000, 0x4000000000000000);
26515 __ Movi(v4.V2D(), 0xfff0000000000000, 0x7ff0000000000000);
26516 __ Movi(v5.V2D(), 0x7ff8000000000000, 0xfff0000000000000);
26517 __ Fminnmp(d3, v3.V2D());
26518 __ Fminnmp(d4, v4.V2D());
26519 __ Fminnmp(d5, v5.V2D());
26520 END();
26521
26522 RUN();
26523
26524 ASSERT_EQUAL_FP32(1.0, s0);
26525 ASSERT_EQUAL_FP32(kFP32NegativeInfinity, s1);
26526 ASSERT_EQUAL_FP32(kFP32NegativeInfinity, s2);
26527 ASSERT_EQUAL_FP64(1.0, d3);
26528 ASSERT_EQUAL_FP64(kFP64NegativeInfinity, d4);
26529 ASSERT_EQUAL_FP64(kFP64NegativeInfinity, d5);
26530
26531 TEARDOWN();
26532 }
26533
26534
TEST(neon_fminnmp_h_scalar)26535 TEST(neon_fminnmp_h_scalar) {
26536 SETUP_WITH_FEATURES(CPUFeatures::kNEON,
26537 CPUFeatures::kFP,
26538 CPUFeatures::kNEONHalf);
26539
26540 START();
26541 __ Movi(s0, 0x3c004000);
26542 __ Movi(s1, 0xfc007c00);
26543 __ Movi(s2, 0x7e00fc00);
26544 __ Fminnmp(h0, v0.V2H());
26545 __ Fminnmp(h1, v1.V2H());
26546 __ Fminnmp(h2, v2.V2H());
26547 END();
26548
26549 #ifdef VIXL_INCLUDE_SIMULATOR_AARCH64
26550 RUN();
26551
26552 ASSERT_EQUAL_FP16(Float16(1.0), h0);
26553 ASSERT_EQUAL_FP16(kFP16NegativeInfinity, h1);
26554 ASSERT_EQUAL_FP16(kFP16NegativeInfinity, h2);
26555 #endif // VIXL_INCLUDE_SIMULATOR_AARCH64
26556
26557 TEARDOWN();
26558 }
26559
26560
TEST(neon_tbl)26561 TEST(neon_tbl) {
26562 SETUP_WITH_FEATURES(CPUFeatures::kNEON);
26563
26564 START();
26565 __ Movi(v30.V2D(), 0xbf561e188b1280e9, 0xbd542b8cbd24e8e8);
26566 __ Movi(v31.V2D(), 0xb5e9883d2c88a46d, 0x12276d5b614c915e);
26567 __ Movi(v0.V2D(), 0xc45b7782bc5ecd72, 0x5dd4fe5a4bc6bf5e);
26568 __ Movi(v1.V2D(), 0x1e3254094bd1746a, 0xf099ecf50e861c80);
26569
26570 __ Movi(v4.V2D(), 0xf80c030100031f16, 0x00070504031201ff);
26571 __ Movi(v5.V2D(), 0x1f01001afc14202a, 0x2a081e1b0c02020c);
26572 __ Movi(v6.V2D(), 0x353f1a13022a2360, 0x2c464a00203a0a33);
26573 __ Movi(v7.V2D(), 0x64801a1c054cf30d, 0x793a2c052e213739);
26574
26575 __ Movi(v8.V2D(), 0xb7f60ad7d7d88f13, 0x13eefc240496e842);
26576 __ Movi(v9.V2D(), 0x1be199c7c69b47ec, 0x8e4b9919f6eed443);
26577 __ Movi(v10.V2D(), 0x9bd2e1654c69e48f, 0x2143d089e426c6d2);
26578 __ Movi(v11.V2D(), 0xc31dbdc4a0393065, 0x1ecc2077caaf64d8);
26579 __ Movi(v12.V2D(), 0x29b24463967bc6eb, 0xdaf59970df01c93b);
26580 __ Movi(v13.V2D(), 0x3e20a4a4cb6813f4, 0x20a5832713dae669);
26581 __ Movi(v14.V2D(), 0xc5ff9a94041b1fdf, 0x2f46cde38cba2682);
26582 __ Movi(v15.V2D(), 0xd8cc5b0e61f387e6, 0xe69d6d314971e8fd);
26583
26584 __ Tbl(v8.V16B(), v1.V16B(), v4.V16B());
26585 __ Tbl(v9.V16B(), v0.V16B(), v1.V16B(), v5.V16B());
26586 __ Tbl(v10.V16B(), v31.V16B(), v0.V16B(), v1.V16B(), v6.V16B());
26587 __ Tbl(v11.V16B(), v30.V16B(), v31.V16B(), v0.V16B(), v1.V16B(), v7.V16B());
26588 __ Tbl(v12.V8B(), v1.V16B(), v4.V8B());
26589 __ Tbl(v13.V8B(), v0.V16B(), v1.V16B(), v5.V8B());
26590 __ Tbl(v14.V8B(), v31.V16B(), v0.V16B(), v1.V16B(), v6.V8B());
26591 __ Tbl(v15.V8B(), v30.V16B(), v31.V16B(), v0.V16B(), v1.V16B(), v7.V8B());
26592
26593 __ Movi(v16.V2D(), 0xb7f60ad7d7d88f13, 0x13eefc240496e842);
26594 __ Movi(v17.V2D(), 0x1be199c7c69b47ec, 0x8e4b9919f6eed443);
26595 __ Movi(v18.V2D(), 0x9bd2e1654c69e48f, 0x2143d089e426c6d2);
26596 __ Movi(v19.V2D(), 0xc31dbdc4a0393065, 0x1ecc2077caaf64d8);
26597 __ Movi(v20.V2D(), 0x29b24463967bc6eb, 0xdaf59970df01c93b);
26598 __ Movi(v21.V2D(), 0x3e20a4a4cb6813f4, 0x20a5832713dae669);
26599 __ Movi(v22.V2D(), 0xc5ff9a94041b1fdf, 0x2f46cde38cba2682);
26600 __ Movi(v23.V2D(), 0xd8cc5b0e61f387e6, 0xe69d6d314971e8fd);
26601
26602 __ Tbx(v16.V16B(), v1.V16B(), v4.V16B());
26603 __ Tbx(v17.V16B(), v0.V16B(), v1.V16B(), v5.V16B());
26604 __ Tbx(v18.V16B(), v31.V16B(), v0.V16B(), v1.V16B(), v6.V16B());
26605 __ Tbx(v19.V16B(), v30.V16B(), v31.V16B(), v0.V16B(), v1.V16B(), v7.V16B());
26606 __ Tbx(v20.V8B(), v1.V16B(), v4.V8B());
26607 __ Tbx(v21.V8B(), v0.V16B(), v1.V16B(), v5.V8B());
26608 __ Tbx(v22.V8B(), v31.V16B(), v0.V16B(), v1.V16B(), v6.V8B());
26609 __ Tbx(v23.V8B(), v30.V16B(), v31.V16B(), v0.V16B(), v1.V16B(), v7.V8B());
26610 END();
26611
26612 RUN();
26613
26614 ASSERT_EQUAL_128(0x00090e1c800e0000, 0x80f0ecf50e001c00, v8);
26615 ASSERT_EQUAL_128(0x1ebf5ed100f50000, 0x0072324b82c6c682, v9);
26616 ASSERT_EQUAL_128(0x00005e4b4cd10e00, 0x0900005e80008800, v10);
26617 ASSERT_EQUAL_128(0x0000883d2b00001e, 0x00d1822b5bbff074, v11);
26618 ASSERT_EQUAL_128(0x0000000000000000, 0x80f0ecf50e001c00, v12);
26619 ASSERT_EQUAL_128(0x0000000000000000, 0x0072324b82c6c682, v13);
26620 ASSERT_EQUAL_128(0x0000000000000000, 0x0900005e80008800, v14);
26621 ASSERT_EQUAL_128(0x0000000000000000, 0x00d1822b5bbff074, v15);
26622
26623 ASSERT_EQUAL_128(0xb7090e1c800e8f13, 0x80f0ecf50e961c42, v16);
26624 ASSERT_EQUAL_128(0x1ebf5ed1c6f547ec, 0x8e72324b82c6c682, v17);
26625 ASSERT_EQUAL_128(0x9bd25e4b4cd10e8f, 0x0943d05e802688d2, v18);
26626 ASSERT_EQUAL_128(0xc31d883d2b39301e, 0x1ed1822b5bbff074, v19);
26627 ASSERT_EQUAL_128(0x0000000000000000, 0x80f0ecf50e011c3b, v20);
26628 ASSERT_EQUAL_128(0x0000000000000000, 0x2072324b82c6c682, v21);
26629 ASSERT_EQUAL_128(0x0000000000000000, 0x0946cd5e80ba8882, v22);
26630 ASSERT_EQUAL_128(0x0000000000000000, 0xe6d1822b5bbff074, v23);
26631
26632 TEARDOWN();
26633 }
26634
26635
TEST(regress_cmp_shift_imm)26636 TEST(regress_cmp_shift_imm) {
26637 SETUP();
26638
26639 START();
26640
26641 __ Mov(x0, 0x3d720c8d);
26642 __ Cmp(x0, Operand(0x3d720c8d));
26643
26644 END();
26645 RUN();
26646
26647 ASSERT_EQUAL_NZCV(ZCFlag);
26648
26649 TEARDOWN();
26650 }
26651
26652
TEST(compute_address)26653 TEST(compute_address) {
26654 SETUP();
26655
26656 START();
26657 int64_t base_address = INT64_C(0x123000000abc);
26658 int64_t reg_offset = INT64_C(0x1087654321);
26659 Register base = x0;
26660 Register offset = x1;
26661
26662 __ Mov(base, base_address);
26663 __ Mov(offset, reg_offset);
26664
26665
26666 __ ComputeAddress(x2, MemOperand(base, 0));
26667 __ ComputeAddress(x3, MemOperand(base, 8));
26668 __ ComputeAddress(x4, MemOperand(base, -100));
26669
26670 __ ComputeAddress(x5, MemOperand(base, offset));
26671 __ ComputeAddress(x6, MemOperand(base, offset, LSL, 2));
26672 __ ComputeAddress(x7, MemOperand(base, offset, LSL, 4));
26673 __ ComputeAddress(x8, MemOperand(base, offset, LSL, 8));
26674
26675 __ ComputeAddress(x9, MemOperand(base, offset, SXTW));
26676 __ ComputeAddress(x10, MemOperand(base, offset, UXTW, 1));
26677 __ ComputeAddress(x11, MemOperand(base, offset, SXTW, 2));
26678 __ ComputeAddress(x12, MemOperand(base, offset, UXTW, 3));
26679
26680 END();
26681
26682 RUN();
26683
26684 ASSERT_EQUAL_64(base_address, base);
26685
26686 ASSERT_EQUAL_64(INT64_C(0x123000000abc), x2);
26687 ASSERT_EQUAL_64(INT64_C(0x123000000ac4), x3);
26688 ASSERT_EQUAL_64(INT64_C(0x123000000a58), x4);
26689
26690 ASSERT_EQUAL_64(INT64_C(0x124087654ddd), x5);
26691 ASSERT_EQUAL_64(INT64_C(0x12721d951740), x6);
26692 ASSERT_EQUAL_64(INT64_C(0x133876543ccc), x7);
26693 ASSERT_EQUAL_64(INT64_C(0x22b765432bbc), x8);
26694
26695 ASSERT_EQUAL_64(INT64_C(0x122f87654ddd), x9);
26696 ASSERT_EQUAL_64(INT64_C(0x12310eca90fe), x10);
26697 ASSERT_EQUAL_64(INT64_C(0x122e1d951740), x11);
26698 ASSERT_EQUAL_64(INT64_C(0x12343b2a23c4), x12);
26699
26700 TEARDOWN();
26701 }
26702
26703
TEST(far_branch_backward)26704 TEST(far_branch_backward) {
26705 // Test that the MacroAssembler correctly resolves backward branches to labels
26706 // that are outside the immediate range of branch instructions.
26707 // Take into account that backward branches can reach one instruction further
26708 // than forward branches.
26709 const int overflow_size =
26710 kInstructionSize +
26711 std::max(Instruction::GetImmBranchForwardRange(TestBranchType),
26712 std::max(Instruction::GetImmBranchForwardRange(
26713 CompareBranchType),
26714 Instruction::GetImmBranchForwardRange(CondBranchType)));
26715
26716 SETUP();
26717 START();
26718
26719 Label done, fail;
26720 Label test_tbz, test_cbz, test_bcond;
26721 Label success_tbz, success_cbz, success_bcond;
26722
26723 __ Mov(x0, 0);
26724 __ Mov(x1, 1);
26725 __ Mov(x10, 0);
26726
26727 __ B(&test_tbz);
26728 __ Bind(&success_tbz);
26729 __ Orr(x0, x0, 1 << 0);
26730 __ B(&test_cbz);
26731 __ Bind(&success_cbz);
26732 __ Orr(x0, x0, 1 << 1);
26733 __ B(&test_bcond);
26734 __ Bind(&success_bcond);
26735 __ Orr(x0, x0, 1 << 2);
26736
26737 __ B(&done);
26738
26739 // Generate enough code to overflow the immediate range of the three types of
26740 // branches below.
26741 for (unsigned i = 0; i < overflow_size / kInstructionSize; ++i) {
26742 if (i % 100 == 0) {
26743 // If we do land in this code, we do not want to execute so many nops
26744 // before reaching the end of test (especially if tracing is activated).
26745 __ B(&fail);
26746 } else {
26747 __ Nop();
26748 }
26749 }
26750 __ B(&fail);
26751
26752 __ Bind(&test_tbz);
26753 __ Tbz(x10, 7, &success_tbz);
26754 __ Bind(&test_cbz);
26755 __ Cbz(x10, &success_cbz);
26756 __ Bind(&test_bcond);
26757 __ Cmp(x10, 0);
26758 __ B(eq, &success_bcond);
26759
26760 // For each out-of-range branch instructions, at least two instructions should
26761 // have been generated.
26762 VIXL_CHECK(masm.GetSizeOfCodeGeneratedSince(&test_tbz) >=
26763 7 * kInstructionSize);
26764
26765 __ Bind(&fail);
26766 __ Mov(x1, 0);
26767 __ Bind(&done);
26768
26769 END();
26770 RUN();
26771
26772 ASSERT_EQUAL_64(0x7, x0);
26773 ASSERT_EQUAL_64(0x1, x1);
26774
26775 TEARDOWN();
26776 }
26777
26778
TEST(single_veneer)26779 TEST(single_veneer) {
26780 SETUP();
26781 START();
26782
26783 const int max_range = Instruction::GetImmBranchForwardRange(TestBranchType);
26784
26785 Label success, fail, done;
26786
26787 __ Mov(x0, 0);
26788 __ Mov(x1, 1);
26789 __ Mov(x10, 0);
26790
26791 __ Tbz(x10, 7, &success);
26792
26793 // Generate enough code to overflow the immediate range of the `tbz`.
26794 for (unsigned i = 0; i < max_range / kInstructionSize + 1; ++i) {
26795 if (i % 100 == 0) {
26796 // If we do land in this code, we do not want to execute so many nops
26797 // before reaching the end of test (especially if tracing is activated).
26798 __ B(&fail);
26799 } else {
26800 __ Nop();
26801 }
26802 }
26803 __ B(&fail);
26804
26805 __ Bind(&success);
26806 __ Mov(x0, 1);
26807
26808 __ B(&done);
26809 __ Bind(&fail);
26810 __ Mov(x1, 0);
26811 __ Bind(&done);
26812
26813 END();
26814 RUN();
26815
26816 ASSERT_EQUAL_64(1, x0);
26817 ASSERT_EQUAL_64(1, x1);
26818
26819 TEARDOWN();
26820 }
26821
26822
TEST(simple_veneers)26823 TEST(simple_veneers) {
26824 // Test that the MacroAssembler correctly emits veneers for forward branches
26825 // to labels that are outside the immediate range of branch instructions.
26826 const int max_range =
26827 std::max(Instruction::GetImmBranchForwardRange(TestBranchType),
26828 std::max(Instruction::GetImmBranchForwardRange(
26829 CompareBranchType),
26830 Instruction::GetImmBranchForwardRange(CondBranchType)));
26831
26832 SETUP();
26833 START();
26834
26835 Label done, fail;
26836 Label test_tbz, test_cbz, test_bcond;
26837 Label success_tbz, success_cbz, success_bcond;
26838
26839 __ Mov(x0, 0);
26840 __ Mov(x1, 1);
26841 __ Mov(x10, 0);
26842
26843 __ Bind(&test_tbz);
26844 __ Tbz(x10, 7, &success_tbz);
26845 __ Bind(&test_cbz);
26846 __ Cbz(x10, &success_cbz);
26847 __ Bind(&test_bcond);
26848 __ Cmp(x10, 0);
26849 __ B(eq, &success_bcond);
26850
26851 // Generate enough code to overflow the immediate range of the three types of
26852 // branches below.
26853 for (unsigned i = 0; i < max_range / kInstructionSize + 1; ++i) {
26854 if (i % 100 == 0) {
26855 // If we do land in this code, we do not want to execute so many nops
26856 // before reaching the end of test (especially if tracing is activated).
26857 __ B(&fail);
26858 } else {
26859 __ Nop();
26860 }
26861 }
26862 __ B(&fail);
26863
26864 __ Bind(&success_tbz);
26865 __ Orr(x0, x0, 1 << 0);
26866 __ B(&test_cbz);
26867 __ Bind(&success_cbz);
26868 __ Orr(x0, x0, 1 << 1);
26869 __ B(&test_bcond);
26870 __ Bind(&success_bcond);
26871 __ Orr(x0, x0, 1 << 2);
26872
26873 __ B(&done);
26874 __ Bind(&fail);
26875 __ Mov(x1, 0);
26876 __ Bind(&done);
26877
26878 END();
26879 RUN();
26880
26881 ASSERT_EQUAL_64(0x7, x0);
26882 ASSERT_EQUAL_64(0x1, x1);
26883
26884 TEARDOWN();
26885 }
26886
26887
TEST(veneers_stress)26888 TEST(veneers_stress) {
26889 SETUP();
26890 START();
26891
26892 // This is a code generation test stressing the emission of veneers. The code
26893 // generated is not executed.
26894
26895 Label target;
26896 const unsigned max_range =
26897 Instruction::GetImmBranchForwardRange(CondBranchType);
26898 const unsigned iterations =
26899 (max_range + max_range / 4) / (4 * kInstructionSize);
26900 for (unsigned i = 0; i < iterations; i++) {
26901 __ B(&target);
26902 __ B(eq, &target);
26903 __ Cbz(x0, &target);
26904 __ Tbz(x0, 0, &target);
26905 }
26906 __ Bind(&target);
26907
26908 END();
26909 TEARDOWN();
26910 }
26911
26912
TEST(veneers_two_out_of_range)26913 TEST(veneers_two_out_of_range) {
26914 SETUP();
26915 START();
26916
26917 // This is a code generation test. The code generated is not executed.
26918 // Ensure that the MacroAssembler considers unresolved branches to chose when
26919 // a veneer pool should be emitted. We generate two branches that go out of
26920 // range at the same offset. When the MacroAssembler decides to emit the
26921 // veneer pool, the emission of a first veneer should not cause the other
26922 // branch to go out of range.
26923
26924 int range_cbz = Instruction::GetImmBranchForwardRange(CompareBranchType);
26925 int range_tbz = Instruction::GetImmBranchForwardRange(TestBranchType);
26926 int max_target = static_cast<int>(masm.GetCursorOffset()) + range_cbz;
26927
26928 Label done;
26929
26930 // We use different labels to prevent the MacroAssembler from sharing veneers.
26931 Label target_cbz, target_tbz;
26932
26933 __ Cbz(x0, &target_cbz);
26934 while (masm.GetCursorOffset() < max_target - range_tbz) {
26935 __ Nop();
26936 }
26937 __ Tbz(x0, 0, &target_tbz);
26938 while (masm.GetCursorOffset() < max_target) {
26939 __ Nop();
26940 }
26941
26942 // This additional nop makes the branches go out of range.
26943 __ Nop();
26944
26945 __ Bind(&target_cbz);
26946 __ Bind(&target_tbz);
26947
26948 END();
26949 TEARDOWN();
26950 }
26951
26952
TEST(veneers_hanging)26953 TEST(veneers_hanging) {
26954 SETUP();
26955 START();
26956
26957 // This is a code generation test. The code generated is not executed.
26958 // Ensure that the MacroAssembler considers unresolved branches to chose when
26959 // a veneer pool should be emitted. This is similar to the
26960 // 'veneers_two_out_of_range' test. We try to trigger the following situation:
26961 // b.eq label
26962 // b.eq label
26963 // ...
26964 // nop
26965 // ...
26966 // cbz x0, label
26967 // cbz x0, label
26968 // ...
26969 // tbz x0, 0 label
26970 // nop
26971 // ...
26972 // nop <- From here the `b.eq` and `cbz` instructions run out of range,
26973 // so a literal pool is required.
26974 // veneer
26975 // veneer
26976 // veneer <- The `tbz` runs out of range somewhere in the middle of the
26977 // veneer veneer pool.
26978 // veneer
26979
26980 const int range_bcond = Instruction::GetImmBranchForwardRange(CondBranchType);
26981 const int range_cbz =
26982 Instruction::GetImmBranchForwardRange(CompareBranchType);
26983 const int range_tbz = Instruction::GetImmBranchForwardRange(TestBranchType);
26984 const int max_target = static_cast<int>(masm.GetCursorOffset()) + range_bcond;
26985
26986 Label done;
26987 const int n_bcond = 100;
26988 const int n_cbz = 100;
26989 const int n_tbz = 1;
26990 const int kNTotalBranches = n_bcond + n_cbz + n_tbz;
26991
26992 // We use different labels to prevent the MacroAssembler from sharing veneers.
26993 Label labels[kNTotalBranches];
26994 for (int i = 0; i < kNTotalBranches; i++) {
26995 new (&labels[i]) Label();
26996 }
26997
26998 for (int i = 0; i < n_bcond; i++) {
26999 __ B(eq, &labels[i]);
27000 }
27001
27002 while (masm.GetCursorOffset() < max_target - range_cbz) {
27003 __ Nop();
27004 }
27005
27006 for (int i = 0; i < n_cbz; i++) {
27007 __ Cbz(x0, &labels[n_bcond + i]);
27008 }
27009
27010 // Ensure the 'tbz' will go out of range after some of the previously
27011 // generated branches.
27012 int margin = (n_bcond / 2) * kInstructionSize;
27013 while (masm.GetCursorOffset() < max_target - range_tbz + margin) {
27014 __ Nop();
27015 }
27016
27017 __ Tbz(x0, 0, &labels[n_bcond + n_cbz]);
27018
27019 while (masm.GetCursorOffset() < max_target) {
27020 __ Nop();
27021 }
27022
27023 // This additional nop makes the 'b.eq' and 'cbz' instructions go out of range
27024 // and forces the emission of a veneer pool. The 'tbz' is not yet out of
27025 // range, but will go out of range while veneers are emitted for the other
27026 // branches.
27027 // The MacroAssembler should ensure that veneers are correctly emitted for all
27028 // the branches, including the 'tbz'. Checks will fail if the target of a
27029 // branch is out of range.
27030 __ Nop();
27031
27032 for (int i = 0; i < kNTotalBranches; i++) {
27033 __ Bind(&labels[i]);
27034 }
27035
27036 END();
27037 TEARDOWN();
27038 }
27039
27040
TEST(collision_literal_veneer_pools)27041 TEST(collision_literal_veneer_pools) {
27042 SETUP_WITH_FEATURES(CPUFeatures::kFP);
27043 START();
27044
27045 // This is a code generation test. The code generated is not executed.
27046
27047 // Make sure the literal pool is empty;
27048 masm.EmitLiteralPool(LiteralPool::kBranchRequired);
27049 ASSERT_LITERAL_POOL_SIZE(0);
27050
27051 // We chose the offsets below to (try to) trigger the following situation:
27052 // buffer offset
27053 // 0: tbz x0, 0, target_tbz ----------------------------------.
27054 // 4: nop |
27055 // ... |
27056 // nop |
27057 // literal gen: ldr s0, [pc + ...] ; load from `pool start + 0` |
27058 // ldr s0, [pc + ...] ; load from `pool start + 4` |
27059 // ... |
27060 // ldr s0, [pc + ...] |
27061 // pool start: floating-point literal (0.1) |
27062 // floating-point literal (1.1) |
27063 // ... |
27064 // floating-point literal (<n>.1) <-----tbz-max-range--'
27065 // floating-point literal (<n+1>.1)
27066 // ...
27067
27068 const int range_tbz = Instruction::GetImmBranchForwardRange(TestBranchType);
27069 const int max_target = static_cast<int>(masm.GetCursorOffset()) + range_tbz;
27070
27071 const size_t target_literal_pool_size = 100 * kInstructionSize;
27072 const int offset_start_literal_gen =
27073 target_literal_pool_size + target_literal_pool_size / 2;
27074
27075
27076 Label target_tbz;
27077
27078 __ Tbz(x0, 0, &target_tbz);
27079 VIXL_CHECK(masm.GetNumberOfPotentialVeneers() == 1);
27080 while (masm.GetCursorOffset() < max_target - offset_start_literal_gen) {
27081 __ Nop();
27082 }
27083 VIXL_CHECK(masm.GetNumberOfPotentialVeneers() == 1);
27084
27085 for (int i = 0; i < 100; i++) {
27086 // Use a different value to force one literal pool entry per iteration.
27087 __ Ldr(s0, i + 0.1);
27088 }
27089 VIXL_CHECK(masm.GetLiteralPoolSize() >= target_literal_pool_size);
27090
27091 // Force emission of a literal pool.
27092 masm.EmitLiteralPool(LiteralPool::kBranchRequired);
27093 ASSERT_LITERAL_POOL_SIZE(0);
27094
27095 // The branch should not have gone out of range during the emission of the
27096 // literal pool.
27097 __ Bind(&target_tbz);
27098
27099 VIXL_CHECK(masm.GetNumberOfPotentialVeneers() == 0);
27100
27101 END();
27102 TEARDOWN();
27103 }
27104
27105
TEST(ldr_literal_explicit)27106 TEST(ldr_literal_explicit) {
27107 SETUP();
27108
27109 START();
27110 Literal<int64_t> automatically_placed_literal(1, masm.GetLiteralPool());
27111 Literal<int64_t> manually_placed_literal(2);
27112 {
27113 ExactAssemblyScope scope(&masm, kInstructionSize + sizeof(int64_t));
27114 Label over_literal;
27115 __ b(&over_literal);
27116 __ place(&manually_placed_literal);
27117 __ bind(&over_literal);
27118 }
27119 __ Ldr(x1, &manually_placed_literal);
27120 __ Ldr(x2, &automatically_placed_literal);
27121 __ Add(x0, x1, x2);
27122 END();
27123
27124 RUN();
27125
27126 ASSERT_EQUAL_64(3, x0);
27127
27128 TEARDOWN();
27129 }
27130
27131
TEST(ldr_literal_automatically_placed)27132 TEST(ldr_literal_automatically_placed) {
27133 SETUP_WITH_FEATURES(CPUFeatures::kFP);
27134
27135 START();
27136
27137 // We start with an empty literal pool.
27138 ASSERT_LITERAL_POOL_SIZE(0);
27139
27140 // Create a literal that should be placed by the literal pool.
27141 Literal<int64_t> explicit_literal(2, masm.GetLiteralPool());
27142 // It should not appear in the literal pool until its first use.
27143 ASSERT_LITERAL_POOL_SIZE(0);
27144
27145 // Check that using standard literals does not break the use of explicitly
27146 // created literals.
27147 __ Ldr(d1, 1.1);
27148 ASSERT_LITERAL_POOL_SIZE(8);
27149 masm.EmitLiteralPool(LiteralPool::kBranchRequired);
27150 ASSERT_LITERAL_POOL_SIZE(0);
27151
27152 __ Ldr(x2, &explicit_literal);
27153 ASSERT_LITERAL_POOL_SIZE(8);
27154 masm.EmitLiteralPool(LiteralPool::kBranchRequired);
27155 ASSERT_LITERAL_POOL_SIZE(0);
27156
27157 __ Ldr(d3, 3.3);
27158 ASSERT_LITERAL_POOL_SIZE(8);
27159 masm.EmitLiteralPool(LiteralPool::kBranchRequired);
27160 ASSERT_LITERAL_POOL_SIZE(0);
27161
27162 // Re-use our explicitly created literal. It has already been placed, so it
27163 // should not impact the literal pool.
27164 __ Ldr(x4, &explicit_literal);
27165 ASSERT_LITERAL_POOL_SIZE(0);
27166
27167 END();
27168
27169 RUN();
27170
27171 ASSERT_EQUAL_FP64(1.1, d1);
27172 ASSERT_EQUAL_64(2, x2);
27173 ASSERT_EQUAL_FP64(3.3, d3);
27174 ASSERT_EQUAL_64(2, x4);
27175
27176 TEARDOWN();
27177 }
27178
27179
TEST(literal_update_overwrite)27180 TEST(literal_update_overwrite) {
27181 SETUP();
27182
27183 START();
27184
27185 ASSERT_LITERAL_POOL_SIZE(0);
27186 LiteralPool* literal_pool = masm.GetLiteralPool();
27187
27188 Literal<int32_t> lit_32_update_before_pool(0xbad, literal_pool);
27189 Literal<int32_t> lit_32_update_after_pool(0xbad, literal_pool);
27190 Literal<int64_t> lit_64_update_before_pool(0xbad, literal_pool);
27191 Literal<int64_t> lit_64_update_after_pool(0xbad, literal_pool);
27192
27193 ASSERT_LITERAL_POOL_SIZE(0);
27194
27195 lit_32_update_before_pool.UpdateValue(32);
27196 lit_64_update_before_pool.UpdateValue(64);
27197
27198 __ Ldr(w1, &lit_32_update_before_pool);
27199 __ Ldr(x2, &lit_64_update_before_pool);
27200 __ Ldr(w3, &lit_32_update_after_pool);
27201 __ Ldr(x4, &lit_64_update_after_pool);
27202
27203 masm.EmitLiteralPool(LiteralPool::kBranchRequired);
27204
27205 VIXL_ASSERT(lit_32_update_after_pool.IsPlaced());
27206 VIXL_ASSERT(lit_64_update_after_pool.IsPlaced());
27207 lit_32_update_after_pool.UpdateValue(128, &masm);
27208 lit_64_update_after_pool.UpdateValue(256, &masm);
27209
27210 END();
27211
27212 RUN();
27213
27214 ASSERT_EQUAL_64(32, x1);
27215 ASSERT_EQUAL_64(64, x2);
27216 ASSERT_EQUAL_64(128, x3);
27217 ASSERT_EQUAL_64(256, x4);
27218
27219 TEARDOWN();
27220 }
27221
27222
TEST(literal_deletion_policies)27223 TEST(literal_deletion_policies) {
27224 SETUP();
27225
27226 START();
27227
27228 // We cannot check exactly when the deletion of the literals occur, but we
27229 // check that usage of the deletion policies is not broken.
27230
27231 ASSERT_LITERAL_POOL_SIZE(0);
27232 LiteralPool* literal_pool = masm.GetLiteralPool();
27233
27234 Literal<int32_t> lit_manual(0xbad, literal_pool);
27235 Literal<int32_t>* lit_deleted_on_placement =
27236 new Literal<int32_t>(0xbad,
27237 literal_pool,
27238 RawLiteral::kDeletedOnPlacementByPool);
27239 Literal<int32_t>* lit_deleted_on_pool_destruction =
27240 new Literal<int32_t>(0xbad,
27241 literal_pool,
27242 RawLiteral::kDeletedOnPoolDestruction);
27243
27244 ASSERT_LITERAL_POOL_SIZE(0);
27245
27246 lit_manual.UpdateValue(32);
27247 lit_deleted_on_placement->UpdateValue(64);
27248
27249 __ Ldr(w1, &lit_manual);
27250 __ Ldr(w2, lit_deleted_on_placement);
27251 __ Ldr(w3, lit_deleted_on_pool_destruction);
27252
27253 masm.EmitLiteralPool(LiteralPool::kBranchRequired);
27254
27255 VIXL_ASSERT(lit_manual.IsPlaced());
27256 VIXL_ASSERT(lit_deleted_on_pool_destruction->IsPlaced());
27257 lit_deleted_on_pool_destruction->UpdateValue(128, &masm);
27258
27259 END();
27260
27261 RUN();
27262
27263 ASSERT_EQUAL_64(32, x1);
27264 ASSERT_EQUAL_64(64, x2);
27265 ASSERT_EQUAL_64(128, x3);
27266
27267 TEARDOWN();
27268 }
27269
27270
TEST(generic_operand)27271 TEST(generic_operand) {
27272 SETUP_WITH_FEATURES(CPUFeatures::kFP);
27273
27274 int32_t data_32_array[5] = {0xbadbeef,
27275 0x11111111,
27276 0xbadbeef,
27277 0x33333333,
27278 0xbadbeef};
27279 int64_t data_64_array[5] = {INT64_C(0xbadbadbadbeef),
27280 INT64_C(0x1111111111111111),
27281 INT64_C(0xbadbadbadbeef),
27282 INT64_C(0x3333333333333333),
27283 INT64_C(0xbadbadbadbeef)};
27284 size_t size_32 = sizeof(data_32_array[0]);
27285 size_t size_64 = sizeof(data_64_array[0]);
27286
27287 START();
27288
27289 intptr_t data_32_address = reinterpret_cast<intptr_t>(&data_32_array[0]);
27290 intptr_t data_64_address = reinterpret_cast<intptr_t>(&data_64_array[0]);
27291 Register data_32 = x27;
27292 Register data_64 = x28;
27293 __ Mov(data_32, data_32_address);
27294 __ Mov(data_64, data_64_address);
27295
27296 __ Move(GenericOperand(w0),
27297 GenericOperand(MemOperand(data_32, 1 * size_32), size_32));
27298 __ Move(GenericOperand(s0),
27299 GenericOperand(MemOperand(data_32, 3 * size_32), size_32));
27300 __ Move(GenericOperand(x10),
27301 GenericOperand(MemOperand(data_64, 1 * size_64), size_64));
27302 __ Move(GenericOperand(d10),
27303 GenericOperand(MemOperand(data_64, 3 * size_64), size_64));
27304
27305 __ Move(GenericOperand(w1), GenericOperand(w0));
27306 __ Move(GenericOperand(s1), GenericOperand(s0));
27307 __ Move(GenericOperand(x11), GenericOperand(x10));
27308 __ Move(GenericOperand(d11), GenericOperand(d10));
27309
27310 __ Move(GenericOperand(MemOperand(data_32, 0 * size_32), size_32),
27311 GenericOperand(w1));
27312 __ Move(GenericOperand(MemOperand(data_32, 2 * size_32), size_32),
27313 GenericOperand(s1));
27314 __ Move(GenericOperand(MemOperand(data_64, 0 * size_64), size_64),
27315 GenericOperand(x11));
27316 __ Move(GenericOperand(MemOperand(data_64, 2 * size_64), size_64),
27317 GenericOperand(d11));
27318
27319 __ Move(GenericOperand(MemOperand(data_32, 4 * size_32), size_32),
27320 GenericOperand(MemOperand(data_32, 0 * size_32), size_32));
27321 __ Move(GenericOperand(MemOperand(data_64, 4 * size_64), size_64),
27322 GenericOperand(MemOperand(data_64, 0 * size_64), size_64));
27323 END();
27324
27325 RUN();
27326
27327 ASSERT_EQUAL_64(data_32_address, data_32);
27328 ASSERT_EQUAL_64(data_64_address, data_64);
27329
27330 ASSERT_EQUAL_32(0x11111111, w0);
27331 ASSERT_EQUAL_32(0x33333333, core.sreg_bits(0));
27332 ASSERT_EQUAL_64(INT64_C(0x1111111111111111), x10);
27333 ASSERT_EQUAL_64(INT64_C(0x3333333333333333), core.dreg_bits(10));
27334
27335 ASSERT_EQUAL_32(0x11111111, w1);
27336 ASSERT_EQUAL_32(0x33333333, core.sreg_bits(1));
27337 ASSERT_EQUAL_64(INT64_C(0x1111111111111111), x11);
27338 ASSERT_EQUAL_64(INT64_C(0x3333333333333333), core.dreg_bits(11));
27339
27340 VIXL_CHECK(data_32_array[0] == 0x11111111);
27341 VIXL_CHECK(data_32_array[1] == 0x11111111);
27342 VIXL_CHECK(data_32_array[2] == 0x33333333);
27343 VIXL_CHECK(data_32_array[3] == 0x33333333);
27344 VIXL_CHECK(data_32_array[4] == 0x11111111);
27345
27346 VIXL_CHECK(data_64_array[0] == INT64_C(0x1111111111111111));
27347 VIXL_CHECK(data_64_array[1] == INT64_C(0x1111111111111111));
27348 VIXL_CHECK(data_64_array[2] == INT64_C(0x3333333333333333));
27349 VIXL_CHECK(data_64_array[3] == INT64_C(0x3333333333333333));
27350 VIXL_CHECK(data_64_array[4] == INT64_C(0x1111111111111111));
27351
27352 TEARDOWN();
27353 }
27354
27355
27356 // Test feature detection of calls to runtime functions.
27357
27358 // C++11 should be sufficient to provide simulated runtime calls, except for a
27359 // GCC bug before 4.9.1.
27360 #if defined(VIXL_INCLUDE_SIMULATOR_AARCH64) && (__cplusplus >= 201103L) && \
27361 (defined(__clang__) || GCC_VERSION_OR_NEWER(4, 9, 1)) && \
27362 !defined(VIXL_HAS_SIMULATED_RUNTIME_CALL_SUPPORT)
27363 #error \
27364 "C++11 should be sufficient to provide support for simulated runtime calls."
27365 #endif // #if defined(VIXL_INCLUDE_SIMULATOR_AARCH64) && ...
27366
27367 #if (__cplusplus >= 201103L) && \
27368 !defined(VIXL_HAS_MACROASSEMBLER_RUNTIME_CALL_SUPPORT)
27369 #error \
27370 "C++11 should be sufficient to provide support for `MacroAssembler::CallRuntime()`."
27371 #endif // #if (__cplusplus >= 201103L) && ...
27372
27373 #ifdef VIXL_HAS_MACROASSEMBLER_RUNTIME_CALL_SUPPORT
runtime_call_add_one(int32_t a)27374 int32_t runtime_call_add_one(int32_t a) { return a + 1; }
27375
runtime_call_add_doubles(double a,double b,double c)27376 double runtime_call_add_doubles(double a, double b, double c) {
27377 return a + b + c;
27378 }
27379
runtime_call_one_argument_on_stack(int64_t arg1,int64_t arg2,int64_t arg3,int64_t arg4,int64_t arg5,int64_t arg6,int64_t arg7,int64_t arg8,int64_t arg9)27380 int64_t runtime_call_one_argument_on_stack(int64_t arg1 __attribute__((unused)),
27381 int64_t arg2 __attribute__((unused)),
27382 int64_t arg3 __attribute__((unused)),
27383 int64_t arg4 __attribute__((unused)),
27384 int64_t arg5 __attribute__((unused)),
27385 int64_t arg6 __attribute__((unused)),
27386 int64_t arg7 __attribute__((unused)),
27387 int64_t arg8 __attribute__((unused)),
27388 int64_t arg9) {
27389 return arg9;
27390 }
27391
runtime_call_two_arguments_on_stack(int64_t arg1,int64_t arg2,int64_t arg3,int64_t arg4,int64_t arg5,int64_t arg6,int64_t arg7,int64_t arg8,double arg9,double arg10)27392 double runtime_call_two_arguments_on_stack(int64_t arg1 __attribute__((unused)),
27393 int64_t arg2 __attribute__((unused)),
27394 int64_t arg3 __attribute__((unused)),
27395 int64_t arg4 __attribute__((unused)),
27396 int64_t arg5 __attribute__((unused)),
27397 int64_t arg6 __attribute__((unused)),
27398 int64_t arg7 __attribute__((unused)),
27399 int64_t arg8 __attribute__((unused)),
27400 double arg9,
27401 double arg10) {
27402 return arg9 - arg10;
27403 }
27404
runtime_call_store_at_address(int64_t * address)27405 void runtime_call_store_at_address(int64_t* address) { *address = 0xf00d; }
27406
27407 enum RuntimeCallTestEnum { Enum0 };
27408
runtime_call_enum(RuntimeCallTestEnum e)27409 RuntimeCallTestEnum runtime_call_enum(RuntimeCallTestEnum e) { return e; }
27410
27411 enum class RuntimeCallTestEnumClass { Enum0 };
27412
runtime_call_enum_class(RuntimeCallTestEnumClass e)27413 RuntimeCallTestEnumClass runtime_call_enum_class(RuntimeCallTestEnumClass e) {
27414 return e;
27415 }
27416
test_int8_t(int8_t x)27417 int8_t test_int8_t(int8_t x) { return x; }
test_uint8_t(uint8_t x)27418 uint8_t test_uint8_t(uint8_t x) { return x; }
test_int16_t(int16_t x)27419 int16_t test_int16_t(int16_t x) { return x; }
test_uint16_t(uint16_t x)27420 uint16_t test_uint16_t(uint16_t x) { return x; }
27421
TEST(runtime_calls)27422 TEST(runtime_calls) {
27423 SETUP_WITH_FEATURES(CPUFeatures::kFP);
27424
27425 #ifndef VIXL_HAS_SIMULATED_RUNTIME_CALL_SUPPORT
27426 if (masm.GenerateSimulatorCode()) {
27427 // This configuration is unsupported and a `VIXL_UNREACHABLE()` would fire
27428 // while trying to generate `CallRuntime`. This configuration should only be
27429 // reachable with C++11 and a (buggy) version of GCC pre-4.9.1.
27430 TEARDOWN();
27431 return;
27432 }
27433 #endif
27434
27435 START();
27436
27437 // Test `CallRuntime`.
27438
27439 __ Mov(w0, 0);
27440 __ CallRuntime(runtime_call_add_one);
27441 __ Mov(w20, w0);
27442
27443 __ Fmov(d0, 0.0);
27444 __ Fmov(d1, 1.5);
27445 __ Fmov(d2, 2.5);
27446 __ CallRuntime(runtime_call_add_doubles);
27447 __ Fmov(d20, d0);
27448
27449 __ Mov(x0, 0x123);
27450 __ Push(x0, x0);
27451 __ CallRuntime(runtime_call_one_argument_on_stack);
27452 __ Mov(x21, x0);
27453 __ Pop(x0, x1);
27454
27455 __ Fmov(d0, 314.0);
27456 __ Fmov(d1, 4.0);
27457 __ Push(d1, d0);
27458 __ CallRuntime(runtime_call_two_arguments_on_stack);
27459 __ Fmov(d21, d0);
27460 __ Pop(d1, d0);
27461
27462 // Test that the template mechanisms don't break with enums.
27463 __ Mov(w0, 0);
27464 __ CallRuntime(runtime_call_enum);
27465 __ Mov(w0, 0);
27466 __ CallRuntime(runtime_call_enum_class);
27467
27468 // Test `TailCallRuntime`.
27469
27470 Label function, after_function;
27471 __ B(&after_function);
27472 __ Bind(&function);
27473 __ Mov(x22, 0);
27474 __ Mov(w0, 123);
27475 __ TailCallRuntime(runtime_call_add_one);
27476 // Control should not fall through.
27477 __ Mov(x22, 0xbad);
27478 __ Ret();
27479 __ Bind(&after_function);
27480
27481 // Call our dummy function, taking care to preserve the link register.
27482 __ Push(ip0, lr);
27483 __ Bl(&function);
27484 __ Pop(lr, ip0);
27485 // Save the result.
27486 __ Mov(w23, w0);
27487
27488 __ Mov(x24, 0);
27489 int test_values[] = {static_cast<int8_t>(-1),
27490 static_cast<uint8_t>(-1),
27491 static_cast<int16_t>(-1),
27492 static_cast<uint16_t>(-1),
27493 -256,
27494 -1,
27495 0,
27496 1,
27497 256};
27498 for (size_t i = 0; i < sizeof(test_values) / sizeof(test_values[0]); ++i) {
27499 Label pass_int8, pass_uint8, pass_int16, pass_uint16;
27500 int x = test_values[i];
27501 __ Mov(w0, static_cast<int8_t>(x));
27502 __ CallRuntime(test_int8_t);
27503 __ Cmp(w0, static_cast<int8_t>(x));
27504 __ Cinc(x24, x24, ne);
27505 __ Mov(w0, static_cast<uint8_t>(x));
27506 __ CallRuntime(test_uint8_t);
27507 __ Cmp(w0, static_cast<uint8_t>(x));
27508 __ Cinc(x24, x24, ne);
27509 __ Mov(w0, static_cast<int16_t>(x));
27510 __ CallRuntime(test_int16_t);
27511 __ Cmp(w0, static_cast<int16_t>(x));
27512 __ Cinc(x24, x24, ne);
27513 __ Mov(w0, static_cast<uint16_t>(x));
27514 __ CallRuntime(test_uint16_t);
27515 __ Cmp(w0, static_cast<uint16_t>(x));
27516 __ Cinc(x24, x24, ne);
27517 }
27518
27519
27520 int64_t value = 0xbadbeef;
27521 __ Mov(x0, reinterpret_cast<uint64_t>(&value));
27522 __ CallRuntime(runtime_call_store_at_address);
27523
27524 END();
27525
27526 #if defined(VIXL_HAS_SIMULATED_RUNTIME_CALL_SUPPORT) || \
27527 !defined(VIXL_INCLUDE_SIMULATOR_AARCH64)
27528 RUN();
27529
27530 ASSERT_EQUAL_32(1, w20);
27531 ASSERT_EQUAL_FP64(4.0, d20);
27532 ASSERT_EQUAL_64(0x123, x21);
27533 ASSERT_EQUAL_FP64(310.0, d21);
27534 VIXL_CHECK(value == 0xf00d);
27535 ASSERT_EQUAL_64(0, x22);
27536 ASSERT_EQUAL_32(124, w23);
27537 ASSERT_EQUAL_64(0, x24);
27538 #endif // #if defined(VIXL_HAS_SIMULATED_RUNTIME_CALL_SUPPORT) || ...
27539
27540 TEARDOWN();
27541 }
27542 #endif // #ifdef VIXL_HAS_MACROASSEMBLER_RUNTIME_CALL_SUPPORT
27543
27544
TEST(optimised_mov_register)27545 TEST(optimised_mov_register) {
27546 SETUP();
27547
27548 START();
27549 Label start;
27550 __ Bind(&start);
27551 __ Mov(x0, x0);
27552 VIXL_CHECK(masm.GetSizeOfCodeGeneratedSince(&start) == 0);
27553 __ Mov(w0, w0, kDiscardForSameWReg);
27554 VIXL_CHECK(masm.GetSizeOfCodeGeneratedSince(&start) == 0);
27555 __ Mov(w0, w0);
27556 VIXL_CHECK(masm.GetSizeOfCodeGeneratedSince(&start) == kInstructionSize);
27557
27558 END();
27559
27560 RUN();
27561
27562 TEARDOWN();
27563 }
27564
27565
TEST(nop)27566 TEST(nop) {
27567 MacroAssembler masm;
27568
27569 Label start;
27570 __ Bind(&start);
27571 __ Nop();
27572 // `MacroAssembler::Nop` must generate at least one nop.
27573 VIXL_CHECK(masm.GetSizeOfCodeGeneratedSince(&start) >= kInstructionSize);
27574
27575 masm.FinalizeCode();
27576 }
27577
TEST(scratch_scope_basic_v)27578 TEST(scratch_scope_basic_v) {
27579 MacroAssembler masm;
27580
27581 {
27582 UseScratchRegisterScope temps(&masm);
27583 VRegister temp = temps.AcquireVRegisterOfSize(kQRegSize);
27584 VIXL_CHECK(temp.Aliases(v31));
27585 }
27586 {
27587 UseScratchRegisterScope temps(&masm);
27588 VRegister temp = temps.AcquireVRegisterOfSize(kDRegSize);
27589 VIXL_CHECK(temp.Aliases(v31));
27590 }
27591 {
27592 UseScratchRegisterScope temps(&masm);
27593 VRegister temp = temps.AcquireVRegisterOfSize(kSRegSize);
27594 VIXL_CHECK(temp.Aliases(v31));
27595 }
27596 }
27597
27598 #ifdef VIXL_INCLUDE_SIMULATOR_AARCH64
27599 // Test the pseudo-instructions that control CPUFeatures dynamically in the
27600 // Simulator. These are used by the test infrastructure itself, but in a fairly
27601 // limited way.
27602
RunHelperWithFeatureCombinations(void (* helper)(const CPUFeatures & base,const CPUFeatures & f))27603 static void RunHelperWithFeatureCombinations(
27604 void (*helper)(const CPUFeatures& base, const CPUFeatures& f)) {
27605 // Iterate, testing the first n features in this list.
27606 CPUFeatures::Feature features[] = {
27607 // Put kNone first, so that the first iteration uses an empty feature set.
27608 CPUFeatures::kNone,
27609 // The remaining features used are arbitrary.
27610 CPUFeatures::kIDRegisterEmulation,
27611 CPUFeatures::kDCPoP,
27612 CPUFeatures::kPAuth,
27613 CPUFeatures::kFcma,
27614 CPUFeatures::kAES,
27615 CPUFeatures::kNEON,
27616 CPUFeatures::kCRC32,
27617 CPUFeatures::kFP,
27618 CPUFeatures::kPmull1Q,
27619 CPUFeatures::kSM4,
27620 CPUFeatures::kSM3,
27621 CPUFeatures::kDotProduct,
27622 };
27623 VIXL_ASSERT(CPUFeatures(CPUFeatures::kNone) == CPUFeatures::None());
27624 // The features are not necessarily encoded in kInstructionSize-sized slots,
27625 // so the MacroAssembler must pad the list to align the following instruction.
27626 // Ensure that we have enough features in the list to cover all interesting
27627 // alignment cases, even if the highest common factor of kInstructionSize and
27628 // an encoded feature is one.
27629 VIXL_STATIC_ASSERT(ARRAY_SIZE(features) > kInstructionSize);
27630
27631 CPUFeatures base = CPUFeatures::None();
27632 for (size_t i = 0; i < ARRAY_SIZE(features); i++) {
27633 base.Combine(features[i]);
27634 CPUFeatures f = CPUFeatures::None();
27635 for (size_t j = 0; j < ARRAY_SIZE(features); j++) {
27636 f.Combine(features[j]);
27637 helper(base, f);
27638 }
27639 }
27640 }
27641
SetSimulatorCPUFeaturesHelper(const CPUFeatures & base,const CPUFeatures & f)27642 static void SetSimulatorCPUFeaturesHelper(const CPUFeatures& base,
27643 const CPUFeatures& f) {
27644 SETUP_WITH_FEATURES(base);
27645 START();
27646
27647 __ SetSimulatorCPUFeatures(f);
27648
27649 END();
27650 RUN_WITHOUT_SEEN_FEATURE_CHECK();
27651 VIXL_CHECK(*(simulator.GetCPUFeatures()) == f);
27652 TEARDOWN();
27653 }
27654
TEST(configure_cpu_features_set)27655 TEST(configure_cpu_features_set) {
27656 RunHelperWithFeatureCombinations(SetSimulatorCPUFeaturesHelper);
27657 }
27658
EnableSimulatorCPUFeaturesHelper(const CPUFeatures & base,const CPUFeatures & f)27659 static void EnableSimulatorCPUFeaturesHelper(const CPUFeatures& base,
27660 const CPUFeatures& f) {
27661 SETUP_WITH_FEATURES(base);
27662 START();
27663
27664 __ EnableSimulatorCPUFeatures(f);
27665
27666 END();
27667 RUN_WITHOUT_SEEN_FEATURE_CHECK();
27668 VIXL_CHECK(*(simulator.GetCPUFeatures()) == base.With(f));
27669 TEARDOWN();
27670 }
27671
TEST(configure_cpu_features_enable)27672 TEST(configure_cpu_features_enable) {
27673 RunHelperWithFeatureCombinations(EnableSimulatorCPUFeaturesHelper);
27674 }
27675
DisableSimulatorCPUFeaturesHelper(const CPUFeatures & base,const CPUFeatures & f)27676 static void DisableSimulatorCPUFeaturesHelper(const CPUFeatures& base,
27677 const CPUFeatures& f) {
27678 SETUP_WITH_FEATURES(base);
27679 START();
27680
27681 __ DisableSimulatorCPUFeatures(f);
27682
27683 END();
27684 RUN_WITHOUT_SEEN_FEATURE_CHECK();
27685 VIXL_CHECK(*(simulator.GetCPUFeatures()) == base.Without(f));
27686 TEARDOWN();
27687 }
27688
TEST(configure_cpu_features_disable)27689 TEST(configure_cpu_features_disable) {
27690 RunHelperWithFeatureCombinations(DisableSimulatorCPUFeaturesHelper);
27691 }
27692
SaveRestoreSimulatorCPUFeaturesHelper(const CPUFeatures & base,const CPUFeatures & f)27693 static void SaveRestoreSimulatorCPUFeaturesHelper(const CPUFeatures& base,
27694 const CPUFeatures& f) {
27695 SETUP_WITH_FEATURES(base);
27696 START();
27697
27698 {
27699 __ SaveSimulatorCPUFeatures();
27700 __ SetSimulatorCPUFeatures(f);
27701 {
27702 __ SaveSimulatorCPUFeatures();
27703 __ SetSimulatorCPUFeatures(CPUFeatures::All());
27704 __ RestoreSimulatorCPUFeatures();
27705 }
27706 __ RestoreSimulatorCPUFeatures();
27707 }
27708
27709 END();
27710 RUN_WITHOUT_SEEN_FEATURE_CHECK();
27711 VIXL_CHECK(*(simulator.GetCPUFeatures()) == base);
27712 TEARDOWN();
27713 }
27714
TEST(configure_cpu_features_save_restore)27715 TEST(configure_cpu_features_save_restore) {
27716 RunHelperWithFeatureCombinations(SaveRestoreSimulatorCPUFeaturesHelper);
27717 }
27718
SimulationCPUFeaturesScopeHelper(const CPUFeatures & base,const CPUFeatures & f)27719 static void SimulationCPUFeaturesScopeHelper(const CPUFeatures& base,
27720 const CPUFeatures& f) {
27721 SETUP_WITH_FEATURES(base);
27722 START();
27723
27724 {
27725 SimulationCPUFeaturesScope scope_a(&masm, f);
27726 {
27727 SimulationCPUFeaturesScope scope_b(&masm, CPUFeatures::All());
27728 {
27729 SimulationCPUFeaturesScope scope_c(&masm, CPUFeatures::None());
27730 // The scope arguments should combine with 'Enable', so we should be
27731 // able to use any CPUFeatures here.
27732 __ Fadd(v0.V4S(), v1.V4S(), v2.V4S()); // Requires {FP, NEON}.
27733 }
27734 }
27735 }
27736
27737 END();
27738 RUN_WITHOUT_SEEN_FEATURE_CHECK();
27739 VIXL_CHECK(*(simulator.GetCPUFeatures()) == base);
27740 TEARDOWN();
27741 }
27742
TEST(configure_cpu_features_scope)27743 TEST(configure_cpu_features_scope) {
27744 RunHelperWithFeatureCombinations(SimulationCPUFeaturesScopeHelper);
27745 }
27746
27747 #endif
27748
27749 } // namespace aarch64
27750 } // namespace vixl
27751