1 // Copyright 2014, VIXL authors
2 // All rights reserved.
3 //
4 // Redistribution and use in source and binary forms, with or without
5 // modification, are permitted provided that the following conditions are met:
6 //
7 // * Redistributions of source code must retain the above copyright notice,
8 // this list of conditions and the following disclaimer.
9 // * Redistributions in binary form must reproduce the above copyright notice,
10 // this list of conditions and the following disclaimer in the documentation
11 // and/or other materials provided with the distribution.
12 // * Neither the name of ARM Limited nor the names of its contributors may be
13 // used to endorse or promote products derived from this software without
14 // specific prior written permission.
15 //
16 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
17 // ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18 // WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19 // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
20 // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
22 // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
23 // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
24 // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26
27 #include "test-utils-aarch64.h"
28
29 #include <cmath>
30 #include <queue>
31
32 #include "test-runner.h"
33
34 #include "../test/aarch64/test-simulator-inputs-aarch64.h"
35 #include "aarch64/cpu-aarch64.h"
36 #include "aarch64/disasm-aarch64.h"
37 #include "aarch64/macro-assembler-aarch64.h"
38 #include "aarch64/simulator-aarch64.h"
39
40 #define __ masm->
41
42 namespace vixl {
43 namespace aarch64 {
44
45
46 // This value is a signalling NaN as FP64, and also as FP32 or FP16 (taking the
47 // least-significant bits).
48 const double kFP64SignallingNaN = RawbitsToDouble(UINT64_C(0x7ff000007f807c01));
49 const float kFP32SignallingNaN = RawbitsToFloat(0x7f807c01);
50 const Float16 kFP16SignallingNaN = RawbitsToFloat16(0x7c01);
51
52 // A similar value, but as a quiet NaN.
53 const double kFP64QuietNaN = RawbitsToDouble(UINT64_C(0x7ff800007fc07e01));
54 const float kFP32QuietNaN = RawbitsToFloat(0x7fc07e01);
55 const Float16 kFP16QuietNaN = RawbitsToFloat16(0x7e01);
56
57
Equal32(uint32_t expected,const RegisterDump *,uint32_t result)58 bool Equal32(uint32_t expected, const RegisterDump*, uint32_t result) {
59 if (result != expected) {
60 printf("Expected 0x%08" PRIx32 "\t Found 0x%08" PRIx32 "\n",
61 expected,
62 result);
63 }
64
65 return expected == result;
66 }
67
68
Equal64(uint64_t reference,const RegisterDump *,uint64_t result,ExpectedResult option)69 bool Equal64(uint64_t reference,
70 const RegisterDump*,
71 uint64_t result,
72 ExpectedResult option) {
73 switch (option) {
74 case kExpectEqual:
75 if (result != reference) {
76 printf("Expected 0x%016" PRIx64 "\t Found 0x%016" PRIx64 "\n",
77 reference,
78 result);
79 }
80 break;
81 case kExpectNotEqual:
82 if (result == reference) {
83 printf("Expected a result not equal to 0x%016" PRIx64 "\n", reference);
84 }
85 break;
86 }
87
88 return reference == result;
89 }
90
91
Equal64(std::vector<uint64_t> reference_list,const RegisterDump *,uint64_t result,ExpectedResult option)92 bool Equal64(std::vector<uint64_t> reference_list,
93 const RegisterDump*,
94 uint64_t result,
95 ExpectedResult option) {
96 switch (option) {
97 case kExpectEqual:
98 for (uint64_t reference : reference_list) {
99 if (result == reference) return true;
100 }
101 printf("Expected a result in (\n");
102 break;
103 case kExpectNotEqual:
104 for (uint64_t reference : reference_list) {
105 if (result == reference) {
106 printf("Expected a result not in (\n");
107 break;
108 }
109 }
110 return true;
111 }
112 for (uint64_t reference : reference_list) {
113 printf(" 0x%016" PRIx64 ",\n", reference);
114 }
115 printf(")\t Found 0x%016" PRIx64 "\n", result);
116 return false;
117 }
118
119
Equal128(QRegisterValue expected,const RegisterDump *,QRegisterValue result)120 bool Equal128(QRegisterValue expected,
121 const RegisterDump*,
122 QRegisterValue result) {
123 if (!expected.Equals(result)) {
124 printf("Expected 0x%016" PRIx64 "%016" PRIx64
125 "\t "
126 "Found 0x%016" PRIx64 "%016" PRIx64 "\n",
127 expected.GetLane<uint64_t>(1),
128 expected.GetLane<uint64_t>(0),
129 result.GetLane<uint64_t>(1),
130 result.GetLane<uint64_t>(0));
131 }
132
133 return expected.Equals(result);
134 }
135
136
EqualFP16(Float16 expected,const RegisterDump *,Float16 result)137 bool EqualFP16(Float16 expected, const RegisterDump*, Float16 result) {
138 uint16_t e_rawbits = Float16ToRawbits(expected);
139 uint16_t r_rawbits = Float16ToRawbits(result);
140 if (e_rawbits == r_rawbits) {
141 return true;
142 } else {
143 if (IsNaN(expected) || IsZero(expected)) {
144 printf("Expected 0x%04" PRIx16 "\t Found 0x%04" PRIx16 "\n",
145 e_rawbits,
146 r_rawbits);
147 } else {
148 printf("Expected %.6f (16 bit): (0x%04" PRIx16
149 ")\t "
150 "Found %.6f (0x%04" PRIx16 ")\n",
151 FPToFloat(expected, kIgnoreDefaultNaN),
152 e_rawbits,
153 FPToFloat(result, kIgnoreDefaultNaN),
154 r_rawbits);
155 }
156 return false;
157 }
158 }
159
160
EqualFP32(float expected,const RegisterDump *,float result)161 bool EqualFP32(float expected, const RegisterDump*, float result) {
162 if (FloatToRawbits(expected) == FloatToRawbits(result)) {
163 return true;
164 } else {
165 if (IsNaN(expected) || (expected == 0.0)) {
166 printf("Expected 0x%08" PRIx32 "\t Found 0x%08" PRIx32 "\n",
167 FloatToRawbits(expected),
168 FloatToRawbits(result));
169 } else {
170 printf("Expected %.9f (0x%08" PRIx32
171 ")\t "
172 "Found %.9f (0x%08" PRIx32 ")\n",
173 expected,
174 FloatToRawbits(expected),
175 result,
176 FloatToRawbits(result));
177 }
178 return false;
179 }
180 }
181
182
EqualFP64(double expected,const RegisterDump *,double result)183 bool EqualFP64(double expected, const RegisterDump*, double result) {
184 if (DoubleToRawbits(expected) == DoubleToRawbits(result)) {
185 return true;
186 }
187
188 if (IsNaN(expected) || (expected == 0.0)) {
189 printf("Expected 0x%016" PRIx64 "\t Found 0x%016" PRIx64 "\n",
190 DoubleToRawbits(expected),
191 DoubleToRawbits(result));
192 } else {
193 printf("Expected %.17f (0x%016" PRIx64
194 ")\t "
195 "Found %.17f (0x%016" PRIx64 ")\n",
196 expected,
197 DoubleToRawbits(expected),
198 result,
199 DoubleToRawbits(result));
200 }
201 return false;
202 }
203
204
Equal32(uint32_t expected,const RegisterDump * core,const Register & reg)205 bool Equal32(uint32_t expected, const RegisterDump* core, const Register& reg) {
206 VIXL_ASSERT(reg.Is32Bits());
207 // Retrieve the corresponding X register so we can check that the upper part
208 // was properly cleared.
209 int64_t result_x = core->xreg(reg.GetCode());
210 if ((result_x & 0xffffffff00000000) != 0) {
211 printf("Expected 0x%08" PRIx32 "\t Found 0x%016" PRIx64 "\n",
212 expected,
213 result_x);
214 return false;
215 }
216 uint32_t result_w = core->wreg(reg.GetCode());
217 return Equal32(expected, core, result_w);
218 }
219
220
Equal64(uint64_t reference,const RegisterDump * core,const Register & reg,ExpectedResult option)221 bool Equal64(uint64_t reference,
222 const RegisterDump* core,
223 const Register& reg,
224 ExpectedResult option) {
225 VIXL_ASSERT(reg.Is64Bits());
226 uint64_t result = core->xreg(reg.GetCode());
227 return Equal64(reference, core, result, option);
228 }
229
230
Equal64(std::vector<uint64_t> reference_list,const RegisterDump * core,const Register & reg,ExpectedResult option)231 bool Equal64(std::vector<uint64_t> reference_list,
232 const RegisterDump* core,
233 const Register& reg,
234 ExpectedResult option) {
235 VIXL_ASSERT(reg.Is64Bits());
236 uint64_t result = core->xreg(reg.GetCode());
237 return Equal64(reference_list, core, result, option);
238 }
239
240
NotEqual64(uint64_t reference,const RegisterDump * core,const Register & reg)241 bool NotEqual64(uint64_t reference,
242 const RegisterDump* core,
243 const Register& reg) {
244 VIXL_ASSERT(reg.Is64Bits());
245 uint64_t result = core->xreg(reg.GetCode());
246 return NotEqual64(reference, core, result);
247 }
248
249
Equal128(uint64_t expected_h,uint64_t expected_l,const RegisterDump * core,const VRegister & vreg)250 bool Equal128(uint64_t expected_h,
251 uint64_t expected_l,
252 const RegisterDump* core,
253 const VRegister& vreg) {
254 VIXL_ASSERT(vreg.Is128Bits());
255 QRegisterValue expected;
256 expected.SetLane(0, expected_l);
257 expected.SetLane(1, expected_h);
258 QRegisterValue result = core->qreg(vreg.GetCode());
259 return Equal128(expected, core, result);
260 }
261
262
EqualFP16(Float16 expected,const RegisterDump * core,const VRegister & fpreg)263 bool EqualFP16(Float16 expected,
264 const RegisterDump* core,
265 const VRegister& fpreg) {
266 VIXL_ASSERT(fpreg.Is16Bits());
267 // Retrieve the corresponding D register so we can check that the upper part
268 // was properly cleared.
269 uint64_t result_64 = core->dreg_bits(fpreg.GetCode());
270 if ((result_64 & 0xfffffffffff0000) != 0) {
271 printf("Expected 0x%04" PRIx16 " (%f)\t Found 0x%016" PRIx64 "\n",
272 Float16ToRawbits(expected),
273 FPToFloat(expected, kIgnoreDefaultNaN),
274 result_64);
275 return false;
276 }
277 return EqualFP16(expected, core, core->hreg(fpreg.GetCode()));
278 }
279
280
EqualFP32(float expected,const RegisterDump * core,const VRegister & fpreg)281 bool EqualFP32(float expected,
282 const RegisterDump* core,
283 const VRegister& fpreg) {
284 VIXL_ASSERT(fpreg.Is32Bits());
285 // Retrieve the corresponding D register so we can check that the upper part
286 // was properly cleared.
287 uint64_t result_64 = core->dreg_bits(fpreg.GetCode());
288 if ((result_64 & 0xffffffff00000000) != 0) {
289 printf("Expected 0x%08" PRIx32 " (%f)\t Found 0x%016" PRIx64 "\n",
290 FloatToRawbits(expected),
291 expected,
292 result_64);
293 return false;
294 }
295
296 return EqualFP32(expected, core, core->sreg(fpreg.GetCode()));
297 }
298
299
EqualFP64(double expected,const RegisterDump * core,const VRegister & fpreg)300 bool EqualFP64(double expected,
301 const RegisterDump* core,
302 const VRegister& fpreg) {
303 VIXL_ASSERT(fpreg.Is64Bits());
304 return EqualFP64(expected, core, core->dreg(fpreg.GetCode()));
305 }
306
307
Equal64(const Register & reg0,const RegisterDump * core,const Register & reg1,ExpectedResult option)308 bool Equal64(const Register& reg0,
309 const RegisterDump* core,
310 const Register& reg1,
311 ExpectedResult option) {
312 VIXL_ASSERT(reg0.Is64Bits() && reg1.Is64Bits());
313 int64_t reference = core->xreg(reg0.GetCode());
314 int64_t result = core->xreg(reg1.GetCode());
315 return Equal64(reference, core, result, option);
316 }
317
318
NotEqual64(const Register & reg0,const RegisterDump * core,const Register & reg1)319 bool NotEqual64(const Register& reg0,
320 const RegisterDump* core,
321 const Register& reg1) {
322 VIXL_ASSERT(reg0.Is64Bits() && reg1.Is64Bits());
323 int64_t expected = core->xreg(reg0.GetCode());
324 int64_t result = core->xreg(reg1.GetCode());
325 return NotEqual64(expected, core, result);
326 }
327
328
Equal64(uint64_t expected,const RegisterDump * core,const VRegister & vreg)329 bool Equal64(uint64_t expected,
330 const RegisterDump* core,
331 const VRegister& vreg) {
332 VIXL_ASSERT(vreg.Is64Bits());
333 uint64_t result = core->dreg_bits(vreg.GetCode());
334 return Equal64(expected, core, result);
335 }
336
337
FlagN(uint32_t flags)338 static char FlagN(uint32_t flags) { return (flags & NFlag) ? 'N' : 'n'; }
339
340
FlagZ(uint32_t flags)341 static char FlagZ(uint32_t flags) { return (flags & ZFlag) ? 'Z' : 'z'; }
342
343
FlagC(uint32_t flags)344 static char FlagC(uint32_t flags) { return (flags & CFlag) ? 'C' : 'c'; }
345
346
FlagV(uint32_t flags)347 static char FlagV(uint32_t flags) { return (flags & VFlag) ? 'V' : 'v'; }
348
349
EqualNzcv(uint32_t expected,uint32_t result)350 bool EqualNzcv(uint32_t expected, uint32_t result) {
351 VIXL_ASSERT((expected & ~NZCVFlag) == 0);
352 VIXL_ASSERT((result & ~NZCVFlag) == 0);
353 if (result != expected) {
354 printf("Expected: %c%c%c%c\t Found: %c%c%c%c\n",
355 FlagN(expected),
356 FlagZ(expected),
357 FlagC(expected),
358 FlagV(expected),
359 FlagN(result),
360 FlagZ(result),
361 FlagC(result),
362 FlagV(result));
363 return false;
364 }
365
366 return true;
367 }
368
369
EqualRegisters(const RegisterDump * a,const RegisterDump * b)370 bool EqualRegisters(const RegisterDump* a, const RegisterDump* b) {
371 for (unsigned i = 0; i < kNumberOfRegisters; i++) {
372 if (a->xreg(i) != b->xreg(i)) {
373 printf("x%d\t Expected 0x%016" PRIx64 "\t Found 0x%016" PRIx64 "\n",
374 i,
375 a->xreg(i),
376 b->xreg(i));
377 return false;
378 }
379 }
380
381 for (unsigned i = 0; i < kNumberOfVRegisters; i++) {
382 uint64_t a_bits = a->dreg_bits(i);
383 uint64_t b_bits = b->dreg_bits(i);
384 if (a_bits != b_bits) {
385 printf("d%d\t Expected 0x%016" PRIx64 "\t Found 0x%016" PRIx64 "\n",
386 i,
387 a_bits,
388 b_bits);
389 return false;
390 }
391 }
392
393 return true;
394 }
395
EqualSVELane(uint64_t expected,const RegisterDump * core,const ZRegister & reg,int lane)396 bool EqualSVELane(uint64_t expected,
397 const RegisterDump* core,
398 const ZRegister& reg,
399 int lane) {
400 unsigned lane_size = reg.GetLaneSizeInBits();
401 // For convenience in the tests, we allow negative values to be passed into
402 // `expected`, but truncate them to an appropriately-sized unsigned value for
403 // the check. For example, in `EqualSVELane(-1, core, z0.VnB())`, the expected
404 // value is truncated from 0xffffffffffffffff to 0xff before the comparison.
405 VIXL_ASSERT(IsUintN(lane_size, expected) ||
406 IsIntN(lane_size, RawbitsToInt64(expected)));
407 expected &= GetUintMask(lane_size);
408
409 uint64_t result = core->zreg_lane(reg.GetCode(), lane_size, lane);
410 if (expected != result) {
411 unsigned lane_size_in_hex_chars = lane_size / 4;
412 std::string reg_name = reg.GetArchitecturalName();
413 printf("%s[%d]\t Expected 0x%0*" PRIx64 "\t Found 0x%0*" PRIx64 "\n",
414 reg_name.c_str(),
415 lane,
416 lane_size_in_hex_chars,
417 expected,
418 lane_size_in_hex_chars,
419 result);
420 return false;
421 }
422 return true;
423 }
424
EqualSVELane(uint64_t expected,const RegisterDump * core,const PRegister & reg,int lane)425 bool EqualSVELane(uint64_t expected,
426 const RegisterDump* core,
427 const PRegister& reg,
428 int lane) {
429 VIXL_ASSERT(reg.HasLaneSize());
430 VIXL_ASSERT((reg.GetLaneSizeInBits() % kZRegBitsPerPRegBit) == 0);
431 unsigned p_bits_per_lane = reg.GetLaneSizeInBits() / kZRegBitsPerPRegBit;
432 VIXL_ASSERT(IsUintN(p_bits_per_lane, expected));
433 expected &= GetUintMask(p_bits_per_lane);
434
435 uint64_t result = core->preg_lane(reg.GetCode(), p_bits_per_lane, lane);
436 if (expected != result) {
437 unsigned lane_size_in_hex_chars = (p_bits_per_lane + 3) / 4;
438 std::string reg_name = reg.GetArchitecturalName();
439 printf("%s[%d]\t Expected 0x%0*" PRIx64 "\t Found 0x%0*" PRIx64 "\n",
440 reg_name.c_str(),
441 lane,
442 lane_size_in_hex_chars,
443 expected,
444 lane_size_in_hex_chars,
445 result);
446 return false;
447 }
448 return true;
449 }
450
451 struct EqualMemoryChunk {
452 typedef uint64_t RawChunk;
453
454 uintptr_t address;
455 RawChunk expected;
456 RawChunk result;
457
IsEqualvixl::aarch64::EqualMemoryChunk458 bool IsEqual() const { return expected == result; }
459 };
460
EqualMemory(const void * expected,const void * result,size_t size_in_bytes,size_t zero_offset)461 bool EqualMemory(const void* expected,
462 const void* result,
463 size_t size_in_bytes,
464 size_t zero_offset) {
465 if (memcmp(expected, result, size_in_bytes) == 0) return true;
466
467 // Read 64-bit chunks, and print them side-by-side if they don't match.
468
469 // Remember the last few chunks, even if they matched, so we can print some
470 // context. We don't want to print the whole buffer, because it could be huge.
471 static const size_t kContextLines = 1;
472 std::queue<EqualMemoryChunk> context;
473 static const size_t kChunkSize = sizeof(EqualMemoryChunk::RawChunk);
474
475 // This assumption keeps the logic simple, and is acceptable for our tests.
476 VIXL_ASSERT((size_in_bytes % kChunkSize) == 0);
477
478 const char* expected_it = reinterpret_cast<const char*>(expected);
479 const char* result_it = reinterpret_cast<const char*>(result);
480
481 // This is the first error, so print a header row.
482 printf(" Address (of result) Expected Result\n");
483
484 // Always print some context at the start of the buffer.
485 uintptr_t print_context_to =
486 reinterpret_cast<uintptr_t>(result) + (kContextLines + 1) * kChunkSize;
487 for (size_t i = 0; i < size_in_bytes; i += kChunkSize) {
488 EqualMemoryChunk chunk;
489 chunk.address = reinterpret_cast<uintptr_t>(result_it);
490 memcpy(&chunk.expected, expected_it, kChunkSize);
491 memcpy(&chunk.result, result_it, kChunkSize);
492
493 while (context.size() > kContextLines) context.pop();
494 context.push(chunk);
495
496 // Print context after an error, and at the end of the buffer.
497 if (!chunk.IsEqual() || ((i + kChunkSize) >= size_in_bytes)) {
498 if (chunk.address > print_context_to) {
499 // We aren't currently printing context, so separate this context from
500 // the previous block.
501 printf("...\n");
502 }
503 print_context_to = chunk.address + (kContextLines + 1) * kChunkSize;
504 }
505
506 // Print context (including the current line).
507 while (!context.empty() && (context.front().address < print_context_to)) {
508 uintptr_t address = context.front().address;
509 uint64_t offset = address - reinterpret_cast<uintptr_t>(result);
510 bool is_negative = (offset < zero_offset);
511 printf("0x%016" PRIxPTR " (result %c %5" PRIu64 "): 0x%016" PRIx64
512 " 0x%016" PRIx64 "\n",
513 address,
514 (is_negative ? '-' : '+'),
515 (is_negative ? (zero_offset - offset) : (offset - zero_offset)),
516 context.front().expected,
517 context.front().result);
518 context.pop();
519 }
520
521 expected_it += kChunkSize;
522 result_it += kChunkSize;
523 }
524
525 return false;
526 }
PopulateRegisterArray(Register * w,Register * x,Register * r,int reg_size,int reg_count,RegList allowed)527 RegList PopulateRegisterArray(Register* w,
528 Register* x,
529 Register* r,
530 int reg_size,
531 int reg_count,
532 RegList allowed) {
533 RegList list = 0;
534 int i = 0;
535 for (unsigned n = 0; (n < kNumberOfRegisters) && (i < reg_count); n++) {
536 if (((UINT64_C(1) << n) & allowed) != 0) {
537 // Only assign allowed registers.
538 if (r) {
539 r[i] = Register(n, reg_size);
540 }
541 if (x) {
542 x[i] = Register(n, kXRegSize);
543 }
544 if (w) {
545 w[i] = Register(n, kWRegSize);
546 }
547 list |= (UINT64_C(1) << n);
548 i++;
549 }
550 }
551 // Check that we got enough registers.
552 VIXL_ASSERT(CountSetBits(list, kNumberOfRegisters) == reg_count);
553
554 return list;
555 }
556
557
PopulateVRegisterArray(VRegister * s,VRegister * d,VRegister * v,int reg_size,int reg_count,RegList allowed)558 RegList PopulateVRegisterArray(VRegister* s,
559 VRegister* d,
560 VRegister* v,
561 int reg_size,
562 int reg_count,
563 RegList allowed) {
564 RegList list = 0;
565 int i = 0;
566 for (unsigned n = 0; (n < kNumberOfVRegisters) && (i < reg_count); n++) {
567 if (((UINT64_C(1) << n) & allowed) != 0) {
568 // Only assigned allowed registers.
569 if (v) {
570 v[i] = VRegister(n, reg_size);
571 }
572 if (d) {
573 d[i] = VRegister(n, kDRegSize);
574 }
575 if (s) {
576 s[i] = VRegister(n, kSRegSize);
577 }
578 list |= (UINT64_C(1) << n);
579 i++;
580 }
581 }
582 // Check that we got enough registers.
583 VIXL_ASSERT(CountSetBits(list, kNumberOfVRegisters) == reg_count);
584
585 return list;
586 }
587
588
Clobber(MacroAssembler * masm,RegList reg_list,uint64_t const value)589 void Clobber(MacroAssembler* masm, RegList reg_list, uint64_t const value) {
590 Register first = NoReg;
591 for (unsigned i = 0; i < kNumberOfRegisters; i++) {
592 if (reg_list & (UINT64_C(1) << i)) {
593 Register xn(i, kXRegSize);
594 // We should never write into sp here.
595 VIXL_ASSERT(!xn.Is(sp));
596 if (!xn.IsZero()) {
597 if (!first.IsValid()) {
598 // This is the first register we've hit, so construct the literal.
599 __ Mov(xn, value);
600 first = xn;
601 } else {
602 // We've already loaded the literal, so re-use the value already
603 // loaded into the first register we hit.
604 __ Mov(xn, first);
605 }
606 }
607 }
608 }
609 }
610
611
ClobberFP(MacroAssembler * masm,RegList reg_list,double const value)612 void ClobberFP(MacroAssembler* masm, RegList reg_list, double const value) {
613 VRegister first = NoVReg;
614 for (unsigned i = 0; i < kNumberOfVRegisters; i++) {
615 if (reg_list & (UINT64_C(1) << i)) {
616 VRegister dn(i, kDRegSize);
617 if (!first.IsValid()) {
618 // This is the first register we've hit, so construct the literal.
619 __ Fmov(dn, value);
620 first = dn;
621 } else {
622 // We've already loaded the literal, so re-use the value already loaded
623 // into the first register we hit.
624 __ Fmov(dn, first);
625 }
626 }
627 }
628 }
629
630
Clobber(MacroAssembler * masm,CPURegList reg_list)631 void Clobber(MacroAssembler* masm, CPURegList reg_list) {
632 if (reg_list.GetType() == CPURegister::kRegister) {
633 // This will always clobber X registers.
634 Clobber(masm, reg_list.GetList());
635 } else if (reg_list.GetType() == CPURegister::kVRegister) {
636 // This will always clobber D registers.
637 ClobberFP(masm, reg_list.GetList());
638 } else {
639 VIXL_UNIMPLEMENTED();
640 }
641 }
642
643 // TODO: Once registers have sufficiently compatible interfaces, merge the two
644 // DumpRegisters templates.
645 template <typename T>
DumpRegisters(MacroAssembler * masm,Register dump_base,int offset)646 static void DumpRegisters(MacroAssembler* masm,
647 Register dump_base,
648 int offset) {
649 UseScratchRegisterScope temps(masm);
650 Register dump = temps.AcquireX();
651 __ Add(dump, dump_base, offset);
652 for (unsigned i = 0; i <= T::GetMaxCode(); i++) {
653 T reg(i);
654 __ Str(reg, SVEMemOperand(dump));
655 __ Add(dump, dump, reg.GetMaxSizeInBytes());
656 }
657 }
658
659 template <typename T>
DumpRegisters(MacroAssembler * masm,Register dump_base,int offset,int reg_size_in_bytes)660 static void DumpRegisters(MacroAssembler* masm,
661 Register dump_base,
662 int offset,
663 int reg_size_in_bytes) {
664 UseScratchRegisterScope temps(masm);
665 Register dump = temps.AcquireX();
666 __ Add(dump, dump_base, offset);
667 for (unsigned i = 0; i <= T::GetMaxCode(); i++) {
668 T reg(i, reg_size_in_bytes * kBitsPerByte);
669 __ Str(reg, MemOperand(dump));
670 __ Add(dump, dump, reg_size_in_bytes);
671 }
672 }
673
Dump(MacroAssembler * masm)674 void RegisterDump::Dump(MacroAssembler* masm) {
675 VIXL_ASSERT(__ StackPointer().Is(sp));
676
677 dump_cpu_features_ = *masm->GetCPUFeatures();
678
679 // We need some scratch registers, but we also need to dump them, so we have
680 // to control exactly which registers are used, and dump them separately.
681 CPURegList scratch_registers(x0, x1, x2, x3);
682
683 UseScratchRegisterScope temps(masm);
684 temps.ExcludeAll();
685 __ PushCPURegList(scratch_registers);
686 temps.Include(scratch_registers);
687
688 Register dump_base = temps.AcquireX();
689 Register tmp = temps.AcquireX();
690
691 // Offsets into the dump_ structure.
692 const int x_offset = offsetof(dump_t, x_);
693 const int w_offset = offsetof(dump_t, w_);
694 const int d_offset = offsetof(dump_t, d_);
695 const int s_offset = offsetof(dump_t, s_);
696 const int h_offset = offsetof(dump_t, h_);
697 const int q_offset = offsetof(dump_t, q_);
698 const int z_offset = offsetof(dump_t, z_);
699 const int p_offset = offsetof(dump_t, p_);
700 const int sp_offset = offsetof(dump_t, sp_);
701 const int wsp_offset = offsetof(dump_t, wsp_);
702 const int flags_offset = offsetof(dump_t, flags_);
703 const int vl_offset = offsetof(dump_t, vl_);
704
705 // Load the address where we will dump the state.
706 __ Mov(dump_base, reinterpret_cast<uintptr_t>(&dump_));
707
708 // Dump the stack pointer (sp and wsp).
709 // The stack pointer cannot be stored directly; it needs to be moved into
710 // another register first. Also, we pushed four X registers, so we need to
711 // compensate here.
712 __ Add(tmp, sp, 4 * kXRegSizeInBytes);
713 __ Str(tmp, MemOperand(dump_base, sp_offset));
714 __ Add(tmp.W(), wsp, 4 * kXRegSizeInBytes);
715 __ Str(tmp.W(), MemOperand(dump_base, wsp_offset));
716
717 // Dump core registers.
718 DumpRegisters<Register>(masm, dump_base, x_offset, kXRegSizeInBytes);
719 DumpRegisters<Register>(masm, dump_base, w_offset, kWRegSizeInBytes);
720
721 // Dump NEON and FP registers.
722 DumpRegisters<VRegister>(masm, dump_base, q_offset, kQRegSizeInBytes);
723 DumpRegisters<VRegister>(masm, dump_base, d_offset, kDRegSizeInBytes);
724 DumpRegisters<VRegister>(masm, dump_base, s_offset, kSRegSizeInBytes);
725 DumpRegisters<VRegister>(masm, dump_base, h_offset, kHRegSizeInBytes);
726
727 // Dump SVE registers.
728 if (CPUHas(CPUFeatures::kSVE)) {
729 DumpRegisters<ZRegister>(masm, dump_base, z_offset);
730 DumpRegisters<PRegister>(masm, dump_base, p_offset);
731
732 // Record the vector length.
733 __ Rdvl(tmp, kBitsPerByte);
734 __ Str(tmp, MemOperand(dump_base, vl_offset));
735 }
736
737 // Dump the flags.
738 __ Mrs(tmp, NZCV);
739 __ Str(tmp, MemOperand(dump_base, flags_offset));
740
741 // To dump the values we used as scratch registers, we need a new scratch
742 // register. We can use any of the already dumped registers since we can
743 // easily restore them.
744 Register dump2_base = x10;
745 VIXL_ASSERT(!scratch_registers.IncludesAliasOf(dump2_base));
746
747 VIXL_ASSERT(scratch_registers.IncludesAliasOf(dump_base));
748
749 // Ensure that we don't try to use the scratch registers again.
750 temps.ExcludeAll();
751
752 // Don't lose the dump_ address.
753 __ Mov(dump2_base, dump_base);
754
755 __ PopCPURegList(scratch_registers);
756
757 while (!scratch_registers.IsEmpty()) {
758 CPURegister reg = scratch_registers.PopLowestIndex();
759 Register x = reg.X();
760 Register w = reg.W();
761 unsigned code = reg.GetCode();
762 __ Str(x, MemOperand(dump2_base, x_offset + (code * kXRegSizeInBytes)));
763 __ Str(w, MemOperand(dump2_base, w_offset + (code * kWRegSizeInBytes)));
764 }
765
766 // Finally, restore dump2_base.
767 __ Ldr(dump2_base,
768 MemOperand(dump2_base,
769 x_offset + (dump2_base.GetCode() * kXRegSizeInBytes)));
770
771 completed_ = true;
772 }
773
GetSignallingNan(int size_in_bits)774 uint64_t GetSignallingNan(int size_in_bits) {
775 switch (size_in_bits) {
776 case kHRegSize:
777 return Float16ToRawbits(kFP16SignallingNaN);
778 case kSRegSize:
779 return FloatToRawbits(kFP32SignallingNaN);
780 case kDRegSize:
781 return DoubleToRawbits(kFP64SignallingNaN);
782 default:
783 VIXL_UNIMPLEMENTED();
784 return 0;
785 }
786 }
787
CanRun(const CPUFeatures & required,bool * queried_can_run)788 bool CanRun(const CPUFeatures& required, bool* queried_can_run) {
789 bool log_if_missing = true;
790 if (queried_can_run != NULL) {
791 log_if_missing = !*queried_can_run;
792 *queried_can_run = true;
793 }
794
795 #ifdef VIXL_INCLUDE_SIMULATOR_AARCH64
796 // The Simulator can run any test that VIXL can assemble.
797 USE(required);
798 USE(log_if_missing);
799 return true;
800 #else
801 CPUFeatures cpu = CPUFeatures::InferFromOS();
802 // If InferFromOS fails, assume that basic features are present.
803 if (cpu.HasNoFeatures()) cpu = CPUFeatures::AArch64LegacyBaseline();
804 VIXL_ASSERT(cpu.Has(kInfrastructureCPUFeatures));
805
806 if (cpu.Has(required)) return true;
807
808 if (log_if_missing) {
809 CPUFeatures missing = required.Without(cpu);
810 // Note: This message needs to match REGEXP_MISSING_FEATURES from
811 // tools/threaded_test.py.
812 std::cout << "SKIPPED: Missing features: { " << missing << " }\n";
813 std::cout << "This test requires the following features to run its "
814 "generated code on this CPU: "
815 << required << "\n";
816 }
817 return false;
818 #endif
819 }
820
821 // Note that the function assumes p0, p1, p2 and p3 are set to all true in b-,
822 // h-, s- and d-lane sizes respectively, and p4, p5 are clobbered as a temp
823 // predicate.
824 template <typename T, size_t N>
SetFpData(MacroAssembler * masm,int esize,const T (& values)[N],uint64_t lcg_mult)825 void SetFpData(MacroAssembler* masm,
826 int esize,
827 const T (&values)[N],
828 uint64_t lcg_mult) {
829 uint64_t a = 0;
830 uint64_t b = lcg_mult;
831 // Be used to populate the assigned element slots of register based on the
832 // type of floating point.
833 __ Pfalse(p5.VnB());
834 switch (esize) {
835 case kHRegSize:
836 a = Float16ToRawbits(Float16(1.5));
837 // Pick a convenient number within largest normal half-precision floating
838 // point.
839 b = Float16ToRawbits(Float16(lcg_mult % 1024));
840 // Step 1: Set fp16 numbers to the undefined registers.
841 // p4< 15:0>: 0b0101010101010101
842 // z{code}<127:0>: 0xHHHHHHHHHHHHHHHH
843 __ Zip1(p4.VnB(), p0.VnB(), p5.VnB());
844 break;
845 case kSRegSize:
846 a = FloatToRawbits(1.5);
847 b = FloatToRawbits(lcg_mult);
848 // Step 2: Set fp32 numbers to register on top of fp16 initialized.
849 // p4< 15:0>: 0b0000000100000001
850 // z{code}<127:0>: 0xHHHHSSSSHHHHSSSS
851 __ Zip1(p4.VnS(), p2.VnS(), p5.VnS());
852 break;
853 case kDRegSize:
854 a = DoubleToRawbits(1.5);
855 b = DoubleToRawbits(lcg_mult);
856 // Step 3: Set fp64 numbers to register on top of both fp16 and fp 32
857 // initialized.
858 // p4< 15:0>: 0b0000000000000001
859 // z{code}<127:0>: 0xHHHHSSSSDDDDDDDD
860 __ Zip1(p4.VnD(), p3.VnD(), p5.VnD());
861 break;
862 default:
863 VIXL_UNIMPLEMENTED();
864 break;
865 }
866
867 __ Dup(z30.WithLaneSize(esize), a);
868 __ Dup(z31.WithLaneSize(esize), b);
869
870 for (unsigned j = 0; j <= (kZRegMaxSize / (N * esize)); j++) {
871 // As floating point operations on random values have a tendency to
872 // converge on special-case numbers like NaNs, adopt normal floating point
873 // values be the seed instead.
874 InsrHelper(masm, z0.WithLaneSize(esize), values);
875 }
876
877 __ Fmla(z0.WithLaneSize(esize),
878 p4.Merging(),
879 z30.WithLaneSize(esize),
880 z0.WithLaneSize(esize),
881 z31.WithLaneSize(esize),
882 FastNaNPropagation);
883
884 for (unsigned i = 1; i < kNumberOfZRegisters - 1; i++) {
885 __ Fmla(ZRegister(i).WithLaneSize(esize),
886 p4.Merging(),
887 z30.WithLaneSize(esize),
888 ZRegister(i - 1).WithLaneSize(esize),
889 z31.WithLaneSize(esize),
890 FastNaNPropagation);
891 }
892
893 __ Fmul(z31.WithLaneSize(esize),
894 p4.Merging(),
895 z31.WithLaneSize(esize),
896 z30.WithLaneSize(esize),
897 FastNaNPropagation);
898 __ Fadd(z31.WithLaneSize(esize), p4.Merging(), z31.WithLaneSize(esize), 1);
899 }
900
901 // Set z0 - z31 to some normal floating point data.
InitialiseRegisterFp(MacroAssembler * masm,uint64_t lcg_mult)902 void InitialiseRegisterFp(MacroAssembler* masm, uint64_t lcg_mult) {
903 // Initialise each Z registers to a mixture of fp16/32/64 values as following
904 // pattern:
905 // z0.h[0-1] = fp16, z0.s[1] = fp32, z0.d[1] = fp64 repeatedly throughout the
906 // register.
907 //
908 // For example:
909 // z{code}<2047:1920>: 0x{< fp64 >< fp32 ><fp16><fp16>}
910 // ...
911 // z{code}< 127: 0>: 0x{< fp64 >< fp32 ><fp16><fp16>}
912 //
913 // In current manner, in order to make a desired mixture, each part of
914 // initialization have to be called in the following order.
915 SetFpData(masm, kHRegSize, kInputFloat16Basic, lcg_mult);
916 SetFpData(masm, kSRegSize, kInputFloatBasic, lcg_mult);
917 SetFpData(masm, kDRegSize, kInputDoubleBasic, lcg_mult);
918 }
919
SetInitialMachineState(MacroAssembler * masm,InputSet input_set)920 void SetInitialMachineState(MacroAssembler* masm, InputSet input_set) {
921 USE(input_set);
922 uint64_t lcg_mult = 6364136223846793005;
923
924 // Set x0 - x30 to pseudo-random data.
925 __ Mov(x29, 1); // LCG increment.
926 __ Mov(x30, lcg_mult);
927 __ Mov(x0, 42); // LCG seed.
928
929 __ Cmn(x0, 0); // Clear NZCV flags for later.
930
931 __ Madd(x0, x0, x30, x29); // First pseudo-random number.
932
933 // Registers 1 - 29.
934 for (unsigned i = 1; i < 30; i++) {
935 __ Madd(XRegister(i), XRegister(i - 1), x30, x29);
936 }
937 __ Mul(x30, x29, x30);
938 __ Add(x30, x30, 1);
939
940
941 // Set first four predicate registers to true for increasing lane sizes.
942 __ Ptrue(p0.VnB());
943 __ Ptrue(p1.VnH());
944 __ Ptrue(p2.VnS());
945 __ Ptrue(p3.VnD());
946
947 // Set z0 - z31 to pseudo-random data.
948 if (input_set == kIntInputSet) {
949 __ Dup(z30.VnD(), 1);
950 __ Dup(z31.VnD(), lcg_mult);
951 __ Index(z0.VnB(), -16, 13); // LCG seeds.
952
953 __ Mla(z0.VnD(), p0.Merging(), z30.VnD(), z0.VnD(), z31.VnD());
954 for (unsigned i = 1; i < kNumberOfZRegisters - 1; i++) {
955 __ Mla(ZRegister(i).VnD(),
956 p0.Merging(),
957 z30.VnD(),
958 ZRegister(i - 1).VnD(),
959 z31.VnD());
960 }
961 __ Mul(z31.VnD(), p0.Merging(), z31.VnD(), z30.VnD());
962 __ Add(z31.VnD(), z31.VnD(), 1);
963
964 } else {
965 VIXL_ASSERT(input_set == kFpInputSet);
966 InitialiseRegisterFp(masm, lcg_mult);
967 }
968
969 // Set remaining predicate registers based on earlier pseudo-random data.
970 for (unsigned i = 4; i < kNumberOfPRegisters; i++) {
971 __ Cmpge(PRegister(i).VnB(), p0.Zeroing(), ZRegister(i).VnB(), 0);
972 }
973 for (unsigned i = 4; i < kNumberOfPRegisters; i += 2) {
974 __ Zip1(p0.VnB(), PRegister(i).VnB(), PRegister(i + 1).VnB());
975 __ Zip2(PRegister(i + 1).VnB(), PRegister(i).VnB(), PRegister(i + 1).VnB());
976 __ Mov(PRegister(i), p0);
977 }
978 __ Ptrue(p0.VnB());
979
980 // At this point, only sp and a few status registers are undefined. These
981 // must be ignored when computing the state hash.
982 }
983
ComputeMachineStateHash(MacroAssembler * masm,uint32_t * dst)984 void ComputeMachineStateHash(MacroAssembler* masm, uint32_t* dst) {
985 // Use explicit registers, to avoid hash order varying if
986 // UseScratchRegisterScope changes.
987 UseScratchRegisterScope temps(masm);
988 temps.ExcludeAll();
989 Register t0 = w0;
990 Register t1 = x1;
991
992 // Compute hash of x0 - x30.
993 __ Push(t0.X(), t1);
994 __ Crc32x(t0, wzr, t0.X());
995 for (unsigned i = 0; i < kNumberOfRegisters; i++) {
996 if (i == xzr.GetCode()) continue; // Skip sp.
997 if (t0.Is(WRegister(i))) continue; // Skip t0, as it's already hashed.
998 __ Crc32x(t0, t0, XRegister(i));
999 }
1000
1001 // Hash the status flags.
1002 __ Mrs(t1, NZCV);
1003 __ Crc32x(t0, t0, t1);
1004
1005 // Acquire another temp, as integer registers have been hashed already.
1006 __ Push(x30, xzr);
1007 Register t2 = x30;
1008
1009 // Compute hash of all bits in z0 - z31. This implies different hashes are
1010 // produced for machines of different vector length.
1011 for (unsigned i = 0; i < kNumberOfZRegisters; i++) {
1012 __ Rdvl(t2, 1);
1013 __ Lsr(t2, t2, 4);
1014 Label vl_loop;
1015 __ Bind(&vl_loop);
1016 __ Umov(t1, VRegister(i).V2D(), 0);
1017 __ Crc32x(t0, t0, t1);
1018 __ Umov(t1, VRegister(i).V2D(), 1);
1019 __ Crc32x(t0, t0, t1);
1020 __ Ext(ZRegister(i).VnB(), ZRegister(i).VnB(), ZRegister(i).VnB(), 16);
1021 __ Sub(t2, t2, 1);
1022 __ Cbnz(t2, &vl_loop);
1023 }
1024
1025 // Hash predicate registers. For simplicity, this writes the predicate
1026 // registers to a zero-initialised area of stack of the maximum size required
1027 // for P registers. It then computes a hash of that entire stack area.
1028 unsigned p_stack_space = kNumberOfPRegisters * kPRegMaxSizeInBytes;
1029
1030 // Zero claimed stack area.
1031 for (unsigned i = 0; i < p_stack_space; i += kXRegSizeInBytes * 2) {
1032 __ Push(xzr, xzr);
1033 }
1034
1035 // Store all P registers to the stack.
1036 __ Mov(t1, sp);
1037 for (unsigned i = 0; i < kNumberOfPRegisters; i++) {
1038 __ Str(PRegister(i), SVEMemOperand(t1));
1039 __ Add(t1, t1, kPRegMaxSizeInBytes);
1040 }
1041
1042 // Hash the entire stack area.
1043 for (unsigned i = 0; i < p_stack_space; i += kXRegSizeInBytes * 2) {
1044 __ Pop(t1, t2);
1045 __ Crc32x(t0, t0, t1);
1046 __ Crc32x(t0, t0, t2);
1047 }
1048
1049 __ Mov(t1, reinterpret_cast<uint64_t>(dst));
1050 __ Str(t0, MemOperand(t1));
1051
1052 __ Pop(xzr, x30);
1053 __ Pop(t1, t0.X());
1054 }
1055
1056 } // namespace aarch64
1057 } // namespace vixl
1058