• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2014, VIXL authors
2 // All rights reserved.
3 //
4 // Redistribution and use in source and binary forms, with or without
5 // modification, are permitted provided that the following conditions are met:
6 //
7 //   * Redistributions of source code must retain the above copyright notice,
8 //     this list of conditions and the following disclaimer.
9 //   * Redistributions in binary form must reproduce the above copyright notice,
10 //     this list of conditions and the following disclaimer in the documentation
11 //     and/or other materials provided with the distribution.
12 //   * Neither the name of ARM Limited nor the names of its contributors may be
13 //     used to endorse or promote products derived from this software without
14 //     specific prior written permission.
15 //
16 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
17 // ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18 // WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19 // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
20 // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
22 // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
23 // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
24 // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 
27 #include <cmath>
28 #include <queue>
29 
30 #include "test-runner.h"
31 #include "test-utils-aarch64.h"
32 
33 #include "../test/aarch64/test-simulator-inputs-aarch64.h"
34 #include "aarch64/cpu-aarch64.h"
35 #include "aarch64/disasm-aarch64.h"
36 #include "aarch64/macro-assembler-aarch64.h"
37 #include "aarch64/simulator-aarch64.h"
38 
39 #define __ masm->
40 
41 namespace vixl {
42 namespace aarch64 {
43 
44 
45 // This value is a signalling NaN as FP64, and also as FP32 or FP16 (taking the
46 // least-significant bits).
47 const double kFP64SignallingNaN = RawbitsToDouble(UINT64_C(0x7ff000007f807c01));
48 const float kFP32SignallingNaN = RawbitsToFloat(0x7f807c01);
49 const Float16 kFP16SignallingNaN = RawbitsToFloat16(0x7c01);
50 
51 // A similar value, but as a quiet NaN.
52 const double kFP64QuietNaN = RawbitsToDouble(UINT64_C(0x7ff800007fc07e01));
53 const float kFP32QuietNaN = RawbitsToFloat(0x7fc07e01);
54 const Float16 kFP16QuietNaN = RawbitsToFloat16(0x7e01);
55 
56 
Equal32(uint32_t expected,const RegisterDump *,uint32_t result)57 bool Equal32(uint32_t expected, const RegisterDump*, uint32_t result) {
58   if (result != expected) {
59     printf("Expected 0x%08" PRIx32 "\t Found 0x%08" PRIx32 "\n",
60            expected,
61            result);
62   }
63 
64   return expected == result;
65 }
66 
67 
Equal64(uint64_t reference,const RegisterDump *,uint64_t result,ExpectedResult option)68 bool Equal64(uint64_t reference,
69              const RegisterDump*,
70              uint64_t result,
71              ExpectedResult option) {
72   switch (option) {
73     case kExpectEqual:
74       if (result != reference) {
75         printf("Expected 0x%016" PRIx64 "\t Found 0x%016" PRIx64 "\n",
76                reference,
77                result);
78       }
79       break;
80     case kExpectNotEqual:
81       if (result == reference) {
82         printf("Expected a result not equal to 0x%016" PRIx64 "\n", reference);
83       }
84       break;
85   }
86 
87   return reference == result;
88 }
89 
90 
Equal128(QRegisterValue expected,const RegisterDump *,QRegisterValue result)91 bool Equal128(QRegisterValue expected,
92               const RegisterDump*,
93               QRegisterValue result) {
94   if (!expected.Equals(result)) {
95     printf("Expected 0x%016" PRIx64 "%016" PRIx64
96            "\t "
97            "Found 0x%016" PRIx64 "%016" PRIx64 "\n",
98            expected.GetLane<uint64_t>(1),
99            expected.GetLane<uint64_t>(0),
100            result.GetLane<uint64_t>(1),
101            result.GetLane<uint64_t>(0));
102   }
103 
104   return expected.Equals(result);
105 }
106 
107 
EqualFP16(Float16 expected,const RegisterDump *,Float16 result)108 bool EqualFP16(Float16 expected, const RegisterDump*, Float16 result) {
109   uint16_t e_rawbits = Float16ToRawbits(expected);
110   uint16_t r_rawbits = Float16ToRawbits(result);
111   if (e_rawbits == r_rawbits) {
112     return true;
113   } else {
114     if (IsNaN(expected) || IsZero(expected)) {
115       printf("Expected 0x%04" PRIx16 "\t Found 0x%04" PRIx16 "\n",
116              e_rawbits,
117              r_rawbits);
118     } else {
119       printf("Expected %.6f (16 bit): (0x%04" PRIx16
120              ")\t "
121              "Found %.6f (0x%04" PRIx16 ")\n",
122              FPToFloat(expected, kIgnoreDefaultNaN),
123              e_rawbits,
124              FPToFloat(result, kIgnoreDefaultNaN),
125              r_rawbits);
126     }
127     return false;
128   }
129 }
130 
131 
EqualFP32(float expected,const RegisterDump *,float result)132 bool EqualFP32(float expected, const RegisterDump*, float result) {
133   if (FloatToRawbits(expected) == FloatToRawbits(result)) {
134     return true;
135   } else {
136     if (IsNaN(expected) || (expected == 0.0)) {
137       printf("Expected 0x%08" PRIx32 "\t Found 0x%08" PRIx32 "\n",
138              FloatToRawbits(expected),
139              FloatToRawbits(result));
140     } else {
141       printf("Expected %.9f (0x%08" PRIx32
142              ")\t "
143              "Found %.9f (0x%08" PRIx32 ")\n",
144              expected,
145              FloatToRawbits(expected),
146              result,
147              FloatToRawbits(result));
148     }
149     return false;
150   }
151 }
152 
153 
EqualFP64(double expected,const RegisterDump *,double result)154 bool EqualFP64(double expected, const RegisterDump*, double result) {
155   if (DoubleToRawbits(expected) == DoubleToRawbits(result)) {
156     return true;
157   }
158 
159   if (IsNaN(expected) || (expected == 0.0)) {
160     printf("Expected 0x%016" PRIx64 "\t Found 0x%016" PRIx64 "\n",
161            DoubleToRawbits(expected),
162            DoubleToRawbits(result));
163   } else {
164     printf("Expected %.17f (0x%016" PRIx64
165            ")\t "
166            "Found %.17f (0x%016" PRIx64 ")\n",
167            expected,
168            DoubleToRawbits(expected),
169            result,
170            DoubleToRawbits(result));
171   }
172   return false;
173 }
174 
175 
Equal32(uint32_t expected,const RegisterDump * core,const Register & reg)176 bool Equal32(uint32_t expected, const RegisterDump* core, const Register& reg) {
177   VIXL_ASSERT(reg.Is32Bits());
178   // Retrieve the corresponding X register so we can check that the upper part
179   // was properly cleared.
180   int64_t result_x = core->xreg(reg.GetCode());
181   if ((result_x & 0xffffffff00000000) != 0) {
182     printf("Expected 0x%08" PRIx32 "\t Found 0x%016" PRIx64 "\n",
183            expected,
184            result_x);
185     return false;
186   }
187   uint32_t result_w = core->wreg(reg.GetCode());
188   return Equal32(expected, core, result_w);
189 }
190 
191 
Equal64(uint64_t reference,const RegisterDump * core,const Register & reg,ExpectedResult option)192 bool Equal64(uint64_t reference,
193              const RegisterDump* core,
194              const Register& reg,
195              ExpectedResult option) {
196   VIXL_ASSERT(reg.Is64Bits());
197   uint64_t result = core->xreg(reg.GetCode());
198   return Equal64(reference, core, result, option);
199 }
200 
201 
NotEqual64(uint64_t reference,const RegisterDump * core,const Register & reg)202 bool NotEqual64(uint64_t reference,
203                 const RegisterDump* core,
204                 const Register& reg) {
205   VIXL_ASSERT(reg.Is64Bits());
206   uint64_t result = core->xreg(reg.GetCode());
207   return NotEqual64(reference, core, result);
208 }
209 
210 
Equal128(uint64_t expected_h,uint64_t expected_l,const RegisterDump * core,const VRegister & vreg)211 bool Equal128(uint64_t expected_h,
212               uint64_t expected_l,
213               const RegisterDump* core,
214               const VRegister& vreg) {
215   VIXL_ASSERT(vreg.Is128Bits());
216   QRegisterValue expected;
217   expected.SetLane(0, expected_l);
218   expected.SetLane(1, expected_h);
219   QRegisterValue result = core->qreg(vreg.GetCode());
220   return Equal128(expected, core, result);
221 }
222 
223 
EqualFP16(Float16 expected,const RegisterDump * core,const VRegister & fpreg)224 bool EqualFP16(Float16 expected,
225                const RegisterDump* core,
226                const VRegister& fpreg) {
227   VIXL_ASSERT(fpreg.Is16Bits());
228   // Retrieve the corresponding D register so we can check that the upper part
229   // was properly cleared.
230   uint64_t result_64 = core->dreg_bits(fpreg.GetCode());
231   if ((result_64 & 0xfffffffffff0000) != 0) {
232     printf("Expected 0x%04" PRIx16 " (%f)\t Found 0x%016" PRIx64 "\n",
233            Float16ToRawbits(expected),
234            FPToFloat(expected, kIgnoreDefaultNaN),
235            result_64);
236     return false;
237   }
238   return EqualFP16(expected, core, core->hreg(fpreg.GetCode()));
239 }
240 
241 
EqualFP32(float expected,const RegisterDump * core,const VRegister & fpreg)242 bool EqualFP32(float expected,
243                const RegisterDump* core,
244                const VRegister& fpreg) {
245   VIXL_ASSERT(fpreg.Is32Bits());
246   // Retrieve the corresponding D register so we can check that the upper part
247   // was properly cleared.
248   uint64_t result_64 = core->dreg_bits(fpreg.GetCode());
249   if ((result_64 & 0xffffffff00000000) != 0) {
250     printf("Expected 0x%08" PRIx32 " (%f)\t Found 0x%016" PRIx64 "\n",
251            FloatToRawbits(expected),
252            expected,
253            result_64);
254     return false;
255   }
256 
257   return EqualFP32(expected, core, core->sreg(fpreg.GetCode()));
258 }
259 
260 
EqualFP64(double expected,const RegisterDump * core,const VRegister & fpreg)261 bool EqualFP64(double expected,
262                const RegisterDump* core,
263                const VRegister& fpreg) {
264   VIXL_ASSERT(fpreg.Is64Bits());
265   return EqualFP64(expected, core, core->dreg(fpreg.GetCode()));
266 }
267 
268 
Equal64(const Register & reg0,const RegisterDump * core,const Register & reg1,ExpectedResult option)269 bool Equal64(const Register& reg0,
270              const RegisterDump* core,
271              const Register& reg1,
272              ExpectedResult option) {
273   VIXL_ASSERT(reg0.Is64Bits() && reg1.Is64Bits());
274   int64_t reference = core->xreg(reg0.GetCode());
275   int64_t result = core->xreg(reg1.GetCode());
276   return Equal64(reference, core, result, option);
277 }
278 
279 
NotEqual64(const Register & reg0,const RegisterDump * core,const Register & reg1)280 bool NotEqual64(const Register& reg0,
281                 const RegisterDump* core,
282                 const Register& reg1) {
283   VIXL_ASSERT(reg0.Is64Bits() && reg1.Is64Bits());
284   int64_t expected = core->xreg(reg0.GetCode());
285   int64_t result = core->xreg(reg1.GetCode());
286   return NotEqual64(expected, core, result);
287 }
288 
289 
Equal64(uint64_t expected,const RegisterDump * core,const VRegister & vreg)290 bool Equal64(uint64_t expected,
291              const RegisterDump* core,
292              const VRegister& vreg) {
293   VIXL_ASSERT(vreg.Is64Bits());
294   uint64_t result = core->dreg_bits(vreg.GetCode());
295   return Equal64(expected, core, result);
296 }
297 
298 
FlagN(uint32_t flags)299 static char FlagN(uint32_t flags) { return (flags & NFlag) ? 'N' : 'n'; }
300 
301 
FlagZ(uint32_t flags)302 static char FlagZ(uint32_t flags) { return (flags & ZFlag) ? 'Z' : 'z'; }
303 
304 
FlagC(uint32_t flags)305 static char FlagC(uint32_t flags) { return (flags & CFlag) ? 'C' : 'c'; }
306 
307 
FlagV(uint32_t flags)308 static char FlagV(uint32_t flags) { return (flags & VFlag) ? 'V' : 'v'; }
309 
310 
EqualNzcv(uint32_t expected,uint32_t result)311 bool EqualNzcv(uint32_t expected, uint32_t result) {
312   VIXL_ASSERT((expected & ~NZCVFlag) == 0);
313   VIXL_ASSERT((result & ~NZCVFlag) == 0);
314   if (result != expected) {
315     printf("Expected: %c%c%c%c\t Found: %c%c%c%c\n",
316            FlagN(expected),
317            FlagZ(expected),
318            FlagC(expected),
319            FlagV(expected),
320            FlagN(result),
321            FlagZ(result),
322            FlagC(result),
323            FlagV(result));
324     return false;
325   }
326 
327   return true;
328 }
329 
330 
EqualRegisters(const RegisterDump * a,const RegisterDump * b)331 bool EqualRegisters(const RegisterDump* a, const RegisterDump* b) {
332   for (unsigned i = 0; i < kNumberOfRegisters; i++) {
333     if (a->xreg(i) != b->xreg(i)) {
334       printf("x%d\t Expected 0x%016" PRIx64 "\t Found 0x%016" PRIx64 "\n",
335              i,
336              a->xreg(i),
337              b->xreg(i));
338       return false;
339     }
340   }
341 
342   for (unsigned i = 0; i < kNumberOfVRegisters; i++) {
343     uint64_t a_bits = a->dreg_bits(i);
344     uint64_t b_bits = b->dreg_bits(i);
345     if (a_bits != b_bits) {
346       printf("d%d\t Expected 0x%016" PRIx64 "\t Found 0x%016" PRIx64 "\n",
347              i,
348              a_bits,
349              b_bits);
350       return false;
351     }
352   }
353 
354   return true;
355 }
356 
EqualSVELane(uint64_t expected,const RegisterDump * core,const ZRegister & reg,int lane)357 bool EqualSVELane(uint64_t expected,
358                   const RegisterDump* core,
359                   const ZRegister& reg,
360                   int lane) {
361   unsigned lane_size = reg.GetLaneSizeInBits();
362   // For convenience in the tests, we allow negative values to be passed into
363   // `expected`, but truncate them to an appropriately-sized unsigned value for
364   // the check. For example, in `EqualSVELane(-1, core, z0.VnB())`, the expected
365   // value is truncated from 0xffffffffffffffff to 0xff before the comparison.
366   VIXL_ASSERT(IsUintN(lane_size, expected) ||
367               IsIntN(lane_size, RawbitsToInt64(expected)));
368   expected &= GetUintMask(lane_size);
369 
370   uint64_t result = core->zreg_lane(reg.GetCode(), lane_size, lane);
371   if (expected != result) {
372     unsigned lane_size_in_hex_chars = lane_size / 4;
373     std::string reg_name = reg.GetArchitecturalName();
374     printf("%s[%d]\t Expected 0x%0*" PRIx64 "\t Found 0x%0*" PRIx64 "\n",
375            reg_name.c_str(),
376            lane,
377            lane_size_in_hex_chars,
378            expected,
379            lane_size_in_hex_chars,
380            result);
381     return false;
382   }
383   return true;
384 }
385 
EqualSVELane(uint64_t expected,const RegisterDump * core,const PRegister & reg,int lane)386 bool EqualSVELane(uint64_t expected,
387                   const RegisterDump* core,
388                   const PRegister& reg,
389                   int lane) {
390   VIXL_ASSERT(reg.HasLaneSize());
391   VIXL_ASSERT((reg.GetLaneSizeInBits() % kZRegBitsPerPRegBit) == 0);
392   unsigned p_bits_per_lane = reg.GetLaneSizeInBits() / kZRegBitsPerPRegBit;
393   VIXL_ASSERT(IsUintN(p_bits_per_lane, expected));
394   expected &= GetUintMask(p_bits_per_lane);
395 
396   uint64_t result = core->preg_lane(reg.GetCode(), p_bits_per_lane, lane);
397   if (expected != result) {
398     unsigned lane_size_in_hex_chars = (p_bits_per_lane + 3) / 4;
399     std::string reg_name = reg.GetArchitecturalName();
400     printf("%s[%d]\t Expected 0x%0*" PRIx64 "\t Found 0x%0*" PRIx64 "\n",
401            reg_name.c_str(),
402            lane,
403            lane_size_in_hex_chars,
404            expected,
405            lane_size_in_hex_chars,
406            result);
407     return false;
408   }
409   return true;
410 }
411 
412 struct EqualMemoryChunk {
413   typedef uint64_t RawChunk;
414 
415   uintptr_t address;
416   RawChunk expected;
417   RawChunk result;
418 
IsEqualvixl::aarch64::EqualMemoryChunk419   bool IsEqual() const { return expected == result; }
420 };
421 
EqualMemory(const void * expected,const void * result,size_t size_in_bytes,size_t zero_offset)422 bool EqualMemory(const void* expected,
423                  const void* result,
424                  size_t size_in_bytes,
425                  size_t zero_offset) {
426   if (memcmp(expected, result, size_in_bytes) == 0) return true;
427 
428   // Read 64-bit chunks, and print them side-by-side if they don't match.
429 
430   // Remember the last few chunks, even if they matched, so we can print some
431   // context. We don't want to print the whole buffer, because it could be huge.
432   static const size_t kContextLines = 1;
433   std::queue<EqualMemoryChunk> context;
434   static const size_t kChunkSize = sizeof(EqualMemoryChunk::RawChunk);
435 
436   // This assumption keeps the logic simple, and is acceptable for our tests.
437   VIXL_ASSERT((size_in_bytes % kChunkSize) == 0);
438 
439   const char* expected_it = reinterpret_cast<const char*>(expected);
440   const char* result_it = reinterpret_cast<const char*>(result);
441 
442   // This is the first error, so print a header row.
443   printf("  Address (of result)                  Expected           Result\n");
444 
445   // Always print some context at the start of the buffer.
446   uintptr_t print_context_to =
447       reinterpret_cast<uintptr_t>(result) + (kContextLines + 1) * kChunkSize;
448   for (size_t i = 0; i < size_in_bytes; i += kChunkSize) {
449     EqualMemoryChunk chunk;
450     chunk.address = reinterpret_cast<uintptr_t>(result_it);
451     memcpy(&chunk.expected, expected_it, kChunkSize);
452     memcpy(&chunk.result, result_it, kChunkSize);
453 
454     while (context.size() > kContextLines) context.pop();
455     context.push(chunk);
456 
457     // Print context after an error, and at the end of the buffer.
458     if (!chunk.IsEqual() || ((i + kChunkSize) >= size_in_bytes)) {
459       if (chunk.address > print_context_to) {
460         // We aren't currently printing context, so separate this context from
461         // the previous block.
462         printf("...\n");
463       }
464       print_context_to = chunk.address + (kContextLines + 1) * kChunkSize;
465     }
466 
467     // Print context (including the current line).
468     while (!context.empty() && (context.front().address < print_context_to)) {
469       uintptr_t address = context.front().address;
470       uint64_t offset = address - reinterpret_cast<uintptr_t>(result);
471       bool is_negative = (offset < zero_offset);
472       printf("0x%016" PRIxPTR " (result %c %5" PRIu64 "): 0x%016" PRIx64
473              " 0x%016" PRIx64 "\n",
474              address,
475              (is_negative ? '-' : '+'),
476              (is_negative ? (zero_offset - offset) : (offset - zero_offset)),
477              context.front().expected,
478              context.front().result);
479       context.pop();
480     }
481 
482     expected_it += kChunkSize;
483     result_it += kChunkSize;
484   }
485 
486   return false;
487 }
PopulateRegisterArray(Register * w,Register * x,Register * r,int reg_size,int reg_count,RegList allowed)488 RegList PopulateRegisterArray(Register* w,
489                               Register* x,
490                               Register* r,
491                               int reg_size,
492                               int reg_count,
493                               RegList allowed) {
494   RegList list = 0;
495   int i = 0;
496   for (unsigned n = 0; (n < kNumberOfRegisters) && (i < reg_count); n++) {
497     if (((UINT64_C(1) << n) & allowed) != 0) {
498       // Only assign allowed registers.
499       if (r) {
500         r[i] = Register(n, reg_size);
501       }
502       if (x) {
503         x[i] = Register(n, kXRegSize);
504       }
505       if (w) {
506         w[i] = Register(n, kWRegSize);
507       }
508       list |= (UINT64_C(1) << n);
509       i++;
510     }
511   }
512   // Check that we got enough registers.
513   VIXL_ASSERT(CountSetBits(list, kNumberOfRegisters) == reg_count);
514 
515   return list;
516 }
517 
518 
PopulateVRegisterArray(VRegister * s,VRegister * d,VRegister * v,int reg_size,int reg_count,RegList allowed)519 RegList PopulateVRegisterArray(VRegister* s,
520                                VRegister* d,
521                                VRegister* v,
522                                int reg_size,
523                                int reg_count,
524                                RegList allowed) {
525   RegList list = 0;
526   int i = 0;
527   for (unsigned n = 0; (n < kNumberOfVRegisters) && (i < reg_count); n++) {
528     if (((UINT64_C(1) << n) & allowed) != 0) {
529       // Only assigned allowed registers.
530       if (v) {
531         v[i] = VRegister(n, reg_size);
532       }
533       if (d) {
534         d[i] = VRegister(n, kDRegSize);
535       }
536       if (s) {
537         s[i] = VRegister(n, kSRegSize);
538       }
539       list |= (UINT64_C(1) << n);
540       i++;
541     }
542   }
543   // Check that we got enough registers.
544   VIXL_ASSERT(CountSetBits(list, kNumberOfVRegisters) == reg_count);
545 
546   return list;
547 }
548 
549 
Clobber(MacroAssembler * masm,RegList reg_list,uint64_t const value)550 void Clobber(MacroAssembler* masm, RegList reg_list, uint64_t const value) {
551   Register first = NoReg;
552   for (unsigned i = 0; i < kNumberOfRegisters; i++) {
553     if (reg_list & (UINT64_C(1) << i)) {
554       Register xn(i, kXRegSize);
555       // We should never write into sp here.
556       VIXL_ASSERT(!xn.Is(sp));
557       if (!xn.IsZero()) {
558         if (!first.IsValid()) {
559           // This is the first register we've hit, so construct the literal.
560           __ Mov(xn, value);
561           first = xn;
562         } else {
563           // We've already loaded the literal, so re-use the value already
564           // loaded into the first register we hit.
565           __ Mov(xn, first);
566         }
567       }
568     }
569   }
570 }
571 
572 
ClobberFP(MacroAssembler * masm,RegList reg_list,double const value)573 void ClobberFP(MacroAssembler* masm, RegList reg_list, double const value) {
574   VRegister first = NoVReg;
575   for (unsigned i = 0; i < kNumberOfVRegisters; i++) {
576     if (reg_list & (UINT64_C(1) << i)) {
577       VRegister dn(i, kDRegSize);
578       if (!first.IsValid()) {
579         // This is the first register we've hit, so construct the literal.
580         __ Fmov(dn, value);
581         first = dn;
582       } else {
583         // We've already loaded the literal, so re-use the value already loaded
584         // into the first register we hit.
585         __ Fmov(dn, first);
586       }
587     }
588   }
589 }
590 
591 
Clobber(MacroAssembler * masm,CPURegList reg_list)592 void Clobber(MacroAssembler* masm, CPURegList reg_list) {
593   if (reg_list.GetType() == CPURegister::kRegister) {
594     // This will always clobber X registers.
595     Clobber(masm, reg_list.GetList());
596   } else if (reg_list.GetType() == CPURegister::kVRegister) {
597     // This will always clobber D registers.
598     ClobberFP(masm, reg_list.GetList());
599   } else {
600     VIXL_UNIMPLEMENTED();
601   }
602 }
603 
604 // TODO: Once registers have sufficiently compatible interfaces, merge the two
605 // DumpRegisters templates.
606 template <typename T>
DumpRegisters(MacroAssembler * masm,Register dump_base,int offset)607 static void DumpRegisters(MacroAssembler* masm,
608                           Register dump_base,
609                           int offset) {
610   UseScratchRegisterScope temps(masm);
611   Register dump = temps.AcquireX();
612   __ Add(dump, dump_base, offset);
613   for (unsigned i = 0; i <= T::GetMaxCode(); i++) {
614     T reg(i);
615     __ Str(reg, SVEMemOperand(dump));
616     __ Add(dump, dump, reg.GetMaxSizeInBytes());
617   }
618 }
619 
620 template <typename T>
DumpRegisters(MacroAssembler * masm,Register dump_base,int offset,int reg_size_in_bytes)621 static void DumpRegisters(MacroAssembler* masm,
622                           Register dump_base,
623                           int offset,
624                           int reg_size_in_bytes) {
625   UseScratchRegisterScope temps(masm);
626   Register dump = temps.AcquireX();
627   __ Add(dump, dump_base, offset);
628   for (unsigned i = 0; i <= T::GetMaxCode(); i++) {
629     T reg(i, reg_size_in_bytes * kBitsPerByte);
630     __ Str(reg, MemOperand(dump));
631     __ Add(dump, dump, reg_size_in_bytes);
632   }
633 }
634 
Dump(MacroAssembler * masm)635 void RegisterDump::Dump(MacroAssembler* masm) {
636   VIXL_ASSERT(__ StackPointer().Is(sp));
637 
638   dump_cpu_features_ = *masm->GetCPUFeatures();
639 
640   // We need some scratch registers, but we also need to dump them, so we have
641   // to control exactly which registers are used, and dump them separately.
642   CPURegList scratch_registers(x0, x1, x2, x3);
643 
644   UseScratchRegisterScope temps(masm);
645   temps.ExcludeAll();
646   __ PushCPURegList(scratch_registers);
647   temps.Include(scratch_registers);
648 
649   Register dump_base = temps.AcquireX();
650   Register tmp = temps.AcquireX();
651 
652   // Offsets into the dump_ structure.
653   const int x_offset = offsetof(dump_t, x_);
654   const int w_offset = offsetof(dump_t, w_);
655   const int d_offset = offsetof(dump_t, d_);
656   const int s_offset = offsetof(dump_t, s_);
657   const int h_offset = offsetof(dump_t, h_);
658   const int q_offset = offsetof(dump_t, q_);
659   const int z_offset = offsetof(dump_t, z_);
660   const int p_offset = offsetof(dump_t, p_);
661   const int sp_offset = offsetof(dump_t, sp_);
662   const int wsp_offset = offsetof(dump_t, wsp_);
663   const int flags_offset = offsetof(dump_t, flags_);
664   const int vl_offset = offsetof(dump_t, vl_);
665 
666   // Load the address where we will dump the state.
667   __ Mov(dump_base, reinterpret_cast<uintptr_t>(&dump_));
668 
669   // Dump the stack pointer (sp and wsp).
670   // The stack pointer cannot be stored directly; it needs to be moved into
671   // another register first. Also, we pushed four X registers, so we need to
672   // compensate here.
673   __ Add(tmp, sp, 4 * kXRegSizeInBytes);
674   __ Str(tmp, MemOperand(dump_base, sp_offset));
675   __ Add(tmp.W(), wsp, 4 * kXRegSizeInBytes);
676   __ Str(tmp.W(), MemOperand(dump_base, wsp_offset));
677 
678   // Dump core registers.
679   DumpRegisters<Register>(masm, dump_base, x_offset, kXRegSizeInBytes);
680   DumpRegisters<Register>(masm, dump_base, w_offset, kWRegSizeInBytes);
681 
682   // Dump NEON and FP registers.
683   DumpRegisters<VRegister>(masm, dump_base, q_offset, kQRegSizeInBytes);
684   DumpRegisters<VRegister>(masm, dump_base, d_offset, kDRegSizeInBytes);
685   DumpRegisters<VRegister>(masm, dump_base, s_offset, kSRegSizeInBytes);
686   DumpRegisters<VRegister>(masm, dump_base, h_offset, kHRegSizeInBytes);
687 
688   // Dump SVE registers.
689   if (CPUHas(CPUFeatures::kSVE)) {
690     DumpRegisters<ZRegister>(masm, dump_base, z_offset);
691     DumpRegisters<PRegister>(masm, dump_base, p_offset);
692 
693     // Record the vector length.
694     __ Rdvl(tmp, kBitsPerByte);
695     __ Str(tmp, MemOperand(dump_base, vl_offset));
696   }
697 
698   // Dump the flags.
699   __ Mrs(tmp, NZCV);
700   __ Str(tmp, MemOperand(dump_base, flags_offset));
701 
702   // To dump the values we used as scratch registers, we need a new scratch
703   // register. We can use any of the already dumped registers since we can
704   // easily restore them.
705   Register dump2_base = x10;
706   VIXL_ASSERT(!scratch_registers.IncludesAliasOf(dump2_base));
707 
708   VIXL_ASSERT(scratch_registers.IncludesAliasOf(dump_base));
709 
710   // Ensure that we don't try to use the scratch registers again.
711   temps.ExcludeAll();
712 
713   // Don't lose the dump_ address.
714   __ Mov(dump2_base, dump_base);
715 
716   __ PopCPURegList(scratch_registers);
717 
718   while (!scratch_registers.IsEmpty()) {
719     CPURegister reg = scratch_registers.PopLowestIndex();
720     Register x = reg.X();
721     Register w = reg.W();
722     unsigned code = reg.GetCode();
723     __ Str(x, MemOperand(dump2_base, x_offset + (code * kXRegSizeInBytes)));
724     __ Str(w, MemOperand(dump2_base, w_offset + (code * kWRegSizeInBytes)));
725   }
726 
727   // Finally, restore dump2_base.
728   __ Ldr(dump2_base,
729          MemOperand(dump2_base,
730                     x_offset + (dump2_base.GetCode() * kXRegSizeInBytes)));
731 
732   completed_ = true;
733 }
734 
GetSignallingNan(int size_in_bits)735 uint64_t GetSignallingNan(int size_in_bits) {
736   switch (size_in_bits) {
737     case kHRegSize:
738       return Float16ToRawbits(kFP16SignallingNaN);
739     case kSRegSize:
740       return FloatToRawbits(kFP32SignallingNaN);
741     case kDRegSize:
742       return DoubleToRawbits(kFP64SignallingNaN);
743     default:
744       VIXL_UNIMPLEMENTED();
745       return 0;
746   }
747 }
748 
CanRun(const CPUFeatures & required,bool * queried_can_run)749 bool CanRun(const CPUFeatures& required, bool* queried_can_run) {
750   bool log_if_missing = true;
751   if (queried_can_run != NULL) {
752     log_if_missing = !*queried_can_run;
753     *queried_can_run = true;
754   }
755 
756 #ifdef VIXL_INCLUDE_SIMULATOR_AARCH64
757   // The Simulator can run any test that VIXL can assemble.
758   USE(required);
759   USE(log_if_missing);
760   return true;
761 #else
762   CPUFeatures cpu = CPUFeatures::InferFromOS();
763   // If InferFromOS fails, assume that basic features are present.
764   if (cpu.HasNoFeatures()) cpu = CPUFeatures::AArch64LegacyBaseline();
765   VIXL_ASSERT(cpu.Has(kInfrastructureCPUFeatures));
766 
767   if (cpu.Has(required)) return true;
768 
769   if (log_if_missing) {
770     CPUFeatures missing = required.Without(cpu);
771     // Note: This message needs to match REGEXP_MISSING_FEATURES from
772     // tools/threaded_test.py.
773     std::cout << "SKIPPED: Missing features: { " << missing << " }\n";
774     std::cout << "This test requires the following features to run its "
775                  "generated code on this CPU: "
776               << required << "\n";
777   }
778   return false;
779 #endif
780 }
781 
782 // Note that the function assumes p0, p1, p2 and p3 are set to all true in b-,
783 // h-, s- and d-lane sizes respectively, and p4, p5 are clobberred as a temp
784 // predicate.
785 template <typename T, size_t N>
SetFpData(MacroAssembler * masm,int esize,const T (& values)[N],uint64_t lcg_mult)786 void SetFpData(MacroAssembler* masm,
787                int esize,
788                const T (&values)[N],
789                uint64_t lcg_mult) {
790   uint64_t a = 0;
791   uint64_t b = lcg_mult;
792   // Be used to populate the assigned element slots of register based on the
793   // type of floating point.
794   __ Pfalse(p5.VnB());
795   switch (esize) {
796     case kHRegSize:
797       a = Float16ToRawbits(Float16(1.5));
798       // Pick a convenient number within largest normal half-precision floating
799       // point.
800       b = Float16ToRawbits(Float16(lcg_mult % 1024));
801       // Step 1: Set fp16 numbers to the undefined registers.
802       //      p4< 15:0>: 0b0101010101010101
803       // z{code}<127:0>: 0xHHHHHHHHHHHHHHHH
804       __ Zip1(p4.VnB(), p0.VnB(), p5.VnB());
805       break;
806     case kSRegSize:
807       a = FloatToRawbits(1.5);
808       b = FloatToRawbits(lcg_mult);
809       // Step 2: Set fp32 numbers to register on top of fp16 initialized.
810       //      p4< 15:0>: 0b0000000100000001
811       // z{code}<127:0>: 0xHHHHSSSSHHHHSSSS
812       __ Zip1(p4.VnS(), p2.VnS(), p5.VnS());
813       break;
814     case kDRegSize:
815       a = DoubleToRawbits(1.5);
816       b = DoubleToRawbits(lcg_mult);
817       // Step 3: Set fp64 numbers to register on top of both fp16 and fp 32
818       // initialized.
819       //      p4< 15:0>: 0b0000000000000001
820       // z{code}<127:0>: 0xHHHHSSSSDDDDDDDD
821       __ Zip1(p4.VnD(), p3.VnD(), p5.VnD());
822       break;
823     default:
824       VIXL_UNIMPLEMENTED();
825       break;
826   }
827 
828   __ Dup(z30.WithLaneSize(esize), a);
829   __ Dup(z31.WithLaneSize(esize), b);
830 
831   for (unsigned j = 0; j <= (kZRegMaxSize / (N * esize)); j++) {
832     // As floating point operations on random values have a tendency to
833     // converge on special-case numbers like NaNs, adopt normal floating point
834     // values be the seed instead.
835     InsrHelper(masm, z0.WithLaneSize(esize), values);
836   }
837 
838   __ Fmla(z0.WithLaneSize(esize),
839           p4.Merging(),
840           z30.WithLaneSize(esize),
841           z0.WithLaneSize(esize),
842           z31.WithLaneSize(esize),
843           FastNaNPropagation);
844 
845   for (unsigned i = 1; i < kNumberOfZRegisters - 1; i++) {
846     __ Fmla(ZRegister(i).WithLaneSize(esize),
847             p4.Merging(),
848             z30.WithLaneSize(esize),
849             ZRegister(i - 1).WithLaneSize(esize),
850             z31.WithLaneSize(esize),
851             FastNaNPropagation);
852   }
853 
854   __ Fmul(z31.WithLaneSize(esize),
855           p4.Merging(),
856           z31.WithLaneSize(esize),
857           z30.WithLaneSize(esize),
858           FastNaNPropagation);
859   __ Fadd(z31.WithLaneSize(esize), p4.Merging(), z31.WithLaneSize(esize), 1);
860 }
861 
862 // Set z0 - z31 to some normal floating point data.
InitialiseRegisterFp(MacroAssembler * masm,uint64_t lcg_mult)863 void InitialiseRegisterFp(MacroAssembler* masm, uint64_t lcg_mult) {
864   // Initialise each Z registers to a mixture of fp16/32/64 values as following
865   // pattern:
866   // z0.h[0-1] = fp16, z0.s[1] = fp32, z0.d[1] = fp64 repeatedly throughout the
867   // register.
868   //
869   // For example:
870   // z{code}<2047:1920>: 0x{<      fp64      ><  fp32  ><fp16><fp16>}
871   // ...
872   // z{code}< 127:   0>: 0x{<      fp64      ><  fp32  ><fp16><fp16>}
873   //
874   // In current manner, in order to make a desired mixture, each part of
875   // initialization have to be called in the following order.
876   SetFpData(masm, kHRegSize, kInputFloat16Basic, lcg_mult);
877   SetFpData(masm, kSRegSize, kInputFloatBasic, lcg_mult);
878   SetFpData(masm, kDRegSize, kInputDoubleBasic, lcg_mult);
879 }
880 
SetInitialMachineState(MacroAssembler * masm,InputSet input_set)881 void SetInitialMachineState(MacroAssembler* masm, InputSet input_set) {
882   USE(input_set);
883   uint64_t lcg_mult = 6364136223846793005;
884 
885   // Set x0 - x30 to pseudo-random data.
886   __ Mov(x29, 1);  // LCG increment.
887   __ Mov(x30, lcg_mult);
888   __ Mov(x0, 42);  // LCG seed.
889 
890   __ Cmn(x0, 0);  // Clear NZCV flags for later.
891 
892   __ Madd(x0, x0, x30, x29);  // First pseudo-random number.
893 
894   // Registers 1 - 29.
895   for (unsigned i = 1; i < 30; i++) {
896     __ Madd(XRegister(i), XRegister(i - 1), x30, x29);
897   }
898   __ Mul(x30, x29, x30);
899   __ Add(x30, x30, 1);
900 
901 
902   // Set first four predicate registers to true for increasing lane sizes.
903   __ Ptrue(p0.VnB());
904   __ Ptrue(p1.VnH());
905   __ Ptrue(p2.VnS());
906   __ Ptrue(p3.VnD());
907 
908   // Set z0 - z31 to pseudo-random data.
909   if (input_set == kIntInputSet) {
910     __ Dup(z30.VnD(), 1);
911     __ Dup(z31.VnD(), lcg_mult);
912     __ Index(z0.VnB(), -16, 13);  // LCG seeds.
913 
914     __ Mla(z0.VnD(), p0.Merging(), z30.VnD(), z0.VnD(), z31.VnD());
915     for (unsigned i = 1; i < kNumberOfZRegisters - 1; i++) {
916       __ Mla(ZRegister(i).VnD(),
917              p0.Merging(),
918              z30.VnD(),
919              ZRegister(i - 1).VnD(),
920              z31.VnD());
921     }
922     __ Mul(z31.VnD(), p0.Merging(), z31.VnD(), z30.VnD());
923     __ Add(z31.VnD(), z31.VnD(), 1);
924 
925   } else {
926     VIXL_ASSERT(input_set == kFpInputSet);
927     InitialiseRegisterFp(masm, lcg_mult);
928   }
929 
930   // Set remaining predicate registers based on earlier pseudo-random data.
931   for (unsigned i = 4; i < kNumberOfPRegisters; i++) {
932     __ Cmpge(PRegister(i).VnB(), p0.Zeroing(), ZRegister(i).VnB(), 0);
933   }
934   for (unsigned i = 4; i < kNumberOfPRegisters; i += 2) {
935     __ Zip1(p0.VnB(), PRegister(i).VnB(), PRegister(i + 1).VnB());
936     __ Zip2(PRegister(i + 1).VnB(), PRegister(i).VnB(), PRegister(i + 1).VnB());
937     __ Mov(PRegister(i), p0);
938   }
939   __ Ptrue(p0.VnB());
940 
941   // At this point, only sp and a few status registers are undefined. These
942   // must be ignored when computing the state hash.
943 }
944 
ComputeMachineStateHash(MacroAssembler * masm,uint32_t * dst)945 void ComputeMachineStateHash(MacroAssembler* masm, uint32_t* dst) {
946   // Use explicit registers, to avoid hash order varying if
947   // UseScratchRegisterScope changes.
948   UseScratchRegisterScope temps(masm);
949   temps.ExcludeAll();
950   Register t0 = w0;
951   Register t1 = x1;
952 
953   // Compute hash of x0 - x30.
954   __ Push(t0.X(), t1);
955   __ Crc32x(t0, wzr, t0.X());
956   for (unsigned i = 0; i < kNumberOfRegisters; i++) {
957     if (i == xzr.GetCode()) continue;   // Skip sp.
958     if (t0.Is(WRegister(i))) continue;  // Skip t0, as it's already hashed.
959     __ Crc32x(t0, t0, XRegister(i));
960   }
961 
962   // Hash the status flags.
963   __ Mrs(t1, NZCV);
964   __ Crc32x(t0, t0, t1);
965 
966   // Acquire another temp, as integer registers have been hashed already.
967   __ Push(x30, xzr);
968   Register t2 = x30;
969 
970   // Compute hash of all bits in z0 - z31. This implies different hashes are
971   // produced for machines of different vector length.
972   for (unsigned i = 0; i < kNumberOfZRegisters; i++) {
973     __ Rdvl(t2, 1);
974     __ Lsr(t2, t2, 4);
975     Label vl_loop;
976     __ Bind(&vl_loop);
977     __ Umov(t1, VRegister(i).V2D(), 0);
978     __ Crc32x(t0, t0, t1);
979     __ Umov(t1, VRegister(i).V2D(), 1);
980     __ Crc32x(t0, t0, t1);
981     __ Ext(ZRegister(i).VnB(), ZRegister(i).VnB(), ZRegister(i).VnB(), 16);
982     __ Sub(t2, t2, 1);
983     __ Cbnz(t2, &vl_loop);
984   }
985 
986   // Hash predicate registers. For simplicity, this writes the predicate
987   // registers to a zero-initialised area of stack of the maximum size required
988   // for P registers. It then computes a hash of that entire stack area.
989   unsigned p_stack_space = kNumberOfPRegisters * kPRegMaxSizeInBytes;
990 
991   // Zero claimed stack area.
992   for (unsigned i = 0; i < p_stack_space; i += kXRegSizeInBytes * 2) {
993     __ Push(xzr, xzr);
994   }
995 
996   // Store all P registers to the stack.
997   __ Mov(t1, sp);
998   for (unsigned i = 0; i < kNumberOfPRegisters; i++) {
999     __ Str(PRegister(i), SVEMemOperand(t1));
1000     __ Add(t1, t1, kPRegMaxSizeInBytes);
1001   }
1002 
1003   // Hash the entire stack area.
1004   for (unsigned i = 0; i < p_stack_space; i += kXRegSizeInBytes * 2) {
1005     __ Pop(t1, t2);
1006     __ Crc32x(t0, t0, t1);
1007     __ Crc32x(t0, t0, t2);
1008   }
1009 
1010   __ Mov(t1, reinterpret_cast<uint64_t>(dst));
1011   __ Str(t0, MemOperand(t1));
1012 
1013   __ Pop(xzr, x30);
1014   __ Pop(t1, t0.X());
1015 }
1016 
1017 }  // namespace aarch64
1018 }  // namespace vixl
1019