• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2014, VIXL authors
2 // All rights reserved.
3 //
4 // Redistribution and use in source and binary forms, with or without
5 // modification, are permitted provided that the following conditions are met:
6 //
7 //   * Redistributions of source code must retain the above copyright notice,
8 //     this list of conditions and the following disclaimer.
9 //   * Redistributions in binary form must reproduce the above copyright notice,
10 //     this list of conditions and the following disclaimer in the documentation
11 //     and/or other materials provided with the distribution.
12 //   * Neither the name of ARM Limited nor the names of its contributors may be
13 //     used to endorse or promote products derived from this software without
14 //     specific prior written permission.
15 //
16 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
17 // ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18 // WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19 // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
20 // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
22 // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
23 // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
24 // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 
27 #include "test-utils-aarch64.h"
28 
29 #include <cmath>
30 #include <queue>
31 
32 #include "test-runner.h"
33 
34 #include "../test/aarch64/test-simulator-inputs-aarch64.h"
35 #include "aarch64/cpu-aarch64.h"
36 #include "aarch64/disasm-aarch64.h"
37 #include "aarch64/macro-assembler-aarch64.h"
38 #include "aarch64/simulator-aarch64.h"
39 
40 #define __ masm->
41 
42 namespace vixl {
43 namespace aarch64 {
44 
45 
46 // This value is a signalling NaN as FP64, and also as FP32 or FP16 (taking the
47 // least-significant bits).
48 const double kFP64SignallingNaN = RawbitsToDouble(UINT64_C(0x7ff000007f807c01));
49 const float kFP32SignallingNaN = RawbitsToFloat(0x7f807c01);
50 const Float16 kFP16SignallingNaN = RawbitsToFloat16(0x7c01);
51 
52 // A similar value, but as a quiet NaN.
53 const double kFP64QuietNaN = RawbitsToDouble(UINT64_C(0x7ff800007fc07e01));
54 const float kFP32QuietNaN = RawbitsToFloat(0x7fc07e01);
55 const Float16 kFP16QuietNaN = RawbitsToFloat16(0x7e01);
56 
57 
Equal32(uint32_t expected,const RegisterDump *,uint32_t result)58 bool Equal32(uint32_t expected, const RegisterDump*, uint32_t result) {
59   if (result != expected) {
60     printf("Expected 0x%08" PRIx32 "\t Found 0x%08" PRIx32 "\n",
61            expected,
62            result);
63   }
64 
65   return expected == result;
66 }
67 
68 
Equal64(uint64_t reference,const RegisterDump *,uint64_t result,ExpectedResult option)69 bool Equal64(uint64_t reference,
70              const RegisterDump*,
71              uint64_t result,
72              ExpectedResult option) {
73   switch (option) {
74     case kExpectEqual:
75       if (result != reference) {
76         printf("Expected 0x%016" PRIx64 "\t Found 0x%016" PRIx64 "\n",
77                reference,
78                result);
79       }
80       break;
81     case kExpectNotEqual:
82       if (result == reference) {
83         printf("Expected a result not equal to 0x%016" PRIx64 "\n", reference);
84       }
85       break;
86   }
87 
88   return reference == result;
89 }
90 
91 
Equal64(std::vector<uint64_t> reference_list,const RegisterDump *,uint64_t result,ExpectedResult option)92 bool Equal64(std::vector<uint64_t> reference_list,
93              const RegisterDump*,
94              uint64_t result,
95              ExpectedResult option) {
96   switch (option) {
97     case kExpectEqual:
98       for (uint64_t reference : reference_list) {
99         if (result == reference) return true;
100       }
101       printf("Expected a result in (\n");
102       break;
103     case kExpectNotEqual:
104       for (uint64_t reference : reference_list) {
105         if (result == reference) {
106           printf("Expected a result not in (\n");
107           break;
108         }
109       }
110       return true;
111   }
112   for (uint64_t reference : reference_list) {
113     printf("  0x%016" PRIx64 ",\n", reference);
114   }
115   printf(")\t Found 0x%016" PRIx64 "\n", result);
116   return false;
117 }
118 
119 
Equal128(QRegisterValue expected,const RegisterDump *,QRegisterValue result)120 bool Equal128(QRegisterValue expected,
121               const RegisterDump*,
122               QRegisterValue result) {
123   if (!expected.Equals(result)) {
124     printf("Expected 0x%016" PRIx64 "%016" PRIx64
125            "\t "
126            "Found 0x%016" PRIx64 "%016" PRIx64 "\n",
127            expected.GetLane<uint64_t>(1),
128            expected.GetLane<uint64_t>(0),
129            result.GetLane<uint64_t>(1),
130            result.GetLane<uint64_t>(0));
131   }
132 
133   return expected.Equals(result);
134 }
135 
136 
EqualFP16(Float16 expected,const RegisterDump *,Float16 result)137 bool EqualFP16(Float16 expected, const RegisterDump*, Float16 result) {
138   uint16_t e_rawbits = Float16ToRawbits(expected);
139   uint16_t r_rawbits = Float16ToRawbits(result);
140   if (e_rawbits == r_rawbits) {
141     return true;
142   } else {
143     if (IsNaN(expected) || IsZero(expected)) {
144       printf("Expected 0x%04" PRIx16 "\t Found 0x%04" PRIx16 "\n",
145              e_rawbits,
146              r_rawbits);
147     } else {
148       printf("Expected %.6f (16 bit): (0x%04" PRIx16
149              ")\t "
150              "Found %.6f (0x%04" PRIx16 ")\n",
151              FPToFloat(expected, kIgnoreDefaultNaN),
152              e_rawbits,
153              FPToFloat(result, kIgnoreDefaultNaN),
154              r_rawbits);
155     }
156     return false;
157   }
158 }
159 
160 
EqualFP32(float expected,const RegisterDump *,float result)161 bool EqualFP32(float expected, const RegisterDump*, float result) {
162   if (FloatToRawbits(expected) == FloatToRawbits(result)) {
163     return true;
164   } else {
165     if (IsNaN(expected) || (expected == 0.0)) {
166       printf("Expected 0x%08" PRIx32 "\t Found 0x%08" PRIx32 "\n",
167              FloatToRawbits(expected),
168              FloatToRawbits(result));
169     } else {
170       printf("Expected %.9f (0x%08" PRIx32
171              ")\t "
172              "Found %.9f (0x%08" PRIx32 ")\n",
173              expected,
174              FloatToRawbits(expected),
175              result,
176              FloatToRawbits(result));
177     }
178     return false;
179   }
180 }
181 
182 
EqualFP64(double expected,const RegisterDump *,double result)183 bool EqualFP64(double expected, const RegisterDump*, double result) {
184   if (DoubleToRawbits(expected) == DoubleToRawbits(result)) {
185     return true;
186   }
187 
188   if (IsNaN(expected) || (expected == 0.0)) {
189     printf("Expected 0x%016" PRIx64 "\t Found 0x%016" PRIx64 "\n",
190            DoubleToRawbits(expected),
191            DoubleToRawbits(result));
192   } else {
193     printf("Expected %.17f (0x%016" PRIx64
194            ")\t "
195            "Found %.17f (0x%016" PRIx64 ")\n",
196            expected,
197            DoubleToRawbits(expected),
198            result,
199            DoubleToRawbits(result));
200   }
201   return false;
202 }
203 
204 
Equal32(uint32_t expected,const RegisterDump * core,const Register & reg)205 bool Equal32(uint32_t expected, const RegisterDump* core, const Register& reg) {
206   VIXL_ASSERT(reg.Is32Bits());
207   // Retrieve the corresponding X register so we can check that the upper part
208   // was properly cleared.
209   int64_t result_x = core->xreg(reg.GetCode());
210   if ((result_x & 0xffffffff00000000) != 0) {
211     printf("Expected 0x%08" PRIx32 "\t Found 0x%016" PRIx64 "\n",
212            expected,
213            result_x);
214     return false;
215   }
216   uint32_t result_w = core->wreg(reg.GetCode());
217   return Equal32(expected, core, result_w);
218 }
219 
220 
Equal64(uint64_t reference,const RegisterDump * core,const Register & reg,ExpectedResult option)221 bool Equal64(uint64_t reference,
222              const RegisterDump* core,
223              const Register& reg,
224              ExpectedResult option) {
225   VIXL_ASSERT(reg.Is64Bits());
226   uint64_t result = core->xreg(reg.GetCode());
227   return Equal64(reference, core, result, option);
228 }
229 
230 
Equal64(std::vector<uint64_t> reference_list,const RegisterDump * core,const Register & reg,ExpectedResult option)231 bool Equal64(std::vector<uint64_t> reference_list,
232              const RegisterDump* core,
233              const Register& reg,
234              ExpectedResult option) {
235   VIXL_ASSERT(reg.Is64Bits());
236   uint64_t result = core->xreg(reg.GetCode());
237   return Equal64(reference_list, core, result, option);
238 }
239 
240 
NotEqual64(uint64_t reference,const RegisterDump * core,const Register & reg)241 bool NotEqual64(uint64_t reference,
242                 const RegisterDump* core,
243                 const Register& reg) {
244   VIXL_ASSERT(reg.Is64Bits());
245   uint64_t result = core->xreg(reg.GetCode());
246   return NotEqual64(reference, core, result);
247 }
248 
249 
Equal128(uint64_t expected_h,uint64_t expected_l,const RegisterDump * core,const VRegister & vreg)250 bool Equal128(uint64_t expected_h,
251               uint64_t expected_l,
252               const RegisterDump* core,
253               const VRegister& vreg) {
254   VIXL_ASSERT(vreg.Is128Bits());
255   QRegisterValue expected;
256   expected.SetLane(0, expected_l);
257   expected.SetLane(1, expected_h);
258   QRegisterValue result = core->qreg(vreg.GetCode());
259   return Equal128(expected, core, result);
260 }
261 
262 
EqualFP16(Float16 expected,const RegisterDump * core,const VRegister & fpreg)263 bool EqualFP16(Float16 expected,
264                const RegisterDump* core,
265                const VRegister& fpreg) {
266   VIXL_ASSERT(fpreg.Is16Bits());
267   // Retrieve the corresponding D register so we can check that the upper part
268   // was properly cleared.
269   uint64_t result_64 = core->dreg_bits(fpreg.GetCode());
270   if ((result_64 & 0xfffffffffff0000) != 0) {
271     printf("Expected 0x%04" PRIx16 " (%f)\t Found 0x%016" PRIx64 "\n",
272            Float16ToRawbits(expected),
273            FPToFloat(expected, kIgnoreDefaultNaN),
274            result_64);
275     return false;
276   }
277   return EqualFP16(expected, core, core->hreg(fpreg.GetCode()));
278 }
279 
280 
EqualFP32(float expected,const RegisterDump * core,const VRegister & fpreg)281 bool EqualFP32(float expected,
282                const RegisterDump* core,
283                const VRegister& fpreg) {
284   VIXL_ASSERT(fpreg.Is32Bits());
285   // Retrieve the corresponding D register so we can check that the upper part
286   // was properly cleared.
287   uint64_t result_64 = core->dreg_bits(fpreg.GetCode());
288   if ((result_64 & 0xffffffff00000000) != 0) {
289     printf("Expected 0x%08" PRIx32 " (%f)\t Found 0x%016" PRIx64 "\n",
290            FloatToRawbits(expected),
291            expected,
292            result_64);
293     return false;
294   }
295 
296   return EqualFP32(expected, core, core->sreg(fpreg.GetCode()));
297 }
298 
299 
EqualFP64(double expected,const RegisterDump * core,const VRegister & fpreg)300 bool EqualFP64(double expected,
301                const RegisterDump* core,
302                const VRegister& fpreg) {
303   VIXL_ASSERT(fpreg.Is64Bits());
304   return EqualFP64(expected, core, core->dreg(fpreg.GetCode()));
305 }
306 
307 
Equal64(const Register & reg0,const RegisterDump * core,const Register & reg1,ExpectedResult option)308 bool Equal64(const Register& reg0,
309              const RegisterDump* core,
310              const Register& reg1,
311              ExpectedResult option) {
312   VIXL_ASSERT(reg0.Is64Bits() && reg1.Is64Bits());
313   int64_t reference = core->xreg(reg0.GetCode());
314   int64_t result = core->xreg(reg1.GetCode());
315   return Equal64(reference, core, result, option);
316 }
317 
318 
NotEqual64(const Register & reg0,const RegisterDump * core,const Register & reg1)319 bool NotEqual64(const Register& reg0,
320                 const RegisterDump* core,
321                 const Register& reg1) {
322   VIXL_ASSERT(reg0.Is64Bits() && reg1.Is64Bits());
323   int64_t expected = core->xreg(reg0.GetCode());
324   int64_t result = core->xreg(reg1.GetCode());
325   return NotEqual64(expected, core, result);
326 }
327 
328 
Equal64(uint64_t expected,const RegisterDump * core,const VRegister & vreg)329 bool Equal64(uint64_t expected,
330              const RegisterDump* core,
331              const VRegister& vreg) {
332   VIXL_ASSERT(vreg.Is64Bits());
333   uint64_t result = core->dreg_bits(vreg.GetCode());
334   return Equal64(expected, core, result);
335 }
336 
337 
FlagN(uint32_t flags)338 static char FlagN(uint32_t flags) { return (flags & NFlag) ? 'N' : 'n'; }
339 
340 
FlagZ(uint32_t flags)341 static char FlagZ(uint32_t flags) { return (flags & ZFlag) ? 'Z' : 'z'; }
342 
343 
FlagC(uint32_t flags)344 static char FlagC(uint32_t flags) { return (flags & CFlag) ? 'C' : 'c'; }
345 
346 
FlagV(uint32_t flags)347 static char FlagV(uint32_t flags) { return (flags & VFlag) ? 'V' : 'v'; }
348 
349 
EqualNzcv(uint32_t expected,uint32_t result)350 bool EqualNzcv(uint32_t expected, uint32_t result) {
351   VIXL_ASSERT((expected & ~NZCVFlag) == 0);
352   VIXL_ASSERT((result & ~NZCVFlag) == 0);
353   if (result != expected) {
354     printf("Expected: %c%c%c%c\t Found: %c%c%c%c\n",
355            FlagN(expected),
356            FlagZ(expected),
357            FlagC(expected),
358            FlagV(expected),
359            FlagN(result),
360            FlagZ(result),
361            FlagC(result),
362            FlagV(result));
363     return false;
364   }
365 
366   return true;
367 }
368 
369 
EqualRegisters(const RegisterDump * a,const RegisterDump * b)370 bool EqualRegisters(const RegisterDump* a, const RegisterDump* b) {
371   for (unsigned i = 0; i < kNumberOfRegisters; i++) {
372     if (a->xreg(i) != b->xreg(i)) {
373       printf("x%d\t Expected 0x%016" PRIx64 "\t Found 0x%016" PRIx64 "\n",
374              i,
375              a->xreg(i),
376              b->xreg(i));
377       return false;
378     }
379   }
380 
381   for (unsigned i = 0; i < kNumberOfVRegisters; i++) {
382     uint64_t a_bits = a->dreg_bits(i);
383     uint64_t b_bits = b->dreg_bits(i);
384     if (a_bits != b_bits) {
385       printf("d%d\t Expected 0x%016" PRIx64 "\t Found 0x%016" PRIx64 "\n",
386              i,
387              a_bits,
388              b_bits);
389       return false;
390     }
391   }
392 
393   return true;
394 }
395 
EqualSVELane(uint64_t expected,const RegisterDump * core,const ZRegister & reg,int lane)396 bool EqualSVELane(uint64_t expected,
397                   const RegisterDump* core,
398                   const ZRegister& reg,
399                   int lane) {
400   unsigned lane_size = reg.GetLaneSizeInBits();
401   // For convenience in the tests, we allow negative values to be passed into
402   // `expected`, but truncate them to an appropriately-sized unsigned value for
403   // the check. For example, in `EqualSVELane(-1, core, z0.VnB())`, the expected
404   // value is truncated from 0xffffffffffffffff to 0xff before the comparison.
405   VIXL_ASSERT(IsUintN(lane_size, expected) ||
406               IsIntN(lane_size, RawbitsToInt64(expected)));
407   expected &= GetUintMask(lane_size);
408 
409   uint64_t result = core->zreg_lane(reg.GetCode(), lane_size, lane);
410   if (expected != result) {
411     unsigned lane_size_in_hex_chars = lane_size / 4;
412     std::string reg_name = reg.GetArchitecturalName();
413     printf("%s[%d]\t Expected 0x%0*" PRIx64 "\t Found 0x%0*" PRIx64 "\n",
414            reg_name.c_str(),
415            lane,
416            lane_size_in_hex_chars,
417            expected,
418            lane_size_in_hex_chars,
419            result);
420     return false;
421   }
422   return true;
423 }
424 
EqualSVELane(uint64_t expected,const RegisterDump * core,const PRegister & reg,int lane)425 bool EqualSVELane(uint64_t expected,
426                   const RegisterDump* core,
427                   const PRegister& reg,
428                   int lane) {
429   VIXL_ASSERT(reg.HasLaneSize());
430   VIXL_ASSERT((reg.GetLaneSizeInBits() % kZRegBitsPerPRegBit) == 0);
431   unsigned p_bits_per_lane = reg.GetLaneSizeInBits() / kZRegBitsPerPRegBit;
432   VIXL_ASSERT(IsUintN(p_bits_per_lane, expected));
433   expected &= GetUintMask(p_bits_per_lane);
434 
435   uint64_t result = core->preg_lane(reg.GetCode(), p_bits_per_lane, lane);
436   if (expected != result) {
437     unsigned lane_size_in_hex_chars = (p_bits_per_lane + 3) / 4;
438     std::string reg_name = reg.GetArchitecturalName();
439     printf("%s[%d]\t Expected 0x%0*" PRIx64 "\t Found 0x%0*" PRIx64 "\n",
440            reg_name.c_str(),
441            lane,
442            lane_size_in_hex_chars,
443            expected,
444            lane_size_in_hex_chars,
445            result);
446     return false;
447   }
448   return true;
449 }
450 
451 struct EqualMemoryChunk {
452   typedef uint64_t RawChunk;
453 
454   uintptr_t address;
455   RawChunk expected;
456   RawChunk result;
457 
IsEqualvixl::aarch64::EqualMemoryChunk458   bool IsEqual() const { return expected == result; }
459 };
460 
EqualMemory(const void * expected,const void * result,size_t size_in_bytes,size_t zero_offset)461 bool EqualMemory(const void* expected,
462                  const void* result,
463                  size_t size_in_bytes,
464                  size_t zero_offset) {
465   if (memcmp(expected, result, size_in_bytes) == 0) return true;
466 
467   // Read 64-bit chunks, and print them side-by-side if they don't match.
468 
469   // Remember the last few chunks, even if they matched, so we can print some
470   // context. We don't want to print the whole buffer, because it could be huge.
471   static const size_t kContextLines = 1;
472   std::queue<EqualMemoryChunk> context;
473   static const size_t kChunkSize = sizeof(EqualMemoryChunk::RawChunk);
474 
475   // This assumption keeps the logic simple, and is acceptable for our tests.
476   VIXL_ASSERT((size_in_bytes % kChunkSize) == 0);
477 
478   const char* expected_it = reinterpret_cast<const char*>(expected);
479   const char* result_it = reinterpret_cast<const char*>(result);
480 
481   // This is the first error, so print a header row.
482   printf("  Address (of result)                  Expected           Result\n");
483 
484   // Always print some context at the start of the buffer.
485   uintptr_t print_context_to =
486       reinterpret_cast<uintptr_t>(result) + (kContextLines + 1) * kChunkSize;
487   for (size_t i = 0; i < size_in_bytes; i += kChunkSize) {
488     EqualMemoryChunk chunk;
489     chunk.address = reinterpret_cast<uintptr_t>(result_it);
490     memcpy(&chunk.expected, expected_it, kChunkSize);
491     memcpy(&chunk.result, result_it, kChunkSize);
492 
493     while (context.size() > kContextLines) context.pop();
494     context.push(chunk);
495 
496     // Print context after an error, and at the end of the buffer.
497     if (!chunk.IsEqual() || ((i + kChunkSize) >= size_in_bytes)) {
498       if (chunk.address > print_context_to) {
499         // We aren't currently printing context, so separate this context from
500         // the previous block.
501         printf("...\n");
502       }
503       print_context_to = chunk.address + (kContextLines + 1) * kChunkSize;
504     }
505 
506     // Print context (including the current line).
507     while (!context.empty() && (context.front().address < print_context_to)) {
508       uintptr_t address = context.front().address;
509       uint64_t offset = address - reinterpret_cast<uintptr_t>(result);
510       bool is_negative = (offset < zero_offset);
511       printf("0x%016" PRIxPTR " (result %c %5" PRIu64 "): 0x%016" PRIx64
512              " 0x%016" PRIx64 "\n",
513              address,
514              (is_negative ? '-' : '+'),
515              (is_negative ? (zero_offset - offset) : (offset - zero_offset)),
516              context.front().expected,
517              context.front().result);
518       context.pop();
519     }
520 
521     expected_it += kChunkSize;
522     result_it += kChunkSize;
523   }
524 
525   return false;
526 }
PopulateRegisterArray(Register * w,Register * x,Register * r,int reg_size,int reg_count,RegList allowed)527 RegList PopulateRegisterArray(Register* w,
528                               Register* x,
529                               Register* r,
530                               int reg_size,
531                               int reg_count,
532                               RegList allowed) {
533   RegList list = 0;
534   int i = 0;
535   for (unsigned n = 0; (n < kNumberOfRegisters) && (i < reg_count); n++) {
536     if (((UINT64_C(1) << n) & allowed) != 0) {
537       // Only assign allowed registers.
538       if (r) {
539         r[i] = Register(n, reg_size);
540       }
541       if (x) {
542         x[i] = Register(n, kXRegSize);
543       }
544       if (w) {
545         w[i] = Register(n, kWRegSize);
546       }
547       list |= (UINT64_C(1) << n);
548       i++;
549     }
550   }
551   // Check that we got enough registers.
552   VIXL_ASSERT(CountSetBits(list, kNumberOfRegisters) == reg_count);
553 
554   return list;
555 }
556 
557 
PopulateVRegisterArray(VRegister * s,VRegister * d,VRegister * v,int reg_size,int reg_count,RegList allowed)558 RegList PopulateVRegisterArray(VRegister* s,
559                                VRegister* d,
560                                VRegister* v,
561                                int reg_size,
562                                int reg_count,
563                                RegList allowed) {
564   RegList list = 0;
565   int i = 0;
566   for (unsigned n = 0; (n < kNumberOfVRegisters) && (i < reg_count); n++) {
567     if (((UINT64_C(1) << n) & allowed) != 0) {
568       // Only assigned allowed registers.
569       if (v) {
570         v[i] = VRegister(n, reg_size);
571       }
572       if (d) {
573         d[i] = VRegister(n, kDRegSize);
574       }
575       if (s) {
576         s[i] = VRegister(n, kSRegSize);
577       }
578       list |= (UINT64_C(1) << n);
579       i++;
580     }
581   }
582   // Check that we got enough registers.
583   VIXL_ASSERT(CountSetBits(list, kNumberOfVRegisters) == reg_count);
584 
585   return list;
586 }
587 
588 
Clobber(MacroAssembler * masm,RegList reg_list,uint64_t const value)589 void Clobber(MacroAssembler* masm, RegList reg_list, uint64_t const value) {
590   Register first = NoReg;
591   for (unsigned i = 0; i < kNumberOfRegisters; i++) {
592     if (reg_list & (UINT64_C(1) << i)) {
593       Register xn(i, kXRegSize);
594       // We should never write into sp here.
595       VIXL_ASSERT(!xn.Is(sp));
596       if (!xn.IsZero()) {
597         if (!first.IsValid()) {
598           // This is the first register we've hit, so construct the literal.
599           __ Mov(xn, value);
600           first = xn;
601         } else {
602           // We've already loaded the literal, so re-use the value already
603           // loaded into the first register we hit.
604           __ Mov(xn, first);
605         }
606       }
607     }
608   }
609 }
610 
611 
ClobberFP(MacroAssembler * masm,RegList reg_list,double const value)612 void ClobberFP(MacroAssembler* masm, RegList reg_list, double const value) {
613   VRegister first = NoVReg;
614   for (unsigned i = 0; i < kNumberOfVRegisters; i++) {
615     if (reg_list & (UINT64_C(1) << i)) {
616       VRegister dn(i, kDRegSize);
617       if (!first.IsValid()) {
618         // This is the first register we've hit, so construct the literal.
619         __ Fmov(dn, value);
620         first = dn;
621       } else {
622         // We've already loaded the literal, so re-use the value already loaded
623         // into the first register we hit.
624         __ Fmov(dn, first);
625       }
626     }
627   }
628 }
629 
630 
Clobber(MacroAssembler * masm,CPURegList reg_list)631 void Clobber(MacroAssembler* masm, CPURegList reg_list) {
632   if (reg_list.GetType() == CPURegister::kRegister) {
633     // This will always clobber X registers.
634     Clobber(masm, reg_list.GetList());
635   } else if (reg_list.GetType() == CPURegister::kVRegister) {
636     // This will always clobber D registers.
637     ClobberFP(masm, reg_list.GetList());
638   } else {
639     VIXL_UNIMPLEMENTED();
640   }
641 }
642 
643 // TODO: Once registers have sufficiently compatible interfaces, merge the two
644 // DumpRegisters templates.
645 template <typename T>
DumpRegisters(MacroAssembler * masm,Register dump_base,int offset)646 static void DumpRegisters(MacroAssembler* masm,
647                           Register dump_base,
648                           int offset) {
649   UseScratchRegisterScope temps(masm);
650   Register dump = temps.AcquireX();
651   __ Add(dump, dump_base, offset);
652   for (unsigned i = 0; i <= T::GetMaxCode(); i++) {
653     T reg(i);
654     __ Str(reg, SVEMemOperand(dump));
655     __ Add(dump, dump, reg.GetMaxSizeInBytes());
656   }
657 }
658 
659 template <typename T>
DumpRegisters(MacroAssembler * masm,Register dump_base,int offset,int reg_size_in_bytes)660 static void DumpRegisters(MacroAssembler* masm,
661                           Register dump_base,
662                           int offset,
663                           int reg_size_in_bytes) {
664   UseScratchRegisterScope temps(masm);
665   Register dump = temps.AcquireX();
666   __ Add(dump, dump_base, offset);
667   for (unsigned i = 0; i <= T::GetMaxCode(); i++) {
668     T reg(i, reg_size_in_bytes * kBitsPerByte);
669     __ Str(reg, MemOperand(dump));
670     __ Add(dump, dump, reg_size_in_bytes);
671   }
672 }
673 
Dump(MacroAssembler * masm)674 void RegisterDump::Dump(MacroAssembler* masm) {
675   VIXL_ASSERT(__ StackPointer().Is(sp));
676 
677   dump_cpu_features_ = *masm->GetCPUFeatures();
678 
679   // We need some scratch registers, but we also need to dump them, so we have
680   // to control exactly which registers are used, and dump them separately.
681   CPURegList scratch_registers(x0, x1, x2, x3);
682 
683   UseScratchRegisterScope temps(masm);
684   temps.ExcludeAll();
685   __ PushCPURegList(scratch_registers);
686   temps.Include(scratch_registers);
687 
688   Register dump_base = temps.AcquireX();
689   Register tmp = temps.AcquireX();
690 
691   // Offsets into the dump_ structure.
692   const int x_offset = offsetof(dump_t, x_);
693   const int w_offset = offsetof(dump_t, w_);
694   const int d_offset = offsetof(dump_t, d_);
695   const int s_offset = offsetof(dump_t, s_);
696   const int h_offset = offsetof(dump_t, h_);
697   const int q_offset = offsetof(dump_t, q_);
698   const int z_offset = offsetof(dump_t, z_);
699   const int p_offset = offsetof(dump_t, p_);
700   const int sp_offset = offsetof(dump_t, sp_);
701   const int wsp_offset = offsetof(dump_t, wsp_);
702   const int flags_offset = offsetof(dump_t, flags_);
703   const int vl_offset = offsetof(dump_t, vl_);
704 
705   // Load the address where we will dump the state.
706   __ Mov(dump_base, reinterpret_cast<uintptr_t>(&dump_));
707 
708   // Dump the stack pointer (sp and wsp).
709   // The stack pointer cannot be stored directly; it needs to be moved into
710   // another register first. Also, we pushed four X registers, so we need to
711   // compensate here.
712   __ Add(tmp, sp, 4 * kXRegSizeInBytes);
713   __ Str(tmp, MemOperand(dump_base, sp_offset));
714   __ Add(tmp.W(), wsp, 4 * kXRegSizeInBytes);
715   __ Str(tmp.W(), MemOperand(dump_base, wsp_offset));
716 
717   // Dump core registers.
718   DumpRegisters<Register>(masm, dump_base, x_offset, kXRegSizeInBytes);
719   DumpRegisters<Register>(masm, dump_base, w_offset, kWRegSizeInBytes);
720 
721   // Dump NEON and FP registers.
722   DumpRegisters<VRegister>(masm, dump_base, q_offset, kQRegSizeInBytes);
723   DumpRegisters<VRegister>(masm, dump_base, d_offset, kDRegSizeInBytes);
724   DumpRegisters<VRegister>(masm, dump_base, s_offset, kSRegSizeInBytes);
725   DumpRegisters<VRegister>(masm, dump_base, h_offset, kHRegSizeInBytes);
726 
727   // Dump SVE registers.
728   if (CPUHas(CPUFeatures::kSVE)) {
729     DumpRegisters<ZRegister>(masm, dump_base, z_offset);
730     DumpRegisters<PRegister>(masm, dump_base, p_offset);
731 
732     // Record the vector length.
733     __ Rdvl(tmp, kBitsPerByte);
734     __ Str(tmp, MemOperand(dump_base, vl_offset));
735   }
736 
737   // Dump the flags.
738   __ Mrs(tmp, NZCV);
739   __ Str(tmp, MemOperand(dump_base, flags_offset));
740 
741   // To dump the values we used as scratch registers, we need a new scratch
742   // register. We can use any of the already dumped registers since we can
743   // easily restore them.
744   Register dump2_base = x10;
745   VIXL_ASSERT(!scratch_registers.IncludesAliasOf(dump2_base));
746 
747   VIXL_ASSERT(scratch_registers.IncludesAliasOf(dump_base));
748 
749   // Ensure that we don't try to use the scratch registers again.
750   temps.ExcludeAll();
751 
752   // Don't lose the dump_ address.
753   __ Mov(dump2_base, dump_base);
754 
755   __ PopCPURegList(scratch_registers);
756 
757   while (!scratch_registers.IsEmpty()) {
758     CPURegister reg = scratch_registers.PopLowestIndex();
759     Register x = reg.X();
760     Register w = reg.W();
761     unsigned code = reg.GetCode();
762     __ Str(x, MemOperand(dump2_base, x_offset + (code * kXRegSizeInBytes)));
763     __ Str(w, MemOperand(dump2_base, w_offset + (code * kWRegSizeInBytes)));
764   }
765 
766   // Finally, restore dump2_base.
767   __ Ldr(dump2_base,
768          MemOperand(dump2_base,
769                     x_offset + (dump2_base.GetCode() * kXRegSizeInBytes)));
770 
771   completed_ = true;
772 }
773 
GetSignallingNan(int size_in_bits)774 uint64_t GetSignallingNan(int size_in_bits) {
775   switch (size_in_bits) {
776     case kHRegSize:
777       return Float16ToRawbits(kFP16SignallingNaN);
778     case kSRegSize:
779       return FloatToRawbits(kFP32SignallingNaN);
780     case kDRegSize:
781       return DoubleToRawbits(kFP64SignallingNaN);
782     default:
783       VIXL_UNIMPLEMENTED();
784       return 0;
785   }
786 }
787 
CanRun(const CPUFeatures & required,bool * queried_can_run)788 bool CanRun(const CPUFeatures& required, bool* queried_can_run) {
789   bool log_if_missing = true;
790   if (queried_can_run != NULL) {
791     log_if_missing = !*queried_can_run;
792     *queried_can_run = true;
793   }
794 
795 #ifdef VIXL_INCLUDE_SIMULATOR_AARCH64
796   // The Simulator can run any test that VIXL can assemble.
797   USE(required);
798   USE(log_if_missing);
799   return true;
800 #else
801   CPUFeatures cpu = CPUFeatures::InferFromOS();
802   // If InferFromOS fails, assume that basic features are present.
803   if (cpu.HasNoFeatures()) cpu = CPUFeatures::AArch64LegacyBaseline();
804   VIXL_ASSERT(cpu.Has(kInfrastructureCPUFeatures));
805 
806   if (cpu.Has(required)) return true;
807 
808   if (log_if_missing) {
809     CPUFeatures missing = required.Without(cpu);
810     // Note: This message needs to match REGEXP_MISSING_FEATURES from
811     // tools/threaded_test.py.
812     std::cout << "SKIPPED: Missing features: { " << missing << " }\n";
813     std::cout << "This test requires the following features to run its "
814                  "generated code on this CPU: "
815               << required << "\n";
816   }
817   return false;
818 #endif
819 }
820 
821 // Note that the function assumes p0, p1, p2 and p3 are set to all true in b-,
822 // h-, s- and d-lane sizes respectively, and p4, p5 are clobbered as a temp
823 // predicate.
824 template <typename T, size_t N>
SetFpData(MacroAssembler * masm,int esize,const T (& values)[N],uint64_t lcg_mult)825 void SetFpData(MacroAssembler* masm,
826                int esize,
827                const T (&values)[N],
828                uint64_t lcg_mult) {
829   uint64_t a = 0;
830   uint64_t b = lcg_mult;
831   // Be used to populate the assigned element slots of register based on the
832   // type of floating point.
833   __ Pfalse(p5.VnB());
834   switch (esize) {
835     case kHRegSize:
836       a = Float16ToRawbits(Float16(1.5));
837       // Pick a convenient number within largest normal half-precision floating
838       // point.
839       b = Float16ToRawbits(Float16(lcg_mult % 1024));
840       // Step 1: Set fp16 numbers to the undefined registers.
841       //      p4< 15:0>: 0b0101010101010101
842       // z{code}<127:0>: 0xHHHHHHHHHHHHHHHH
843       __ Zip1(p4.VnB(), p0.VnB(), p5.VnB());
844       break;
845     case kSRegSize:
846       a = FloatToRawbits(1.5);
847       b = FloatToRawbits(lcg_mult);
848       // Step 2: Set fp32 numbers to register on top of fp16 initialized.
849       //      p4< 15:0>: 0b0000000100000001
850       // z{code}<127:0>: 0xHHHHSSSSHHHHSSSS
851       __ Zip1(p4.VnS(), p2.VnS(), p5.VnS());
852       break;
853     case kDRegSize:
854       a = DoubleToRawbits(1.5);
855       b = DoubleToRawbits(lcg_mult);
856       // Step 3: Set fp64 numbers to register on top of both fp16 and fp 32
857       // initialized.
858       //      p4< 15:0>: 0b0000000000000001
859       // z{code}<127:0>: 0xHHHHSSSSDDDDDDDD
860       __ Zip1(p4.VnD(), p3.VnD(), p5.VnD());
861       break;
862     default:
863       VIXL_UNIMPLEMENTED();
864       break;
865   }
866 
867   __ Dup(z30.WithLaneSize(esize), a);
868   __ Dup(z31.WithLaneSize(esize), b);
869 
870   for (unsigned j = 0; j <= (kZRegMaxSize / (N * esize)); j++) {
871     // As floating point operations on random values have a tendency to
872     // converge on special-case numbers like NaNs, adopt normal floating point
873     // values be the seed instead.
874     InsrHelper(masm, z0.WithLaneSize(esize), values);
875   }
876 
877   __ Fmla(z0.WithLaneSize(esize),
878           p4.Merging(),
879           z30.WithLaneSize(esize),
880           z0.WithLaneSize(esize),
881           z31.WithLaneSize(esize),
882           FastNaNPropagation);
883 
884   for (unsigned i = 1; i < kNumberOfZRegisters - 1; i++) {
885     __ Fmla(ZRegister(i).WithLaneSize(esize),
886             p4.Merging(),
887             z30.WithLaneSize(esize),
888             ZRegister(i - 1).WithLaneSize(esize),
889             z31.WithLaneSize(esize),
890             FastNaNPropagation);
891   }
892 
893   __ Fmul(z31.WithLaneSize(esize),
894           p4.Merging(),
895           z31.WithLaneSize(esize),
896           z30.WithLaneSize(esize),
897           FastNaNPropagation);
898   __ Fadd(z31.WithLaneSize(esize), p4.Merging(), z31.WithLaneSize(esize), 1);
899 }
900 
901 // Set z0 - z31 to some normal floating point data.
InitialiseRegisterFp(MacroAssembler * masm,uint64_t lcg_mult)902 void InitialiseRegisterFp(MacroAssembler* masm, uint64_t lcg_mult) {
903   // Initialise each Z registers to a mixture of fp16/32/64 values as following
904   // pattern:
905   // z0.h[0-1] = fp16, z0.s[1] = fp32, z0.d[1] = fp64 repeatedly throughout the
906   // register.
907   //
908   // For example:
909   // z{code}<2047:1920>: 0x{<      fp64      ><  fp32  ><fp16><fp16>}
910   // ...
911   // z{code}< 127:   0>: 0x{<      fp64      ><  fp32  ><fp16><fp16>}
912   //
913   // In current manner, in order to make a desired mixture, each part of
914   // initialization have to be called in the following order.
915   SetFpData(masm, kHRegSize, kInputFloat16Basic, lcg_mult);
916   SetFpData(masm, kSRegSize, kInputFloatBasic, lcg_mult);
917   SetFpData(masm, kDRegSize, kInputDoubleBasic, lcg_mult);
918 }
919 
SetInitialMachineState(MacroAssembler * masm,InputSet input_set)920 void SetInitialMachineState(MacroAssembler* masm, InputSet input_set) {
921   USE(input_set);
922   uint64_t lcg_mult = 6364136223846793005;
923 
924   // Set x0 - x30 to pseudo-random data.
925   __ Mov(x29, 1);  // LCG increment.
926   __ Mov(x30, lcg_mult);
927   __ Mov(x0, 42);  // LCG seed.
928 
929   __ Cmn(x0, 0);  // Clear NZCV flags for later.
930 
931   __ Madd(x0, x0, x30, x29);  // First pseudo-random number.
932 
933   // Registers 1 - 29.
934   for (unsigned i = 1; i < 30; i++) {
935     __ Madd(XRegister(i), XRegister(i - 1), x30, x29);
936   }
937   __ Mul(x30, x29, x30);
938   __ Add(x30, x30, 1);
939 
940 
941   // Set first four predicate registers to true for increasing lane sizes.
942   __ Ptrue(p0.VnB());
943   __ Ptrue(p1.VnH());
944   __ Ptrue(p2.VnS());
945   __ Ptrue(p3.VnD());
946 
947   // Set z0 - z31 to pseudo-random data.
948   if (input_set == kIntInputSet) {
949     __ Dup(z30.VnD(), 1);
950     __ Dup(z31.VnD(), lcg_mult);
951     __ Index(z0.VnB(), -16, 13);  // LCG seeds.
952 
953     __ Mla(z0.VnD(), p0.Merging(), z30.VnD(), z0.VnD(), z31.VnD());
954     for (unsigned i = 1; i < kNumberOfZRegisters - 1; i++) {
955       __ Mla(ZRegister(i).VnD(),
956              p0.Merging(),
957              z30.VnD(),
958              ZRegister(i - 1).VnD(),
959              z31.VnD());
960     }
961     __ Mul(z31.VnD(), p0.Merging(), z31.VnD(), z30.VnD());
962     __ Add(z31.VnD(), z31.VnD(), 1);
963 
964   } else {
965     VIXL_ASSERT(input_set == kFpInputSet);
966     InitialiseRegisterFp(masm, lcg_mult);
967   }
968 
969   // Set remaining predicate registers based on earlier pseudo-random data.
970   for (unsigned i = 4; i < kNumberOfPRegisters; i++) {
971     __ Cmpge(PRegister(i).VnB(), p0.Zeroing(), ZRegister(i).VnB(), 0);
972   }
973   for (unsigned i = 4; i < kNumberOfPRegisters; i += 2) {
974     __ Zip1(p0.VnB(), PRegister(i).VnB(), PRegister(i + 1).VnB());
975     __ Zip2(PRegister(i + 1).VnB(), PRegister(i).VnB(), PRegister(i + 1).VnB());
976     __ Mov(PRegister(i), p0);
977   }
978   __ Ptrue(p0.VnB());
979 
980   // At this point, only sp and a few status registers are undefined. These
981   // must be ignored when computing the state hash.
982 }
983 
ComputeMachineStateHash(MacroAssembler * masm,uint32_t * dst)984 void ComputeMachineStateHash(MacroAssembler* masm, uint32_t* dst) {
985   // Use explicit registers, to avoid hash order varying if
986   // UseScratchRegisterScope changes.
987   UseScratchRegisterScope temps(masm);
988   temps.ExcludeAll();
989   Register t0 = w0;
990   Register t1 = x1;
991 
992   // Compute hash of x0 - x30.
993   __ Push(t0.X(), t1);
994   __ Crc32x(t0, wzr, t0.X());
995   for (unsigned i = 0; i < kNumberOfRegisters; i++) {
996     if (i == xzr.GetCode()) continue;   // Skip sp.
997     if (t0.Is(WRegister(i))) continue;  // Skip t0, as it's already hashed.
998     __ Crc32x(t0, t0, XRegister(i));
999   }
1000 
1001   // Hash the status flags.
1002   __ Mrs(t1, NZCV);
1003   __ Crc32x(t0, t0, t1);
1004 
1005   // Acquire another temp, as integer registers have been hashed already.
1006   __ Push(x30, xzr);
1007   Register t2 = x30;
1008 
1009   // Compute hash of all bits in z0 - z31. This implies different hashes are
1010   // produced for machines of different vector length.
1011   for (unsigned i = 0; i < kNumberOfZRegisters; i++) {
1012     __ Rdvl(t2, 1);
1013     __ Lsr(t2, t2, 4);
1014     Label vl_loop;
1015     __ Bind(&vl_loop);
1016     __ Umov(t1, VRegister(i).V2D(), 0);
1017     __ Crc32x(t0, t0, t1);
1018     __ Umov(t1, VRegister(i).V2D(), 1);
1019     __ Crc32x(t0, t0, t1);
1020     __ Ext(ZRegister(i).VnB(), ZRegister(i).VnB(), ZRegister(i).VnB(), 16);
1021     __ Sub(t2, t2, 1);
1022     __ Cbnz(t2, &vl_loop);
1023   }
1024 
1025   // Hash predicate registers. For simplicity, this writes the predicate
1026   // registers to a zero-initialised area of stack of the maximum size required
1027   // for P registers. It then computes a hash of that entire stack area.
1028   unsigned p_stack_space = kNumberOfPRegisters * kPRegMaxSizeInBytes;
1029 
1030   // Zero claimed stack area.
1031   for (unsigned i = 0; i < p_stack_space; i += kXRegSizeInBytes * 2) {
1032     __ Push(xzr, xzr);
1033   }
1034 
1035   // Store all P registers to the stack.
1036   __ Mov(t1, sp);
1037   for (unsigned i = 0; i < kNumberOfPRegisters; i++) {
1038     __ Str(PRegister(i), SVEMemOperand(t1));
1039     __ Add(t1, t1, kPRegMaxSizeInBytes);
1040   }
1041 
1042   // Hash the entire stack area.
1043   for (unsigned i = 0; i < p_stack_space; i += kXRegSizeInBytes * 2) {
1044     __ Pop(t1, t2);
1045     __ Crc32x(t0, t0, t1);
1046     __ Crc32x(t0, t0, t2);
1047   }
1048 
1049   __ Mov(t1, reinterpret_cast<uint64_t>(dst));
1050   __ Str(t0, MemOperand(t1));
1051 
1052   __ Pop(xzr, x30);
1053   __ Pop(t1, t0.X());
1054 }
1055 
1056 }  // namespace aarch64
1057 }  // namespace vixl
1058