// Copyright 2014, VIXL authors // All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are met: // // * Redistributions of source code must retain the above copyright notice, // this list of conditions and the following disclaimer. // * Redistributions in binary form must reproduce the above copyright notice, // this list of conditions and the following disclaimer in the documentation // and/or other materials provided with the distribution. // * Neither the name of ARM Limited nor the names of its contributors may be // used to endorse or promote products derived from this software without // specific prior written permission. // // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND // ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED // WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include #include #include "test-runner.h" #include "test-utils-aarch64.h" #include "../test/aarch64/test-simulator-inputs-aarch64.h" #include "aarch64/cpu-aarch64.h" #include "aarch64/disasm-aarch64.h" #include "aarch64/macro-assembler-aarch64.h" #include "aarch64/simulator-aarch64.h" #define __ masm-> namespace vixl { namespace aarch64 { // This value is a signalling NaN as FP64, and also as FP32 or FP16 (taking the // least-significant bits). const double kFP64SignallingNaN = RawbitsToDouble(UINT64_C(0x7ff000007f807c01)); const float kFP32SignallingNaN = RawbitsToFloat(0x7f807c01); const Float16 kFP16SignallingNaN = RawbitsToFloat16(0x7c01); // A similar value, but as a quiet NaN. const double kFP64QuietNaN = RawbitsToDouble(UINT64_C(0x7ff800007fc07e01)); const float kFP32QuietNaN = RawbitsToFloat(0x7fc07e01); const Float16 kFP16QuietNaN = RawbitsToFloat16(0x7e01); bool Equal32(uint32_t expected, const RegisterDump*, uint32_t result) { if (result != expected) { printf("Expected 0x%08" PRIx32 "\t Found 0x%08" PRIx32 "\n", expected, result); } return expected == result; } bool Equal64(uint64_t reference, const RegisterDump*, uint64_t result, ExpectedResult option) { switch (option) { case kExpectEqual: if (result != reference) { printf("Expected 0x%016" PRIx64 "\t Found 0x%016" PRIx64 "\n", reference, result); } break; case kExpectNotEqual: if (result == reference) { printf("Expected a result not equal to 0x%016" PRIx64 "\n", reference); } break; } return reference == result; } bool Equal128(QRegisterValue expected, const RegisterDump*, QRegisterValue result) { if (!expected.Equals(result)) { printf("Expected 0x%016" PRIx64 "%016" PRIx64 "\t " "Found 0x%016" PRIx64 "%016" PRIx64 "\n", expected.GetLane(1), expected.GetLane(0), result.GetLane(1), result.GetLane(0)); } return expected.Equals(result); } bool EqualFP16(Float16 expected, const RegisterDump*, Float16 result) { uint16_t e_rawbits = Float16ToRawbits(expected); uint16_t r_rawbits = Float16ToRawbits(result); if (e_rawbits == r_rawbits) { return true; } else { if (IsNaN(expected) || IsZero(expected)) { printf("Expected 0x%04" PRIx16 "\t Found 0x%04" PRIx16 "\n", e_rawbits, r_rawbits); } else { printf("Expected %.6f (16 bit): (0x%04" PRIx16 ")\t " "Found %.6f (0x%04" PRIx16 ")\n", FPToFloat(expected, kIgnoreDefaultNaN), e_rawbits, FPToFloat(result, kIgnoreDefaultNaN), r_rawbits); } return false; } } bool EqualFP32(float expected, const RegisterDump*, float result) { if (FloatToRawbits(expected) == FloatToRawbits(result)) { return true; } else { if (IsNaN(expected) || (expected == 0.0)) { printf("Expected 0x%08" PRIx32 "\t Found 0x%08" PRIx32 "\n", FloatToRawbits(expected), FloatToRawbits(result)); } else { printf("Expected %.9f (0x%08" PRIx32 ")\t " "Found %.9f (0x%08" PRIx32 ")\n", expected, FloatToRawbits(expected), result, FloatToRawbits(result)); } return false; } } bool EqualFP64(double expected, const RegisterDump*, double result) { if (DoubleToRawbits(expected) == DoubleToRawbits(result)) { return true; } if (IsNaN(expected) || (expected == 0.0)) { printf("Expected 0x%016" PRIx64 "\t Found 0x%016" PRIx64 "\n", DoubleToRawbits(expected), DoubleToRawbits(result)); } else { printf("Expected %.17f (0x%016" PRIx64 ")\t " "Found %.17f (0x%016" PRIx64 ")\n", expected, DoubleToRawbits(expected), result, DoubleToRawbits(result)); } return false; } bool Equal32(uint32_t expected, const RegisterDump* core, const Register& reg) { VIXL_ASSERT(reg.Is32Bits()); // Retrieve the corresponding X register so we can check that the upper part // was properly cleared. int64_t result_x = core->xreg(reg.GetCode()); if ((result_x & 0xffffffff00000000) != 0) { printf("Expected 0x%08" PRIx32 "\t Found 0x%016" PRIx64 "\n", expected, result_x); return false; } uint32_t result_w = core->wreg(reg.GetCode()); return Equal32(expected, core, result_w); } bool Equal64(uint64_t reference, const RegisterDump* core, const Register& reg, ExpectedResult option) { VIXL_ASSERT(reg.Is64Bits()); uint64_t result = core->xreg(reg.GetCode()); return Equal64(reference, core, result, option); } bool NotEqual64(uint64_t reference, const RegisterDump* core, const Register& reg) { VIXL_ASSERT(reg.Is64Bits()); uint64_t result = core->xreg(reg.GetCode()); return NotEqual64(reference, core, result); } bool Equal128(uint64_t expected_h, uint64_t expected_l, const RegisterDump* core, const VRegister& vreg) { VIXL_ASSERT(vreg.Is128Bits()); QRegisterValue expected; expected.SetLane(0, expected_l); expected.SetLane(1, expected_h); QRegisterValue result = core->qreg(vreg.GetCode()); return Equal128(expected, core, result); } bool EqualFP16(Float16 expected, const RegisterDump* core, const VRegister& fpreg) { VIXL_ASSERT(fpreg.Is16Bits()); // Retrieve the corresponding D register so we can check that the upper part // was properly cleared. uint64_t result_64 = core->dreg_bits(fpreg.GetCode()); if ((result_64 & 0xfffffffffff0000) != 0) { printf("Expected 0x%04" PRIx16 " (%f)\t Found 0x%016" PRIx64 "\n", Float16ToRawbits(expected), FPToFloat(expected, kIgnoreDefaultNaN), result_64); return false; } return EqualFP16(expected, core, core->hreg(fpreg.GetCode())); } bool EqualFP32(float expected, const RegisterDump* core, const VRegister& fpreg) { VIXL_ASSERT(fpreg.Is32Bits()); // Retrieve the corresponding D register so we can check that the upper part // was properly cleared. uint64_t result_64 = core->dreg_bits(fpreg.GetCode()); if ((result_64 & 0xffffffff00000000) != 0) { printf("Expected 0x%08" PRIx32 " (%f)\t Found 0x%016" PRIx64 "\n", FloatToRawbits(expected), expected, result_64); return false; } return EqualFP32(expected, core, core->sreg(fpreg.GetCode())); } bool EqualFP64(double expected, const RegisterDump* core, const VRegister& fpreg) { VIXL_ASSERT(fpreg.Is64Bits()); return EqualFP64(expected, core, core->dreg(fpreg.GetCode())); } bool Equal64(const Register& reg0, const RegisterDump* core, const Register& reg1, ExpectedResult option) { VIXL_ASSERT(reg0.Is64Bits() && reg1.Is64Bits()); int64_t reference = core->xreg(reg0.GetCode()); int64_t result = core->xreg(reg1.GetCode()); return Equal64(reference, core, result, option); } bool NotEqual64(const Register& reg0, const RegisterDump* core, const Register& reg1) { VIXL_ASSERT(reg0.Is64Bits() && reg1.Is64Bits()); int64_t expected = core->xreg(reg0.GetCode()); int64_t result = core->xreg(reg1.GetCode()); return NotEqual64(expected, core, result); } bool Equal64(uint64_t expected, const RegisterDump* core, const VRegister& vreg) { VIXL_ASSERT(vreg.Is64Bits()); uint64_t result = core->dreg_bits(vreg.GetCode()); return Equal64(expected, core, result); } static char FlagN(uint32_t flags) { return (flags & NFlag) ? 'N' : 'n'; } static char FlagZ(uint32_t flags) { return (flags & ZFlag) ? 'Z' : 'z'; } static char FlagC(uint32_t flags) { return (flags & CFlag) ? 'C' : 'c'; } static char FlagV(uint32_t flags) { return (flags & VFlag) ? 'V' : 'v'; } bool EqualNzcv(uint32_t expected, uint32_t result) { VIXL_ASSERT((expected & ~NZCVFlag) == 0); VIXL_ASSERT((result & ~NZCVFlag) == 0); if (result != expected) { printf("Expected: %c%c%c%c\t Found: %c%c%c%c\n", FlagN(expected), FlagZ(expected), FlagC(expected), FlagV(expected), FlagN(result), FlagZ(result), FlagC(result), FlagV(result)); return false; } return true; } bool EqualRegisters(const RegisterDump* a, const RegisterDump* b) { for (unsigned i = 0; i < kNumberOfRegisters; i++) { if (a->xreg(i) != b->xreg(i)) { printf("x%d\t Expected 0x%016" PRIx64 "\t Found 0x%016" PRIx64 "\n", i, a->xreg(i), b->xreg(i)); return false; } } for (unsigned i = 0; i < kNumberOfVRegisters; i++) { uint64_t a_bits = a->dreg_bits(i); uint64_t b_bits = b->dreg_bits(i); if (a_bits != b_bits) { printf("d%d\t Expected 0x%016" PRIx64 "\t Found 0x%016" PRIx64 "\n", i, a_bits, b_bits); return false; } } return true; } bool EqualSVELane(uint64_t expected, const RegisterDump* core, const ZRegister& reg, int lane) { unsigned lane_size = reg.GetLaneSizeInBits(); // For convenience in the tests, we allow negative values to be passed into // `expected`, but truncate them to an appropriately-sized unsigned value for // the check. For example, in `EqualSVELane(-1, core, z0.VnB())`, the expected // value is truncated from 0xffffffffffffffff to 0xff before the comparison. VIXL_ASSERT(IsUintN(lane_size, expected) || IsIntN(lane_size, RawbitsToInt64(expected))); expected &= GetUintMask(lane_size); uint64_t result = core->zreg_lane(reg.GetCode(), lane_size, lane); if (expected != result) { unsigned lane_size_in_hex_chars = lane_size / 4; std::string reg_name = reg.GetArchitecturalName(); printf("%s[%d]\t Expected 0x%0*" PRIx64 "\t Found 0x%0*" PRIx64 "\n", reg_name.c_str(), lane, lane_size_in_hex_chars, expected, lane_size_in_hex_chars, result); return false; } return true; } bool EqualSVELane(uint64_t expected, const RegisterDump* core, const PRegister& reg, int lane) { VIXL_ASSERT(reg.HasLaneSize()); VIXL_ASSERT((reg.GetLaneSizeInBits() % kZRegBitsPerPRegBit) == 0); unsigned p_bits_per_lane = reg.GetLaneSizeInBits() / kZRegBitsPerPRegBit; VIXL_ASSERT(IsUintN(p_bits_per_lane, expected)); expected &= GetUintMask(p_bits_per_lane); uint64_t result = core->preg_lane(reg.GetCode(), p_bits_per_lane, lane); if (expected != result) { unsigned lane_size_in_hex_chars = (p_bits_per_lane + 3) / 4; std::string reg_name = reg.GetArchitecturalName(); printf("%s[%d]\t Expected 0x%0*" PRIx64 "\t Found 0x%0*" PRIx64 "\n", reg_name.c_str(), lane, lane_size_in_hex_chars, expected, lane_size_in_hex_chars, result); return false; } return true; } struct EqualMemoryChunk { typedef uint64_t RawChunk; uintptr_t address; RawChunk expected; RawChunk result; bool IsEqual() const { return expected == result; } }; bool EqualMemory(const void* expected, const void* result, size_t size_in_bytes, size_t zero_offset) { if (memcmp(expected, result, size_in_bytes) == 0) return true; // Read 64-bit chunks, and print them side-by-side if they don't match. // Remember the last few chunks, even if they matched, so we can print some // context. We don't want to print the whole buffer, because it could be huge. static const size_t kContextLines = 1; std::queue context; static const size_t kChunkSize = sizeof(EqualMemoryChunk::RawChunk); // This assumption keeps the logic simple, and is acceptable for our tests. VIXL_ASSERT((size_in_bytes % kChunkSize) == 0); const char* expected_it = reinterpret_cast(expected); const char* result_it = reinterpret_cast(result); // This is the first error, so print a header row. printf(" Address (of result) Expected Result\n"); // Always print some context at the start of the buffer. uintptr_t print_context_to = reinterpret_cast(result) + (kContextLines + 1) * kChunkSize; for (size_t i = 0; i < size_in_bytes; i += kChunkSize) { EqualMemoryChunk chunk; chunk.address = reinterpret_cast(result_it); memcpy(&chunk.expected, expected_it, kChunkSize); memcpy(&chunk.result, result_it, kChunkSize); while (context.size() > kContextLines) context.pop(); context.push(chunk); // Print context after an error, and at the end of the buffer. if (!chunk.IsEqual() || ((i + kChunkSize) >= size_in_bytes)) { if (chunk.address > print_context_to) { // We aren't currently printing context, so separate this context from // the previous block. printf("...\n"); } print_context_to = chunk.address + (kContextLines + 1) * kChunkSize; } // Print context (including the current line). while (!context.empty() && (context.front().address < print_context_to)) { uintptr_t address = context.front().address; uint64_t offset = address - reinterpret_cast(result); bool is_negative = (offset < zero_offset); printf("0x%016" PRIxPTR " (result %c %5" PRIu64 "): 0x%016" PRIx64 " 0x%016" PRIx64 "\n", address, (is_negative ? '-' : '+'), (is_negative ? (zero_offset - offset) : (offset - zero_offset)), context.front().expected, context.front().result); context.pop(); } expected_it += kChunkSize; result_it += kChunkSize; } return false; } RegList PopulateRegisterArray(Register* w, Register* x, Register* r, int reg_size, int reg_count, RegList allowed) { RegList list = 0; int i = 0; for (unsigned n = 0; (n < kNumberOfRegisters) && (i < reg_count); n++) { if (((UINT64_C(1) << n) & allowed) != 0) { // Only assign allowed registers. if (r) { r[i] = Register(n, reg_size); } if (x) { x[i] = Register(n, kXRegSize); } if (w) { w[i] = Register(n, kWRegSize); } list |= (UINT64_C(1) << n); i++; } } // Check that we got enough registers. VIXL_ASSERT(CountSetBits(list, kNumberOfRegisters) == reg_count); return list; } RegList PopulateVRegisterArray(VRegister* s, VRegister* d, VRegister* v, int reg_size, int reg_count, RegList allowed) { RegList list = 0; int i = 0; for (unsigned n = 0; (n < kNumberOfVRegisters) && (i < reg_count); n++) { if (((UINT64_C(1) << n) & allowed) != 0) { // Only assigned allowed registers. if (v) { v[i] = VRegister(n, reg_size); } if (d) { d[i] = VRegister(n, kDRegSize); } if (s) { s[i] = VRegister(n, kSRegSize); } list |= (UINT64_C(1) << n); i++; } } // Check that we got enough registers. VIXL_ASSERT(CountSetBits(list, kNumberOfVRegisters) == reg_count); return list; } void Clobber(MacroAssembler* masm, RegList reg_list, uint64_t const value) { Register first = NoReg; for (unsigned i = 0; i < kNumberOfRegisters; i++) { if (reg_list & (UINT64_C(1) << i)) { Register xn(i, kXRegSize); // We should never write into sp here. VIXL_ASSERT(!xn.Is(sp)); if (!xn.IsZero()) { if (!first.IsValid()) { // This is the first register we've hit, so construct the literal. __ Mov(xn, value); first = xn; } else { // We've already loaded the literal, so re-use the value already // loaded into the first register we hit. __ Mov(xn, first); } } } } } void ClobberFP(MacroAssembler* masm, RegList reg_list, double const value) { VRegister first = NoVReg; for (unsigned i = 0; i < kNumberOfVRegisters; i++) { if (reg_list & (UINT64_C(1) << i)) { VRegister dn(i, kDRegSize); if (!first.IsValid()) { // This is the first register we've hit, so construct the literal. __ Fmov(dn, value); first = dn; } else { // We've already loaded the literal, so re-use the value already loaded // into the first register we hit. __ Fmov(dn, first); } } } } void Clobber(MacroAssembler* masm, CPURegList reg_list) { if (reg_list.GetType() == CPURegister::kRegister) { // This will always clobber X registers. Clobber(masm, reg_list.GetList()); } else if (reg_list.GetType() == CPURegister::kVRegister) { // This will always clobber D registers. ClobberFP(masm, reg_list.GetList()); } else { VIXL_UNIMPLEMENTED(); } } // TODO: Once registers have sufficiently compatible interfaces, merge the two // DumpRegisters templates. template static void DumpRegisters(MacroAssembler* masm, Register dump_base, int offset) { UseScratchRegisterScope temps(masm); Register dump = temps.AcquireX(); __ Add(dump, dump_base, offset); for (unsigned i = 0; i <= T::GetMaxCode(); i++) { T reg(i); __ Str(reg, SVEMemOperand(dump)); __ Add(dump, dump, reg.GetMaxSizeInBytes()); } } template static void DumpRegisters(MacroAssembler* masm, Register dump_base, int offset, int reg_size_in_bytes) { UseScratchRegisterScope temps(masm); Register dump = temps.AcquireX(); __ Add(dump, dump_base, offset); for (unsigned i = 0; i <= T::GetMaxCode(); i++) { T reg(i, reg_size_in_bytes * kBitsPerByte); __ Str(reg, MemOperand(dump)); __ Add(dump, dump, reg_size_in_bytes); } } void RegisterDump::Dump(MacroAssembler* masm) { VIXL_ASSERT(__ StackPointer().Is(sp)); dump_cpu_features_ = *masm->GetCPUFeatures(); // We need some scratch registers, but we also need to dump them, so we have // to control exactly which registers are used, and dump them separately. CPURegList scratch_registers(x0, x1, x2, x3); UseScratchRegisterScope temps(masm); temps.ExcludeAll(); __ PushCPURegList(scratch_registers); temps.Include(scratch_registers); Register dump_base = temps.AcquireX(); Register tmp = temps.AcquireX(); // Offsets into the dump_ structure. const int x_offset = offsetof(dump_t, x_); const int w_offset = offsetof(dump_t, w_); const int d_offset = offsetof(dump_t, d_); const int s_offset = offsetof(dump_t, s_); const int h_offset = offsetof(dump_t, h_); const int q_offset = offsetof(dump_t, q_); const int z_offset = offsetof(dump_t, z_); const int p_offset = offsetof(dump_t, p_); const int sp_offset = offsetof(dump_t, sp_); const int wsp_offset = offsetof(dump_t, wsp_); const int flags_offset = offsetof(dump_t, flags_); const int vl_offset = offsetof(dump_t, vl_); // Load the address where we will dump the state. __ Mov(dump_base, reinterpret_cast(&dump_)); // Dump the stack pointer (sp and wsp). // The stack pointer cannot be stored directly; it needs to be moved into // another register first. Also, we pushed four X registers, so we need to // compensate here. __ Add(tmp, sp, 4 * kXRegSizeInBytes); __ Str(tmp, MemOperand(dump_base, sp_offset)); __ Add(tmp.W(), wsp, 4 * kXRegSizeInBytes); __ Str(tmp.W(), MemOperand(dump_base, wsp_offset)); // Dump core registers. DumpRegisters(masm, dump_base, x_offset, kXRegSizeInBytes); DumpRegisters(masm, dump_base, w_offset, kWRegSizeInBytes); // Dump NEON and FP registers. DumpRegisters(masm, dump_base, q_offset, kQRegSizeInBytes); DumpRegisters(masm, dump_base, d_offset, kDRegSizeInBytes); DumpRegisters(masm, dump_base, s_offset, kSRegSizeInBytes); DumpRegisters(masm, dump_base, h_offset, kHRegSizeInBytes); // Dump SVE registers. if (CPUHas(CPUFeatures::kSVE)) { DumpRegisters(masm, dump_base, z_offset); DumpRegisters(masm, dump_base, p_offset); // Record the vector length. __ Rdvl(tmp, kBitsPerByte); __ Str(tmp, MemOperand(dump_base, vl_offset)); } // Dump the flags. __ Mrs(tmp, NZCV); __ Str(tmp, MemOperand(dump_base, flags_offset)); // To dump the values we used as scratch registers, we need a new scratch // register. We can use any of the already dumped registers since we can // easily restore them. Register dump2_base = x10; VIXL_ASSERT(!scratch_registers.IncludesAliasOf(dump2_base)); VIXL_ASSERT(scratch_registers.IncludesAliasOf(dump_base)); // Ensure that we don't try to use the scratch registers again. temps.ExcludeAll(); // Don't lose the dump_ address. __ Mov(dump2_base, dump_base); __ PopCPURegList(scratch_registers); while (!scratch_registers.IsEmpty()) { CPURegister reg = scratch_registers.PopLowestIndex(); Register x = reg.X(); Register w = reg.W(); unsigned code = reg.GetCode(); __ Str(x, MemOperand(dump2_base, x_offset + (code * kXRegSizeInBytes))); __ Str(w, MemOperand(dump2_base, w_offset + (code * kWRegSizeInBytes))); } // Finally, restore dump2_base. __ Ldr(dump2_base, MemOperand(dump2_base, x_offset + (dump2_base.GetCode() * kXRegSizeInBytes))); completed_ = true; } uint64_t GetSignallingNan(int size_in_bits) { switch (size_in_bits) { case kHRegSize: return Float16ToRawbits(kFP16SignallingNaN); case kSRegSize: return FloatToRawbits(kFP32SignallingNaN); case kDRegSize: return DoubleToRawbits(kFP64SignallingNaN); default: VIXL_UNIMPLEMENTED(); return 0; } } bool CanRun(const CPUFeatures& required, bool* queried_can_run) { bool log_if_missing = true; if (queried_can_run != NULL) { log_if_missing = !*queried_can_run; *queried_can_run = true; } #ifdef VIXL_INCLUDE_SIMULATOR_AARCH64 // The Simulator can run any test that VIXL can assemble. USE(required); USE(log_if_missing); return true; #else CPUFeatures cpu = CPUFeatures::InferFromOS(); // If InferFromOS fails, assume that basic features are present. if (cpu.HasNoFeatures()) cpu = CPUFeatures::AArch64LegacyBaseline(); VIXL_ASSERT(cpu.Has(kInfrastructureCPUFeatures)); if (cpu.Has(required)) return true; if (log_if_missing) { CPUFeatures missing = required.Without(cpu); // Note: This message needs to match REGEXP_MISSING_FEATURES from // tools/threaded_test.py. std::cout << "SKIPPED: Missing features: { " << missing << " }\n"; std::cout << "This test requires the following features to run its " "generated code on this CPU: " << required << "\n"; } return false; #endif } // Note that the function assumes p0, p1, p2 and p3 are set to all true in b-, // h-, s- and d-lane sizes respectively, and p4, p5 are clobberred as a temp // predicate. template void SetFpData(MacroAssembler* masm, int esize, const T (&values)[N], uint64_t lcg_mult) { uint64_t a = 0; uint64_t b = lcg_mult; // Be used to populate the assigned element slots of register based on the // type of floating point. __ Pfalse(p5.VnB()); switch (esize) { case kHRegSize: a = Float16ToRawbits(Float16(1.5)); // Pick a convenient number within largest normal half-precision floating // point. b = Float16ToRawbits(Float16(lcg_mult % 1024)); // Step 1: Set fp16 numbers to the undefined registers. // p4< 15:0>: 0b0101010101010101 // z{code}<127:0>: 0xHHHHHHHHHHHHHHHH __ Zip1(p4.VnB(), p0.VnB(), p5.VnB()); break; case kSRegSize: a = FloatToRawbits(1.5); b = FloatToRawbits(lcg_mult); // Step 2: Set fp32 numbers to register on top of fp16 initialized. // p4< 15:0>: 0b0000000100000001 // z{code}<127:0>: 0xHHHHSSSSHHHHSSSS __ Zip1(p4.VnS(), p2.VnS(), p5.VnS()); break; case kDRegSize: a = DoubleToRawbits(1.5); b = DoubleToRawbits(lcg_mult); // Step 3: Set fp64 numbers to register on top of both fp16 and fp 32 // initialized. // p4< 15:0>: 0b0000000000000001 // z{code}<127:0>: 0xHHHHSSSSDDDDDDDD __ Zip1(p4.VnD(), p3.VnD(), p5.VnD()); break; default: VIXL_UNIMPLEMENTED(); break; } __ Dup(z30.WithLaneSize(esize), a); __ Dup(z31.WithLaneSize(esize), b); for (unsigned j = 0; j <= (kZRegMaxSize / (N * esize)); j++) { // As floating point operations on random values have a tendency to // converge on special-case numbers like NaNs, adopt normal floating point // values be the seed instead. InsrHelper(masm, z0.WithLaneSize(esize), values); } __ Fmla(z0.WithLaneSize(esize), p4.Merging(), z30.WithLaneSize(esize), z0.WithLaneSize(esize), z31.WithLaneSize(esize), FastNaNPropagation); for (unsigned i = 1; i < kNumberOfZRegisters - 1; i++) { __ Fmla(ZRegister(i).WithLaneSize(esize), p4.Merging(), z30.WithLaneSize(esize), ZRegister(i - 1).WithLaneSize(esize), z31.WithLaneSize(esize), FastNaNPropagation); } __ Fmul(z31.WithLaneSize(esize), p4.Merging(), z31.WithLaneSize(esize), z30.WithLaneSize(esize), FastNaNPropagation); __ Fadd(z31.WithLaneSize(esize), p4.Merging(), z31.WithLaneSize(esize), 1); } // Set z0 - z31 to some normal floating point data. void InitialiseRegisterFp(MacroAssembler* masm, uint64_t lcg_mult) { // Initialise each Z registers to a mixture of fp16/32/64 values as following // pattern: // z0.h[0-1] = fp16, z0.s[1] = fp32, z0.d[1] = fp64 repeatedly throughout the // register. // // For example: // z{code}<2047:1920>: 0x{< fp64 >< fp32 >} // ... // z{code}< 127: 0>: 0x{< fp64 >< fp32 >} // // In current manner, in order to make a desired mixture, each part of // initialization have to be called in the following order. SetFpData(masm, kHRegSize, kInputFloat16Basic, lcg_mult); SetFpData(masm, kSRegSize, kInputFloatBasic, lcg_mult); SetFpData(masm, kDRegSize, kInputDoubleBasic, lcg_mult); } void SetInitialMachineState(MacroAssembler* masm, InputSet input_set) { USE(input_set); uint64_t lcg_mult = 6364136223846793005; // Set x0 - x30 to pseudo-random data. __ Mov(x29, 1); // LCG increment. __ Mov(x30, lcg_mult); __ Mov(x0, 42); // LCG seed. __ Cmn(x0, 0); // Clear NZCV flags for later. __ Madd(x0, x0, x30, x29); // First pseudo-random number. // Registers 1 - 29. for (unsigned i = 1; i < 30; i++) { __ Madd(XRegister(i), XRegister(i - 1), x30, x29); } __ Mul(x30, x29, x30); __ Add(x30, x30, 1); // Set first four predicate registers to true for increasing lane sizes. __ Ptrue(p0.VnB()); __ Ptrue(p1.VnH()); __ Ptrue(p2.VnS()); __ Ptrue(p3.VnD()); // Set z0 - z31 to pseudo-random data. if (input_set == kIntInputSet) { __ Dup(z30.VnD(), 1); __ Dup(z31.VnD(), lcg_mult); __ Index(z0.VnB(), -16, 13); // LCG seeds. __ Mla(z0.VnD(), p0.Merging(), z30.VnD(), z0.VnD(), z31.VnD()); for (unsigned i = 1; i < kNumberOfZRegisters - 1; i++) { __ Mla(ZRegister(i).VnD(), p0.Merging(), z30.VnD(), ZRegister(i - 1).VnD(), z31.VnD()); } __ Mul(z31.VnD(), p0.Merging(), z31.VnD(), z30.VnD()); __ Add(z31.VnD(), z31.VnD(), 1); } else { VIXL_ASSERT(input_set == kFpInputSet); InitialiseRegisterFp(masm, lcg_mult); } // Set remaining predicate registers based on earlier pseudo-random data. for (unsigned i = 4; i < kNumberOfPRegisters; i++) { __ Cmpge(PRegister(i).VnB(), p0.Zeroing(), ZRegister(i).VnB(), 0); } for (unsigned i = 4; i < kNumberOfPRegisters; i += 2) { __ Zip1(p0.VnB(), PRegister(i).VnB(), PRegister(i + 1).VnB()); __ Zip2(PRegister(i + 1).VnB(), PRegister(i).VnB(), PRegister(i + 1).VnB()); __ Mov(PRegister(i), p0); } __ Ptrue(p0.VnB()); // At this point, only sp and a few status registers are undefined. These // must be ignored when computing the state hash. } void ComputeMachineStateHash(MacroAssembler* masm, uint32_t* dst) { // Use explicit registers, to avoid hash order varying if // UseScratchRegisterScope changes. UseScratchRegisterScope temps(masm); temps.ExcludeAll(); Register t0 = w0; Register t1 = x1; // Compute hash of x0 - x30. __ Push(t0.X(), t1); __ Crc32x(t0, wzr, t0.X()); for (unsigned i = 0; i < kNumberOfRegisters; i++) { if (i == xzr.GetCode()) continue; // Skip sp. if (t0.Is(WRegister(i))) continue; // Skip t0, as it's already hashed. __ Crc32x(t0, t0, XRegister(i)); } // Hash the status flags. __ Mrs(t1, NZCV); __ Crc32x(t0, t0, t1); // Acquire another temp, as integer registers have been hashed already. __ Push(x30, xzr); Register t2 = x30; // Compute hash of all bits in z0 - z31. This implies different hashes are // produced for machines of different vector length. for (unsigned i = 0; i < kNumberOfZRegisters; i++) { __ Rdvl(t2, 1); __ Lsr(t2, t2, 4); Label vl_loop; __ Bind(&vl_loop); __ Umov(t1, VRegister(i).V2D(), 0); __ Crc32x(t0, t0, t1); __ Umov(t1, VRegister(i).V2D(), 1); __ Crc32x(t0, t0, t1); __ Ext(ZRegister(i).VnB(), ZRegister(i).VnB(), ZRegister(i).VnB(), 16); __ Sub(t2, t2, 1); __ Cbnz(t2, &vl_loop); } // Hash predicate registers. For simplicity, this writes the predicate // registers to a zero-initialised area of stack of the maximum size required // for P registers. It then computes a hash of that entire stack area. unsigned p_stack_space = kNumberOfPRegisters * kPRegMaxSizeInBytes; // Zero claimed stack area. for (unsigned i = 0; i < p_stack_space; i += kXRegSizeInBytes * 2) { __ Push(xzr, xzr); } // Store all P registers to the stack. __ Mov(t1, sp); for (unsigned i = 0; i < kNumberOfPRegisters; i++) { __ Str(PRegister(i), SVEMemOperand(t1)); __ Add(t1, t1, kPRegMaxSizeInBytes); } // Hash the entire stack area. for (unsigned i = 0; i < p_stack_space; i += kXRegSizeInBytes * 2) { __ Pop(t1, t2); __ Crc32x(t0, t0, t1); __ Crc32x(t0, t0, t2); } __ Mov(t1, reinterpret_cast(dst)); __ Str(t0, MemOperand(t1)); __ Pop(xzr, x30); __ Pop(t1, t0.X()); } } // namespace aarch64 } // namespace vixl