// Copyright 2015, VIXL authors // All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are met: // // * Redistributions of source code must retain the above copyright notice, // this list of conditions and the following disclaimer. // * Redistributions in binary form must reproduce the above copyright notice, // this list of conditions and the following disclaimer in the documentation // and/or other materials provided with the distribution. // * Neither the name of ARM Limited nor the names of its contributors may be // used to endorse or promote products derived from this software without // specific prior written permission. // // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND // ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED // WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #ifdef VIXL_INCLUDE_SIMULATOR_AARCH64 #include #include "simulator-aarch64.h" namespace vixl { namespace aarch64 { template <> double Simulator::FPDefaultNaN() { return kFP64DefaultNaN; } template <> float Simulator::FPDefaultNaN() { return kFP32DefaultNaN; } // See FPRound for a description of this function. static inline double FPRoundToDouble(int64_t sign, int64_t exponent, uint64_t mantissa, FPRounding round_mode) { int64_t bits = FPRound(sign, exponent, mantissa, round_mode); return RawbitsToDouble(bits); } // See FPRound for a description of this function. static inline float FPRoundToFloat(int64_t sign, int64_t exponent, uint64_t mantissa, FPRounding round_mode) { int32_t bits = FPRound(sign, exponent, mantissa, round_mode); return RawbitsToFloat(bits); } // See FPRound for a description of this function. static inline float16 FPRoundToFloat16(int64_t sign, int64_t exponent, uint64_t mantissa, FPRounding round_mode) { return FPRound(sign, exponent, mantissa, round_mode); } double Simulator::FixedToDouble(int64_t src, int fbits, FPRounding round) { if (src >= 0) { return UFixedToDouble(src, fbits, round); } else if (src == INT64_MIN) { return -UFixedToDouble(src, fbits, round); } else { return -UFixedToDouble(-src, fbits, round); } } double Simulator::UFixedToDouble(uint64_t src, int fbits, FPRounding round) { // An input of 0 is a special case because the result is effectively // subnormal: The exponent is encoded as 0 and there is no implicit 1 bit. if (src == 0) { return 0.0; } // Calculate the exponent. The highest significant bit will have the value // 2^exponent. const int highest_significant_bit = 63 - CountLeadingZeros(src); const int64_t exponent = highest_significant_bit - fbits; return FPRoundToDouble(0, exponent, src, round); } float Simulator::FixedToFloat(int64_t src, int fbits, FPRounding round) { if (src >= 0) { return UFixedToFloat(src, fbits, round); } else if (src == INT64_MIN) { return -UFixedToFloat(src, fbits, round); } else { return -UFixedToFloat(-src, fbits, round); } } float Simulator::UFixedToFloat(uint64_t src, int fbits, FPRounding round) { // An input of 0 is a special case because the result is effectively // subnormal: The exponent is encoded as 0 and there is no implicit 1 bit. if (src == 0) { return 0.0f; } // Calculate the exponent. The highest significant bit will have the value // 2^exponent. const int highest_significant_bit = 63 - CountLeadingZeros(src); const int32_t exponent = highest_significant_bit - fbits; return FPRoundToFloat(0, exponent, src, round); } double Simulator::FPToDouble(float value) { switch (std::fpclassify(value)) { case FP_NAN: { if (IsSignallingNaN(value)) { FPProcessException(); } if (ReadDN()) return kFP64DefaultNaN; // Convert NaNs as the processor would: // - The sign is propagated. // - The payload (mantissa) is transferred entirely, except that the top // bit is forced to '1', making the result a quiet NaN. The unused // (low-order) payload bits are set to 0. uint32_t raw = FloatToRawbits(value); uint64_t sign = raw >> 31; uint64_t exponent = (1 << 11) - 1; uint64_t payload = ExtractUnsignedBitfield64(21, 0, raw); payload <<= (52 - 23); // The unused low-order bits should be 0. payload |= (UINT64_C(1) << 51); // Force a quiet NaN. return RawbitsToDouble((sign << 63) | (exponent << 52) | payload); } case FP_ZERO: case FP_NORMAL: case FP_SUBNORMAL: case FP_INFINITE: { // All other inputs are preserved in a standard cast, because every value // representable using an IEEE-754 float is also representable using an // IEEE-754 double. return static_cast(value); } } VIXL_UNREACHABLE(); return static_cast(value); } float Simulator::FPToFloat(float16 value) { uint32_t sign = value >> 15; uint32_t exponent = ExtractUnsignedBitfield32(kFloat16MantissaBits + kFloat16ExponentBits - 1, kFloat16MantissaBits, value); uint32_t mantissa = ExtractUnsignedBitfield32(kFloat16MantissaBits - 1, 0, value); switch (Float16Classify(value)) { case FP_ZERO: return (sign == 0) ? 0.0f : -0.0f; case FP_INFINITE: return (sign == 0) ? kFP32PositiveInfinity : kFP32NegativeInfinity; case FP_SUBNORMAL: { // Calculate shift required to put mantissa into the most-significant bits // of the destination mantissa. int shift = CountLeadingZeros(mantissa << (32 - 10)); // Shift mantissa and discard implicit '1'. mantissa <<= (kFloatMantissaBits - kFloat16MantissaBits) + shift + 1; mantissa &= (1 << kFloatMantissaBits) - 1; // Adjust the exponent for the shift applied, and rebias. exponent = exponent - shift + (-15 + 127); break; } case FP_NAN: if (IsSignallingNaN(value)) { FPProcessException(); } if (ReadDN()) return kFP32DefaultNaN; // Convert NaNs as the processor would: // - The sign is propagated. // - The payload (mantissa) is transferred entirely, except that the top // bit is forced to '1', making the result a quiet NaN. The unused // (low-order) payload bits are set to 0. exponent = (1 << kFloatExponentBits) - 1; // Increase bits in mantissa, making low-order bits 0. mantissa <<= (kFloatMantissaBits - kFloat16MantissaBits); mantissa |= 1 << 22; // Force a quiet NaN. break; case FP_NORMAL: // Increase bits in mantissa, making low-order bits 0. mantissa <<= (kFloatMantissaBits - kFloat16MantissaBits); // Change exponent bias. exponent += (-15 + 127); break; default: VIXL_UNREACHABLE(); } return RawbitsToFloat((sign << 31) | (exponent << kFloatMantissaBits) | mantissa); } float16 Simulator::FPToFloat16(float value, FPRounding round_mode) { // Only the FPTieEven rounding mode is implemented. VIXL_ASSERT(round_mode == FPTieEven); USE(round_mode); uint32_t raw = FloatToRawbits(value); int32_t sign = raw >> 31; int32_t exponent = ExtractUnsignedBitfield32(30, 23, raw) - 127; uint32_t mantissa = ExtractUnsignedBitfield32(22, 0, raw); switch (std::fpclassify(value)) { case FP_NAN: { if (IsSignallingNaN(value)) { FPProcessException(); } if (ReadDN()) return kFP16DefaultNaN; // Convert NaNs as the processor would: // - The sign is propagated. // - The payload (mantissa) is transferred as much as possible, except // that the top bit is forced to '1', making the result a quiet NaN. float16 result = (sign == 0) ? kFP16PositiveInfinity : kFP16NegativeInfinity; result |= mantissa >> (kFloatMantissaBits - kFloat16MantissaBits); result |= (1 << 9); // Force a quiet NaN; return result; } case FP_ZERO: return (sign == 0) ? 0 : 0x8000; case FP_INFINITE: return (sign == 0) ? kFP16PositiveInfinity : kFP16NegativeInfinity; case FP_NORMAL: case FP_SUBNORMAL: { // Convert float-to-half as the processor would, assuming that FPCR.FZ // (flush-to-zero) is not set. // Add the implicit '1' bit to the mantissa. mantissa += (1 << 23); return FPRoundToFloat16(sign, exponent, mantissa, round_mode); } } VIXL_UNREACHABLE(); return 0; } float16 Simulator::FPToFloat16(double value, FPRounding round_mode) { // Only the FPTieEven rounding mode is implemented. VIXL_ASSERT(round_mode == FPTieEven); USE(round_mode); uint64_t raw = DoubleToRawbits(value); int32_t sign = raw >> 63; int64_t exponent = ExtractUnsignedBitfield64(62, 52, raw) - 1023; uint64_t mantissa = ExtractUnsignedBitfield64(51, 0, raw); switch (std::fpclassify(value)) { case FP_NAN: { if (IsSignallingNaN(value)) { FPProcessException(); } if (ReadDN()) return kFP16DefaultNaN; // Convert NaNs as the processor would: // - The sign is propagated. // - The payload (mantissa) is transferred as much as possible, except // that the top bit is forced to '1', making the result a quiet NaN. float16 result = (sign == 0) ? kFP16PositiveInfinity : kFP16NegativeInfinity; result |= mantissa >> (kDoubleMantissaBits - kFloat16MantissaBits); result |= (1 << 9); // Force a quiet NaN; return result; } case FP_ZERO: return (sign == 0) ? 0 : 0x8000; case FP_INFINITE: return (sign == 0) ? kFP16PositiveInfinity : kFP16NegativeInfinity; case FP_NORMAL: case FP_SUBNORMAL: { // Convert double-to-half as the processor would, assuming that FPCR.FZ // (flush-to-zero) is not set. // Add the implicit '1' bit to the mantissa. mantissa += (UINT64_C(1) << 52); return FPRoundToFloat16(sign, exponent, mantissa, round_mode); } } VIXL_UNREACHABLE(); return 0; } float Simulator::FPToFloat(double value, FPRounding round_mode) { // Only the FPTieEven rounding mode is implemented. VIXL_ASSERT((round_mode == FPTieEven) || (round_mode == FPRoundOdd)); USE(round_mode); switch (std::fpclassify(value)) { case FP_NAN: { if (IsSignallingNaN(value)) { FPProcessException(); } if (ReadDN()) return kFP32DefaultNaN; // Convert NaNs as the processor would: // - The sign is propagated. // - The payload (mantissa) is transferred as much as possible, except // that the top bit is forced to '1', making the result a quiet NaN. uint64_t raw = DoubleToRawbits(value); uint32_t sign = raw >> 63; uint32_t exponent = (1 << 8) - 1; uint32_t payload = static_cast(ExtractUnsignedBitfield64(50, 52 - 23, raw)); payload |= (1 << 22); // Force a quiet NaN. return RawbitsToFloat((sign << 31) | (exponent << 23) | payload); } case FP_ZERO: case FP_INFINITE: { // In a C++ cast, any value representable in the target type will be // unchanged. This is always the case for +/-0.0 and infinities. return static_cast(value); } case FP_NORMAL: case FP_SUBNORMAL: { // Convert double-to-float as the processor would, assuming that FPCR.FZ // (flush-to-zero) is not set. uint64_t raw = DoubleToRawbits(value); // Extract the IEEE-754 double components. uint32_t sign = raw >> 63; // Extract the exponent and remove the IEEE-754 encoding bias. int32_t exponent = static_cast(ExtractUnsignedBitfield64(62, 52, raw)) - 1023; // Extract the mantissa and add the implicit '1' bit. uint64_t mantissa = ExtractUnsignedBitfield64(51, 0, raw); if (std::fpclassify(value) == FP_NORMAL) { mantissa |= (UINT64_C(1) << 52); } return FPRoundToFloat(sign, exponent, mantissa, round_mode); } } VIXL_UNREACHABLE(); return value; } void Simulator::ld1(VectorFormat vform, LogicVRegister dst, uint64_t addr) { dst.ClearForWrite(vform); for (int i = 0; i < LaneCountFromFormat(vform); i++) { dst.ReadUintFromMem(vform, i, addr); addr += LaneSizeInBytesFromFormat(vform); } } void Simulator::ld1(VectorFormat vform, LogicVRegister dst, int index, uint64_t addr) { dst.ReadUintFromMem(vform, index, addr); } void Simulator::ld1r(VectorFormat vform, LogicVRegister dst, uint64_t addr) { dst.ClearForWrite(vform); for (int i = 0; i < LaneCountFromFormat(vform); i++) { dst.ReadUintFromMem(vform, i, addr); } } void Simulator::ld2(VectorFormat vform, LogicVRegister dst1, LogicVRegister dst2, uint64_t addr1) { dst1.ClearForWrite(vform); dst2.ClearForWrite(vform); int esize = LaneSizeInBytesFromFormat(vform); uint64_t addr2 = addr1 + esize; for (int i = 0; i < LaneCountFromFormat(vform); i++) { dst1.ReadUintFromMem(vform, i, addr1); dst2.ReadUintFromMem(vform, i, addr2); addr1 += 2 * esize; addr2 += 2 * esize; } } void Simulator::ld2(VectorFormat vform, LogicVRegister dst1, LogicVRegister dst2, int index, uint64_t addr1) { dst1.ClearForWrite(vform); dst2.ClearForWrite(vform); uint64_t addr2 = addr1 + LaneSizeInBytesFromFormat(vform); dst1.ReadUintFromMem(vform, index, addr1); dst2.ReadUintFromMem(vform, index, addr2); } void Simulator::ld2r(VectorFormat vform, LogicVRegister dst1, LogicVRegister dst2, uint64_t addr) { dst1.ClearForWrite(vform); dst2.ClearForWrite(vform); uint64_t addr2 = addr + LaneSizeInBytesFromFormat(vform); for (int i = 0; i < LaneCountFromFormat(vform); i++) { dst1.ReadUintFromMem(vform, i, addr); dst2.ReadUintFromMem(vform, i, addr2); } } void Simulator::ld3(VectorFormat vform, LogicVRegister dst1, LogicVRegister dst2, LogicVRegister dst3, uint64_t addr1) { dst1.ClearForWrite(vform); dst2.ClearForWrite(vform); dst3.ClearForWrite(vform); int esize = LaneSizeInBytesFromFormat(vform); uint64_t addr2 = addr1 + esize; uint64_t addr3 = addr2 + esize; for (int i = 0; i < LaneCountFromFormat(vform); i++) { dst1.ReadUintFromMem(vform, i, addr1); dst2.ReadUintFromMem(vform, i, addr2); dst3.ReadUintFromMem(vform, i, addr3); addr1 += 3 * esize; addr2 += 3 * esize; addr3 += 3 * esize; } } void Simulator::ld3(VectorFormat vform, LogicVRegister dst1, LogicVRegister dst2, LogicVRegister dst3, int index, uint64_t addr1) { dst1.ClearForWrite(vform); dst2.ClearForWrite(vform); dst3.ClearForWrite(vform); uint64_t addr2 = addr1 + LaneSizeInBytesFromFormat(vform); uint64_t addr3 = addr2 + LaneSizeInBytesFromFormat(vform); dst1.ReadUintFromMem(vform, index, addr1); dst2.ReadUintFromMem(vform, index, addr2); dst3.ReadUintFromMem(vform, index, addr3); } void Simulator::ld3r(VectorFormat vform, LogicVRegister dst1, LogicVRegister dst2, LogicVRegister dst3, uint64_t addr) { dst1.ClearForWrite(vform); dst2.ClearForWrite(vform); dst3.ClearForWrite(vform); uint64_t addr2 = addr + LaneSizeInBytesFromFormat(vform); uint64_t addr3 = addr2 + LaneSizeInBytesFromFormat(vform); for (int i = 0; i < LaneCountFromFormat(vform); i++) { dst1.ReadUintFromMem(vform, i, addr); dst2.ReadUintFromMem(vform, i, addr2); dst3.ReadUintFromMem(vform, i, addr3); } } void Simulator::ld4(VectorFormat vform, LogicVRegister dst1, LogicVRegister dst2, LogicVRegister dst3, LogicVRegister dst4, uint64_t addr1) { dst1.ClearForWrite(vform); dst2.ClearForWrite(vform); dst3.ClearForWrite(vform); dst4.ClearForWrite(vform); int esize = LaneSizeInBytesFromFormat(vform); uint64_t addr2 = addr1 + esize; uint64_t addr3 = addr2 + esize; uint64_t addr4 = addr3 + esize; for (int i = 0; i < LaneCountFromFormat(vform); i++) { dst1.ReadUintFromMem(vform, i, addr1); dst2.ReadUintFromMem(vform, i, addr2); dst3.ReadUintFromMem(vform, i, addr3); dst4.ReadUintFromMem(vform, i, addr4); addr1 += 4 * esize; addr2 += 4 * esize; addr3 += 4 * esize; addr4 += 4 * esize; } } void Simulator::ld4(VectorFormat vform, LogicVRegister dst1, LogicVRegister dst2, LogicVRegister dst3, LogicVRegister dst4, int index, uint64_t addr1) { dst1.ClearForWrite(vform); dst2.ClearForWrite(vform); dst3.ClearForWrite(vform); dst4.ClearForWrite(vform); uint64_t addr2 = addr1 + LaneSizeInBytesFromFormat(vform); uint64_t addr3 = addr2 + LaneSizeInBytesFromFormat(vform); uint64_t addr4 = addr3 + LaneSizeInBytesFromFormat(vform); dst1.ReadUintFromMem(vform, index, addr1); dst2.ReadUintFromMem(vform, index, addr2); dst3.ReadUintFromMem(vform, index, addr3); dst4.ReadUintFromMem(vform, index, addr4); } void Simulator::ld4r(VectorFormat vform, LogicVRegister dst1, LogicVRegister dst2, LogicVRegister dst3, LogicVRegister dst4, uint64_t addr) { dst1.ClearForWrite(vform); dst2.ClearForWrite(vform); dst3.ClearForWrite(vform); dst4.ClearForWrite(vform); uint64_t addr2 = addr + LaneSizeInBytesFromFormat(vform); uint64_t addr3 = addr2 + LaneSizeInBytesFromFormat(vform); uint64_t addr4 = addr3 + LaneSizeInBytesFromFormat(vform); for (int i = 0; i < LaneCountFromFormat(vform); i++) { dst1.ReadUintFromMem(vform, i, addr); dst2.ReadUintFromMem(vform, i, addr2); dst3.ReadUintFromMem(vform, i, addr3); dst4.ReadUintFromMem(vform, i, addr4); } } void Simulator::st1(VectorFormat vform, LogicVRegister src, uint64_t addr) { for (int i = 0; i < LaneCountFromFormat(vform); i++) { src.WriteUintToMem(vform, i, addr); addr += LaneSizeInBytesFromFormat(vform); } } void Simulator::st1(VectorFormat vform, LogicVRegister src, int index, uint64_t addr) { src.WriteUintToMem(vform, index, addr); } void Simulator::st2(VectorFormat vform, LogicVRegister dst, LogicVRegister dst2, uint64_t addr) { int esize = LaneSizeInBytesFromFormat(vform); uint64_t addr2 = addr + esize; for (int i = 0; i < LaneCountFromFormat(vform); i++) { dst.WriteUintToMem(vform, i, addr); dst2.WriteUintToMem(vform, i, addr2); addr += 2 * esize; addr2 += 2 * esize; } } void Simulator::st2(VectorFormat vform, LogicVRegister dst, LogicVRegister dst2, int index, uint64_t addr) { int esize = LaneSizeInBytesFromFormat(vform); dst.WriteUintToMem(vform, index, addr); dst2.WriteUintToMem(vform, index, addr + 1 * esize); } void Simulator::st3(VectorFormat vform, LogicVRegister dst, LogicVRegister dst2, LogicVRegister dst3, uint64_t addr) { int esize = LaneSizeInBytesFromFormat(vform); uint64_t addr2 = addr + esize; uint64_t addr3 = addr2 + esize; for (int i = 0; i < LaneCountFromFormat(vform); i++) { dst.WriteUintToMem(vform, i, addr); dst2.WriteUintToMem(vform, i, addr2); dst3.WriteUintToMem(vform, i, addr3); addr += 3 * esize; addr2 += 3 * esize; addr3 += 3 * esize; } } void Simulator::st3(VectorFormat vform, LogicVRegister dst, LogicVRegister dst2, LogicVRegister dst3, int index, uint64_t addr) { int esize = LaneSizeInBytesFromFormat(vform); dst.WriteUintToMem(vform, index, addr); dst2.WriteUintToMem(vform, index, addr + 1 * esize); dst3.WriteUintToMem(vform, index, addr + 2 * esize); } void Simulator::st4(VectorFormat vform, LogicVRegister dst, LogicVRegister dst2, LogicVRegister dst3, LogicVRegister dst4, uint64_t addr) { int esize = LaneSizeInBytesFromFormat(vform); uint64_t addr2 = addr + esize; uint64_t addr3 = addr2 + esize; uint64_t addr4 = addr3 + esize; for (int i = 0; i < LaneCountFromFormat(vform); i++) { dst.WriteUintToMem(vform, i, addr); dst2.WriteUintToMem(vform, i, addr2); dst3.WriteUintToMem(vform, i, addr3); dst4.WriteUintToMem(vform, i, addr4); addr += 4 * esize; addr2 += 4 * esize; addr3 += 4 * esize; addr4 += 4 * esize; } } void Simulator::st4(VectorFormat vform, LogicVRegister dst, LogicVRegister dst2, LogicVRegister dst3, LogicVRegister dst4, int index, uint64_t addr) { int esize = LaneSizeInBytesFromFormat(vform); dst.WriteUintToMem(vform, index, addr); dst2.WriteUintToMem(vform, index, addr + 1 * esize); dst3.WriteUintToMem(vform, index, addr + 2 * esize); dst4.WriteUintToMem(vform, index, addr + 3 * esize); } LogicVRegister Simulator::cmp(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src1, const LogicVRegister& src2, Condition cond) { dst.ClearForWrite(vform); for (int i = 0; i < LaneCountFromFormat(vform); i++) { int64_t sa = src1.Int(vform, i); int64_t sb = src2.Int(vform, i); uint64_t ua = src1.Uint(vform, i); uint64_t ub = src2.Uint(vform, i); bool result = false; switch (cond) { case eq: result = (ua == ub); break; case ge: result = (sa >= sb); break; case gt: result = (sa > sb); break; case hi: result = (ua > ub); break; case hs: result = (ua >= ub); break; case lt: result = (sa < sb); break; case le: result = (sa <= sb); break; default: VIXL_UNREACHABLE(); break; } dst.SetUint(vform, i, result ? MaxUintFromFormat(vform) : 0); } return dst; } LogicVRegister Simulator::cmp(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src1, int imm, Condition cond) { SimVRegister temp; LogicVRegister imm_reg = dup_immediate(vform, temp, imm); return cmp(vform, dst, src1, imm_reg, cond); } LogicVRegister Simulator::cmptst(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src1, const LogicVRegister& src2) { dst.ClearForWrite(vform); for (int i = 0; i < LaneCountFromFormat(vform); i++) { uint64_t ua = src1.Uint(vform, i); uint64_t ub = src2.Uint(vform, i); dst.SetUint(vform, i, ((ua & ub) != 0) ? MaxUintFromFormat(vform) : 0); } return dst; } LogicVRegister Simulator::add(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src1, const LogicVRegister& src2) { int lane_size = LaneSizeInBitsFromFormat(vform); dst.ClearForWrite(vform); for (int i = 0; i < LaneCountFromFormat(vform); i++) { // Test for unsigned saturation. uint64_t ua = src1.UintLeftJustified(vform, i); uint64_t ub = src2.UintLeftJustified(vform, i); uint64_t ur = ua + ub; if (ur < ua) { dst.SetUnsignedSat(i, true); } // Test for signed saturation. bool pos_a = (ua >> 63) == 0; bool pos_b = (ub >> 63) == 0; bool pos_r = (ur >> 63) == 0; // If the signs of the operands are the same, but different from the result, // there was an overflow. if ((pos_a == pos_b) && (pos_a != pos_r)) { dst.SetSignedSat(i, pos_a); } dst.SetInt(vform, i, ur >> (64 - lane_size)); } return dst; } LogicVRegister Simulator::addp(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src1, const LogicVRegister& src2) { SimVRegister temp1, temp2; uzp1(vform, temp1, src1, src2); uzp2(vform, temp2, src1, src2); add(vform, dst, temp1, temp2); return dst; } LogicVRegister Simulator::mla(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src1, const LogicVRegister& src2) { SimVRegister temp; mul(vform, temp, src1, src2); add(vform, dst, dst, temp); return dst; } LogicVRegister Simulator::mls(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src1, const LogicVRegister& src2) { SimVRegister temp; mul(vform, temp, src1, src2); sub(vform, dst, dst, temp); return dst; } LogicVRegister Simulator::mul(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src1, const LogicVRegister& src2) { dst.ClearForWrite(vform); for (int i = 0; i < LaneCountFromFormat(vform); i++) { dst.SetUint(vform, i, src1.Uint(vform, i) * src2.Uint(vform, i)); } return dst; } LogicVRegister Simulator::mul(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src1, const LogicVRegister& src2, int index) { SimVRegister temp; VectorFormat indexform = VectorFormatFillQ(vform); return mul(vform, dst, src1, dup_element(indexform, temp, src2, index)); } LogicVRegister Simulator::mla(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src1, const LogicVRegister& src2, int index) { SimVRegister temp; VectorFormat indexform = VectorFormatFillQ(vform); return mla(vform, dst, src1, dup_element(indexform, temp, src2, index)); } LogicVRegister Simulator::mls(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src1, const LogicVRegister& src2, int index) { SimVRegister temp; VectorFormat indexform = VectorFormatFillQ(vform); return mls(vform, dst, src1, dup_element(indexform, temp, src2, index)); } LogicVRegister Simulator::smull(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src1, const LogicVRegister& src2, int index) { SimVRegister temp; VectorFormat indexform = VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform)); return smull(vform, dst, src1, dup_element(indexform, temp, src2, index)); } LogicVRegister Simulator::smull2(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src1, const LogicVRegister& src2, int index) { SimVRegister temp; VectorFormat indexform = VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform)); return smull2(vform, dst, src1, dup_element(indexform, temp, src2, index)); } LogicVRegister Simulator::umull(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src1, const LogicVRegister& src2, int index) { SimVRegister temp; VectorFormat indexform = VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform)); return umull(vform, dst, src1, dup_element(indexform, temp, src2, index)); } LogicVRegister Simulator::umull2(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src1, const LogicVRegister& src2, int index) { SimVRegister temp; VectorFormat indexform = VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform)); return umull2(vform, dst, src1, dup_element(indexform, temp, src2, index)); } LogicVRegister Simulator::smlal(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src1, const LogicVRegister& src2, int index) { SimVRegister temp; VectorFormat indexform = VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform)); return smlal(vform, dst, src1, dup_element(indexform, temp, src2, index)); } LogicVRegister Simulator::smlal2(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src1, const LogicVRegister& src2, int index) { SimVRegister temp; VectorFormat indexform = VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform)); return smlal2(vform, dst, src1, dup_element(indexform, temp, src2, index)); } LogicVRegister Simulator::umlal(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src1, const LogicVRegister& src2, int index) { SimVRegister temp; VectorFormat indexform = VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform)); return umlal(vform, dst, src1, dup_element(indexform, temp, src2, index)); } LogicVRegister Simulator::umlal2(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src1, const LogicVRegister& src2, int index) { SimVRegister temp; VectorFormat indexform = VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform)); return umlal2(vform, dst, src1, dup_element(indexform, temp, src2, index)); } LogicVRegister Simulator::smlsl(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src1, const LogicVRegister& src2, int index) { SimVRegister temp; VectorFormat indexform = VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform)); return smlsl(vform, dst, src1, dup_element(indexform, temp, src2, index)); } LogicVRegister Simulator::smlsl2(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src1, const LogicVRegister& src2, int index) { SimVRegister temp; VectorFormat indexform = VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform)); return smlsl2(vform, dst, src1, dup_element(indexform, temp, src2, index)); } LogicVRegister Simulator::umlsl(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src1, const LogicVRegister& src2, int index) { SimVRegister temp; VectorFormat indexform = VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform)); return umlsl(vform, dst, src1, dup_element(indexform, temp, src2, index)); } LogicVRegister Simulator::umlsl2(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src1, const LogicVRegister& src2, int index) { SimVRegister temp; VectorFormat indexform = VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform)); return umlsl2(vform, dst, src1, dup_element(indexform, temp, src2, index)); } LogicVRegister Simulator::sqdmull(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src1, const LogicVRegister& src2, int index) { SimVRegister temp; VectorFormat indexform = VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform)); return sqdmull(vform, dst, src1, dup_element(indexform, temp, src2, index)); } LogicVRegister Simulator::sqdmull2(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src1, const LogicVRegister& src2, int index) { SimVRegister temp; VectorFormat indexform = VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform)); return sqdmull2(vform, dst, src1, dup_element(indexform, temp, src2, index)); } LogicVRegister Simulator::sqdmlal(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src1, const LogicVRegister& src2, int index) { SimVRegister temp; VectorFormat indexform = VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform)); return sqdmlal(vform, dst, src1, dup_element(indexform, temp, src2, index)); } LogicVRegister Simulator::sqdmlal2(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src1, const LogicVRegister& src2, int index) { SimVRegister temp; VectorFormat indexform = VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform)); return sqdmlal2(vform, dst, src1, dup_element(indexform, temp, src2, index)); } LogicVRegister Simulator::sqdmlsl(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src1, const LogicVRegister& src2, int index) { SimVRegister temp; VectorFormat indexform = VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform)); return sqdmlsl(vform, dst, src1, dup_element(indexform, temp, src2, index)); } LogicVRegister Simulator::sqdmlsl2(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src1, const LogicVRegister& src2, int index) { SimVRegister temp; VectorFormat indexform = VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform)); return sqdmlsl2(vform, dst, src1, dup_element(indexform, temp, src2, index)); } LogicVRegister Simulator::sqdmulh(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src1, const LogicVRegister& src2, int index) { SimVRegister temp; VectorFormat indexform = VectorFormatFillQ(vform); return sqdmulh(vform, dst, src1, dup_element(indexform, temp, src2, index)); } LogicVRegister Simulator::sqrdmulh(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src1, const LogicVRegister& src2, int index) { SimVRegister temp; VectorFormat indexform = VectorFormatFillQ(vform); return sqrdmulh(vform, dst, src1, dup_element(indexform, temp, src2, index)); } uint16_t Simulator::PolynomialMult(uint8_t op1, uint8_t op2) const { uint16_t result = 0; uint16_t extended_op2 = op2; for (int i = 0; i < 8; ++i) { if ((op1 >> i) & 1) { result = result ^ (extended_op2 << i); } } return result; } LogicVRegister Simulator::pmul(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src1, const LogicVRegister& src2) { dst.ClearForWrite(vform); for (int i = 0; i < LaneCountFromFormat(vform); i++) { dst.SetUint(vform, i, PolynomialMult(src1.Uint(vform, i), src2.Uint(vform, i))); } return dst; } LogicVRegister Simulator::pmull(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src1, const LogicVRegister& src2) { VectorFormat vform_src = VectorFormatHalfWidth(vform); dst.ClearForWrite(vform); for (int i = 0; i < LaneCountFromFormat(vform); i++) { dst.SetUint(vform, i, PolynomialMult(src1.Uint(vform_src, i), src2.Uint(vform_src, i))); } return dst; } LogicVRegister Simulator::pmull2(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src1, const LogicVRegister& src2) { VectorFormat vform_src = VectorFormatHalfWidthDoubleLanes(vform); dst.ClearForWrite(vform); int lane_count = LaneCountFromFormat(vform); for (int i = 0; i < lane_count; i++) { dst.SetUint(vform, i, PolynomialMult(src1.Uint(vform_src, lane_count + i), src2.Uint(vform_src, lane_count + i))); } return dst; } LogicVRegister Simulator::sub(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src1, const LogicVRegister& src2) { int lane_size = LaneSizeInBitsFromFormat(vform); dst.ClearForWrite(vform); for (int i = 0; i < LaneCountFromFormat(vform); i++) { // Test for unsigned saturation. uint64_t ua = src1.UintLeftJustified(vform, i); uint64_t ub = src2.UintLeftJustified(vform, i); uint64_t ur = ua - ub; if (ub > ua) { dst.SetUnsignedSat(i, false); } // Test for signed saturation. bool pos_a = (ua >> 63) == 0; bool pos_b = (ub >> 63) == 0; bool pos_r = (ur >> 63) == 0; // If the signs of the operands are different, and the sign of the first // operand doesn't match the result, there was an overflow. if ((pos_a != pos_b) && (pos_a != pos_r)) { dst.SetSignedSat(i, pos_a); } dst.SetInt(vform, i, ur >> (64 - lane_size)); } return dst; } LogicVRegister Simulator::and_(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src1, const LogicVRegister& src2) { dst.ClearForWrite(vform); for (int i = 0; i < LaneCountFromFormat(vform); i++) { dst.SetUint(vform, i, src1.Uint(vform, i) & src2.Uint(vform, i)); } return dst; } LogicVRegister Simulator::orr(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src1, const LogicVRegister& src2) { dst.ClearForWrite(vform); for (int i = 0; i < LaneCountFromFormat(vform); i++) { dst.SetUint(vform, i, src1.Uint(vform, i) | src2.Uint(vform, i)); } return dst; } LogicVRegister Simulator::orn(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src1, const LogicVRegister& src2) { dst.ClearForWrite(vform); for (int i = 0; i < LaneCountFromFormat(vform); i++) { dst.SetUint(vform, i, src1.Uint(vform, i) | ~src2.Uint(vform, i)); } return dst; } LogicVRegister Simulator::eor(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src1, const LogicVRegister& src2) { dst.ClearForWrite(vform); for (int i = 0; i < LaneCountFromFormat(vform); i++) { dst.SetUint(vform, i, src1.Uint(vform, i) ^ src2.Uint(vform, i)); } return dst; } LogicVRegister Simulator::bic(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src1, const LogicVRegister& src2) { dst.ClearForWrite(vform); for (int i = 0; i < LaneCountFromFormat(vform); i++) { dst.SetUint(vform, i, src1.Uint(vform, i) & ~src2.Uint(vform, i)); } return dst; } LogicVRegister Simulator::bic(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src, uint64_t imm) { uint64_t result[16]; int laneCount = LaneCountFromFormat(vform); for (int i = 0; i < laneCount; ++i) { result[i] = src.Uint(vform, i) & ~imm; } dst.ClearForWrite(vform); for (int i = 0; i < laneCount; ++i) { dst.SetUint(vform, i, result[i]); } return dst; } LogicVRegister Simulator::bif(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src1, const LogicVRegister& src2) { dst.ClearForWrite(vform); for (int i = 0; i < LaneCountFromFormat(vform); i++) { uint64_t operand1 = dst.Uint(vform, i); uint64_t operand2 = ~src2.Uint(vform, i); uint64_t operand3 = src1.Uint(vform, i); uint64_t result = operand1 ^ ((operand1 ^ operand3) & operand2); dst.SetUint(vform, i, result); } return dst; } LogicVRegister Simulator::bit(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src1, const LogicVRegister& src2) { dst.ClearForWrite(vform); for (int i = 0; i < LaneCountFromFormat(vform); i++) { uint64_t operand1 = dst.Uint(vform, i); uint64_t operand2 = src2.Uint(vform, i); uint64_t operand3 = src1.Uint(vform, i); uint64_t result = operand1 ^ ((operand1 ^ operand3) & operand2); dst.SetUint(vform, i, result); } return dst; } LogicVRegister Simulator::bsl(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src1, const LogicVRegister& src2) { dst.ClearForWrite(vform); for (int i = 0; i < LaneCountFromFormat(vform); i++) { uint64_t operand1 = src2.Uint(vform, i); uint64_t operand2 = dst.Uint(vform, i); uint64_t operand3 = src1.Uint(vform, i); uint64_t result = operand1 ^ ((operand1 ^ operand3) & operand2); dst.SetUint(vform, i, result); } return dst; } LogicVRegister Simulator::sminmax(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src1, const LogicVRegister& src2, bool max) { dst.ClearForWrite(vform); for (int i = 0; i < LaneCountFromFormat(vform); i++) { int64_t src1_val = src1.Int(vform, i); int64_t src2_val = src2.Int(vform, i); int64_t dst_val; if (max) { dst_val = (src1_val > src2_val) ? src1_val : src2_val; } else { dst_val = (src1_val < src2_val) ? src1_val : src2_val; } dst.SetInt(vform, i, dst_val); } return dst; } LogicVRegister Simulator::smax(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src1, const LogicVRegister& src2) { return sminmax(vform, dst, src1, src2, true); } LogicVRegister Simulator::smin(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src1, const LogicVRegister& src2) { return sminmax(vform, dst, src1, src2, false); } LogicVRegister Simulator::sminmaxp(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src1, const LogicVRegister& src2, bool max) { int lanes = LaneCountFromFormat(vform); int64_t result[kMaxLanesPerVector]; const LogicVRegister* src = &src1; for (int j = 0; j < 2; j++) { for (int i = 0; i < lanes; i += 2) { int64_t first_val = src->Int(vform, i); int64_t second_val = src->Int(vform, i + 1); int64_t dst_val; if (max) { dst_val = (first_val > second_val) ? first_val : second_val; } else { dst_val = (first_val < second_val) ? first_val : second_val; } VIXL_ASSERT(((i >> 1) + (j * lanes / 2)) < kMaxLanesPerVector); result[(i >> 1) + (j * lanes / 2)] = dst_val; } src = &src2; } dst.SetIntArray(vform, result); return dst; } LogicVRegister Simulator::smaxp(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src1, const LogicVRegister& src2) { return sminmaxp(vform, dst, src1, src2, true); } LogicVRegister Simulator::sminp(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src1, const LogicVRegister& src2) { return sminmaxp(vform, dst, src1, src2, false); } LogicVRegister Simulator::addp(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src) { VIXL_ASSERT(vform == kFormatD); uint64_t dst_val = src.Uint(kFormat2D, 0) + src.Uint(kFormat2D, 1); dst.ClearForWrite(vform); dst.SetUint(vform, 0, dst_val); return dst; } LogicVRegister Simulator::addv(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src) { VectorFormat vform_dst = ScalarFormatFromLaneSize(LaneSizeInBitsFromFormat(vform)); int64_t dst_val = 0; for (int i = 0; i < LaneCountFromFormat(vform); i++) { dst_val += src.Int(vform, i); } dst.ClearForWrite(vform_dst); dst.SetInt(vform_dst, 0, dst_val); return dst; } LogicVRegister Simulator::saddlv(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src) { VectorFormat vform_dst = ScalarFormatFromLaneSize(LaneSizeInBitsFromFormat(vform) * 2); int64_t dst_val = 0; for (int i = 0; i < LaneCountFromFormat(vform); i++) { dst_val += src.Int(vform, i); } dst.ClearForWrite(vform_dst); dst.SetInt(vform_dst, 0, dst_val); return dst; } LogicVRegister Simulator::uaddlv(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src) { VectorFormat vform_dst = ScalarFormatFromLaneSize(LaneSizeInBitsFromFormat(vform) * 2); uint64_t dst_val = 0; for (int i = 0; i < LaneCountFromFormat(vform); i++) { dst_val += src.Uint(vform, i); } dst.ClearForWrite(vform_dst); dst.SetUint(vform_dst, 0, dst_val); return dst; } LogicVRegister Simulator::sminmaxv(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src, bool max) { int64_t dst_val = max ? INT64_MIN : INT64_MAX; for (int i = 0; i < LaneCountFromFormat(vform); i++) { int64_t src_val = src.Int(vform, i); if (max) { dst_val = (src_val > dst_val) ? src_val : dst_val; } else { dst_val = (src_val < dst_val) ? src_val : dst_val; } } dst.ClearForWrite(ScalarFormatFromFormat(vform)); dst.SetInt(vform, 0, dst_val); return dst; } LogicVRegister Simulator::smaxv(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src) { sminmaxv(vform, dst, src, true); return dst; } LogicVRegister Simulator::sminv(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src) { sminmaxv(vform, dst, src, false); return dst; } LogicVRegister Simulator::uminmax(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src1, const LogicVRegister& src2, bool max) { dst.ClearForWrite(vform); for (int i = 0; i < LaneCountFromFormat(vform); i++) { uint64_t src1_val = src1.Uint(vform, i); uint64_t src2_val = src2.Uint(vform, i); uint64_t dst_val; if (max) { dst_val = (src1_val > src2_val) ? src1_val : src2_val; } else { dst_val = (src1_val < src2_val) ? src1_val : src2_val; } dst.SetUint(vform, i, dst_val); } return dst; } LogicVRegister Simulator::umax(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src1, const LogicVRegister& src2) { return uminmax(vform, dst, src1, src2, true); } LogicVRegister Simulator::umin(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src1, const LogicVRegister& src2) { return uminmax(vform, dst, src1, src2, false); } LogicVRegister Simulator::uminmaxp(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src1, const LogicVRegister& src2, bool max) { int lanes = LaneCountFromFormat(vform); uint64_t result[kMaxLanesPerVector]; const LogicVRegister* src = &src1; for (int j = 0; j < 2; j++) { for (int i = 0; i < LaneCountFromFormat(vform); i += 2) { uint64_t first_val = src->Uint(vform, i); uint64_t second_val = src->Uint(vform, i + 1); uint64_t dst_val; if (max) { dst_val = (first_val > second_val) ? first_val : second_val; } else { dst_val = (first_val < second_val) ? first_val : second_val; } VIXL_ASSERT(((i >> 1) + (j * lanes / 2)) < kMaxLanesPerVector); result[(i >> 1) + (j * lanes / 2)] = dst_val; } src = &src2; } dst.SetUintArray(vform, result); return dst; } LogicVRegister Simulator::umaxp(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src1, const LogicVRegister& src2) { return uminmaxp(vform, dst, src1, src2, true); } LogicVRegister Simulator::uminp(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src1, const LogicVRegister& src2) { return uminmaxp(vform, dst, src1, src2, false); } LogicVRegister Simulator::uminmaxv(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src, bool max) { uint64_t dst_val = max ? 0 : UINT64_MAX; for (int i = 0; i < LaneCountFromFormat(vform); i++) { uint64_t src_val = src.Uint(vform, i); if (max) { dst_val = (src_val > dst_val) ? src_val : dst_val; } else { dst_val = (src_val < dst_val) ? src_val : dst_val; } } dst.ClearForWrite(ScalarFormatFromFormat(vform)); dst.SetUint(vform, 0, dst_val); return dst; } LogicVRegister Simulator::umaxv(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src) { uminmaxv(vform, dst, src, true); return dst; } LogicVRegister Simulator::uminv(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src) { uminmaxv(vform, dst, src, false); return dst; } LogicVRegister Simulator::shl(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src, int shift) { VIXL_ASSERT(shift >= 0); SimVRegister temp; LogicVRegister shiftreg = dup_immediate(vform, temp, shift); return ushl(vform, dst, src, shiftreg); } LogicVRegister Simulator::sshll(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src, int shift) { VIXL_ASSERT(shift >= 0); SimVRegister temp1, temp2; LogicVRegister shiftreg = dup_immediate(vform, temp1, shift); LogicVRegister extendedreg = sxtl(vform, temp2, src); return sshl(vform, dst, extendedreg, shiftreg); } LogicVRegister Simulator::sshll2(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src, int shift) { VIXL_ASSERT(shift >= 0); SimVRegister temp1, temp2; LogicVRegister shiftreg = dup_immediate(vform, temp1, shift); LogicVRegister extendedreg = sxtl2(vform, temp2, src); return sshl(vform, dst, extendedreg, shiftreg); } LogicVRegister Simulator::shll(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src) { int shift = LaneSizeInBitsFromFormat(vform) / 2; return sshll(vform, dst, src, shift); } LogicVRegister Simulator::shll2(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src) { int shift = LaneSizeInBitsFromFormat(vform) / 2; return sshll2(vform, dst, src, shift); } LogicVRegister Simulator::ushll(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src, int shift) { VIXL_ASSERT(shift >= 0); SimVRegister temp1, temp2; LogicVRegister shiftreg = dup_immediate(vform, temp1, shift); LogicVRegister extendedreg = uxtl(vform, temp2, src); return ushl(vform, dst, extendedreg, shiftreg); } LogicVRegister Simulator::ushll2(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src, int shift) { VIXL_ASSERT(shift >= 0); SimVRegister temp1, temp2; LogicVRegister shiftreg = dup_immediate(vform, temp1, shift); LogicVRegister extendedreg = uxtl2(vform, temp2, src); return ushl(vform, dst, extendedreg, shiftreg); } LogicVRegister Simulator::sli(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src, int shift) { dst.ClearForWrite(vform); int laneCount = LaneCountFromFormat(vform); for (int i = 0; i < laneCount; i++) { uint64_t src_lane = src.Uint(vform, i); uint64_t dst_lane = dst.Uint(vform, i); uint64_t shifted = src_lane << shift; uint64_t mask = MaxUintFromFormat(vform) << shift; dst.SetUint(vform, i, (dst_lane & ~mask) | shifted); } return dst; } LogicVRegister Simulator::sqshl(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src, int shift) { VIXL_ASSERT(shift >= 0); SimVRegister temp; LogicVRegister shiftreg = dup_immediate(vform, temp, shift); return sshl(vform, dst, src, shiftreg).SignedSaturate(vform); } LogicVRegister Simulator::uqshl(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src, int shift) { VIXL_ASSERT(shift >= 0); SimVRegister temp; LogicVRegister shiftreg = dup_immediate(vform, temp, shift); return ushl(vform, dst, src, shiftreg).UnsignedSaturate(vform); } LogicVRegister Simulator::sqshlu(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src, int shift) { VIXL_ASSERT(shift >= 0); SimVRegister temp; LogicVRegister shiftreg = dup_immediate(vform, temp, shift); return sshl(vform, dst, src, shiftreg).UnsignedSaturate(vform); } LogicVRegister Simulator::sri(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src, int shift) { dst.ClearForWrite(vform); int laneCount = LaneCountFromFormat(vform); VIXL_ASSERT((shift > 0) && (shift <= static_cast(LaneSizeInBitsFromFormat(vform)))); for (int i = 0; i < laneCount; i++) { uint64_t src_lane = src.Uint(vform, i); uint64_t dst_lane = dst.Uint(vform, i); uint64_t shifted; uint64_t mask; if (shift == 64) { shifted = 0; mask = 0; } else { shifted = src_lane >> shift; mask = MaxUintFromFormat(vform) >> shift; } dst.SetUint(vform, i, (dst_lane & ~mask) | shifted); } return dst; } LogicVRegister Simulator::ushr(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src, int shift) { VIXL_ASSERT(shift >= 0); SimVRegister temp; LogicVRegister shiftreg = dup_immediate(vform, temp, -shift); return ushl(vform, dst, src, shiftreg); } LogicVRegister Simulator::sshr(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src, int shift) { VIXL_ASSERT(shift >= 0); SimVRegister temp; LogicVRegister shiftreg = dup_immediate(vform, temp, -shift); return sshl(vform, dst, src, shiftreg); } LogicVRegister Simulator::ssra(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src, int shift) { SimVRegister temp; LogicVRegister shifted_reg = sshr(vform, temp, src, shift); return add(vform, dst, dst, shifted_reg); } LogicVRegister Simulator::usra(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src, int shift) { SimVRegister temp; LogicVRegister shifted_reg = ushr(vform, temp, src, shift); return add(vform, dst, dst, shifted_reg); } LogicVRegister Simulator::srsra(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src, int shift) { SimVRegister temp; LogicVRegister shifted_reg = sshr(vform, temp, src, shift).Round(vform); return add(vform, dst, dst, shifted_reg); } LogicVRegister Simulator::ursra(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src, int shift) { SimVRegister temp; LogicVRegister shifted_reg = ushr(vform, temp, src, shift).Round(vform); return add(vform, dst, dst, shifted_reg); } LogicVRegister Simulator::cls(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src) { uint64_t result[16]; int laneSizeInBits = LaneSizeInBitsFromFormat(vform); int laneCount = LaneCountFromFormat(vform); for (int i = 0; i < laneCount; i++) { result[i] = CountLeadingSignBits(src.Int(vform, i), laneSizeInBits); } dst.ClearForWrite(vform); for (int i = 0; i < laneCount; ++i) { dst.SetUint(vform, i, result[i]); } return dst; } LogicVRegister Simulator::clz(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src) { uint64_t result[16]; int laneSizeInBits = LaneSizeInBitsFromFormat(vform); int laneCount = LaneCountFromFormat(vform); for (int i = 0; i < laneCount; i++) { result[i] = CountLeadingZeros(src.Uint(vform, i), laneSizeInBits); } dst.ClearForWrite(vform); for (int i = 0; i < laneCount; ++i) { dst.SetUint(vform, i, result[i]); } return dst; } LogicVRegister Simulator::cnt(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src) { uint64_t result[16]; int laneSizeInBits = LaneSizeInBitsFromFormat(vform); int laneCount = LaneCountFromFormat(vform); for (int i = 0; i < laneCount; i++) { uint64_t value = src.Uint(vform, i); result[i] = 0; for (int j = 0; j < laneSizeInBits; j++) { result[i] += (value & 1); value >>= 1; } } dst.ClearForWrite(vform); for (int i = 0; i < laneCount; ++i) { dst.SetUint(vform, i, result[i]); } return dst; } LogicVRegister Simulator::sshl(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src1, const LogicVRegister& src2) { dst.ClearForWrite(vform); for (int i = 0; i < LaneCountFromFormat(vform); i++) { int8_t shift_val = src2.Int(vform, i); int64_t lj_src_val = src1.IntLeftJustified(vform, i); // Set signed saturation state. if ((shift_val > CountLeadingSignBits(lj_src_val)) && (lj_src_val != 0)) { dst.SetSignedSat(i, lj_src_val >= 0); } // Set unsigned saturation state. if (lj_src_val < 0) { dst.SetUnsignedSat(i, false); } else if ((shift_val > CountLeadingZeros(lj_src_val)) && (lj_src_val != 0)) { dst.SetUnsignedSat(i, true); } int64_t src_val = src1.Int(vform, i); bool src_is_negative = src_val < 0; if (shift_val > 63) { dst.SetInt(vform, i, 0); } else if (shift_val < -63) { dst.SetRounding(i, src_is_negative); dst.SetInt(vform, i, src_is_negative ? -1 : 0); } else { // Use unsigned types for shifts, as behaviour is undefined for signed // lhs. uint64_t usrc_val = static_cast(src_val); if (shift_val < 0) { // Convert to right shift. shift_val = -shift_val; // Set rounding state by testing most-significant bit shifted out. // Rounding only needed on right shifts. if (((usrc_val >> (shift_val - 1)) & 1) == 1) { dst.SetRounding(i, true); } usrc_val >>= shift_val; if (src_is_negative) { // Simulate sign-extension. usrc_val |= (~UINT64_C(0) << (64 - shift_val)); } } else { usrc_val <<= shift_val; } dst.SetUint(vform, i, usrc_val); } } return dst; } LogicVRegister Simulator::ushl(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src1, const LogicVRegister& src2) { dst.ClearForWrite(vform); for (int i = 0; i < LaneCountFromFormat(vform); i++) { int8_t shift_val = src2.Int(vform, i); uint64_t lj_src_val = src1.UintLeftJustified(vform, i); // Set saturation state. if ((shift_val > CountLeadingZeros(lj_src_val)) && (lj_src_val != 0)) { dst.SetUnsignedSat(i, true); } uint64_t src_val = src1.Uint(vform, i); if ((shift_val > 63) || (shift_val < -64)) { dst.SetUint(vform, i, 0); } else { if (shift_val < 0) { // Set rounding state. Rounding only needed on right shifts. if (((src_val >> (-shift_val - 1)) & 1) == 1) { dst.SetRounding(i, true); } if (shift_val == -64) { src_val = 0; } else { src_val >>= -shift_val; } } else { src_val <<= shift_val; } dst.SetUint(vform, i, src_val); } } return dst; } LogicVRegister Simulator::neg(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src) { dst.ClearForWrite(vform); for (int i = 0; i < LaneCountFromFormat(vform); i++) { // Test for signed saturation. int64_t sa = src.Int(vform, i); if (sa == MinIntFromFormat(vform)) { dst.SetSignedSat(i, true); } dst.SetInt(vform, i, (sa == INT64_MIN) ? sa : -sa); } return dst; } LogicVRegister Simulator::suqadd(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src) { dst.ClearForWrite(vform); for (int i = 0; i < LaneCountFromFormat(vform); i++) { int64_t sa = dst.IntLeftJustified(vform, i); uint64_t ub = src.UintLeftJustified(vform, i); uint64_t ur = sa + ub; int64_t sr; memcpy(&sr, &ur, sizeof(sr)); if (sr < sa) { // Test for signed positive saturation. dst.SetInt(vform, i, MaxIntFromFormat(vform)); } else { dst.SetUint(vform, i, dst.Int(vform, i) + src.Uint(vform, i)); } } return dst; } LogicVRegister Simulator::usqadd(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src) { dst.ClearForWrite(vform); for (int i = 0; i < LaneCountFromFormat(vform); i++) { uint64_t ua = dst.UintLeftJustified(vform, i); int64_t sb = src.IntLeftJustified(vform, i); uint64_t ur = ua + sb; if ((sb > 0) && (ur <= ua)) { dst.SetUint(vform, i, MaxUintFromFormat(vform)); // Positive saturation. } else if ((sb < 0) && (ur >= ua)) { dst.SetUint(vform, i, 0); // Negative saturation. } else { dst.SetUint(vform, i, dst.Uint(vform, i) + src.Int(vform, i)); } } return dst; } LogicVRegister Simulator::abs(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src) { dst.ClearForWrite(vform); for (int i = 0; i < LaneCountFromFormat(vform); i++) { // Test for signed saturation. int64_t sa = src.Int(vform, i); if (sa == MinIntFromFormat(vform)) { dst.SetSignedSat(i, true); } if (sa < 0) { dst.SetInt(vform, i, (sa == INT64_MIN) ? sa : -sa); } else { dst.SetInt(vform, i, sa); } } return dst; } LogicVRegister Simulator::extractnarrow(VectorFormat dstform, LogicVRegister dst, bool dstIsSigned, const LogicVRegister& src, bool srcIsSigned) { bool upperhalf = false; VectorFormat srcform = kFormatUndefined; int64_t ssrc[8]; uint64_t usrc[8]; switch (dstform) { case kFormat8B: upperhalf = false; srcform = kFormat8H; break; case kFormat16B: upperhalf = true; srcform = kFormat8H; break; case kFormat4H: upperhalf = false; srcform = kFormat4S; break; case kFormat8H: upperhalf = true; srcform = kFormat4S; break; case kFormat2S: upperhalf = false; srcform = kFormat2D; break; case kFormat4S: upperhalf = true; srcform = kFormat2D; break; case kFormatB: upperhalf = false; srcform = kFormatH; break; case kFormatH: upperhalf = false; srcform = kFormatS; break; case kFormatS: upperhalf = false; srcform = kFormatD; break; default: VIXL_UNIMPLEMENTED(); } for (int i = 0; i < LaneCountFromFormat(srcform); i++) { ssrc[i] = src.Int(srcform, i); usrc[i] = src.Uint(srcform, i); } int offset; if (upperhalf) { offset = LaneCountFromFormat(dstform) / 2; } else { offset = 0; dst.ClearForWrite(dstform); } for (int i = 0; i < LaneCountFromFormat(srcform); i++) { // Test for signed saturation if (ssrc[i] > MaxIntFromFormat(dstform)) { dst.SetSignedSat(offset + i, true); } else if (ssrc[i] < MinIntFromFormat(dstform)) { dst.SetSignedSat(offset + i, false); } // Test for unsigned saturation if (srcIsSigned) { if (ssrc[i] > static_cast(MaxUintFromFormat(dstform))) { dst.SetUnsignedSat(offset + i, true); } else if (ssrc[i] < 0) { dst.SetUnsignedSat(offset + i, false); } } else { if (usrc[i] > MaxUintFromFormat(dstform)) { dst.SetUnsignedSat(offset + i, true); } } int64_t result; if (srcIsSigned) { result = ssrc[i] & MaxUintFromFormat(dstform); } else { result = usrc[i] & MaxUintFromFormat(dstform); } if (dstIsSigned) { dst.SetInt(dstform, offset + i, result); } else { dst.SetUint(dstform, offset + i, result); } } return dst; } LogicVRegister Simulator::xtn(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src) { return extractnarrow(vform, dst, true, src, true); } LogicVRegister Simulator::sqxtn(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src) { return extractnarrow(vform, dst, true, src, true).SignedSaturate(vform); } LogicVRegister Simulator::sqxtun(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src) { return extractnarrow(vform, dst, false, src, true).UnsignedSaturate(vform); } LogicVRegister Simulator::uqxtn(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src) { return extractnarrow(vform, dst, false, src, false).UnsignedSaturate(vform); } LogicVRegister Simulator::absdiff(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src1, const LogicVRegister& src2, bool issigned) { dst.ClearForWrite(vform); for (int i = 0; i < LaneCountFromFormat(vform); i++) { if (issigned) { int64_t sr = src1.Int(vform, i) - src2.Int(vform, i); sr = sr > 0 ? sr : -sr; dst.SetInt(vform, i, sr); } else { int64_t sr = src1.Uint(vform, i) - src2.Uint(vform, i); sr = sr > 0 ? sr : -sr; dst.SetUint(vform, i, sr); } } return dst; } LogicVRegister Simulator::saba(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src1, const LogicVRegister& src2) { SimVRegister temp; dst.ClearForWrite(vform); absdiff(vform, temp, src1, src2, true); add(vform, dst, dst, temp); return dst; } LogicVRegister Simulator::uaba(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src1, const LogicVRegister& src2) { SimVRegister temp; dst.ClearForWrite(vform); absdiff(vform, temp, src1, src2, false); add(vform, dst, dst, temp); return dst; } LogicVRegister Simulator::not_(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src) { dst.ClearForWrite(vform); for (int i = 0; i < LaneCountFromFormat(vform); i++) { dst.SetUint(vform, i, ~src.Uint(vform, i)); } return dst; } LogicVRegister Simulator::rbit(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src) { uint64_t result[16]; int laneCount = LaneCountFromFormat(vform); int laneSizeInBits = LaneSizeInBitsFromFormat(vform); uint64_t reversed_value; uint64_t value; for (int i = 0; i < laneCount; i++) { value = src.Uint(vform, i); reversed_value = 0; for (int j = 0; j < laneSizeInBits; j++) { reversed_value = (reversed_value << 1) | (value & 1); value >>= 1; } result[i] = reversed_value; } dst.ClearForWrite(vform); for (int i = 0; i < laneCount; ++i) { dst.SetUint(vform, i, result[i]); } return dst; } LogicVRegister Simulator::rev(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src, int revSize) { uint64_t result[16]; int laneCount = LaneCountFromFormat(vform); int laneSize = LaneSizeInBytesFromFormat(vform); int lanesPerLoop = revSize / laneSize; for (int i = 0; i < laneCount; i += lanesPerLoop) { for (int j = 0; j < lanesPerLoop; j++) { result[i + lanesPerLoop - 1 - j] = src.Uint(vform, i + j); } } dst.ClearForWrite(vform); for (int i = 0; i < laneCount; ++i) { dst.SetUint(vform, i, result[i]); } return dst; } LogicVRegister Simulator::rev16(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src) { return rev(vform, dst, src, 2); } LogicVRegister Simulator::rev32(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src) { return rev(vform, dst, src, 4); } LogicVRegister Simulator::rev64(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src) { return rev(vform, dst, src, 8); } LogicVRegister Simulator::addlp(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src, bool is_signed, bool do_accumulate) { VectorFormat vformsrc = VectorFormatHalfWidthDoubleLanes(vform); VIXL_ASSERT(LaneSizeInBitsFromFormat(vformsrc) <= 32); VIXL_ASSERT(LaneCountFromFormat(vform) <= 8); uint64_t result[8]; int lane_count = LaneCountFromFormat(vform); for (int i = 0; i < lane_count; i++) { if (is_signed) { result[i] = static_cast(src.Int(vformsrc, 2 * i) + src.Int(vformsrc, 2 * i + 1)); } else { result[i] = src.Uint(vformsrc, 2 * i) + src.Uint(vformsrc, 2 * i + 1); } } dst.ClearForWrite(vform); for (int i = 0; i < lane_count; ++i) { if (do_accumulate) { result[i] += dst.Uint(vform, i); } dst.SetUint(vform, i, result[i]); } return dst; } LogicVRegister Simulator::saddlp(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src) { return addlp(vform, dst, src, true, false); } LogicVRegister Simulator::uaddlp(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src) { return addlp(vform, dst, src, false, false); } LogicVRegister Simulator::sadalp(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src) { return addlp(vform, dst, src, true, true); } LogicVRegister Simulator::uadalp(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src) { return addlp(vform, dst, src, false, true); } LogicVRegister Simulator::ext(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src1, const LogicVRegister& src2, int index) { uint8_t result[16]; int laneCount = LaneCountFromFormat(vform); for (int i = 0; i < laneCount - index; ++i) { result[i] = src1.Uint(vform, i + index); } for (int i = 0; i < index; ++i) { result[laneCount - index + i] = src2.Uint(vform, i); } dst.ClearForWrite(vform); for (int i = 0; i < laneCount; ++i) { dst.SetUint(vform, i, result[i]); } return dst; } LogicVRegister Simulator::dup_element(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src, int src_index) { int laneCount = LaneCountFromFormat(vform); uint64_t value = src.Uint(vform, src_index); dst.ClearForWrite(vform); for (int i = 0; i < laneCount; ++i) { dst.SetUint(vform, i, value); } return dst; } LogicVRegister Simulator::dup_immediate(VectorFormat vform, LogicVRegister dst, uint64_t imm) { int laneCount = LaneCountFromFormat(vform); uint64_t value = imm & MaxUintFromFormat(vform); dst.ClearForWrite(vform); for (int i = 0; i < laneCount; ++i) { dst.SetUint(vform, i, value); } return dst; } LogicVRegister Simulator::ins_element(VectorFormat vform, LogicVRegister dst, int dst_index, const LogicVRegister& src, int src_index) { dst.SetUint(vform, dst_index, src.Uint(vform, src_index)); return dst; } LogicVRegister Simulator::ins_immediate(VectorFormat vform, LogicVRegister dst, int dst_index, uint64_t imm) { uint64_t value = imm & MaxUintFromFormat(vform); dst.SetUint(vform, dst_index, value); return dst; } LogicVRegister Simulator::movi(VectorFormat vform, LogicVRegister dst, uint64_t imm) { int laneCount = LaneCountFromFormat(vform); dst.ClearForWrite(vform); for (int i = 0; i < laneCount; ++i) { dst.SetUint(vform, i, imm); } return dst; } LogicVRegister Simulator::mvni(VectorFormat vform, LogicVRegister dst, uint64_t imm) { int laneCount = LaneCountFromFormat(vform); dst.ClearForWrite(vform); for (int i = 0; i < laneCount; ++i) { dst.SetUint(vform, i, ~imm); } return dst; } LogicVRegister Simulator::orr(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src, uint64_t imm) { uint64_t result[16]; int laneCount = LaneCountFromFormat(vform); for (int i = 0; i < laneCount; ++i) { result[i] = src.Uint(vform, i) | imm; } dst.ClearForWrite(vform); for (int i = 0; i < laneCount; ++i) { dst.SetUint(vform, i, result[i]); } return dst; } LogicVRegister Simulator::uxtl(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src) { VectorFormat vform_half = VectorFormatHalfWidth(vform); dst.ClearForWrite(vform); for (int i = 0; i < LaneCountFromFormat(vform); i++) { dst.SetUint(vform, i, src.Uint(vform_half, i)); } return dst; } LogicVRegister Simulator::sxtl(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src) { VectorFormat vform_half = VectorFormatHalfWidth(vform); dst.ClearForWrite(vform); for (int i = 0; i < LaneCountFromFormat(vform); i++) { dst.SetInt(vform, i, src.Int(vform_half, i)); } return dst; } LogicVRegister Simulator::uxtl2(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src) { VectorFormat vform_half = VectorFormatHalfWidth(vform); int lane_count = LaneCountFromFormat(vform); dst.ClearForWrite(vform); for (int i = 0; i < lane_count; i++) { dst.SetUint(vform, i, src.Uint(vform_half, lane_count + i)); } return dst; } LogicVRegister Simulator::sxtl2(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src) { VectorFormat vform_half = VectorFormatHalfWidth(vform); int lane_count = LaneCountFromFormat(vform); dst.ClearForWrite(vform); for (int i = 0; i < lane_count; i++) { dst.SetInt(vform, i, src.Int(vform_half, lane_count + i)); } return dst; } LogicVRegister Simulator::shrn(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src, int shift) { SimVRegister temp; VectorFormat vform_src = VectorFormatDoubleWidth(vform); VectorFormat vform_dst = vform; LogicVRegister shifted_src = ushr(vform_src, temp, src, shift); return extractnarrow(vform_dst, dst, false, shifted_src, false); } LogicVRegister Simulator::shrn2(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src, int shift) { SimVRegister temp; VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)); VectorFormat vformdst = vform; LogicVRegister shifted_src = ushr(vformsrc, temp, src, shift); return extractnarrow(vformdst, dst, false, shifted_src, false); } LogicVRegister Simulator::rshrn(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src, int shift) { SimVRegister temp; VectorFormat vformsrc = VectorFormatDoubleWidth(vform); VectorFormat vformdst = vform; LogicVRegister shifted_src = ushr(vformsrc, temp, src, shift).Round(vformsrc); return extractnarrow(vformdst, dst, false, shifted_src, false); } LogicVRegister Simulator::rshrn2(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src, int shift) { SimVRegister temp; VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)); VectorFormat vformdst = vform; LogicVRegister shifted_src = ushr(vformsrc, temp, src, shift).Round(vformsrc); return extractnarrow(vformdst, dst, false, shifted_src, false); } LogicVRegister Simulator::Table(VectorFormat vform, LogicVRegister dst, const LogicVRegister& ind, bool zero_out_of_bounds, const LogicVRegister* tab1, const LogicVRegister* tab2, const LogicVRegister* tab3, const LogicVRegister* tab4) { VIXL_ASSERT(tab1 != NULL); const LogicVRegister* tab[4] = {tab1, tab2, tab3, tab4}; uint64_t result[kMaxLanesPerVector]; for (int i = 0; i < LaneCountFromFormat(vform); i++) { result[i] = zero_out_of_bounds ? 0 : dst.Uint(kFormat16B, i); } for (int i = 0; i < LaneCountFromFormat(vform); i++) { uint64_t j = ind.Uint(vform, i); int tab_idx = static_cast(j >> 4); int j_idx = static_cast(j & 15); if ((tab_idx < 4) && (tab[tab_idx] != NULL)) { result[i] = tab[tab_idx]->Uint(kFormat16B, j_idx); } } dst.SetUintArray(vform, result); return dst; } LogicVRegister Simulator::tbl(VectorFormat vform, LogicVRegister dst, const LogicVRegister& tab, const LogicVRegister& ind) { return Table(vform, dst, ind, true, &tab); } LogicVRegister Simulator::tbl(VectorFormat vform, LogicVRegister dst, const LogicVRegister& tab, const LogicVRegister& tab2, const LogicVRegister& ind) { return Table(vform, dst, ind, true, &tab, &tab2); } LogicVRegister Simulator::tbl(VectorFormat vform, LogicVRegister dst, const LogicVRegister& tab, const LogicVRegister& tab2, const LogicVRegister& tab3, const LogicVRegister& ind) { return Table(vform, dst, ind, true, &tab, &tab2, &tab3); } LogicVRegister Simulator::tbl(VectorFormat vform, LogicVRegister dst, const LogicVRegister& tab, const LogicVRegister& tab2, const LogicVRegister& tab3, const LogicVRegister& tab4, const LogicVRegister& ind) { return Table(vform, dst, ind, true, &tab, &tab2, &tab3, &tab4); } LogicVRegister Simulator::tbx(VectorFormat vform, LogicVRegister dst, const LogicVRegister& tab, const LogicVRegister& ind) { return Table(vform, dst, ind, false, &tab); } LogicVRegister Simulator::tbx(VectorFormat vform, LogicVRegister dst, const LogicVRegister& tab, const LogicVRegister& tab2, const LogicVRegister& ind) { return Table(vform, dst, ind, false, &tab, &tab2); } LogicVRegister Simulator::tbx(VectorFormat vform, LogicVRegister dst, const LogicVRegister& tab, const LogicVRegister& tab2, const LogicVRegister& tab3, const LogicVRegister& ind) { return Table(vform, dst, ind, false, &tab, &tab2, &tab3); } LogicVRegister Simulator::tbx(VectorFormat vform, LogicVRegister dst, const LogicVRegister& tab, const LogicVRegister& tab2, const LogicVRegister& tab3, const LogicVRegister& tab4, const LogicVRegister& ind) { return Table(vform, dst, ind, false, &tab, &tab2, &tab3, &tab4); } LogicVRegister Simulator::uqshrn(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src, int shift) { return shrn(vform, dst, src, shift).UnsignedSaturate(vform); } LogicVRegister Simulator::uqshrn2(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src, int shift) { return shrn2(vform, dst, src, shift).UnsignedSaturate(vform); } LogicVRegister Simulator::uqrshrn(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src, int shift) { return rshrn(vform, dst, src, shift).UnsignedSaturate(vform); } LogicVRegister Simulator::uqrshrn2(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src, int shift) { return rshrn2(vform, dst, src, shift).UnsignedSaturate(vform); } LogicVRegister Simulator::sqshrn(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src, int shift) { SimVRegister temp; VectorFormat vformsrc = VectorFormatDoubleWidth(vform); VectorFormat vformdst = vform; LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift); return sqxtn(vformdst, dst, shifted_src); } LogicVRegister Simulator::sqshrn2(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src, int shift) { SimVRegister temp; VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)); VectorFormat vformdst = vform; LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift); return sqxtn(vformdst, dst, shifted_src); } LogicVRegister Simulator::sqrshrn(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src, int shift) { SimVRegister temp; VectorFormat vformsrc = VectorFormatDoubleWidth(vform); VectorFormat vformdst = vform; LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift).Round(vformsrc); return sqxtn(vformdst, dst, shifted_src); } LogicVRegister Simulator::sqrshrn2(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src, int shift) { SimVRegister temp; VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)); VectorFormat vformdst = vform; LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift).Round(vformsrc); return sqxtn(vformdst, dst, shifted_src); } LogicVRegister Simulator::sqshrun(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src, int shift) { SimVRegister temp; VectorFormat vformsrc = VectorFormatDoubleWidth(vform); VectorFormat vformdst = vform; LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift); return sqxtun(vformdst, dst, shifted_src); } LogicVRegister Simulator::sqshrun2(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src, int shift) { SimVRegister temp; VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)); VectorFormat vformdst = vform; LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift); return sqxtun(vformdst, dst, shifted_src); } LogicVRegister Simulator::sqrshrun(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src, int shift) { SimVRegister temp; VectorFormat vformsrc = VectorFormatDoubleWidth(vform); VectorFormat vformdst = vform; LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift).Round(vformsrc); return sqxtun(vformdst, dst, shifted_src); } LogicVRegister Simulator::sqrshrun2(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src, int shift) { SimVRegister temp; VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)); VectorFormat vformdst = vform; LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift).Round(vformsrc); return sqxtun(vformdst, dst, shifted_src); } LogicVRegister Simulator::uaddl(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src1, const LogicVRegister& src2) { SimVRegister temp1, temp2; uxtl(vform, temp1, src1); uxtl(vform, temp2, src2); add(vform, dst, temp1, temp2); return dst; } LogicVRegister Simulator::uaddl2(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src1, const LogicVRegister& src2) { SimVRegister temp1, temp2; uxtl2(vform, temp1, src1); uxtl2(vform, temp2, src2); add(vform, dst, temp1, temp2); return dst; } LogicVRegister Simulator::uaddw(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src1, const LogicVRegister& src2) { SimVRegister temp; uxtl(vform, temp, src2); add(vform, dst, src1, temp); return dst; } LogicVRegister Simulator::uaddw2(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src1, const LogicVRegister& src2) { SimVRegister temp; uxtl2(vform, temp, src2); add(vform, dst, src1, temp); return dst; } LogicVRegister Simulator::saddl(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src1, const LogicVRegister& src2) { SimVRegister temp1, temp2; sxtl(vform, temp1, src1); sxtl(vform, temp2, src2); add(vform, dst, temp1, temp2); return dst; } LogicVRegister Simulator::saddl2(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src1, const LogicVRegister& src2) { SimVRegister temp1, temp2; sxtl2(vform, temp1, src1); sxtl2(vform, temp2, src2); add(vform, dst, temp1, temp2); return dst; } LogicVRegister Simulator::saddw(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src1, const LogicVRegister& src2) { SimVRegister temp; sxtl(vform, temp, src2); add(vform, dst, src1, temp); return dst; } LogicVRegister Simulator::saddw2(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src1, const LogicVRegister& src2) { SimVRegister temp; sxtl2(vform, temp, src2); add(vform, dst, src1, temp); return dst; } LogicVRegister Simulator::usubl(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src1, const LogicVRegister& src2) { SimVRegister temp1, temp2; uxtl(vform, temp1, src1); uxtl(vform, temp2, src2); sub(vform, dst, temp1, temp2); return dst; } LogicVRegister Simulator::usubl2(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src1, const LogicVRegister& src2) { SimVRegister temp1, temp2; uxtl2(vform, temp1, src1); uxtl2(vform, temp2, src2); sub(vform, dst, temp1, temp2); return dst; } LogicVRegister Simulator::usubw(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src1, const LogicVRegister& src2) { SimVRegister temp; uxtl(vform, temp, src2); sub(vform, dst, src1, temp); return dst; } LogicVRegister Simulator::usubw2(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src1, const LogicVRegister& src2) { SimVRegister temp; uxtl2(vform, temp, src2); sub(vform, dst, src1, temp); return dst; } LogicVRegister Simulator::ssubl(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src1, const LogicVRegister& src2) { SimVRegister temp1, temp2; sxtl(vform, temp1, src1); sxtl(vform, temp2, src2); sub(vform, dst, temp1, temp2); return dst; } LogicVRegister Simulator::ssubl2(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src1, const LogicVRegister& src2) { SimVRegister temp1, temp2; sxtl2(vform, temp1, src1); sxtl2(vform, temp2, src2); sub(vform, dst, temp1, temp2); return dst; } LogicVRegister Simulator::ssubw(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src1, const LogicVRegister& src2) { SimVRegister temp; sxtl(vform, temp, src2); sub(vform, dst, src1, temp); return dst; } LogicVRegister Simulator::ssubw2(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src1, const LogicVRegister& src2) { SimVRegister temp; sxtl2(vform, temp, src2); sub(vform, dst, src1, temp); return dst; } LogicVRegister Simulator::uabal(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src1, const LogicVRegister& src2) { SimVRegister temp1, temp2; uxtl(vform, temp1, src1); uxtl(vform, temp2, src2); uaba(vform, dst, temp1, temp2); return dst; } LogicVRegister Simulator::uabal2(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src1, const LogicVRegister& src2) { SimVRegister temp1, temp2; uxtl2(vform, temp1, src1); uxtl2(vform, temp2, src2); uaba(vform, dst, temp1, temp2); return dst; } LogicVRegister Simulator::sabal(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src1, const LogicVRegister& src2) { SimVRegister temp1, temp2; sxtl(vform, temp1, src1); sxtl(vform, temp2, src2); saba(vform, dst, temp1, temp2); return dst; } LogicVRegister Simulator::sabal2(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src1, const LogicVRegister& src2) { SimVRegister temp1, temp2; sxtl2(vform, temp1, src1); sxtl2(vform, temp2, src2); saba(vform, dst, temp1, temp2); return dst; } LogicVRegister Simulator::uabdl(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src1, const LogicVRegister& src2) { SimVRegister temp1, temp2; uxtl(vform, temp1, src1); uxtl(vform, temp2, src2); absdiff(vform, dst, temp1, temp2, false); return dst; } LogicVRegister Simulator::uabdl2(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src1, const LogicVRegister& src2) { SimVRegister temp1, temp2; uxtl2(vform, temp1, src1); uxtl2(vform, temp2, src2); absdiff(vform, dst, temp1, temp2, false); return dst; } LogicVRegister Simulator::sabdl(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src1, const LogicVRegister& src2) { SimVRegister temp1, temp2; sxtl(vform, temp1, src1); sxtl(vform, temp2, src2); absdiff(vform, dst, temp1, temp2, true); return dst; } LogicVRegister Simulator::sabdl2(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src1, const LogicVRegister& src2) { SimVRegister temp1, temp2; sxtl2(vform, temp1, src1); sxtl2(vform, temp2, src2); absdiff(vform, dst, temp1, temp2, true); return dst; } LogicVRegister Simulator::umull(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src1, const LogicVRegister& src2) { SimVRegister temp1, temp2; uxtl(vform, temp1, src1); uxtl(vform, temp2, src2); mul(vform, dst, temp1, temp2); return dst; } LogicVRegister Simulator::umull2(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src1, const LogicVRegister& src2) { SimVRegister temp1, temp2; uxtl2(vform, temp1, src1); uxtl2(vform, temp2, src2); mul(vform, dst, temp1, temp2); return dst; } LogicVRegister Simulator::smull(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src1, const LogicVRegister& src2) { SimVRegister temp1, temp2; sxtl(vform, temp1, src1); sxtl(vform, temp2, src2); mul(vform, dst, temp1, temp2); return dst; } LogicVRegister Simulator::smull2(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src1, const LogicVRegister& src2) { SimVRegister temp1, temp2; sxtl2(vform, temp1, src1); sxtl2(vform, temp2, src2); mul(vform, dst, temp1, temp2); return dst; } LogicVRegister Simulator::umlsl(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src1, const LogicVRegister& src2) { SimVRegister temp1, temp2; uxtl(vform, temp1, src1); uxtl(vform, temp2, src2); mls(vform, dst, temp1, temp2); return dst; } LogicVRegister Simulator::umlsl2(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src1, const LogicVRegister& src2) { SimVRegister temp1, temp2; uxtl2(vform, temp1, src1); uxtl2(vform, temp2, src2); mls(vform, dst, temp1, temp2); return dst; } LogicVRegister Simulator::smlsl(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src1, const LogicVRegister& src2) { SimVRegister temp1, temp2; sxtl(vform, temp1, src1); sxtl(vform, temp2, src2); mls(vform, dst, temp1, temp2); return dst; } LogicVRegister Simulator::smlsl2(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src1, const LogicVRegister& src2) { SimVRegister temp1, temp2; sxtl2(vform, temp1, src1); sxtl2(vform, temp2, src2); mls(vform, dst, temp1, temp2); return dst; } LogicVRegister Simulator::umlal(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src1, const LogicVRegister& src2) { SimVRegister temp1, temp2; uxtl(vform, temp1, src1); uxtl(vform, temp2, src2); mla(vform, dst, temp1, temp2); return dst; } LogicVRegister Simulator::umlal2(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src1, const LogicVRegister& src2) { SimVRegister temp1, temp2; uxtl2(vform, temp1, src1); uxtl2(vform, temp2, src2); mla(vform, dst, temp1, temp2); return dst; } LogicVRegister Simulator::smlal(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src1, const LogicVRegister& src2) { SimVRegister temp1, temp2; sxtl(vform, temp1, src1); sxtl(vform, temp2, src2); mla(vform, dst, temp1, temp2); return dst; } LogicVRegister Simulator::smlal2(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src1, const LogicVRegister& src2) { SimVRegister temp1, temp2; sxtl2(vform, temp1, src1); sxtl2(vform, temp2, src2); mla(vform, dst, temp1, temp2); return dst; } LogicVRegister Simulator::sqdmlal(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src1, const LogicVRegister& src2) { SimVRegister temp; LogicVRegister product = sqdmull(vform, temp, src1, src2); return add(vform, dst, dst, product).SignedSaturate(vform); } LogicVRegister Simulator::sqdmlal2(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src1, const LogicVRegister& src2) { SimVRegister temp; LogicVRegister product = sqdmull2(vform, temp, src1, src2); return add(vform, dst, dst, product).SignedSaturate(vform); } LogicVRegister Simulator::sqdmlsl(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src1, const LogicVRegister& src2) { SimVRegister temp; LogicVRegister product = sqdmull(vform, temp, src1, src2); return sub(vform, dst, dst, product).SignedSaturate(vform); } LogicVRegister Simulator::sqdmlsl2(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src1, const LogicVRegister& src2) { SimVRegister temp; LogicVRegister product = sqdmull2(vform, temp, src1, src2); return sub(vform, dst, dst, product).SignedSaturate(vform); } LogicVRegister Simulator::sqdmull(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src1, const LogicVRegister& src2) { SimVRegister temp; LogicVRegister product = smull(vform, temp, src1, src2); return add(vform, dst, product, product).SignedSaturate(vform); } LogicVRegister Simulator::sqdmull2(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src1, const LogicVRegister& src2) { SimVRegister temp; LogicVRegister product = smull2(vform, temp, src1, src2); return add(vform, dst, product, product).SignedSaturate(vform); } LogicVRegister Simulator::sqrdmulh(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src1, const LogicVRegister& src2, bool round) { // 2 * INT_32_MIN * INT_32_MIN causes int64_t to overflow. // To avoid this, we use (src1 * src2 + 1 << (esize - 2)) >> (esize - 1) // which is same as (2 * src1 * src2 + 1 << (esize - 1)) >> esize. int esize = LaneSizeInBitsFromFormat(vform); int round_const = round ? (1 << (esize - 2)) : 0; int64_t product; dst.ClearForWrite(vform); for (int i = 0; i < LaneCountFromFormat(vform); i++) { product = src1.Int(vform, i) * src2.Int(vform, i); product += round_const; product = product >> (esize - 1); if (product > MaxIntFromFormat(vform)) { product = MaxIntFromFormat(vform); } else if (product < MinIntFromFormat(vform)) { product = MinIntFromFormat(vform); } dst.SetInt(vform, i, product); } return dst; } LogicVRegister Simulator::sqdmulh(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src1, const LogicVRegister& src2) { return sqrdmulh(vform, dst, src1, src2, false); } LogicVRegister Simulator::addhn(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src1, const LogicVRegister& src2) { SimVRegister temp; add(VectorFormatDoubleWidth(vform), temp, src1, src2); shrn(vform, dst, temp, LaneSizeInBitsFromFormat(vform)); return dst; } LogicVRegister Simulator::addhn2(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src1, const LogicVRegister& src2) { SimVRegister temp; add(VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)), temp, src1, src2); shrn2(vform, dst, temp, LaneSizeInBitsFromFormat(vform)); return dst; } LogicVRegister Simulator::raddhn(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src1, const LogicVRegister& src2) { SimVRegister temp; add(VectorFormatDoubleWidth(vform), temp, src1, src2); rshrn(vform, dst, temp, LaneSizeInBitsFromFormat(vform)); return dst; } LogicVRegister Simulator::raddhn2(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src1, const LogicVRegister& src2) { SimVRegister temp; add(VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)), temp, src1, src2); rshrn2(vform, dst, temp, LaneSizeInBitsFromFormat(vform)); return dst; } LogicVRegister Simulator::subhn(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src1, const LogicVRegister& src2) { SimVRegister temp; sub(VectorFormatDoubleWidth(vform), temp, src1, src2); shrn(vform, dst, temp, LaneSizeInBitsFromFormat(vform)); return dst; } LogicVRegister Simulator::subhn2(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src1, const LogicVRegister& src2) { SimVRegister temp; sub(VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)), temp, src1, src2); shrn2(vform, dst, temp, LaneSizeInBitsFromFormat(vform)); return dst; } LogicVRegister Simulator::rsubhn(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src1, const LogicVRegister& src2) { SimVRegister temp; sub(VectorFormatDoubleWidth(vform), temp, src1, src2); rshrn(vform, dst, temp, LaneSizeInBitsFromFormat(vform)); return dst; } LogicVRegister Simulator::rsubhn2(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src1, const LogicVRegister& src2) { SimVRegister temp; sub(VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)), temp, src1, src2); rshrn2(vform, dst, temp, LaneSizeInBitsFromFormat(vform)); return dst; } LogicVRegister Simulator::trn1(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src1, const LogicVRegister& src2) { uint64_t result[16]; int laneCount = LaneCountFromFormat(vform); int pairs = laneCount / 2; for (int i = 0; i < pairs; ++i) { result[2 * i] = src1.Uint(vform, 2 * i); result[(2 * i) + 1] = src2.Uint(vform, 2 * i); } dst.ClearForWrite(vform); for (int i = 0; i < laneCount; ++i) { dst.SetUint(vform, i, result[i]); } return dst; } LogicVRegister Simulator::trn2(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src1, const LogicVRegister& src2) { uint64_t result[16]; int laneCount = LaneCountFromFormat(vform); int pairs = laneCount / 2; for (int i = 0; i < pairs; ++i) { result[2 * i] = src1.Uint(vform, (2 * i) + 1); result[(2 * i) + 1] = src2.Uint(vform, (2 * i) + 1); } dst.ClearForWrite(vform); for (int i = 0; i < laneCount; ++i) { dst.SetUint(vform, i, result[i]); } return dst; } LogicVRegister Simulator::zip1(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src1, const LogicVRegister& src2) { uint64_t result[16]; int laneCount = LaneCountFromFormat(vform); int pairs = laneCount / 2; for (int i = 0; i < pairs; ++i) { result[2 * i] = src1.Uint(vform, i); result[(2 * i) + 1] = src2.Uint(vform, i); } dst.ClearForWrite(vform); for (int i = 0; i < laneCount; ++i) { dst.SetUint(vform, i, result[i]); } return dst; } LogicVRegister Simulator::zip2(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src1, const LogicVRegister& src2) { uint64_t result[16]; int laneCount = LaneCountFromFormat(vform); int pairs = laneCount / 2; for (int i = 0; i < pairs; ++i) { result[2 * i] = src1.Uint(vform, pairs + i); result[(2 * i) + 1] = src2.Uint(vform, pairs + i); } dst.ClearForWrite(vform); for (int i = 0; i < laneCount; ++i) { dst.SetUint(vform, i, result[i]); } return dst; } LogicVRegister Simulator::uzp1(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src1, const LogicVRegister& src2) { uint64_t result[32]; int laneCount = LaneCountFromFormat(vform); for (int i = 0; i < laneCount; ++i) { result[i] = src1.Uint(vform, i); result[laneCount + i] = src2.Uint(vform, i); } dst.ClearForWrite(vform); for (int i = 0; i < laneCount; ++i) { dst.SetUint(vform, i, result[2 * i]); } return dst; } LogicVRegister Simulator::uzp2(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src1, const LogicVRegister& src2) { uint64_t result[32]; int laneCount = LaneCountFromFormat(vform); for (int i = 0; i < laneCount; ++i) { result[i] = src1.Uint(vform, i); result[laneCount + i] = src2.Uint(vform, i); } dst.ClearForWrite(vform); for (int i = 0; i < laneCount; ++i) { dst.SetUint(vform, i, result[(2 * i) + 1]); } return dst; } template T Simulator::FPAdd(T op1, T op2) { T result = FPProcessNaNs(op1, op2); if (std::isnan(result)) return result; if (std::isinf(op1) && std::isinf(op2) && (op1 != op2)) { // inf + -inf returns the default NaN. FPProcessException(); return FPDefaultNaN(); } else { // Other cases should be handled by standard arithmetic. return op1 + op2; } } template T Simulator::FPSub(T op1, T op2) { // NaNs should be handled elsewhere. VIXL_ASSERT(!std::isnan(op1) && !std::isnan(op2)); if (std::isinf(op1) && std::isinf(op2) && (op1 == op2)) { // inf - inf returns the default NaN. FPProcessException(); return FPDefaultNaN(); } else { // Other cases should be handled by standard arithmetic. return op1 - op2; } } template T Simulator::FPMul(T op1, T op2) { // NaNs should be handled elsewhere. VIXL_ASSERT(!std::isnan(op1) && !std::isnan(op2)); if ((std::isinf(op1) && (op2 == 0.0)) || (std::isinf(op2) && (op1 == 0.0))) { // inf * 0.0 returns the default NaN. FPProcessException(); return FPDefaultNaN(); } else { // Other cases should be handled by standard arithmetic. return op1 * op2; } } template T Simulator::FPMulx(T op1, T op2) { if ((std::isinf(op1) && (op2 == 0.0)) || (std::isinf(op2) && (op1 == 0.0))) { // inf * 0.0 returns +/-2.0. T two = 2.0; return copysign(1.0, op1) * copysign(1.0, op2) * two; } return FPMul(op1, op2); } template T Simulator::FPMulAdd(T a, T op1, T op2) { T result = FPProcessNaNs3(a, op1, op2); T sign_a = copysign(1.0, a); T sign_prod = copysign(1.0, op1) * copysign(1.0, op2); bool isinf_prod = std::isinf(op1) || std::isinf(op2); bool operation_generates_nan = (std::isinf(op1) && (op2 == 0.0)) || // inf * 0.0 (std::isinf(op2) && (op1 == 0.0)) || // 0.0 * inf (std::isinf(a) && isinf_prod && (sign_a != sign_prod)); // inf - inf if (std::isnan(result)) { // Generated NaNs override quiet NaNs propagated from a. if (operation_generates_nan && IsQuietNaN(a)) { FPProcessException(); return FPDefaultNaN(); } else { return result; } } // If the operation would produce a NaN, return the default NaN. if (operation_generates_nan) { FPProcessException(); return FPDefaultNaN(); } // Work around broken fma implementations for exact zero results: The sign of // exact 0.0 results is positive unless both a and op1 * op2 are negative. if (((op1 == 0.0) || (op2 == 0.0)) && (a == 0.0)) { return ((sign_a < 0) && (sign_prod < 0)) ? -0.0 : 0.0; } result = FusedMultiplyAdd(op1, op2, a); VIXL_ASSERT(!std::isnan(result)); // Work around broken fma implementations for rounded zero results: If a is // 0.0, the sign of the result is the sign of op1 * op2 before rounding. if ((a == 0.0) && (result == 0.0)) { return copysign(0.0, sign_prod); } return result; } template T Simulator::FPDiv(T op1, T op2) { // NaNs should be handled elsewhere. VIXL_ASSERT(!std::isnan(op1) && !std::isnan(op2)); if ((std::isinf(op1) && std::isinf(op2)) || ((op1 == 0.0) && (op2 == 0.0))) { // inf / inf and 0.0 / 0.0 return the default NaN. FPProcessException(); return FPDefaultNaN(); } else { if (op2 == 0.0) { FPProcessException(); if (!std::isnan(op1)) { double op1_sign = copysign(1.0, op1); double op2_sign = copysign(1.0, op2); return static_cast(op1_sign * op2_sign * kFP64PositiveInfinity); } } // Other cases should be handled by standard arithmetic. return op1 / op2; } } template T Simulator::FPSqrt(T op) { if (std::isnan(op)) { return FPProcessNaN(op); } else if (op < 0.0) { FPProcessException(); return FPDefaultNaN(); } else { return sqrt(op); } } template T Simulator::FPMax(T a, T b) { T result = FPProcessNaNs(a, b); if (std::isnan(result)) return result; if ((a == 0.0) && (b == 0.0) && (copysign(1.0, a) != copysign(1.0, b))) { // a and b are zero, and the sign differs: return +0.0. return 0.0; } else { return (a > b) ? a : b; } } template T Simulator::FPMaxNM(T a, T b) { if (IsQuietNaN(a) && !IsQuietNaN(b)) { a = kFP64NegativeInfinity; } else if (!IsQuietNaN(a) && IsQuietNaN(b)) { b = kFP64NegativeInfinity; } T result = FPProcessNaNs(a, b); return std::isnan(result) ? result : FPMax(a, b); } template T Simulator::FPMin(T a, T b) { T result = FPProcessNaNs(a, b); if (std::isnan(result)) return result; if ((a == 0.0) && (b == 0.0) && (copysign(1.0, a) != copysign(1.0, b))) { // a and b are zero, and the sign differs: return -0.0. return -0.0; } else { return (a < b) ? a : b; } } template T Simulator::FPMinNM(T a, T b) { if (IsQuietNaN(a) && !IsQuietNaN(b)) { a = kFP64PositiveInfinity; } else if (!IsQuietNaN(a) && IsQuietNaN(b)) { b = kFP64PositiveInfinity; } T result = FPProcessNaNs(a, b); return std::isnan(result) ? result : FPMin(a, b); } template T Simulator::FPRecipStepFused(T op1, T op2) { const T two = 2.0; if ((std::isinf(op1) && (op2 == 0.0)) || ((op1 == 0.0) && (std::isinf(op2)))) { return two; } else if (std::isinf(op1) || std::isinf(op2)) { // Return +inf if signs match, otherwise -inf. return ((op1 >= 0.0) == (op2 >= 0.0)) ? kFP64PositiveInfinity : kFP64NegativeInfinity; } else { return FusedMultiplyAdd(op1, op2, two); } } template T Simulator::FPRSqrtStepFused(T op1, T op2) { const T one_point_five = 1.5; const T two = 2.0; if ((std::isinf(op1) && (op2 == 0.0)) || ((op1 == 0.0) && (std::isinf(op2)))) { return one_point_five; } else if (std::isinf(op1) || std::isinf(op2)) { // Return +inf if signs match, otherwise -inf. return ((op1 >= 0.0) == (op2 >= 0.0)) ? kFP64PositiveInfinity : kFP64NegativeInfinity; } else { // The multiply-add-halve operation must be fully fused, so avoid interim // rounding by checking which operand can be losslessly divided by two // before doing the multiply-add. if (std::isnormal(op1 / two)) { return FusedMultiplyAdd(op1 / two, op2, one_point_five); } else if (std::isnormal(op2 / two)) { return FusedMultiplyAdd(op1, op2 / two, one_point_five); } else { // Neither operand is normal after halving: the result is dominated by // the addition term, so just return that. return one_point_five; } } } double Simulator::FPRoundInt(double value, FPRounding round_mode) { if ((value == 0.0) || (value == kFP64PositiveInfinity) || (value == kFP64NegativeInfinity)) { return value; } else if (std::isnan(value)) { return FPProcessNaN(value); } double int_result = std::floor(value); double error = value - int_result; switch (round_mode) { case FPTieAway: { // Take care of correctly handling the range ]-0.5, -0.0], which must // yield -0.0. if ((-0.5 < value) && (value < 0.0)) { int_result = -0.0; } else if ((error > 0.5) || ((error == 0.5) && (int_result >= 0.0))) { // If the error is greater than 0.5, or is equal to 0.5 and the integer // result is positive, round up. int_result++; } break; } case FPTieEven: { // Take care of correctly handling the range [-0.5, -0.0], which must // yield -0.0. if ((-0.5 <= value) && (value < 0.0)) { int_result = -0.0; // If the error is greater than 0.5, or is equal to 0.5 and the integer // result is odd, round up. } else if ((error > 0.5) || ((error == 0.5) && (std::fmod(int_result, 2) != 0))) { int_result++; } break; } case FPZero: { // If value>0 then we take floor(value) // otherwise, ceil(value). if (value < 0) { int_result = ceil(value); } break; } case FPNegativeInfinity: { // We always use floor(value). break; } case FPPositiveInfinity: { // Take care of correctly handling the range ]-1.0, -0.0], which must // yield -0.0. if ((-1.0 < value) && (value < 0.0)) { int_result = -0.0; // If the error is non-zero, round up. } else if (error > 0.0) { int_result++; } break; } default: VIXL_UNIMPLEMENTED(); } return int_result; } int32_t Simulator::FPToInt32(double value, FPRounding rmode) { value = FPRoundInt(value, rmode); if (value >= kWMaxInt) { return kWMaxInt; } else if (value < kWMinInt) { return kWMinInt; } return std::isnan(value) ? 0 : static_cast(value); } int64_t Simulator::FPToInt64(double value, FPRounding rmode) { value = FPRoundInt(value, rmode); if (value >= kXMaxInt) { return kXMaxInt; } else if (value < kXMinInt) { return kXMinInt; } return std::isnan(value) ? 0 : static_cast(value); } uint32_t Simulator::FPToUInt32(double value, FPRounding rmode) { value = FPRoundInt(value, rmode); if (value >= kWMaxUInt) { return kWMaxUInt; } else if (value < 0.0) { return 0; } return std::isnan(value) ? 0 : static_cast(value); } uint64_t Simulator::FPToUInt64(double value, FPRounding rmode) { value = FPRoundInt(value, rmode); if (value >= kXMaxUInt) { return kXMaxUInt; } else if (value < 0.0) { return 0; } return std::isnan(value) ? 0 : static_cast(value); } #define DEFINE_NEON_FP_VECTOR_OP(FN, OP, PROCNAN) \ template \ LogicVRegister Simulator::FN(VectorFormat vform, \ LogicVRegister dst, \ const LogicVRegister& src1, \ const LogicVRegister& src2) { \ dst.ClearForWrite(vform); \ for (int i = 0; i < LaneCountFromFormat(vform); i++) { \ T op1 = src1.Float(i); \ T op2 = src2.Float(i); \ T result; \ if (PROCNAN) { \ result = FPProcessNaNs(op1, op2); \ if (!std::isnan(result)) { \ result = OP(op1, op2); \ } \ } else { \ result = OP(op1, op2); \ } \ dst.SetFloat(i, result); \ } \ return dst; \ } \ \ LogicVRegister Simulator::FN(VectorFormat vform, \ LogicVRegister dst, \ const LogicVRegister& src1, \ const LogicVRegister& src2) { \ if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { \ FN(vform, dst, src1, src2); \ } else { \ VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); \ FN(vform, dst, src1, src2); \ } \ return dst; \ } NEON_FP3SAME_LIST(DEFINE_NEON_FP_VECTOR_OP) #undef DEFINE_NEON_FP_VECTOR_OP LogicVRegister Simulator::fnmul(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src1, const LogicVRegister& src2) { SimVRegister temp; LogicVRegister product = fmul(vform, temp, src1, src2); return fneg(vform, dst, product); } template LogicVRegister Simulator::frecps(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src1, const LogicVRegister& src2) { dst.ClearForWrite(vform); for (int i = 0; i < LaneCountFromFormat(vform); i++) { T op1 = -src1.Float(i); T op2 = src2.Float(i); T result = FPProcessNaNs(op1, op2); dst.SetFloat(i, std::isnan(result) ? result : FPRecipStepFused(op1, op2)); } return dst; } LogicVRegister Simulator::frecps(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src1, const LogicVRegister& src2) { if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { frecps(vform, dst, src1, src2); } else { VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); frecps(vform, dst, src1, src2); } return dst; } template LogicVRegister Simulator::frsqrts(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src1, const LogicVRegister& src2) { dst.ClearForWrite(vform); for (int i = 0; i < LaneCountFromFormat(vform); i++) { T op1 = -src1.Float(i); T op2 = src2.Float(i); T result = FPProcessNaNs(op1, op2); dst.SetFloat(i, std::isnan(result) ? result : FPRSqrtStepFused(op1, op2)); } return dst; } LogicVRegister Simulator::frsqrts(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src1, const LogicVRegister& src2) { if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { frsqrts(vform, dst, src1, src2); } else { VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); frsqrts(vform, dst, src1, src2); } return dst; } template LogicVRegister Simulator::fcmp(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src1, const LogicVRegister& src2, Condition cond) { dst.ClearForWrite(vform); for (int i = 0; i < LaneCountFromFormat(vform); i++) { bool result = false; T op1 = src1.Float(i); T op2 = src2.Float(i); T nan_result = FPProcessNaNs(op1, op2); if (!std::isnan(nan_result)) { switch (cond) { case eq: result = (op1 == op2); break; case ge: result = (op1 >= op2); break; case gt: result = (op1 > op2); break; case le: result = (op1 <= op2); break; case lt: result = (op1 < op2); break; default: VIXL_UNREACHABLE(); break; } } dst.SetUint(vform, i, result ? MaxUintFromFormat(vform) : 0); } return dst; } LogicVRegister Simulator::fcmp(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src1, const LogicVRegister& src2, Condition cond) { if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { fcmp(vform, dst, src1, src2, cond); } else { VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); fcmp(vform, dst, src1, src2, cond); } return dst; } LogicVRegister Simulator::fcmp_zero(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src, Condition cond) { SimVRegister temp; if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { LogicVRegister zero_reg = dup_immediate(vform, temp, FloatToRawbits(0.0)); fcmp(vform, dst, src, zero_reg, cond); } else { VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); LogicVRegister zero_reg = dup_immediate(vform, temp, DoubleToRawbits(0.0)); fcmp(vform, dst, src, zero_reg, cond); } return dst; } LogicVRegister Simulator::fabscmp(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src1, const LogicVRegister& src2, Condition cond) { SimVRegister temp1, temp2; if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { LogicVRegister abs_src1 = fabs_(vform, temp1, src1); LogicVRegister abs_src2 = fabs_(vform, temp2, src2); fcmp(vform, dst, abs_src1, abs_src2, cond); } else { VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); LogicVRegister abs_src1 = fabs_(vform, temp1, src1); LogicVRegister abs_src2 = fabs_(vform, temp2, src2); fcmp(vform, dst, abs_src1, abs_src2, cond); } return dst; } template LogicVRegister Simulator::fmla(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src1, const LogicVRegister& src2) { dst.ClearForWrite(vform); for (int i = 0; i < LaneCountFromFormat(vform); i++) { T op1 = src1.Float(i); T op2 = src2.Float(i); T acc = dst.Float(i); T result = FPMulAdd(acc, op1, op2); dst.SetFloat(i, result); } return dst; } LogicVRegister Simulator::fmla(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src1, const LogicVRegister& src2) { if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { fmla(vform, dst, src1, src2); } else { VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); fmla(vform, dst, src1, src2); } return dst; } template LogicVRegister Simulator::fmls(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src1, const LogicVRegister& src2) { dst.ClearForWrite(vform); for (int i = 0; i < LaneCountFromFormat(vform); i++) { T op1 = -src1.Float(i); T op2 = src2.Float(i); T acc = dst.Float(i); T result = FPMulAdd(acc, op1, op2); dst.SetFloat(i, result); } return dst; } LogicVRegister Simulator::fmls(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src1, const LogicVRegister& src2) { if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { fmls(vform, dst, src1, src2); } else { VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); fmls(vform, dst, src1, src2); } return dst; } template LogicVRegister Simulator::fneg(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src) { dst.ClearForWrite(vform); for (int i = 0; i < LaneCountFromFormat(vform); i++) { T op = src.Float(i); op = -op; dst.SetFloat(i, op); } return dst; } LogicVRegister Simulator::fneg(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src) { if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { fneg(vform, dst, src); } else { VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); fneg(vform, dst, src); } return dst; } template LogicVRegister Simulator::fabs_(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src) { dst.ClearForWrite(vform); for (int i = 0; i < LaneCountFromFormat(vform); i++) { T op = src.Float(i); if (copysign(1.0, op) < 0.0) { op = -op; } dst.SetFloat(i, op); } return dst; } LogicVRegister Simulator::fabs_(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src) { if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { fabs_(vform, dst, src); } else { VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); fabs_(vform, dst, src); } return dst; } LogicVRegister Simulator::fabd(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src1, const LogicVRegister& src2) { SimVRegister temp; fsub(vform, temp, src1, src2); fabs_(vform, dst, temp); return dst; } LogicVRegister Simulator::fsqrt(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src) { dst.ClearForWrite(vform); if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { for (int i = 0; i < LaneCountFromFormat(vform); i++) { float result = FPSqrt(src.Float(i)); dst.SetFloat(i, result); } } else { VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); for (int i = 0; i < LaneCountFromFormat(vform); i++) { double result = FPSqrt(src.Float(i)); dst.SetFloat(i, result); } } return dst; } #define DEFINE_NEON_FP_PAIR_OP(FNP, FN, OP) \ LogicVRegister Simulator::FNP(VectorFormat vform, \ LogicVRegister dst, \ const LogicVRegister& src1, \ const LogicVRegister& src2) { \ SimVRegister temp1, temp2; \ uzp1(vform, temp1, src1, src2); \ uzp2(vform, temp2, src1, src2); \ FN(vform, dst, temp1, temp2); \ return dst; \ } \ \ LogicVRegister Simulator::FNP(VectorFormat vform, \ LogicVRegister dst, \ const LogicVRegister& src) { \ if (vform == kFormatS) { \ float result = OP(src.Float(0), src.Float(1)); \ dst.SetFloat(0, result); \ } else { \ VIXL_ASSERT(vform == kFormatD); \ double result = OP(src.Float(0), src.Float(1)); \ dst.SetFloat(0, result); \ } \ dst.ClearForWrite(vform); \ return dst; \ } NEON_FPPAIRWISE_LIST(DEFINE_NEON_FP_PAIR_OP) #undef DEFINE_NEON_FP_PAIR_OP LogicVRegister Simulator::fminmaxv(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src, FPMinMaxOp Op) { VIXL_ASSERT(vform == kFormat4S); USE(vform); float result1 = (this->*Op)(src.Float(0), src.Float(1)); float result2 = (this->*Op)(src.Float(2), src.Float(3)); float result = (this->*Op)(result1, result2); dst.ClearForWrite(kFormatS); dst.SetFloat(0, result); return dst; } LogicVRegister Simulator::fmaxv(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src) { return fminmaxv(vform, dst, src, &Simulator::FPMax); } LogicVRegister Simulator::fminv(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src) { return fminmaxv(vform, dst, src, &Simulator::FPMin); } LogicVRegister Simulator::fmaxnmv(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src) { return fminmaxv(vform, dst, src, &Simulator::FPMaxNM); } LogicVRegister Simulator::fminnmv(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src) { return fminmaxv(vform, dst, src, &Simulator::FPMinNM); } LogicVRegister Simulator::fmul(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src1, const LogicVRegister& src2, int index) { dst.ClearForWrite(vform); SimVRegister temp; if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { LogicVRegister index_reg = dup_element(kFormat4S, temp, src2, index); fmul(vform, dst, src1, index_reg); } else { VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); LogicVRegister index_reg = dup_element(kFormat2D, temp, src2, index); fmul(vform, dst, src1, index_reg); } return dst; } LogicVRegister Simulator::fmla(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src1, const LogicVRegister& src2, int index) { dst.ClearForWrite(vform); SimVRegister temp; if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { LogicVRegister index_reg = dup_element(kFormat4S, temp, src2, index); fmla(vform, dst, src1, index_reg); } else { VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); LogicVRegister index_reg = dup_element(kFormat2D, temp, src2, index); fmla(vform, dst, src1, index_reg); } return dst; } LogicVRegister Simulator::fmls(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src1, const LogicVRegister& src2, int index) { dst.ClearForWrite(vform); SimVRegister temp; if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { LogicVRegister index_reg = dup_element(kFormat4S, temp, src2, index); fmls(vform, dst, src1, index_reg); } else { VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); LogicVRegister index_reg = dup_element(kFormat2D, temp, src2, index); fmls(vform, dst, src1, index_reg); } return dst; } LogicVRegister Simulator::fmulx(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src1, const LogicVRegister& src2, int index) { dst.ClearForWrite(vform); SimVRegister temp; if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { LogicVRegister index_reg = dup_element(kFormat4S, temp, src2, index); fmulx(vform, dst, src1, index_reg); } else { VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); LogicVRegister index_reg = dup_element(kFormat2D, temp, src2, index); fmulx(vform, dst, src1, index_reg); } return dst; } LogicVRegister Simulator::frint(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src, FPRounding rounding_mode, bool inexact_exception) { dst.ClearForWrite(vform); if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { for (int i = 0; i < LaneCountFromFormat(vform); i++) { float input = src.Float(i); float rounded = FPRoundInt(input, rounding_mode); if (inexact_exception && !std::isnan(input) && (input != rounded)) { FPProcessException(); } dst.SetFloat(i, rounded); } } else { VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); for (int i = 0; i < LaneCountFromFormat(vform); i++) { double input = src.Float(i); double rounded = FPRoundInt(input, rounding_mode); if (inexact_exception && !std::isnan(input) && (input != rounded)) { FPProcessException(); } dst.SetFloat(i, rounded); } } return dst; } LogicVRegister Simulator::fcvts(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src, FPRounding rounding_mode, int fbits) { dst.ClearForWrite(vform); if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { for (int i = 0; i < LaneCountFromFormat(vform); i++) { float op = src.Float(i) * std::pow(2.0f, fbits); dst.SetInt(vform, i, FPToInt32(op, rounding_mode)); } } else { VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); for (int i = 0; i < LaneCountFromFormat(vform); i++) { double op = src.Float(i) * std::pow(2.0, fbits); dst.SetInt(vform, i, FPToInt64(op, rounding_mode)); } } return dst; } LogicVRegister Simulator::fcvtu(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src, FPRounding rounding_mode, int fbits) { dst.ClearForWrite(vform); if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { for (int i = 0; i < LaneCountFromFormat(vform); i++) { float op = src.Float(i) * std::pow(2.0f, fbits); dst.SetUint(vform, i, FPToUInt32(op, rounding_mode)); } } else { VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); for (int i = 0; i < LaneCountFromFormat(vform); i++) { double op = src.Float(i) * std::pow(2.0, fbits); dst.SetUint(vform, i, FPToUInt64(op, rounding_mode)); } } return dst; } LogicVRegister Simulator::fcvtl(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src) { if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { for (int i = LaneCountFromFormat(vform) - 1; i >= 0; i--) { dst.SetFloat(i, FPToFloat(src.Float(i))); } } else { VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); for (int i = LaneCountFromFormat(vform) - 1; i >= 0; i--) { dst.SetFloat(i, FPToDouble(src.Float(i))); } } return dst; } LogicVRegister Simulator::fcvtl2(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src) { int lane_count = LaneCountFromFormat(vform); if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { for (int i = 0; i < lane_count; i++) { dst.SetFloat(i, FPToFloat(src.Float(i + lane_count))); } } else { VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); for (int i = 0; i < lane_count; i++) { dst.SetFloat(i, FPToDouble(src.Float(i + lane_count))); } } return dst; } LogicVRegister Simulator::fcvtn(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src) { if (LaneSizeInBitsFromFormat(vform) == kHRegSize) { for (int i = 0; i < LaneCountFromFormat(vform); i++) { dst.SetFloat(i, FPToFloat16(src.Float(i), FPTieEven)); } } else { VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kSRegSize); for (int i = 0; i < LaneCountFromFormat(vform); i++) { dst.SetFloat(i, FPToFloat(src.Float(i), FPTieEven)); } } return dst; } LogicVRegister Simulator::fcvtn2(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src) { int lane_count = LaneCountFromFormat(vform) / 2; if (LaneSizeInBitsFromFormat(vform) == kHRegSize) { for (int i = lane_count - 1; i >= 0; i--) { dst.SetFloat(i + lane_count, FPToFloat16(src.Float(i), FPTieEven)); } } else { VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kSRegSize); for (int i = lane_count - 1; i >= 0; i--) { dst.SetFloat(i + lane_count, FPToFloat(src.Float(i), FPTieEven)); } } return dst; } LogicVRegister Simulator::fcvtxn(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src) { dst.ClearForWrite(vform); VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kSRegSize); for (int i = 0; i < LaneCountFromFormat(vform); i++) { dst.SetFloat(i, FPToFloat(src.Float(i), FPRoundOdd)); } return dst; } LogicVRegister Simulator::fcvtxn2(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src) { VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kSRegSize); int lane_count = LaneCountFromFormat(vform) / 2; for (int i = lane_count - 1; i >= 0; i--) { dst.SetFloat(i + lane_count, FPToFloat(src.Float(i), FPRoundOdd)); } return dst; } // Based on reference C function recip_sqrt_estimate from ARM ARM. double Simulator::recip_sqrt_estimate(double a) { int q0, q1, s; double r; if (a < 0.5) { q0 = static_cast(a * 512.0); r = 1.0 / sqrt((static_cast(q0) + 0.5) / 512.0); } else { q1 = static_cast(a * 256.0); r = 1.0 / sqrt((static_cast(q1) + 0.5) / 256.0); } s = static_cast(256.0 * r + 0.5); return static_cast(s) / 256.0; } static inline uint64_t Bits(uint64_t val, int start_bit, int end_bit) { return ExtractUnsignedBitfield64(start_bit, end_bit, val); } template T Simulator::FPRecipSqrtEstimate(T op) { if (std::isnan(op)) { return FPProcessNaN(op); } else if (op == 0.0) { if (copysign(1.0, op) < 0.0) { return kFP64NegativeInfinity; } else { return kFP64PositiveInfinity; } } else if (copysign(1.0, op) < 0.0) { FPProcessException(); return FPDefaultNaN(); } else if (std::isinf(op)) { return 0.0; } else { uint64_t fraction; int exp, result_exp; if (sizeof(T) == sizeof(float)) { // NOLINT(runtime/sizeof) exp = FloatExp(op); fraction = FloatMantissa(op); fraction <<= 29; } else { exp = DoubleExp(op); fraction = DoubleMantissa(op); } if (exp == 0) { while (Bits(fraction, 51, 51) == 0) { fraction = Bits(fraction, 50, 0) << 1; exp -= 1; } fraction = Bits(fraction, 50, 0) << 1; } double scaled; if (Bits(exp, 0, 0) == 0) { scaled = DoublePack(0, 1022, Bits(fraction, 51, 44) << 44); } else { scaled = DoublePack(0, 1021, Bits(fraction, 51, 44) << 44); } if (sizeof(T) == sizeof(float)) { // NOLINT(runtime/sizeof) result_exp = (380 - exp) / 2; } else { result_exp = (3068 - exp) / 2; } uint64_t estimate = DoubleToRawbits(recip_sqrt_estimate(scaled)); if (sizeof(T) == sizeof(float)) { // NOLINT(runtime/sizeof) uint32_t exp_bits = static_cast(Bits(result_exp, 7, 0)); uint32_t est_bits = static_cast(Bits(estimate, 51, 29)); return FloatPack(0, exp_bits, est_bits); } else { return DoublePack(0, Bits(result_exp, 10, 0), Bits(estimate, 51, 0)); } } } LogicVRegister Simulator::frsqrte(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src) { dst.ClearForWrite(vform); if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { for (int i = 0; i < LaneCountFromFormat(vform); i++) { float input = src.Float(i); dst.SetFloat(i, FPRecipSqrtEstimate(input)); } } else { VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); for (int i = 0; i < LaneCountFromFormat(vform); i++) { double input = src.Float(i); dst.SetFloat(i, FPRecipSqrtEstimate(input)); } } return dst; } template T Simulator::FPRecipEstimate(T op, FPRounding rounding) { uint32_t sign; if (sizeof(T) == sizeof(float)) { // NOLINT(runtime/sizeof) sign = FloatSign(op); } else { sign = DoubleSign(op); } if (std::isnan(op)) { return FPProcessNaN(op); } else if (std::isinf(op)) { return (sign == 1) ? -0.0 : 0.0; } else if (op == 0.0) { FPProcessException(); // FPExc_DivideByZero exception. return (sign == 1) ? kFP64NegativeInfinity : kFP64PositiveInfinity; } else if (((sizeof(T) == sizeof(float)) && // NOLINT(runtime/sizeof) (std::fabs(op) < std::pow(2.0, -128.0))) || ((sizeof(T) == sizeof(double)) && // NOLINT(runtime/sizeof) (std::fabs(op) < std::pow(2.0, -1024.0)))) { bool overflow_to_inf = false; switch (rounding) { case FPTieEven: overflow_to_inf = true; break; case FPPositiveInfinity: overflow_to_inf = (sign == 0); break; case FPNegativeInfinity: overflow_to_inf = (sign == 1); break; case FPZero: overflow_to_inf = false; break; default: break; } FPProcessException(); // FPExc_Overflow and FPExc_Inexact. if (overflow_to_inf) { return (sign == 1) ? kFP64NegativeInfinity : kFP64PositiveInfinity; } else { // Return FPMaxNormal(sign). if (sizeof(T) == sizeof(float)) { // NOLINT(runtime/sizeof) return FloatPack(sign, 0xfe, 0x07fffff); } else { return DoublePack(sign, 0x7fe, 0x0fffffffffffffl); } } } else { uint64_t fraction; int exp, result_exp; uint32_t sign; if (sizeof(T) == sizeof(float)) { // NOLINT(runtime/sizeof) sign = FloatSign(op); exp = FloatExp(op); fraction = FloatMantissa(op); fraction <<= 29; } else { sign = DoubleSign(op); exp = DoubleExp(op); fraction = DoubleMantissa(op); } if (exp == 0) { if (Bits(fraction, 51, 51) == 0) { exp -= 1; fraction = Bits(fraction, 49, 0) << 2; } else { fraction = Bits(fraction, 50, 0) << 1; } } double scaled = DoublePack(0, 1022, Bits(fraction, 51, 44) << 44); if (sizeof(T) == sizeof(float)) { // NOLINT(runtime/sizeof) result_exp = (253 - exp); // In range 253-254 = -1 to 253+1 = 254. } else { result_exp = (2045 - exp); // In range 2045-2046 = -1 to 2045+1 = 2046. } double estimate = recip_estimate(scaled); fraction = DoubleMantissa(estimate); if (result_exp == 0) { fraction = (UINT64_C(1) << 51) | Bits(fraction, 51, 1); } else if (result_exp == -1) { fraction = (UINT64_C(1) << 50) | Bits(fraction, 51, 2); result_exp = 0; } if (sizeof(T) == sizeof(float)) { // NOLINT(runtime/sizeof) uint32_t exp_bits = static_cast(Bits(result_exp, 7, 0)); uint32_t frac_bits = static_cast(Bits(fraction, 51, 29)); return FloatPack(sign, exp_bits, frac_bits); } else { return DoublePack(sign, Bits(result_exp, 10, 0), Bits(fraction, 51, 0)); } } } LogicVRegister Simulator::frecpe(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src, FPRounding round) { dst.ClearForWrite(vform); if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { for (int i = 0; i < LaneCountFromFormat(vform); i++) { float input = src.Float(i); dst.SetFloat(i, FPRecipEstimate(input, round)); } } else { VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); for (int i = 0; i < LaneCountFromFormat(vform); i++) { double input = src.Float(i); dst.SetFloat(i, FPRecipEstimate(input, round)); } } return dst; } LogicVRegister Simulator::ursqrte(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src) { dst.ClearForWrite(vform); uint64_t operand; uint32_t result; double dp_operand, dp_result; for (int i = 0; i < LaneCountFromFormat(vform); i++) { operand = src.Uint(vform, i); if (operand <= 0x3FFFFFFF) { result = 0xFFFFFFFF; } else { dp_operand = operand * std::pow(2.0, -32); dp_result = recip_sqrt_estimate(dp_operand) * std::pow(2.0, 31); result = static_cast(dp_result); } dst.SetUint(vform, i, result); } return dst; } // Based on reference C function recip_estimate from ARM ARM. double Simulator::recip_estimate(double a) { int q, s; double r; q = static_cast(a * 512.0); r = 1.0 / ((static_cast(q) + 0.5) / 512.0); s = static_cast(256.0 * r + 0.5); return static_cast(s) / 256.0; } LogicVRegister Simulator::urecpe(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src) { dst.ClearForWrite(vform); uint64_t operand; uint32_t result; double dp_operand, dp_result; for (int i = 0; i < LaneCountFromFormat(vform); i++) { operand = src.Uint(vform, i); if (operand <= 0x7FFFFFFF) { result = 0xFFFFFFFF; } else { dp_operand = operand * std::pow(2.0, -32); dp_result = recip_estimate(dp_operand) * std::pow(2.0, 31); result = static_cast(dp_result); } dst.SetUint(vform, i, result); } return dst; } template LogicVRegister Simulator::frecpx(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src) { dst.ClearForWrite(vform); for (int i = 0; i < LaneCountFromFormat(vform); i++) { T op = src.Float(i); T result; if (std::isnan(op)) { result = FPProcessNaN(op); } else { int exp; uint32_t sign; if (sizeof(T) == sizeof(float)) { // NOLINT(runtime/sizeof) sign = FloatSign(op); exp = FloatExp(op); exp = (exp == 0) ? (0xFF - 1) : static_cast(Bits(~exp, 7, 0)); result = FloatPack(sign, exp, 0); } else { sign = DoubleSign(op); exp = DoubleExp(op); exp = (exp == 0) ? (0x7FF - 1) : static_cast(Bits(~exp, 10, 0)); result = DoublePack(sign, exp, 0); } } dst.SetFloat(i, result); } return dst; } LogicVRegister Simulator::frecpx(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src) { if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { frecpx(vform, dst, src); } else { VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); frecpx(vform, dst, src); } return dst; } LogicVRegister Simulator::scvtf(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src, int fbits, FPRounding round) { for (int i = 0; i < LaneCountFromFormat(vform); i++) { if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { float result = FixedToFloat(src.Int(kFormatS, i), fbits, round); dst.SetFloat(i, result); } else { VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); double result = FixedToDouble(src.Int(kFormatD, i), fbits, round); dst.SetFloat(i, result); } } return dst; } LogicVRegister Simulator::ucvtf(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src, int fbits, FPRounding round) { for (int i = 0; i < LaneCountFromFormat(vform); i++) { if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { float result = UFixedToFloat(src.Uint(kFormatS, i), fbits, round); dst.SetFloat(i, result); } else { VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); double result = UFixedToDouble(src.Uint(kFormatD, i), fbits, round); dst.SetFloat(i, result); } } return dst; } } // namespace aarch64 } // namespace vixl #endif // VIXL_INCLUDE_SIMULATOR_AARCH64