// Copyright 2019, VIXL authors // All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are met: // // * Redistributions of source code must retain the above copyright notice, // this list of conditions and the following disclaimer. // * Redistributions in binary form must reproduce the above copyright notice, // this list of conditions and the following disclaimer in the documentation // and/or other materials provided with the distribution. // * Neither the name of ARM Limited nor the names of its contributors may be // used to endorse or promote products derived from this software without // specific prior written permission. // // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND // ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED // WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "macro-assembler-aarch64.h" namespace vixl { namespace aarch64 { void MacroAssembler::AddSubHelper(AddSubHelperOption option, const ZRegister& zd, const ZRegister& zn, IntegerOperand imm) { VIXL_ASSERT(imm.FitsInLane(zd)); // Simple, encodable cases. if (TrySingleAddSub(option, zd, zn, imm)) return; VIXL_ASSERT((option == kAddImmediate) || (option == kSubImmediate)); bool add_imm = (option == kAddImmediate); // Try to translate Add(..., -imm) to Sub(..., imm) if we can encode it in one // instruction. Also interpret the immediate as signed, so we can convert // Add(zd.VnH(), zn.VnH(), 0xffff...) to Sub(..., 1), etc. IntegerOperand signed_imm(imm.AsIntN(zd.GetLaneSizeInBits())); if (signed_imm.IsNegative()) { AddSubHelperOption n_option = add_imm ? kSubImmediate : kAddImmediate; IntegerOperand n_imm(signed_imm.GetMagnitude()); // IntegerOperand can represent -INT_MIN, so this is always safe. VIXL_ASSERT(n_imm.IsPositiveOrZero()); if (TrySingleAddSub(n_option, zd, zn, n_imm)) return; } // Otherwise, fall back to dup + ADD_z_z/SUB_z_z. UseScratchRegisterScope temps(this); ZRegister scratch = temps.AcquireZ().WithLaneSize(zn.GetLaneSizeInBits()); Dup(scratch, imm); SingleEmissionCheckScope guard(this); if (add_imm) { add(zd, zn, scratch); } else { sub(zd, zn, scratch); } } bool MacroAssembler::TrySingleAddSub(AddSubHelperOption option, const ZRegister& zd, const ZRegister& zn, IntegerOperand imm) { VIXL_ASSERT(imm.FitsInLane(zd)); int imm8; int shift = -1; if (imm.TryEncodeAsShiftedUintNForLane<8, 0>(zd, &imm8, &shift) || imm.TryEncodeAsShiftedUintNForLane<8, 8>(zd, &imm8, &shift)) { MovprfxHelperScope guard(this, zd, zn); switch (option) { case kAddImmediate: add(zd, zd, imm8, shift); return true; case kSubImmediate: sub(zd, zd, imm8, shift); return true; } } return false; } void MacroAssembler::IntWideImmHelper(IntArithImmFn imm_fn, SVEArithPredicatedFn reg_macro, const ZRegister& zd, const ZRegister& zn, IntegerOperand imm, bool is_signed) { if (is_signed) { // E.g. MUL_z_zi, SMIN_z_zi, SMAX_z_zi if (imm.IsInt8()) { MovprfxHelperScope guard(this, zd, zn); (this->*imm_fn)(zd, zd, imm.AsInt8()); return; } } else { // E.g. UMIN_z_zi, UMAX_z_zi if (imm.IsUint8()) { MovprfxHelperScope guard(this, zd, zn); (this->*imm_fn)(zd, zd, imm.AsUint8()); return; } } UseScratchRegisterScope temps(this); PRegister pg = temps.AcquireGoverningP(); Ptrue(pg.WithSameLaneSizeAs(zd)); // Try to re-use zd if we can, so we can avoid a movprfx. ZRegister scratch = zd.Aliases(zn) ? temps.AcquireZ().WithLaneSize(zn.GetLaneSizeInBits()) : zd; Dup(scratch, imm); // The vector-form macro for commutative operations will swap the arguments to // avoid movprfx, if necessary. (this->*reg_macro)(zd, pg.Merging(), zn, scratch); } void MacroAssembler::Mul(const ZRegister& zd, const ZRegister& zn, IntegerOperand imm) { VIXL_ASSERT(allow_macro_instructions_); IntArithImmFn imm_fn = &Assembler::mul; SVEArithPredicatedFn reg_fn = &MacroAssembler::Mul; IntWideImmHelper(imm_fn, reg_fn, zd, zn, imm, true); } void MacroAssembler::Smin(const ZRegister& zd, const ZRegister& zn, IntegerOperand imm) { VIXL_ASSERT(allow_macro_instructions_); VIXL_ASSERT(imm.FitsInSignedLane(zd)); IntArithImmFn imm_fn = &Assembler::smin; SVEArithPredicatedFn reg_fn = &MacroAssembler::Smin; IntWideImmHelper(imm_fn, reg_fn, zd, zn, imm, true); } void MacroAssembler::Smax(const ZRegister& zd, const ZRegister& zn, IntegerOperand imm) { VIXL_ASSERT(allow_macro_instructions_); VIXL_ASSERT(imm.FitsInSignedLane(zd)); IntArithImmFn imm_fn = &Assembler::smax; SVEArithPredicatedFn reg_fn = &MacroAssembler::Smax; IntWideImmHelper(imm_fn, reg_fn, zd, zn, imm, true); } void MacroAssembler::Umax(const ZRegister& zd, const ZRegister& zn, IntegerOperand imm) { VIXL_ASSERT(allow_macro_instructions_); VIXL_ASSERT(imm.FitsInUnsignedLane(zd)); IntArithImmFn imm_fn = &Assembler::umax; SVEArithPredicatedFn reg_fn = &MacroAssembler::Umax; IntWideImmHelper(imm_fn, reg_fn, zd, zn, imm, false); } void MacroAssembler::Umin(const ZRegister& zd, const ZRegister& zn, IntegerOperand imm) { VIXL_ASSERT(allow_macro_instructions_); VIXL_ASSERT(imm.FitsInUnsignedLane(zd)); IntArithImmFn imm_fn = &Assembler::umin; SVEArithPredicatedFn reg_fn = &MacroAssembler::Umin; IntWideImmHelper(imm_fn, reg_fn, zd, zn, imm, false); } void MacroAssembler::Addpl(const Register& xd, const Register& xn, int64_t multiplier) { VIXL_ASSERT(allow_macro_instructions_); // This macro relies on `Rdvl` to handle some out-of-range cases. Check that // `VL * multiplier` cannot overflow, for any possible value of VL. VIXL_ASSERT(multiplier <= (INT64_MAX / kZRegMaxSizeInBytes)); VIXL_ASSERT(multiplier >= (INT64_MIN / kZRegMaxSizeInBytes)); if (xd.IsZero()) return; if (xn.IsZero() && xd.IsSP()) { // TODO: This operation doesn't make much sense, but we could support it // with a scratch register if necessary. VIXL_UNIMPLEMENTED(); } // Handling xzr requires an extra move, so defer it until later so we can try // to use `rdvl` instead (via `Addvl`). if (IsInt6(multiplier) && !xn.IsZero()) { SingleEmissionCheckScope guard(this); addpl(xd, xn, static_cast(multiplier)); return; } // If `multiplier` is a multiple of 8, we can use `Addvl` instead. if ((multiplier % kZRegBitsPerPRegBit) == 0) { Addvl(xd, xn, multiplier / kZRegBitsPerPRegBit); return; } if (IsInt6(multiplier)) { VIXL_ASSERT(xn.IsZero()); // Other cases were handled with `addpl`. // There is no simple `rdpl` instruction, and `addpl` cannot accept xzr, so // materialise a zero. MacroEmissionCheckScope guard(this); movz(xd, 0); addpl(xd, xd, static_cast(multiplier)); return; } // TODO: Some probable cases result in rather long sequences. For example, // `Addpl(sp, sp, 33)` requires five instructions, even though it's only just // outside the encodable range. We should look for ways to cover such cases // without drastically increasing the complexity of this logic. // For other cases, calculate xn + (PL * multiplier) using discrete // instructions. This requires two scratch registers in the general case, so // try to re-use the destination as a scratch register. UseScratchRegisterScope temps(this); temps.Include(xd); temps.Exclude(xn); Register scratch = temps.AcquireX(); // Because there is no `rdpl`, so we have to calculate PL from VL. We can't // scale the multiplier because (we already know) it isn't a multiple of 8. Rdvl(scratch, multiplier); MacroEmissionCheckScope guard(this); if (xn.IsZero()) { asr(xd, scratch, kZRegBitsPerPRegBitLog2); } else if (xd.IsSP() || xn.IsSP()) { // TODO: MacroAssembler::Add should be able to handle this. asr(scratch, scratch, kZRegBitsPerPRegBitLog2); add(xd, xn, scratch); } else { add(xd, xn, Operand(scratch, ASR, kZRegBitsPerPRegBitLog2)); } } void MacroAssembler::Addvl(const Register& xd, const Register& xn, int64_t multiplier) { VIXL_ASSERT(allow_macro_instructions_); VIXL_ASSERT(xd.IsX()); VIXL_ASSERT(xn.IsX()); // Check that `VL * multiplier` cannot overflow, for any possible value of VL. VIXL_ASSERT(multiplier <= (INT64_MAX / kZRegMaxSizeInBytes)); VIXL_ASSERT(multiplier >= (INT64_MIN / kZRegMaxSizeInBytes)); if (xd.IsZero()) return; if (xn.IsZero() && xd.IsSP()) { // TODO: This operation doesn't make much sense, but we could support it // with a scratch register if necessary. `rdvl` cannot write into `sp`. VIXL_UNIMPLEMENTED(); } if (IsInt6(multiplier)) { SingleEmissionCheckScope guard(this); if (xn.IsZero()) { rdvl(xd, static_cast(multiplier)); } else { addvl(xd, xn, static_cast(multiplier)); } return; } // TODO: Some probable cases result in rather long sequences. For example, // `Addvl(sp, sp, 42)` requires four instructions, even though it's only just // outside the encodable range. We should look for ways to cover such cases // without drastically increasing the complexity of this logic. // For other cases, calculate xn + (VL * multiplier) using discrete // instructions. This requires two scratch registers in the general case, so // we try to re-use the destination as a scratch register. UseScratchRegisterScope temps(this); temps.Include(xd); temps.Exclude(xn); Register a = temps.AcquireX(); Mov(a, multiplier); MacroEmissionCheckScope guard(this); Register b = temps.AcquireX(); rdvl(b, 1); if (xn.IsZero()) { mul(xd, a, b); } else if (xd.IsSP() || xn.IsSP()) { mul(a, a, b); add(xd, xn, a); } else { madd(xd, a, b, xn); } } void MacroAssembler::CalculateSVEAddress(const Register& xd, const SVEMemOperand& addr, int vl_divisor_log2) { VIXL_ASSERT(allow_macro_instructions_); VIXL_ASSERT(!addr.IsScatterGather()); VIXL_ASSERT(xd.IsX()); // The lower bound is where a whole Z register is accessed. VIXL_ASSERT(!addr.IsMulVl() || (vl_divisor_log2 >= 0)); // The upper bound is for P register accesses, and for instructions like // "st1b { z0.d } [...]", where one byte is accessed for every D-sized lane. VIXL_ASSERT(vl_divisor_log2 <= static_cast(kZRegBitsPerPRegBitLog2)); SVEOffsetModifier mod = addr.GetOffsetModifier(); Register base = addr.GetScalarBase(); if (addr.IsEquivalentToScalar()) { // For example: // [x0] // [x0, #0] // [x0, xzr, LSL 2] Mov(xd, base); } else if (addr.IsScalarPlusImmediate()) { // For example: // [x0, #42] // [x0, #42, MUL VL] int64_t offset = addr.GetImmediateOffset(); VIXL_ASSERT(offset != 0); // Handled by IsEquivalentToScalar. if (addr.IsMulVl()) { int vl_divisor = 1 << vl_divisor_log2; // For all possible values of vl_divisor, we can simply use `Addpl`. This // will select `addvl` if necessary. VIXL_ASSERT((kZRegBitsPerPRegBit % vl_divisor) == 0); Addpl(xd, base, offset * (kZRegBitsPerPRegBit / vl_divisor)); } else { // IsScalarPlusImmediate() ensures that no other modifiers can occur. VIXL_ASSERT(mod == NO_SVE_OFFSET_MODIFIER); Add(xd, base, offset); } } else if (addr.IsScalarPlusScalar()) { // For example: // [x0, x1] // [x0, x1, LSL #4] Register offset = addr.GetScalarOffset(); VIXL_ASSERT(!offset.IsZero()); // Handled by IsEquivalentToScalar. if (mod == SVE_LSL) { Add(xd, base, Operand(offset, LSL, addr.GetShiftAmount())); } else { // IsScalarPlusScalar() ensures that no other modifiers can occur. VIXL_ASSERT(mod == NO_SVE_OFFSET_MODIFIER); Add(xd, base, offset); } } else { // All other forms are scatter-gather addresses, which cannot be evaluated // into an X register. VIXL_UNREACHABLE(); } } void MacroAssembler::Cpy(const ZRegister& zd, const PRegister& pg, IntegerOperand imm) { VIXL_ASSERT(allow_macro_instructions_); VIXL_ASSERT(imm.FitsInLane(zd)); int imm8; int shift; if (imm.TryEncodeAsShiftedIntNForLane<8, 0>(zd, &imm8, &shift) || imm.TryEncodeAsShiftedIntNForLane<8, 8>(zd, &imm8, &shift)) { SingleEmissionCheckScope guard(this); cpy(zd, pg, imm8, shift); return; } // The fallbacks rely on `cpy` variants that only support merging predication. // If zeroing predication was requested, zero the destination first. if (pg.IsZeroing()) { SingleEmissionCheckScope guard(this); dup(zd, 0); } PRegisterM pg_m = pg.Merging(); // Try to encode the immediate using fcpy. VIXL_ASSERT(imm.FitsInLane(zd)); if (zd.GetLaneSizeInBits() >= kHRegSize) { double fp_imm = 0.0; switch (zd.GetLaneSizeInBits()) { case kHRegSize: fp_imm = FPToDouble(RawbitsToFloat16(imm.AsUint16()), kIgnoreDefaultNaN); break; case kSRegSize: fp_imm = RawbitsToFloat(imm.AsUint32()); break; case kDRegSize: fp_imm = RawbitsToDouble(imm.AsUint64()); break; default: VIXL_UNREACHABLE(); break; } // IsImmFP64 is equivalent to IsImmFP for the same arithmetic value, so // we can use IsImmFP64 for all lane sizes. if (IsImmFP64(fp_imm)) { SingleEmissionCheckScope guard(this); fcpy(zd, pg_m, fp_imm); return; } } // Fall back to using a scratch register. UseScratchRegisterScope temps(this); Register scratch = temps.AcquireRegisterToHoldLane(zd); Mov(scratch, imm); SingleEmissionCheckScope guard(this); cpy(zd, pg_m, scratch); } // TODO: We implement Fcpy (amongst other things) for all FP types because it // allows us to preserve user-specified NaNs. We should come up with some // FPImmediate type to abstract this, and avoid all the duplication below (and // elsewhere). void MacroAssembler::Fcpy(const ZRegister& zd, const PRegisterM& pg, double imm) { VIXL_ASSERT(allow_macro_instructions_); VIXL_ASSERT(pg.IsMerging()); if (IsImmFP64(imm)) { SingleEmissionCheckScope guard(this); fcpy(zd, pg, imm); return; } // As a fall-back, cast the immediate to the required lane size, and try to // encode the bit pattern using `Cpy`. Cpy(zd, pg, FPToRawbitsWithSize(zd.GetLaneSizeInBits(), imm)); } void MacroAssembler::Fcpy(const ZRegister& zd, const PRegisterM& pg, float imm) { VIXL_ASSERT(allow_macro_instructions_); VIXL_ASSERT(pg.IsMerging()); if (IsImmFP32(imm)) { SingleEmissionCheckScope guard(this); fcpy(zd, pg, imm); return; } // As a fall-back, cast the immediate to the required lane size, and try to // encode the bit pattern using `Cpy`. Cpy(zd, pg, FPToRawbitsWithSize(zd.GetLaneSizeInBits(), imm)); } void MacroAssembler::Fcpy(const ZRegister& zd, const PRegisterM& pg, Float16 imm) { VIXL_ASSERT(allow_macro_instructions_); VIXL_ASSERT(pg.IsMerging()); if (IsImmFP16(imm)) { SingleEmissionCheckScope guard(this); fcpy(zd, pg, imm); return; } // As a fall-back, cast the immediate to the required lane size, and try to // encode the bit pattern using `Cpy`. Cpy(zd, pg, FPToRawbitsWithSize(zd.GetLaneSizeInBits(), imm)); } void MacroAssembler::Dup(const ZRegister& zd, IntegerOperand imm) { VIXL_ASSERT(allow_macro_instructions_); VIXL_ASSERT(imm.FitsInLane(zd)); unsigned lane_size = zd.GetLaneSizeInBits(); int imm8; int shift; if (imm.TryEncodeAsShiftedIntNForLane<8, 0>(zd, &imm8, &shift) || imm.TryEncodeAsShiftedIntNForLane<8, 8>(zd, &imm8, &shift)) { SingleEmissionCheckScope guard(this); dup(zd, imm8, shift); } else if (IsImmLogical(imm.AsUintN(lane_size), lane_size)) { SingleEmissionCheckScope guard(this); dupm(zd, imm.AsUintN(lane_size)); } else { UseScratchRegisterScope temps(this); Register scratch = temps.AcquireRegisterToHoldLane(zd); Mov(scratch, imm); SingleEmissionCheckScope guard(this); dup(zd, scratch); } } void MacroAssembler::NoncommutativeArithmeticHelper( const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn, const ZRegister& zm, SVEArithPredicatedFn fn, SVEArithPredicatedFn rev_fn) { if (zd.Aliases(zn)) { // E.g. zd = zd / zm SingleEmissionCheckScope guard(this); (this->*fn)(zd, pg, zn, zm); } else if (zd.Aliases(zm)) { // E.g. zd = zn / zd SingleEmissionCheckScope guard(this); (this->*rev_fn)(zd, pg, zm, zn); } else { // E.g. zd = zn / zm MovprfxHelperScope guard(this, zd, pg, zn); (this->*fn)(zd, pg, zd, zm); } } void MacroAssembler::FPCommutativeArithmeticHelper( const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn, const ZRegister& zm, SVEArithPredicatedFn fn, FPMacroNaNPropagationOption nan_option) { ResolveFPNaNPropagationOption(&nan_option); if (zd.Aliases(zn)) { SingleEmissionCheckScope guard(this); (this->*fn)(zd, pg, zd, zm); } else if (zd.Aliases(zm)) { switch (nan_option) { case FastNaNPropagation: { // Swap the arguments. SingleEmissionCheckScope guard(this); (this->*fn)(zd, pg, zd, zn); return; } case StrictNaNPropagation: { UseScratchRegisterScope temps(this); // Use a scratch register to keep the argument order exactly as // specified. ZRegister scratch = temps.AcquireZ().WithSameLaneSizeAs(zn); { MovprfxHelperScope guard(this, scratch, pg, zn); (this->*fn)(scratch, pg, scratch, zm); } Mov(zd, scratch); return; } case NoFPMacroNaNPropagationSelected: VIXL_UNREACHABLE(); return; } } else { MovprfxHelperScope guard(this, zd, pg, zn); (this->*fn)(zd, pg, zd, zm); } } // Instructions of the form "inst zda, zn, zm, #num", where they are // non-commutative and no reversed form is provided. #define VIXL_SVE_NONCOMM_ARITH_ZZZZI_LIST(V) \ V(Cmla, cmla) \ V(Sqrdcmlah, sqrdcmlah) #define VIXL_DEFINE_MASM_FUNC(MASMFN, ASMFN) \ void MacroAssembler::MASMFN(const ZRegister& zd, \ const ZRegister& za, \ const ZRegister& zn, \ const ZRegister& zm, \ int imm) { \ if ((zd.Aliases(zn) || zd.Aliases(zm)) && !zd.Aliases(za)) { \ UseScratchRegisterScope temps(this); \ VIXL_ASSERT(AreSameLaneSize(zn, zm)); \ ZRegister ztmp = temps.AcquireZ().WithSameLaneSizeAs(zn); \ Mov(ztmp, zd.Aliases(zn) ? zn : zm); \ MovprfxHelperScope guard(this, zd, za); \ ASMFN(zd, \ (zd.Aliases(zn) ? ztmp : zn), \ (zd.Aliases(zm) ? ztmp : zm), \ imm); \ } else { \ MovprfxHelperScope guard(this, zd, za); \ ASMFN(zd, zn, zm, imm); \ } \ } VIXL_SVE_NONCOMM_ARITH_ZZZZI_LIST(VIXL_DEFINE_MASM_FUNC) #undef VIXL_DEFINE_MASM_FUNC // Instructions of the form "inst zda, zn, zm, #num, #num", where they are // non-commutative and no reversed form is provided. #define VIXL_SVE_NONCOMM_ARITH_ZZZZII_LIST(V) \ V(Cmla, cmla) \ V(Sqrdcmlah, sqrdcmlah) // This doesn't handle zm when it's out of the range that can be encoded in // instruction. The range depends on element size: z0-z7 for H, z0-15 for S. #define VIXL_DEFINE_MASM_FUNC(MASMFN, ASMFN) \ void MacroAssembler::MASMFN(const ZRegister& zd, \ const ZRegister& za, \ const ZRegister& zn, \ const ZRegister& zm, \ int index, \ int rot) { \ if ((zd.Aliases(zn) || zd.Aliases(zm)) && !zd.Aliases(za)) { \ UseScratchRegisterScope temps(this); \ ZRegister ztmp = temps.AcquireZ().WithSameLaneSizeAs(zd); \ { \ MovprfxHelperScope guard(this, ztmp, za); \ ASMFN(ztmp, zn, zm, index, rot); \ } \ Mov(zd, ztmp); \ } else { \ MovprfxHelperScope guard(this, zd, za); \ ASMFN(zd, zn, zm, index, rot); \ } \ } VIXL_SVE_NONCOMM_ARITH_ZZZZII_LIST(VIXL_DEFINE_MASM_FUNC) #undef VIXL_DEFINE_MASM_FUNC // Instructions of the form "inst zda, pg, zda, zn", where they are // non-commutative and no reversed form is provided. #define VIXL_SVE_NONCOMM_ARITH_ZPZZ_LIST(V) \ V(Addp, addp) \ V(Faddp, faddp) \ V(Fmaxnmp, fmaxnmp) \ V(Fminnmp, fminnmp) \ V(Fmaxp, fmaxp) \ V(Fminp, fminp) \ V(Fscale, fscale) \ V(Smaxp, smaxp) \ V(Sminp, sminp) \ V(Suqadd, suqadd) \ V(Umaxp, umaxp) \ V(Uminp, uminp) \ V(Usqadd, usqadd) #define VIXL_DEFINE_MASM_FUNC(MASMFN, ASMFN) \ void MacroAssembler::MASMFN(const ZRegister& zd, \ const PRegisterM& pg, \ const ZRegister& zn, \ const ZRegister& zm) { \ VIXL_ASSERT(allow_macro_instructions_); \ if (zd.Aliases(zm) && !zd.Aliases(zn)) { \ UseScratchRegisterScope temps(this); \ ZRegister scratch = temps.AcquireZ().WithSameLaneSizeAs(zm); \ Mov(scratch, zm); \ MovprfxHelperScope guard(this, zd, pg, zn); \ ASMFN(zd, pg, zd, scratch); \ } else { \ MovprfxHelperScope guard(this, zd, pg, zn); \ ASMFN(zd, pg, zd, zm); \ } \ } VIXL_SVE_NONCOMM_ARITH_ZPZZ_LIST(VIXL_DEFINE_MASM_FUNC) #undef VIXL_DEFINE_MASM_FUNC // Instructions of the form "inst zda, pg, zda, zn", where they are // non-commutative and a reversed form is provided. #define VIXL_SVE_NONCOMM_ARITH_REVERSE_ZPZZ_LIST(V) \ V(Asr, asr) \ V(Fdiv, fdiv) \ V(Fsub, fsub) \ V(Lsl, lsl) \ V(Lsr, lsr) \ V(Sdiv, sdiv) \ V(Shsub, shsub) \ V(Sqrshl, sqrshl) \ V(Sqshl, sqshl) \ V(Sqsub, sqsub) \ V(Srshl, srshl) \ V(Sub, sub) \ V(Udiv, udiv) \ V(Uhsub, uhsub) \ V(Uqrshl, uqrshl) \ V(Uqshl, uqshl) \ V(Uqsub, uqsub) \ V(Urshl, urshl) #define VIXL_DEFINE_MASM_FUNC(MASMFN, ASMFN) \ void MacroAssembler::MASMFN(const ZRegister& zd, \ const PRegisterM& pg, \ const ZRegister& zn, \ const ZRegister& zm) { \ VIXL_ASSERT(allow_macro_instructions_); \ NoncommutativeArithmeticHelper(zd, \ pg, \ zn, \ zm, \ static_cast( \ &Assembler::ASMFN), \ static_cast( \ &Assembler::ASMFN##r)); \ } VIXL_SVE_NONCOMM_ARITH_REVERSE_ZPZZ_LIST(VIXL_DEFINE_MASM_FUNC) #undef VIXL_DEFINE_MASM_FUNC void MacroAssembler::Fadd(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn, const ZRegister& zm, FPMacroNaNPropagationOption nan_option) { VIXL_ASSERT(allow_macro_instructions_); FPCommutativeArithmeticHelper(zd, pg, zn, zm, static_cast( &Assembler::fadd), nan_option); } void MacroAssembler::Fabd(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn, const ZRegister& zm, FPMacroNaNPropagationOption nan_option) { VIXL_ASSERT(allow_macro_instructions_); FPCommutativeArithmeticHelper(zd, pg, zn, zm, static_cast( &Assembler::fabd), nan_option); } void MacroAssembler::Fmul(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn, const ZRegister& zm, FPMacroNaNPropagationOption nan_option) { VIXL_ASSERT(allow_macro_instructions_); FPCommutativeArithmeticHelper(zd, pg, zn, zm, static_cast( &Assembler::fmul), nan_option); } void MacroAssembler::Fmulx(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn, const ZRegister& zm, FPMacroNaNPropagationOption nan_option) { VIXL_ASSERT(allow_macro_instructions_); FPCommutativeArithmeticHelper(zd, pg, zn, zm, static_cast( &Assembler::fmulx), nan_option); } void MacroAssembler::Fmax(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn, const ZRegister& zm, FPMacroNaNPropagationOption nan_option) { VIXL_ASSERT(allow_macro_instructions_); FPCommutativeArithmeticHelper(zd, pg, zn, zm, static_cast( &Assembler::fmax), nan_option); } void MacroAssembler::Fmin(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn, const ZRegister& zm, FPMacroNaNPropagationOption nan_option) { VIXL_ASSERT(allow_macro_instructions_); FPCommutativeArithmeticHelper(zd, pg, zn, zm, static_cast( &Assembler::fmin), nan_option); } void MacroAssembler::Fmaxnm(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn, const ZRegister& zm, FPMacroNaNPropagationOption nan_option) { VIXL_ASSERT(allow_macro_instructions_); FPCommutativeArithmeticHelper(zd, pg, zn, zm, static_cast( &Assembler::fmaxnm), nan_option); } void MacroAssembler::Fminnm(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn, const ZRegister& zm, FPMacroNaNPropagationOption nan_option) { VIXL_ASSERT(allow_macro_instructions_); FPCommutativeArithmeticHelper(zd, pg, zn, zm, static_cast( &Assembler::fminnm), nan_option); } void MacroAssembler::Fdup(const ZRegister& zd, double imm) { VIXL_ASSERT(allow_macro_instructions_); switch (zd.GetLaneSizeInBits()) { case kHRegSize: Fdup(zd, Float16(imm)); break; case kSRegSize: Fdup(zd, static_cast(imm)); break; case kDRegSize: if (IsImmFP64(imm)) { SingleEmissionCheckScope guard(this); fdup(zd, imm); } else { Dup(zd, DoubleToRawbits(imm)); } break; } } void MacroAssembler::Fdup(const ZRegister& zd, float imm) { VIXL_ASSERT(allow_macro_instructions_); switch (zd.GetLaneSizeInBits()) { case kHRegSize: Fdup(zd, Float16(imm)); break; case kSRegSize: if (IsImmFP32(imm)) { SingleEmissionCheckScope guard(this); fdup(zd, imm); } else { Dup(zd, FloatToRawbits(imm)); } break; case kDRegSize: Fdup(zd, static_cast(imm)); break; } } void MacroAssembler::Fdup(const ZRegister& zd, Float16 imm) { VIXL_ASSERT(allow_macro_instructions_); switch (zd.GetLaneSizeInBits()) { case kHRegSize: if (IsImmFP16(imm)) { SingleEmissionCheckScope guard(this); fdup(zd, imm); } else { Dup(zd, Float16ToRawbits(imm)); } break; case kSRegSize: Fdup(zd, FPToFloat(imm, kIgnoreDefaultNaN)); break; case kDRegSize: Fdup(zd, FPToDouble(imm, kIgnoreDefaultNaN)); break; } } void MacroAssembler::Index(const ZRegister& zd, const Operand& start, const Operand& step) { class IndexOperand : public Operand { public: static IndexOperand Prepare(MacroAssembler* masm, UseScratchRegisterScope* temps, const Operand& op, const ZRegister& zd_inner) { // Look for encodable immediates. int imm; if (op.IsImmediate()) { if (IntegerOperand(op).TryEncodeAsIntNForLane<5>(zd_inner, &imm)) { return IndexOperand(imm); } Register scratch = temps->AcquireRegisterToHoldLane(zd_inner); masm->Mov(scratch, op); return IndexOperand(scratch); } else { // Plain registers can be encoded directly. VIXL_ASSERT(op.IsPlainRegister()); return IndexOperand(op.GetRegister()); } } int GetImm5() const { int64_t imm = GetImmediate(); VIXL_ASSERT(IsInt5(imm)); return static_cast(imm); } private: explicit IndexOperand(const Register& reg) : Operand(reg) {} explicit IndexOperand(int64_t imm) : Operand(imm) {} }; UseScratchRegisterScope temps(this); IndexOperand start_enc = IndexOperand::Prepare(this, &temps, start, zd); IndexOperand step_enc = IndexOperand::Prepare(this, &temps, step, zd); SingleEmissionCheckScope guard(this); if (start_enc.IsImmediate()) { if (step_enc.IsImmediate()) { index(zd, start_enc.GetImm5(), step_enc.GetImm5()); } else { index(zd, start_enc.GetImm5(), step_enc.GetRegister()); } } else { if (step_enc.IsImmediate()) { index(zd, start_enc.GetRegister(), step_enc.GetImm5()); } else { index(zd, start_enc.GetRegister(), step_enc.GetRegister()); } } } void MacroAssembler::Insr(const ZRegister& zdn, IntegerOperand imm) { VIXL_ASSERT(allow_macro_instructions_); VIXL_ASSERT(imm.FitsInLane(zdn)); if (imm.IsZero()) { SingleEmissionCheckScope guard(this); insr(zdn, xzr); return; } UseScratchRegisterScope temps(this); Register scratch = temps.AcquireRegisterToHoldLane(zdn); // TODO: There are many cases where we could optimise immediates, such as by // detecting repeating patterns or FP immediates. We should optimise and // abstract this for use in other SVE mov-immediate-like macros. Mov(scratch, imm); SingleEmissionCheckScope guard(this); insr(zdn, scratch); } void MacroAssembler::Mla(const ZRegister& zd, const PRegisterM& pg, const ZRegister& za, const ZRegister& zn, const ZRegister& zm) { VIXL_ASSERT(allow_macro_instructions_); if (zd.Aliases(za)) { // zda = zda + (zn * zm) SingleEmissionCheckScope guard(this); mla(zd, pg, zn, zm); } else if (zd.Aliases(zn)) { // zdn = za + (zdn * zm) SingleEmissionCheckScope guard(this); mad(zd, pg, zm, za); } else if (zd.Aliases(zm)) { // Multiplication is commutative, so we can swap zn and zm. // zdm = za + (zdm * zn) SingleEmissionCheckScope guard(this); mad(zd, pg, zn, za); } else { // zd = za + (zn * zm) ExactAssemblyScope guard(this, 2 * kInstructionSize); movprfx(zd, pg, za); mla(zd, pg, zn, zm); } } void MacroAssembler::Mls(const ZRegister& zd, const PRegisterM& pg, const ZRegister& za, const ZRegister& zn, const ZRegister& zm) { VIXL_ASSERT(allow_macro_instructions_); if (zd.Aliases(za)) { // zda = zda - (zn * zm) SingleEmissionCheckScope guard(this); mls(zd, pg, zn, zm); } else if (zd.Aliases(zn)) { // zdn = za - (zdn * zm) SingleEmissionCheckScope guard(this); msb(zd, pg, zm, za); } else if (zd.Aliases(zm)) { // Multiplication is commutative, so we can swap zn and zm. // zdm = za - (zdm * zn) SingleEmissionCheckScope guard(this); msb(zd, pg, zn, za); } else { // zd = za - (zn * zm) ExactAssemblyScope guard(this, 2 * kInstructionSize); movprfx(zd, pg, za); mls(zd, pg, zn, zm); } } void MacroAssembler::CompareHelper(Condition cond, const PRegisterWithLaneSize& pd, const PRegisterZ& pg, const ZRegister& zn, IntegerOperand imm) { UseScratchRegisterScope temps(this); ZRegister zm = temps.AcquireZ().WithLaneSize(zn.GetLaneSizeInBits()); Dup(zm, imm); SingleEmissionCheckScope guard(this); cmp(cond, pd, pg, zn, zm); } void MacroAssembler::Pfirst(const PRegisterWithLaneSize& pd, const PRegister& pg, const PRegisterWithLaneSize& pn) { VIXL_ASSERT(allow_macro_instructions_); VIXL_ASSERT(pd.IsLaneSizeB()); VIXL_ASSERT(pn.IsLaneSizeB()); if (pd.Is(pn)) { SingleEmissionCheckScope guard(this); pfirst(pd, pg, pn); } else { UseScratchRegisterScope temps(this); PRegister temp_pg = pg; if (pd.Aliases(pg)) { temp_pg = temps.AcquireP(); Mov(temp_pg.VnB(), pg.VnB()); } Mov(pd, pn); SingleEmissionCheckScope guard(this); pfirst(pd, temp_pg, pd); } } void MacroAssembler::Pnext(const PRegisterWithLaneSize& pd, const PRegister& pg, const PRegisterWithLaneSize& pn) { VIXL_ASSERT(allow_macro_instructions_); VIXL_ASSERT(AreSameFormat(pd, pn)); if (pd.Is(pn)) { SingleEmissionCheckScope guard(this); pnext(pd, pg, pn); } else { UseScratchRegisterScope temps(this); PRegister temp_pg = pg; if (pd.Aliases(pg)) { temp_pg = temps.AcquireP(); Mov(temp_pg.VnB(), pg.VnB()); } Mov(pd.VnB(), pn.VnB()); SingleEmissionCheckScope guard(this); pnext(pd, temp_pg, pd); } } void MacroAssembler::Ptrue(const PRegisterWithLaneSize& pd, SVEPredicateConstraint pattern, FlagsUpdate s) { VIXL_ASSERT(allow_macro_instructions_); switch (s) { case LeaveFlags: Ptrue(pd, pattern); return; case SetFlags: Ptrues(pd, pattern); return; } VIXL_UNREACHABLE(); } void MacroAssembler::Sub(const ZRegister& zd, IntegerOperand imm, const ZRegister& zm) { VIXL_ASSERT(allow_macro_instructions_); int imm8; int shift = -1; if (imm.TryEncodeAsShiftedUintNForLane<8, 0>(zd, &imm8, &shift) || imm.TryEncodeAsShiftedUintNForLane<8, 8>(zd, &imm8, &shift)) { MovprfxHelperScope guard(this, zd, zm); subr(zd, zd, imm8, shift); } else { UseScratchRegisterScope temps(this); ZRegister scratch = temps.AcquireZ().WithLaneSize(zm.GetLaneSizeInBits()); Dup(scratch, imm); SingleEmissionCheckScope guard(this); sub(zd, scratch, zm); } } void MacroAssembler::SVELoadBroadcastImmHelper(const ZRegister& zt, const PRegisterZ& pg, const SVEMemOperand& addr, SVELoadBroadcastFn fn, int divisor) { VIXL_ASSERT(addr.IsScalarPlusImmediate()); int64_t imm = addr.GetImmediateOffset(); if ((imm % divisor == 0) && IsUint6(imm / divisor)) { SingleEmissionCheckScope guard(this); (this->*fn)(zt, pg, addr); } else { UseScratchRegisterScope temps(this); Register scratch = temps.AcquireX(); CalculateSVEAddress(scratch, addr, zt); SingleEmissionCheckScope guard(this); (this->*fn)(zt, pg, SVEMemOperand(scratch)); } } void MacroAssembler::SVELoadStoreScalarImmHelper(const CPURegister& rt, const SVEMemOperand& addr, SVELoadStoreFn fn) { VIXL_ASSERT(allow_macro_instructions_); VIXL_ASSERT(rt.IsZRegister() || rt.IsPRegister()); if (addr.IsPlainScalar() || (addr.IsScalarPlusImmediate() && IsInt9(addr.GetImmediateOffset()) && addr.IsMulVl())) { SingleEmissionCheckScope guard(this); (this->*fn)(rt, addr); return; } if (addr.IsEquivalentToScalar()) { SingleEmissionCheckScope guard(this); (this->*fn)(rt, SVEMemOperand(addr.GetScalarBase())); return; } UseScratchRegisterScope temps(this); Register scratch = temps.AcquireX(); CalculateSVEAddress(scratch, addr, rt); SingleEmissionCheckScope guard(this); (this->*fn)(rt, SVEMemOperand(scratch)); } template void MacroAssembler::SVELoadStoreNTBroadcastQOHelper( const ZRegister& zt, const Tg& pg, const SVEMemOperand& addr, Tf fn, int imm_bits, int shift_amount, SVEOffsetModifier supported_modifier, int vl_divisor_log2) { VIXL_ASSERT(allow_macro_instructions_); int imm_divisor = 1 << shift_amount; if (addr.IsPlainScalar() || (addr.IsScalarPlusImmediate() && IsIntN(imm_bits, addr.GetImmediateOffset() / imm_divisor) && ((addr.GetImmediateOffset() % imm_divisor) == 0) && (addr.GetOffsetModifier() == supported_modifier))) { SingleEmissionCheckScope guard(this); (this->*fn)(zt, pg, addr); return; } if (addr.IsScalarPlusScalar() && !addr.GetScalarOffset().IsZero() && addr.IsEquivalentToLSL(zt.GetLaneSizeInBytesLog2())) { SingleEmissionCheckScope guard(this); (this->*fn)(zt, pg, addr); return; } if (addr.IsEquivalentToScalar()) { SingleEmissionCheckScope guard(this); (this->*fn)(zt, pg, SVEMemOperand(addr.GetScalarBase())); return; } if (addr.IsMulVl() && (supported_modifier != SVE_MUL_VL) && (vl_divisor_log2 == -1)) { // We don't handle [x0, #imm, MUL VL] if the in-memory access size is not VL // dependent. VIXL_UNIMPLEMENTED(); } UseScratchRegisterScope temps(this); Register scratch = temps.AcquireX(); CalculateSVEAddress(scratch, addr, vl_divisor_log2); SingleEmissionCheckScope guard(this); (this->*fn)(zt, pg, SVEMemOperand(scratch)); } template void MacroAssembler::SVELoadStore1Helper(int msize_in_bytes_log2, const ZRegister& zt, const Tg& pg, const SVEMemOperand& addr, Tf fn) { if (addr.IsPlainScalar() || (addr.IsScalarPlusScalar() && !addr.GetScalarOffset().IsZero() && addr.IsEquivalentToLSL(msize_in_bytes_log2)) || (addr.IsScalarPlusImmediate() && IsInt4(addr.GetImmediateOffset()) && addr.IsMulVl())) { SingleEmissionCheckScope guard(this); (this->*fn)(zt, pg, addr); return; } if (addr.IsEquivalentToScalar()) { SingleEmissionCheckScope guard(this); (this->*fn)(zt, pg, SVEMemOperand(addr.GetScalarBase())); return; } if (addr.IsVectorPlusImmediate()) { uint64_t offset = addr.GetImmediateOffset(); if (IsMultiple(offset, (1 << msize_in_bytes_log2)) && IsUint5(offset >> msize_in_bytes_log2)) { SingleEmissionCheckScope guard(this); (this->*fn)(zt, pg, addr); return; } } if (addr.IsScalarPlusVector()) { VIXL_ASSERT(addr.IsScatterGather()); SingleEmissionCheckScope guard(this); (this->*fn)(zt, pg, addr); return; } UseScratchRegisterScope temps(this); if (addr.IsScatterGather()) { // In scatter-gather modes, zt and zn/zm have the same lane size. However, // for 32-bit accesses, the result of each lane's address calculation still // requires 64 bits; we can't naively use `Adr` for the address calculation // because it would truncate each address to 32 bits. if (addr.IsVectorPlusImmediate()) { // Synthesise the immediate in an X register, then use a // scalar-plus-vector access with the original vector. Register scratch = temps.AcquireX(); Mov(scratch, addr.GetImmediateOffset()); SingleEmissionCheckScope guard(this); SVEOffsetModifier om = zt.IsLaneSizeS() ? SVE_UXTW : NO_SVE_OFFSET_MODIFIER; (this->*fn)(zt, pg, SVEMemOperand(scratch, addr.GetVectorBase(), om)); return; } VIXL_UNIMPLEMENTED(); } else { Register scratch = temps.AcquireX(); // TODO: If we have an immediate offset that is a multiple of // msize_in_bytes, we can use Rdvl/Rdpl and a scalar-plus-scalar form to // save an instruction. int vl_divisor_log2 = zt.GetLaneSizeInBytesLog2() - msize_in_bytes_log2; CalculateSVEAddress(scratch, addr, vl_divisor_log2); SingleEmissionCheckScope guard(this); (this->*fn)(zt, pg, SVEMemOperand(scratch)); } } template void MacroAssembler::SVELoadFFHelper(int msize_in_bytes_log2, const ZRegister& zt, const PRegisterZ& pg, const SVEMemOperand& addr, Tf fn) { if (addr.IsScatterGather()) { // Scatter-gather first-fault loads share encodings with normal loads. SVELoadStore1Helper(msize_in_bytes_log2, zt, pg, addr, fn); return; } // Contiguous first-faulting loads have no scalar-plus-immediate form at all, // so we don't do immediate synthesis. // We cannot currently distinguish "[x0]" from "[x0, #0]", and this // is not "scalar-plus-scalar", so we have to permit `IsPlainScalar()` here. if (addr.IsPlainScalar() || (addr.IsScalarPlusScalar() && addr.IsEquivalentToLSL(msize_in_bytes_log2))) { SingleEmissionCheckScope guard(this); (this->*fn)(zt, pg, addr); return; } VIXL_UNIMPLEMENTED(); } void MacroAssembler::Ld1b(const ZRegister& zt, const PRegisterZ& pg, const SVEMemOperand& addr) { VIXL_ASSERT(allow_macro_instructions_); SVELoadStore1Helper(kBRegSizeInBytesLog2, zt, pg, addr, static_cast(&Assembler::ld1b)); } void MacroAssembler::Ld1h(const ZRegister& zt, const PRegisterZ& pg, const SVEMemOperand& addr) { VIXL_ASSERT(allow_macro_instructions_); SVELoadStore1Helper(kHRegSizeInBytesLog2, zt, pg, addr, static_cast(&Assembler::ld1h)); } void MacroAssembler::Ld1w(const ZRegister& zt, const PRegisterZ& pg, const SVEMemOperand& addr) { VIXL_ASSERT(allow_macro_instructions_); SVELoadStore1Helper(kWRegSizeInBytesLog2, zt, pg, addr, static_cast(&Assembler::ld1w)); } void MacroAssembler::Ld1d(const ZRegister& zt, const PRegisterZ& pg, const SVEMemOperand& addr) { VIXL_ASSERT(allow_macro_instructions_); SVELoadStore1Helper(kDRegSizeInBytesLog2, zt, pg, addr, static_cast(&Assembler::ld1d)); } void MacroAssembler::Ld1sb(const ZRegister& zt, const PRegisterZ& pg, const SVEMemOperand& addr) { VIXL_ASSERT(allow_macro_instructions_); SVELoadStore1Helper(kBRegSizeInBytesLog2, zt, pg, addr, static_cast(&Assembler::ld1sb)); } void MacroAssembler::Ld1sh(const ZRegister& zt, const PRegisterZ& pg, const SVEMemOperand& addr) { VIXL_ASSERT(allow_macro_instructions_); SVELoadStore1Helper(kHRegSizeInBytesLog2, zt, pg, addr, static_cast(&Assembler::ld1sh)); } void MacroAssembler::Ld1sw(const ZRegister& zt, const PRegisterZ& pg, const SVEMemOperand& addr) { VIXL_ASSERT(allow_macro_instructions_); SVELoadStore1Helper(kSRegSizeInBytesLog2, zt, pg, addr, static_cast(&Assembler::ld1sw)); } void MacroAssembler::St1b(const ZRegister& zt, const PRegister& pg, const SVEMemOperand& addr) { VIXL_ASSERT(allow_macro_instructions_); SVELoadStore1Helper(kBRegSizeInBytesLog2, zt, pg, addr, static_cast(&Assembler::st1b)); } void MacroAssembler::St1h(const ZRegister& zt, const PRegister& pg, const SVEMemOperand& addr) { VIXL_ASSERT(allow_macro_instructions_); SVELoadStore1Helper(kHRegSizeInBytesLog2, zt, pg, addr, static_cast(&Assembler::st1h)); } void MacroAssembler::St1w(const ZRegister& zt, const PRegister& pg, const SVEMemOperand& addr) { VIXL_ASSERT(allow_macro_instructions_); SVELoadStore1Helper(kSRegSizeInBytesLog2, zt, pg, addr, static_cast(&Assembler::st1w)); } void MacroAssembler::St1d(const ZRegister& zt, const PRegister& pg, const SVEMemOperand& addr) { VIXL_ASSERT(allow_macro_instructions_); SVELoadStore1Helper(kDRegSizeInBytesLog2, zt, pg, addr, static_cast(&Assembler::st1d)); } void MacroAssembler::Ldff1b(const ZRegister& zt, const PRegisterZ& pg, const SVEMemOperand& addr) { VIXL_ASSERT(allow_macro_instructions_); SVELoadFFHelper(kBRegSizeInBytesLog2, zt, pg, addr, static_cast(&Assembler::ldff1b)); } void MacroAssembler::Ldff1h(const ZRegister& zt, const PRegisterZ& pg, const SVEMemOperand& addr) { VIXL_ASSERT(allow_macro_instructions_); SVELoadFFHelper(kHRegSizeInBytesLog2, zt, pg, addr, static_cast(&Assembler::ldff1h)); } void MacroAssembler::Ldff1w(const ZRegister& zt, const PRegisterZ& pg, const SVEMemOperand& addr) { VIXL_ASSERT(allow_macro_instructions_); SVELoadFFHelper(kSRegSizeInBytesLog2, zt, pg, addr, static_cast(&Assembler::ldff1w)); } void MacroAssembler::Ldff1d(const ZRegister& zt, const PRegisterZ& pg, const SVEMemOperand& addr) { VIXL_ASSERT(allow_macro_instructions_); SVELoadFFHelper(kDRegSizeInBytesLog2, zt, pg, addr, static_cast(&Assembler::ldff1d)); } void MacroAssembler::Ldff1sb(const ZRegister& zt, const PRegisterZ& pg, const SVEMemOperand& addr) { VIXL_ASSERT(allow_macro_instructions_); SVELoadFFHelper(kBRegSizeInBytesLog2, zt, pg, addr, static_cast(&Assembler::ldff1sb)); } void MacroAssembler::Ldff1sh(const ZRegister& zt, const PRegisterZ& pg, const SVEMemOperand& addr) { VIXL_ASSERT(allow_macro_instructions_); SVELoadFFHelper(kHRegSizeInBytesLog2, zt, pg, addr, static_cast(&Assembler::ldff1sh)); } void MacroAssembler::Ldff1sw(const ZRegister& zt, const PRegisterZ& pg, const SVEMemOperand& addr) { VIXL_ASSERT(allow_macro_instructions_); SVELoadFFHelper(kSRegSizeInBytesLog2, zt, pg, addr, static_cast(&Assembler::ldff1sw)); } #define VIXL_SVE_LD1R_LIST(V) \ V(qb, 4) V(qh, 4) V(qw, 4) V(qd, 4) V(ob, 5) V(oh, 5) V(ow, 5) V(od, 5) #define VIXL_DEFINE_MASM_FUNC(SZ, SH) \ void MacroAssembler::Ld1r##SZ(const ZRegister& zt, \ const PRegisterZ& pg, \ const SVEMemOperand& addr) { \ VIXL_ASSERT(allow_macro_instructions_); \ SVELoadStoreNTBroadcastQOHelper(zt, \ pg, \ addr, \ &MacroAssembler::ld1r##SZ, \ 4, \ SH, \ NO_SVE_OFFSET_MODIFIER, \ -1); \ } VIXL_SVE_LD1R_LIST(VIXL_DEFINE_MASM_FUNC) #undef VIXL_DEFINE_MASM_FUNC #undef VIXL_SVE_LD1R_LIST void MacroAssembler::Ldnt1b(const ZRegister& zt, const PRegisterZ& pg, const SVEMemOperand& addr) { VIXL_ASSERT(allow_macro_instructions_); if (addr.IsVectorPlusScalar()) { SingleEmissionCheckScope guard(this); ldnt1b(zt, pg, addr); } else { SVELoadStoreNTBroadcastQOHelper(zt, pg, addr, &MacroAssembler::ldnt1b, 4, 0, SVE_MUL_VL); } } void MacroAssembler::Ldnt1d(const ZRegister& zt, const PRegisterZ& pg, const SVEMemOperand& addr) { VIXL_ASSERT(allow_macro_instructions_); if (addr.IsVectorPlusScalar()) { SingleEmissionCheckScope guard(this); ldnt1d(zt, pg, addr); } else { SVELoadStoreNTBroadcastQOHelper(zt, pg, addr, &MacroAssembler::ldnt1d, 4, 0, SVE_MUL_VL); } } void MacroAssembler::Ldnt1h(const ZRegister& zt, const PRegisterZ& pg, const SVEMemOperand& addr) { VIXL_ASSERT(allow_macro_instructions_); if (addr.IsVectorPlusScalar()) { SingleEmissionCheckScope guard(this); ldnt1h(zt, pg, addr); } else { SVELoadStoreNTBroadcastQOHelper(zt, pg, addr, &MacroAssembler::ldnt1h, 4, 0, SVE_MUL_VL); } } void MacroAssembler::Ldnt1w(const ZRegister& zt, const PRegisterZ& pg, const SVEMemOperand& addr) { VIXL_ASSERT(allow_macro_instructions_); if (addr.IsVectorPlusScalar()) { SingleEmissionCheckScope guard(this); ldnt1w(zt, pg, addr); } else { SVELoadStoreNTBroadcastQOHelper(zt, pg, addr, &MacroAssembler::ldnt1w, 4, 0, SVE_MUL_VL); } } void MacroAssembler::Stnt1b(const ZRegister& zt, const PRegister& pg, const SVEMemOperand& addr) { VIXL_ASSERT(allow_macro_instructions_); if (addr.IsVectorPlusScalar()) { SingleEmissionCheckScope guard(this); stnt1b(zt, pg, addr); } else { SVELoadStoreNTBroadcastQOHelper(zt, pg, addr, &MacroAssembler::stnt1b, 4, 0, SVE_MUL_VL); } } void MacroAssembler::Stnt1d(const ZRegister& zt, const PRegister& pg, const SVEMemOperand& addr) { VIXL_ASSERT(allow_macro_instructions_); if (addr.IsVectorPlusScalar()) { SingleEmissionCheckScope guard(this); stnt1d(zt, pg, addr); } else { SVELoadStoreNTBroadcastQOHelper(zt, pg, addr, &MacroAssembler::stnt1d, 4, 0, SVE_MUL_VL); } } void MacroAssembler::Stnt1h(const ZRegister& zt, const PRegister& pg, const SVEMemOperand& addr) { VIXL_ASSERT(allow_macro_instructions_); if (addr.IsVectorPlusScalar()) { SingleEmissionCheckScope guard(this); stnt1h(zt, pg, addr); } else { SVELoadStoreNTBroadcastQOHelper(zt, pg, addr, &MacroAssembler::stnt1h, 4, 0, SVE_MUL_VL); } } void MacroAssembler::Stnt1w(const ZRegister& zt, const PRegister& pg, const SVEMemOperand& addr) { VIXL_ASSERT(allow_macro_instructions_); if (addr.IsVectorPlusScalar()) { SingleEmissionCheckScope guard(this); stnt1w(zt, pg, addr); } else { SVELoadStoreNTBroadcastQOHelper(zt, pg, addr, &MacroAssembler::stnt1w, 4, 0, SVE_MUL_VL); } } void MacroAssembler::SVEDotIndexHelper(ZZZImmFn fn, const ZRegister& zd, const ZRegister& za, const ZRegister& zn, const ZRegister& zm, int index) { if (zd.Aliases(za)) { // zda = zda + (zn . zm) SingleEmissionCheckScope guard(this); (this->*fn)(zd, zn, zm, index); } else if (zd.Aliases(zn) || zd.Aliases(zm)) { // zdn = za + (zdn . zm[index]) // zdm = za + (zn . zdm[index]) // zdnm = za + (zdnm . zdnm[index]) UseScratchRegisterScope temps(this); ZRegister scratch = temps.AcquireZ().WithSameLaneSizeAs(zd); { MovprfxHelperScope guard(this, scratch, za); (this->*fn)(scratch, zn, zm, index); } Mov(zd, scratch); } else { // zd = za + (zn . zm) MovprfxHelperScope guard(this, zd, za); (this->*fn)(zd, zn, zm, index); } } void MacroAssembler::FourRegDestructiveHelper(Int3ArithFn fn, const ZRegister& zd, const ZRegister& za, const ZRegister& zn, const ZRegister& zm) { if (!zd.Aliases(za) && (zd.Aliases(zn) || zd.Aliases(zm))) { // zd = za . zd . zm // zd = za . zn . zd // zd = za . zd . zd UseScratchRegisterScope temps(this); ZRegister scratch = temps.AcquireZ().WithSameLaneSizeAs(zd); { MovprfxHelperScope guard(this, scratch, za); (this->*fn)(scratch, zn, zm); } Mov(zd, scratch); } else { MovprfxHelperScope guard(this, zd, za); (this->*fn)(zd, zn, zm); } } void MacroAssembler::FourRegDestructiveHelper(Int4ArithFn fn, const ZRegister& zd, const ZRegister& za, const ZRegister& zn, const ZRegister& zm) { if (!zd.Aliases(za) && (zd.Aliases(zn) || zd.Aliases(zm))) { // zd = za . zd . zm // zd = za . zn . zd // zd = za . zd . zd UseScratchRegisterScope temps(this); ZRegister scratch = temps.AcquireZ().WithSameLaneSizeAs(zd); { MovprfxHelperScope guard(this, scratch, za); (this->*fn)(scratch, scratch, zn, zm); } Mov(zd, scratch); } else { MovprfxHelperScope guard(this, zd, za); (this->*fn)(zd, zd, zn, zm); } } void MacroAssembler::FourRegOneImmDestructiveHelper(ZZZImmFn fn, const ZRegister& zd, const ZRegister& za, const ZRegister& zn, const ZRegister& zm, int imm) { if (!zd.Aliases(za) && (zd.Aliases(zn) || zd.Aliases(zm))) { // zd = za . zd . zm[i] // zd = za . zn . zd[i] // zd = za . zd . zd[i] UseScratchRegisterScope temps(this); ZRegister scratch = temps.AcquireZ().WithSameLaneSizeAs(zd); { MovprfxHelperScope guard(this, scratch, za); (this->*fn)(scratch, zn, zm, imm); } Mov(zd, scratch); } else { // zd = za . zn . zm[i] MovprfxHelperScope guard(this, zd, za); (this->*fn)(zd, zn, zm, imm); } } void MacroAssembler::AbsoluteDifferenceAccumulate(Int3ArithFn fn, const ZRegister& zd, const ZRegister& za, const ZRegister& zn, const ZRegister& zm) { if (zn.Aliases(zm)) { // If zn == zm, the difference is zero. if (!zd.Aliases(za)) { Mov(zd, za); } } else if (zd.Aliases(za)) { SingleEmissionCheckScope guard(this); (this->*fn)(zd, zn, zm); } else if (zd.Aliases(zn)) { UseScratchRegisterScope temps(this); ZRegister ztmp = temps.AcquireZ().WithLaneSize(zn.GetLaneSizeInBits()); Mov(ztmp, zn); MovprfxHelperScope guard(this, zd, za); (this->*fn)(zd, ztmp, zm); } else if (zd.Aliases(zm)) { UseScratchRegisterScope temps(this); ZRegister ztmp = temps.AcquireZ().WithLaneSize(zn.GetLaneSizeInBits()); Mov(ztmp, zm); MovprfxHelperScope guard(this, zd, za); (this->*fn)(zd, zn, ztmp); } else { MovprfxHelperScope guard(this, zd, za); (this->*fn)(zd, zn, zm); } } #define VIXL_SVE_4REG_LIST(V) \ V(Saba, saba, AbsoluteDifferenceAccumulate) \ V(Uaba, uaba, AbsoluteDifferenceAccumulate) \ V(Sabalb, sabalb, AbsoluteDifferenceAccumulate) \ V(Sabalt, sabalt, AbsoluteDifferenceAccumulate) \ V(Uabalb, uabalb, AbsoluteDifferenceAccumulate) \ V(Uabalt, uabalt, AbsoluteDifferenceAccumulate) \ V(Sdot, sdot, FourRegDestructiveHelper) \ V(Udot, udot, FourRegDestructiveHelper) \ V(Adclb, adclb, FourRegDestructiveHelper) \ V(Adclt, adclt, FourRegDestructiveHelper) \ V(Sbclb, sbclb, FourRegDestructiveHelper) \ V(Sbclt, sbclt, FourRegDestructiveHelper) \ V(Smlalb, smlalb, FourRegDestructiveHelper) \ V(Smlalt, smlalt, FourRegDestructiveHelper) \ V(Smlslb, smlslb, FourRegDestructiveHelper) \ V(Smlslt, smlslt, FourRegDestructiveHelper) \ V(Umlalb, umlalb, FourRegDestructiveHelper) \ V(Umlalt, umlalt, FourRegDestructiveHelper) \ V(Umlslb, umlslb, FourRegDestructiveHelper) \ V(Umlslt, umlslt, FourRegDestructiveHelper) \ V(Bcax, bcax, FourRegDestructiveHelper) \ V(Bsl, bsl, FourRegDestructiveHelper) \ V(Bsl1n, bsl1n, FourRegDestructiveHelper) \ V(Bsl2n, bsl2n, FourRegDestructiveHelper) \ V(Eor3, eor3, FourRegDestructiveHelper) \ V(Nbsl, nbsl, FourRegDestructiveHelper) \ V(Fmlalb, fmlalb, FourRegDestructiveHelper) \ V(Fmlalt, fmlalt, FourRegDestructiveHelper) \ V(Fmlslb, fmlslb, FourRegDestructiveHelper) \ V(Fmlslt, fmlslt, FourRegDestructiveHelper) \ V(Sqdmlalb, sqdmlalb, FourRegDestructiveHelper) \ V(Sqdmlalbt, sqdmlalbt, FourRegDestructiveHelper) \ V(Sqdmlalt, sqdmlalt, FourRegDestructiveHelper) \ V(Sqdmlslb, sqdmlslb, FourRegDestructiveHelper) \ V(Sqdmlslbt, sqdmlslbt, FourRegDestructiveHelper) \ V(Sqdmlslt, sqdmlslt, FourRegDestructiveHelper) \ V(Sqrdmlah, sqrdmlah, FourRegDestructiveHelper) \ V(Sqrdmlsh, sqrdmlsh, FourRegDestructiveHelper) \ V(Fmmla, fmmla, FourRegDestructiveHelper) \ V(Smmla, smmla, FourRegDestructiveHelper) \ V(Ummla, ummla, FourRegDestructiveHelper) \ V(Usmmla, usmmla, FourRegDestructiveHelper) \ V(Usdot, usdot, FourRegDestructiveHelper) #define VIXL_DEFINE_MASM_FUNC(MASMFN, ASMFN, HELPER) \ void MacroAssembler::MASMFN(const ZRegister& zd, \ const ZRegister& za, \ const ZRegister& zn, \ const ZRegister& zm) { \ VIXL_ASSERT(allow_macro_instructions_); \ HELPER(&Assembler::ASMFN, zd, za, zn, zm); \ } VIXL_SVE_4REG_LIST(VIXL_DEFINE_MASM_FUNC) #undef VIXL_DEFINE_MASM_FUNC #define VIXL_SVE_4REG_1IMM_LIST(V) \ V(Fmla, fmla, FourRegOneImmDestructiveHelper) \ V(Fmls, fmls, FourRegOneImmDestructiveHelper) \ V(Fmlalb, fmlalb, FourRegOneImmDestructiveHelper) \ V(Fmlalt, fmlalt, FourRegOneImmDestructiveHelper) \ V(Fmlslb, fmlslb, FourRegOneImmDestructiveHelper) \ V(Fmlslt, fmlslt, FourRegOneImmDestructiveHelper) \ V(Mla, mla, FourRegOneImmDestructiveHelper) \ V(Mls, mls, FourRegOneImmDestructiveHelper) \ V(Smlalb, smlalb, FourRegOneImmDestructiveHelper) \ V(Smlalt, smlalt, FourRegOneImmDestructiveHelper) \ V(Smlslb, smlslb, FourRegOneImmDestructiveHelper) \ V(Smlslt, smlslt, FourRegOneImmDestructiveHelper) \ V(Sqdmlalb, sqdmlalb, FourRegOneImmDestructiveHelper) \ V(Sqdmlalt, sqdmlalt, FourRegOneImmDestructiveHelper) \ V(Sqdmlslb, sqdmlslb, FourRegOneImmDestructiveHelper) \ V(Sqdmlslt, sqdmlslt, FourRegOneImmDestructiveHelper) \ V(Sqrdmlah, sqrdmlah, FourRegOneImmDestructiveHelper) \ V(Sqrdmlsh, sqrdmlsh, FourRegOneImmDestructiveHelper) \ V(Umlalb, umlalb, FourRegOneImmDestructiveHelper) \ V(Umlalt, umlalt, FourRegOneImmDestructiveHelper) \ V(Umlslb, umlslb, FourRegOneImmDestructiveHelper) \ V(Umlslt, umlslt, FourRegOneImmDestructiveHelper) #define VIXL_DEFINE_MASM_FUNC(MASMFN, ASMFN, HELPER) \ void MacroAssembler::MASMFN(const ZRegister& zd, \ const ZRegister& za, \ const ZRegister& zn, \ const ZRegister& zm, \ int imm) { \ VIXL_ASSERT(allow_macro_instructions_); \ HELPER(&Assembler::ASMFN, zd, za, zn, zm, imm); \ } VIXL_SVE_4REG_1IMM_LIST(VIXL_DEFINE_MASM_FUNC) #undef VIXL_DEFINE_MASM_FUNC void MacroAssembler::Sdot(const ZRegister& zd, const ZRegister& za, const ZRegister& zn, const ZRegister& zm, int index) { VIXL_ASSERT(allow_macro_instructions_); SVEDotIndexHelper(&Assembler::sdot, zd, za, zn, zm, index); } void MacroAssembler::Udot(const ZRegister& zd, const ZRegister& za, const ZRegister& zn, const ZRegister& zm, int index) { VIXL_ASSERT(allow_macro_instructions_); SVEDotIndexHelper(&Assembler::udot, zd, za, zn, zm, index); } void MacroAssembler::Sudot(const ZRegister& zd, const ZRegister& za, const ZRegister& zn, const ZRegister& zm, int index) { VIXL_ASSERT(allow_macro_instructions_); SVEDotIndexHelper(&Assembler::sudot, zd, za, zn, zm, index); } void MacroAssembler::Usdot(const ZRegister& zd, const ZRegister& za, const ZRegister& zn, const ZRegister& zm, int index) { VIXL_ASSERT(allow_macro_instructions_); SVEDotIndexHelper(&Assembler::usdot, zd, za, zn, zm, index); } void MacroAssembler::Cdot(const ZRegister& zd, const ZRegister& za, const ZRegister& zn, const ZRegister& zm, int index, int rot) { // This doesn't handle zm when it's out of the range that can be encoded in // instruction. The range depends on element size: z0-z7 for B, z0-15 for H. if ((zd.Aliases(zn) || zd.Aliases(zm)) && !zd.Aliases(za)) { UseScratchRegisterScope temps(this); ZRegister ztmp = temps.AcquireZ().WithSameLaneSizeAs(zd); { MovprfxHelperScope guard(this, ztmp, za); cdot(ztmp, zn, zm, index, rot); } Mov(zd, ztmp); } else { MovprfxHelperScope guard(this, zd, za); cdot(zd, zn, zm, index, rot); } } void MacroAssembler::Cdot(const ZRegister& zd, const ZRegister& za, const ZRegister& zn, const ZRegister& zm, int rot) { if ((zd.Aliases(zn) || zd.Aliases(zm)) && !zd.Aliases(za)) { UseScratchRegisterScope temps(this); VIXL_ASSERT(AreSameLaneSize(zn, zm)); ZRegister ztmp = temps.AcquireZ().WithSameLaneSizeAs(zn); Mov(ztmp, zd.Aliases(zn) ? zn : zm); MovprfxHelperScope guard(this, zd, za); cdot(zd, (zd.Aliases(zn) ? ztmp : zn), (zd.Aliases(zm) ? ztmp : zm), rot); } else { MovprfxHelperScope guard(this, zd, za); cdot(zd, zn, zm, rot); } } void MacroAssembler::FPMulAddHelper(const ZRegister& zd, const PRegisterM& pg, const ZRegister& za, const ZRegister& zn, const ZRegister& zm, SVEMulAddPredicatedZdaFn fn_zda, SVEMulAddPredicatedZdnFn fn_zdn, FPMacroNaNPropagationOption nan_option) { ResolveFPNaNPropagationOption(&nan_option); if (zd.Aliases(za)) { // zda = (-)zda + ((-)zn * zm) for fmla, fmls, fnmla and fnmls. SingleEmissionCheckScope guard(this); (this->*fn_zda)(zd, pg, zn, zm); } else if (zd.Aliases(zn)) { // zdn = (-)za + ((-)zdn * zm) for fmad, fmsb, fnmad and fnmsb. SingleEmissionCheckScope guard(this); (this->*fn_zdn)(zd, pg, zm, za); } else if (zd.Aliases(zm)) { switch (nan_option) { case FastNaNPropagation: { // We treat multiplication as commutative in the fast mode, so we can // swap zn and zm. // zdm = (-)za + ((-)zdm * zn) for fmad, fmsb, fnmad and fnmsb. SingleEmissionCheckScope guard(this); (this->*fn_zdn)(zd, pg, zn, za); return; } case StrictNaNPropagation: { UseScratchRegisterScope temps(this); // Use a scratch register to keep the argument order exactly as // specified. ZRegister scratch = temps.AcquireZ().WithSameLaneSizeAs(zn); { MovprfxHelperScope guard(this, scratch, pg, za); // scratch = (-)za + ((-)zn * zm) (this->*fn_zda)(scratch, pg, zn, zm); } Mov(zd, scratch); return; } case NoFPMacroNaNPropagationSelected: VIXL_UNREACHABLE(); return; } } else { // zd = (-)za + ((-)zn * zm) for fmla, fmls, fnmla and fnmls. MovprfxHelperScope guard(this, zd, pg, za); (this->*fn_zda)(zd, pg, zn, zm); } } void MacroAssembler::Fmla(const ZRegister& zd, const PRegisterM& pg, const ZRegister& za, const ZRegister& zn, const ZRegister& zm, FPMacroNaNPropagationOption nan_option) { VIXL_ASSERT(allow_macro_instructions_); FPMulAddHelper(zd, pg, za, zn, zm, &Assembler::fmla, &Assembler::fmad, nan_option); } void MacroAssembler::Fmls(const ZRegister& zd, const PRegisterM& pg, const ZRegister& za, const ZRegister& zn, const ZRegister& zm, FPMacroNaNPropagationOption nan_option) { VIXL_ASSERT(allow_macro_instructions_); FPMulAddHelper(zd, pg, za, zn, zm, &Assembler::fmls, &Assembler::fmsb, nan_option); } void MacroAssembler::Fnmla(const ZRegister& zd, const PRegisterM& pg, const ZRegister& za, const ZRegister& zn, const ZRegister& zm, FPMacroNaNPropagationOption nan_option) { VIXL_ASSERT(allow_macro_instructions_); FPMulAddHelper(zd, pg, za, zn, zm, &Assembler::fnmla, &Assembler::fnmad, nan_option); } void MacroAssembler::Fnmls(const ZRegister& zd, const PRegisterM& pg, const ZRegister& za, const ZRegister& zn, const ZRegister& zm, FPMacroNaNPropagationOption nan_option) { VIXL_ASSERT(allow_macro_instructions_); FPMulAddHelper(zd, pg, za, zn, zm, &Assembler::fnmls, &Assembler::fnmsb, nan_option); } void MacroAssembler::Ftmad(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm, int imm3) { VIXL_ASSERT(allow_macro_instructions_); if (zd.Aliases(zm) && !zd.Aliases(zn)) { UseScratchRegisterScope temps(this); ZRegister scratch = temps.AcquireZ().WithSameLaneSizeAs(zm); Mov(scratch, zm); MovprfxHelperScope guard(this, zd, zn); ftmad(zd, zd, scratch, imm3); } else { MovprfxHelperScope guard(this, zd, zn); ftmad(zd, zd, zm, imm3); } } void MacroAssembler::Fcadd(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn, const ZRegister& zm, int rot) { VIXL_ASSERT(allow_macro_instructions_); if (zd.Aliases(zm) && !zd.Aliases(zn)) { UseScratchRegisterScope temps(this); ZRegister scratch = temps.AcquireZ().WithSameLaneSizeAs(zd); { MovprfxHelperScope guard(this, scratch, pg, zn); fcadd(scratch, pg, scratch, zm, rot); } Mov(zd, scratch); } else { MovprfxHelperScope guard(this, zd, pg, zn); fcadd(zd, pg, zd, zm, rot); } } void MacroAssembler::Fcmla(const ZRegister& zd, const PRegisterM& pg, const ZRegister& za, const ZRegister& zn, const ZRegister& zm, int rot) { VIXL_ASSERT(allow_macro_instructions_); if ((zd.Aliases(zn) || zd.Aliases(zm)) && !zd.Aliases(za)) { UseScratchRegisterScope temps(this); ZRegister ztmp = temps.AcquireZ().WithSameLaneSizeAs(zd); { MovprfxHelperScope guard(this, ztmp, za); fcmla(ztmp, pg, zn, zm, rot); } Mov(zd, pg, ztmp); } else { MovprfxHelperScope guard(this, zd, pg, za); fcmla(zd, pg, zn, zm, rot); } } void MacroAssembler::Splice(const ZRegister& zd, const PRegister& pg, const ZRegister& zn, const ZRegister& zm) { VIXL_ASSERT(allow_macro_instructions_); if (CPUHas(CPUFeatures::kSVE2) && AreConsecutive(zn, zm) && !zd.Aliases(zn)) { SingleEmissionCheckScope guard(this); splice(zd, pg, zn, zm); } else if (zd.Aliases(zm) && !zd.Aliases(zn)) { UseScratchRegisterScope temps(this); ZRegister scratch = temps.AcquireZ().WithSameLaneSizeAs(zd); { MovprfxHelperScope guard(this, scratch, zn); splice(scratch, pg, scratch, zm); } Mov(zd, scratch); } else { MovprfxHelperScope guard(this, zd, zn); splice(zd, pg, zd, zm); } } void MacroAssembler::Clasta(const ZRegister& zd, const PRegister& pg, const ZRegister& zn, const ZRegister& zm) { VIXL_ASSERT(allow_macro_instructions_); if (zd.Aliases(zm) && !zd.Aliases(zn)) { UseScratchRegisterScope temps(this); ZRegister scratch = temps.AcquireZ().WithSameLaneSizeAs(zd); { MovprfxHelperScope guard(this, scratch, zn); clasta(scratch, pg, scratch, zm); } Mov(zd, scratch); } else { MovprfxHelperScope guard(this, zd, zn); clasta(zd, pg, zd, zm); } } void MacroAssembler::Clastb(const ZRegister& zd, const PRegister& pg, const ZRegister& zn, const ZRegister& zm) { VIXL_ASSERT(allow_macro_instructions_); if (zd.Aliases(zm) && !zd.Aliases(zn)) { UseScratchRegisterScope temps(this); ZRegister scratch = temps.AcquireZ().WithSameLaneSizeAs(zd); { MovprfxHelperScope guard(this, scratch, zn); clastb(scratch, pg, scratch, zm); } Mov(zd, scratch); } else { MovprfxHelperScope guard(this, zd, zn); clastb(zd, pg, zd, zm); } } void MacroAssembler::ShiftRightAccumulate(IntArithImmFn fn, const ZRegister& zd, const ZRegister& za, const ZRegister& zn, int shift) { VIXL_ASSERT(allow_macro_instructions_); if (!zd.Aliases(za) && zd.Aliases(zn)) { UseScratchRegisterScope temps(this); ZRegister ztmp = temps.AcquireZ().WithSameLaneSizeAs(zn); Mov(ztmp, zn); { MovprfxHelperScope guard(this, zd, za); (this->*fn)(zd, ztmp, shift); } } else { MovprfxHelperScope guard(this, zd, za); (this->*fn)(zd, zn, shift); } } void MacroAssembler::Srsra(const ZRegister& zd, const ZRegister& za, const ZRegister& zn, int shift) { ShiftRightAccumulate(&Assembler::srsra, zd, za, zn, shift); } void MacroAssembler::Ssra(const ZRegister& zd, const ZRegister& za, const ZRegister& zn, int shift) { ShiftRightAccumulate(&Assembler::ssra, zd, za, zn, shift); } void MacroAssembler::Ursra(const ZRegister& zd, const ZRegister& za, const ZRegister& zn, int shift) { ShiftRightAccumulate(&Assembler::ursra, zd, za, zn, shift); } void MacroAssembler::Usra(const ZRegister& zd, const ZRegister& za, const ZRegister& zn, int shift) { ShiftRightAccumulate(&Assembler::usra, zd, za, zn, shift); } void MacroAssembler::ComplexAddition(ZZZImmFn fn, const ZRegister& zd, const ZRegister& zn, const ZRegister& zm, int rot) { VIXL_ASSERT(allow_macro_instructions_); if (!zd.Aliases(zn) && zd.Aliases(zm)) { UseScratchRegisterScope temps(this); ZRegister ztmp = temps.AcquireZ().WithSameLaneSizeAs(zm); Mov(ztmp, zm); { MovprfxHelperScope guard(this, zd, zn); (this->*fn)(zd, zd, ztmp, rot); } } else { MovprfxHelperScope guard(this, zd, zn); (this->*fn)(zd, zd, zm, rot); } } void MacroAssembler::Cadd(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm, int rot) { ComplexAddition(&Assembler::cadd, zd, zn, zm, rot); } void MacroAssembler::Sqcadd(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm, int rot) { ComplexAddition(&Assembler::sqcadd, zd, zn, zm, rot); } } // namespace aarch64 } // namespace vixl