/*---------------------------------------------------------------*/ /*--- begin host_arm64_isel.c ---*/ /*---------------------------------------------------------------*/ /* This file is part of Valgrind, a dynamic binary instrumentation framework. Copyright (C) 2013-2013 OpenWorks info@open-works.net This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. The GNU General Public License is contained in the file COPYING. */ #include "libvex_basictypes.h" #include "libvex_ir.h" #include "libvex.h" #include "ir_match.h" #include "main_util.h" #include "main_globals.h" #include "host_generic_regs.h" #include "host_generic_simd64.h" // for 32-bit SIMD helpers #include "host_arm64_defs.h" //ZZ /*---------------------------------------------------------*/ //ZZ /*--- ARMvfp control word stuff ---*/ //ZZ /*---------------------------------------------------------*/ //ZZ //ZZ /* Vex-generated code expects to run with the FPU set as follows: all //ZZ exceptions masked, round-to-nearest, non-vector mode, with the NZCV //ZZ flags cleared, and FZ (flush to zero) disabled. Curiously enough, //ZZ this corresponds to a FPSCR value of zero. //ZZ //ZZ fpscr should therefore be zero on entry to Vex-generated code, and //ZZ should be unchanged at exit. (Or at least the bottom 28 bits //ZZ should be zero). //ZZ */ //ZZ //ZZ #define DEFAULT_FPSCR 0 /*---------------------------------------------------------*/ /*--- ISelEnv ---*/ /*---------------------------------------------------------*/ /* This carries around: - A mapping from IRTemp to IRType, giving the type of any IRTemp we might encounter. This is computed before insn selection starts, and does not change. - A mapping from IRTemp to HReg. This tells the insn selector which virtual register is associated with each IRTemp temporary. This is computed before insn selection starts, and does not change. We expect this mapping to map precisely the same set of IRTemps as the type mapping does. |vregmap| holds the primary register for the IRTemp. |vregmapHI| is only used for 128-bit integer-typed IRTemps. It holds the identity of a second 64-bit virtual HReg, which holds the high half of the value. - The code array, that is, the insns selected so far. - A counter, for generating new virtual registers. - The host hardware capabilities word. This is set at the start and does not change. - A Bool for indicating whether we may generate chain-me instructions for control flow transfers, or whether we must use XAssisted. - The maximum guest address of any guest insn in this block. Actually, the address of the highest-addressed byte from any insn in this block. Is set at the start and does not change. This is used for detecting jumps which are definitely forward-edges from this block, and therefore can be made (chained) to the fast entry point of the destination, thereby avoiding the destination's event check. - An IRExpr*, which may be NULL, holding the IR expression (an IRRoundingMode-encoded value) to which the FPU's rounding mode was most recently set. Setting to NULL is always safe. Used to avoid redundant settings of the FPU's rounding mode, as described in set_FPCR_rounding_mode below. Note, this is all (well, mostly) host-independent. */ typedef struct { /* Constant -- are set at the start and do not change. */ IRTypeEnv* type_env; HReg* vregmap; HReg* vregmapHI; Int n_vregmap; UInt hwcaps; Bool chainingAllowed; Addr64 max_ga; /* These are modified as we go along. */ HInstrArray* code; Int vreg_ctr; IRExpr* previous_rm; } ISelEnv; static HReg lookupIRTemp ( ISelEnv* env, IRTemp tmp ) { vassert(tmp >= 0); vassert(tmp < env->n_vregmap); return env->vregmap[tmp]; } static void addInstr ( ISelEnv* env, ARM64Instr* instr ) { addHInstr(env->code, instr); if (vex_traceflags & VEX_TRACE_VCODE) { ppARM64Instr(instr); vex_printf("\n"); } } static HReg newVRegI ( ISelEnv* env ) { HReg reg = mkHReg(env->vreg_ctr, HRcInt64, True/*virtual reg*/); env->vreg_ctr++; return reg; } static HReg newVRegD ( ISelEnv* env ) { HReg reg = mkHReg(env->vreg_ctr, HRcFlt64, True/*virtual reg*/); env->vreg_ctr++; return reg; } //ZZ static HReg newVRegF ( ISelEnv* env ) //ZZ { //ZZ HReg reg = mkHReg(env->vreg_ctr, HRcFlt32, True/*virtual reg*/); //ZZ env->vreg_ctr++; //ZZ return reg; //ZZ } static HReg newVRegV ( ISelEnv* env ) { HReg reg = mkHReg(env->vreg_ctr, HRcVec128, True/*virtual reg*/); env->vreg_ctr++; return reg; } //ZZ /* These are duplicated in guest_arm_toIR.c */ //ZZ static IRExpr* unop ( IROp op, IRExpr* a ) //ZZ { //ZZ return IRExpr_Unop(op, a); //ZZ } //ZZ //ZZ static IRExpr* binop ( IROp op, IRExpr* a1, IRExpr* a2 ) //ZZ { //ZZ return IRExpr_Binop(op, a1, a2); //ZZ } //ZZ //ZZ static IRExpr* bind ( Int binder ) //ZZ { //ZZ return IRExpr_Binder(binder); //ZZ } /*---------------------------------------------------------*/ /*--- ISEL: Forward declarations ---*/ /*---------------------------------------------------------*/ /* These are organised as iselXXX and iselXXX_wrk pairs. The iselXXX_wrk do the real work, but are not to be called directly. For each XXX, iselXXX calls its iselXXX_wrk counterpart, then checks that all returned registers are virtual. You should not call the _wrk version directly. Because some forms of ARM64 memory amodes are implicitly scaled by the access size, iselIntExpr_AMode takes an IRType which tells it the type of the access for which the amode is to be used. This type needs to be correct, else you'll get incorrect code. */ static ARM64AMode* iselIntExpr_AMode_wrk ( ISelEnv* env, IRExpr* e, IRType dty ); static ARM64AMode* iselIntExpr_AMode ( ISelEnv* env, IRExpr* e, IRType dty ); static ARM64RIA* iselIntExpr_RIA_wrk ( ISelEnv* env, IRExpr* e ); static ARM64RIA* iselIntExpr_RIA ( ISelEnv* env, IRExpr* e ); static ARM64RIL* iselIntExpr_RIL_wrk ( ISelEnv* env, IRExpr* e ); static ARM64RIL* iselIntExpr_RIL ( ISelEnv* env, IRExpr* e ); static ARM64RI6* iselIntExpr_RI6_wrk ( ISelEnv* env, IRExpr* e ); static ARM64RI6* iselIntExpr_RI6 ( ISelEnv* env, IRExpr* e ); static ARM64CondCode iselCondCode_wrk ( ISelEnv* env, IRExpr* e ); static ARM64CondCode iselCondCode ( ISelEnv* env, IRExpr* e ); static HReg iselIntExpr_R_wrk ( ISelEnv* env, IRExpr* e ); static HReg iselIntExpr_R ( ISelEnv* env, IRExpr* e ); static void iselInt128Expr_wrk ( /*OUT*/HReg* rHi, HReg* rLo, ISelEnv* env, IRExpr* e ); static void iselInt128Expr ( /*OUT*/HReg* rHi, HReg* rLo, ISelEnv* env, IRExpr* e ); //ZZ static void iselInt64Expr_wrk ( HReg* rHi, HReg* rLo, //ZZ ISelEnv* env, IRExpr* e ); //ZZ static void iselInt64Expr ( HReg* rHi, HReg* rLo, //ZZ ISelEnv* env, IRExpr* e ); static HReg iselDblExpr_wrk ( ISelEnv* env, IRExpr* e ); static HReg iselDblExpr ( ISelEnv* env, IRExpr* e ); static HReg iselFltExpr_wrk ( ISelEnv* env, IRExpr* e ); static HReg iselFltExpr ( ISelEnv* env, IRExpr* e ); //ZZ static HReg iselNeon64Expr_wrk ( ISelEnv* env, IRExpr* e ); //ZZ static HReg iselNeon64Expr ( ISelEnv* env, IRExpr* e ); static HReg iselV128Expr_wrk ( ISelEnv* env, IRExpr* e ); static HReg iselV128Expr ( ISelEnv* env, IRExpr* e ); static ARM64RIL* mb_mkARM64RIL_I ( ULong imm64 ); /*---------------------------------------------------------*/ /*--- ISEL: Misc helpers ---*/ /*---------------------------------------------------------*/ /* Generate an amode suitable for a 64-bit sized access relative to the baseblock register (X21). This generates an RI12 amode, which means its scaled by the access size, which is why the access size -- 64 bit -- is stated explicitly here. Consequently |off| needs to be divisible by 8. */ static ARM64AMode* mk_baseblock_64bit_access_amode ( UInt off ) { vassert(off < (8 << 12)); /* otherwise it's unrepresentable */ vassert((off & 7) == 0); /* ditto */ return ARM64AMode_RI12(hregARM64_X21(), off >> 3, 8/*scale*/); } /* Ditto, for 32 bit accesses. */ static ARM64AMode* mk_baseblock_32bit_access_amode ( UInt off ) { vassert(off < (4 << 12)); /* otherwise it's unrepresentable */ vassert((off & 3) == 0); /* ditto */ return ARM64AMode_RI12(hregARM64_X21(), off >> 2, 4/*scale*/); } /* Ditto, for 16 bit accesses. */ static ARM64AMode* mk_baseblock_16bit_access_amode ( UInt off ) { vassert(off < (2 << 12)); /* otherwise it's unrepresentable */ vassert((off & 1) == 0); /* ditto */ return ARM64AMode_RI12(hregARM64_X21(), off >> 1, 2/*scale*/); } /* Ditto, for 8 bit accesses. */ static ARM64AMode* mk_baseblock_8bit_access_amode ( UInt off ) { vassert(off < (1 << 12)); /* otherwise it's unrepresentable */ return ARM64AMode_RI12(hregARM64_X21(), off >> 0, 1/*scale*/); } static HReg mk_baseblock_128bit_access_addr ( ISelEnv* env, UInt off ) { vassert(off < (1<<12)); HReg r = newVRegI(env); addInstr(env, ARM64Instr_Arith(r, hregARM64_X21(), ARM64RIA_I12(off,0), True/*isAdd*/)); return r; } static HReg get_baseblock_register ( void ) { return hregARM64_X21(); } /* Generate code to zero extend a 32 bit value in 'src' to 64 bits, in a new register, and return the new register. */ static HReg widen_z_32_to_64 ( ISelEnv* env, HReg src ) { HReg dst = newVRegI(env); ARM64RIL* mask = ARM64RIL_I13(1, 0, 31); /* encodes 0xFFFFFFFF */ addInstr(env, ARM64Instr_Logic(dst, src, mask, ARM64lo_AND)); return dst; } /* Generate code to sign extend a 16 bit value in 'src' to 64 bits, in a new register, and return the new register. */ static HReg widen_s_16_to_64 ( ISelEnv* env, HReg src ) { HReg dst = newVRegI(env); ARM64RI6* n48 = ARM64RI6_I6(48); addInstr(env, ARM64Instr_Shift(dst, src, n48, ARM64sh_SHL)); addInstr(env, ARM64Instr_Shift(dst, dst, n48, ARM64sh_SAR)); return dst; } /* Generate code to zero extend a 16 bit value in 'src' to 64 bits, in a new register, and return the new register. */ static HReg widen_z_16_to_64 ( ISelEnv* env, HReg src ) { HReg dst = newVRegI(env); ARM64RI6* n48 = ARM64RI6_I6(48); addInstr(env, ARM64Instr_Shift(dst, src, n48, ARM64sh_SHL)); addInstr(env, ARM64Instr_Shift(dst, dst, n48, ARM64sh_SHR)); return dst; } /* Generate code to sign extend a 32 bit value in 'src' to 64 bits, in a new register, and return the new register. */ static HReg widen_s_32_to_64 ( ISelEnv* env, HReg src ) { HReg dst = newVRegI(env); ARM64RI6* n32 = ARM64RI6_I6(32); addInstr(env, ARM64Instr_Shift(dst, src, n32, ARM64sh_SHL)); addInstr(env, ARM64Instr_Shift(dst, dst, n32, ARM64sh_SAR)); return dst; } /* Generate code to sign extend a 8 bit value in 'src' to 64 bits, in a new register, and return the new register. */ static HReg widen_s_8_to_64 ( ISelEnv* env, HReg src ) { HReg dst = newVRegI(env); ARM64RI6* n56 = ARM64RI6_I6(56); addInstr(env, ARM64Instr_Shift(dst, src, n56, ARM64sh_SHL)); addInstr(env, ARM64Instr_Shift(dst, dst, n56, ARM64sh_SAR)); return dst; } static HReg widen_z_8_to_64 ( ISelEnv* env, HReg src ) { HReg dst = newVRegI(env); ARM64RI6* n56 = ARM64RI6_I6(56); addInstr(env, ARM64Instr_Shift(dst, src, n56, ARM64sh_SHL)); addInstr(env, ARM64Instr_Shift(dst, dst, n56, ARM64sh_SHR)); return dst; } /* Is this IRExpr_Const(IRConst_U64(0)) ? */ static Bool isZeroU64 ( IRExpr* e ) { if (e->tag != Iex_Const) return False; IRConst* con = e->Iex.Const.con; vassert(con->tag == Ico_U64); return con->Ico.U64 == 0; } /*---------------------------------------------------------*/ /*--- ISEL: FP rounding mode helpers ---*/ /*---------------------------------------------------------*/ /* Set the FP rounding mode: 'mode' is an I32-typed expression denoting a value in the range 0 .. 3, indicating a round mode encoded as per type IRRoundingMode -- the first four values only (Irrm_NEAREST, Irrm_NegINF, Irrm_PosINF, Irrm_ZERO). Set the PPC FSCR to have the same rounding. For speed & simplicity, we're setting the *entire* FPCR here. Setting the rounding mode is expensive. So this function tries to avoid repeatedly setting the rounding mode to the same thing by first comparing 'mode' to the 'mode' tree supplied in the previous call to this function, if any. (The previous value is stored in env->previous_rm.) If 'mode' is a single IR temporary 't' and env->previous_rm is also just 't', then the setting is skipped. This is safe because of the SSA property of IR: an IR temporary can only be defined once and so will have the same value regardless of where it appears in the block. Cool stuff, SSA. A safety condition: all attempts to set the RM must be aware of this mechanism - by being routed through the functions here. Of course this only helps if blocks where the RM is set more than once and it is set to the same value each time, *and* that value is held in the same IR temporary each time. In order to assure the latter as much as possible, the IR optimiser takes care to do CSE on any block with any sign of floating point activity. */ static void set_FPCR_rounding_mode ( ISelEnv* env, IRExpr* mode ) { vassert(typeOfIRExpr(env->type_env,mode) == Ity_I32); /* Do we need to do anything? */ if (env->previous_rm && env->previous_rm->tag == Iex_RdTmp && mode->tag == Iex_RdTmp && env->previous_rm->Iex.RdTmp.tmp == mode->Iex.RdTmp.tmp) { /* no - setting it to what it was before. */ vassert(typeOfIRExpr(env->type_env, env->previous_rm) == Ity_I32); return; } /* No luck - we better set it, and remember what we set it to. */ env->previous_rm = mode; /* Only supporting the rounding-mode bits - the rest of FPCR is set to zero - so we can set the whole register at once (faster). */ /* This isn't simple, because 'mode' carries an IR rounding encoding, and we need to translate that to an ARM64 FP one: The IR encoding: 00 to nearest (the default) 10 to +infinity 01 to -infinity 11 to zero The ARM64 FP encoding: 00 to nearest 01 to +infinity 10 to -infinity 11 to zero Easy enough to do; just swap the two bits. */ HReg irrm = iselIntExpr_R(env, mode); HReg tL = newVRegI(env); HReg tR = newVRegI(env); HReg t3 = newVRegI(env); /* tL = irrm << 1; tR = irrm >> 1; if we're lucky, these will issue together tL &= 2; tR &= 1; ditto t3 = tL | tR; t3 <<= 22; fmxr fpscr, t3 */ ARM64RIL* ril_one = mb_mkARM64RIL_I(1); ARM64RIL* ril_two = mb_mkARM64RIL_I(2); vassert(ril_one && ril_two); addInstr(env, ARM64Instr_Shift(tL, irrm, ARM64RI6_I6(1), ARM64sh_SHL)); addInstr(env, ARM64Instr_Shift(tR, irrm, ARM64RI6_I6(1), ARM64sh_SHR)); addInstr(env, ARM64Instr_Logic(tL, tL, ril_two, ARM64lo_AND)); addInstr(env, ARM64Instr_Logic(tR, tR, ril_one, ARM64lo_AND)); addInstr(env, ARM64Instr_Logic(t3, tL, ARM64RIL_R(tR), ARM64lo_OR)); addInstr(env, ARM64Instr_Shift(t3, t3, ARM64RI6_I6(22), ARM64sh_SHL)); addInstr(env, ARM64Instr_FPCR(True/*toFPCR*/, t3)); } /*---------------------------------------------------------*/ /*--- ISEL: Function call helpers ---*/ /*---------------------------------------------------------*/ /* Used only in doHelperCall. See big comment in doHelperCall re handling of register-parameter args. This function figures out whether evaluation of an expression might require use of a fixed register. If in doubt return True (safe but suboptimal). */ static Bool mightRequireFixedRegs ( IRExpr* e ) { if (UNLIKELY(is_IRExpr_VECRET_or_BBPTR(e))) { // These are always "safe" -- either a copy of SP in some // arbitrary vreg, or a copy of x21, respectively. return False; } /* Else it's a "normal" expression. */ switch (e->tag) { case Iex_RdTmp: case Iex_Const: case Iex_Get: return False; default: return True; } } /* Do a complete function call. |guard| is a Ity_Bit expression indicating whether or not the call happens. If guard==NULL, the call is unconditional. |retloc| is set to indicate where the return value is after the call. The caller (of this fn) must generate code to add |stackAdjustAfterCall| to the stack pointer after the call is done. Returns True iff it managed to handle this combination of arg/return types, else returns False. */ static Bool doHelperCall ( /*OUT*/UInt* stackAdjustAfterCall, /*OUT*/RetLoc* retloc, ISelEnv* env, IRExpr* guard, IRCallee* cee, IRType retTy, IRExpr** args ) { ARM64CondCode cc; HReg argregs[ARM64_N_ARGREGS]; HReg tmpregs[ARM64_N_ARGREGS]; Bool go_fast; Int n_args, i, nextArgReg; ULong target; vassert(ARM64_N_ARGREGS == 8); /* Set default returns. We'll update them later if needed. */ *stackAdjustAfterCall = 0; *retloc = mk_RetLoc_INVALID(); /* These are used for cross-checking that IR-level constraints on the use of IRExpr_VECRET() and IRExpr_BBPTR() are observed. */ UInt nVECRETs = 0; UInt nBBPTRs = 0; /* Marshal args for a call and do the call. This function only deals with a tiny set of possibilities, which cover all helpers in practice. The restrictions are that only arguments in registers are supported, hence only ARM64_N_REGPARMS x 64 integer bits in total can be passed. In fact the only supported arg type is I64. The return type can be I{64,32} or V128. In the V128 case, it is expected that |args| will contain the special node IRExpr_VECRET(), in which case this routine generates code to allocate space on the stack for the vector return value. Since we are not passing any scalars on the stack, it is enough to preallocate the return space before marshalling any arguments, in this case. |args| may also contain IRExpr_BBPTR(), in which case the value in x21 is passed as the corresponding argument. Generating code which is both efficient and correct when parameters are to be passed in registers is difficult, for the reasons elaborated in detail in comments attached to doHelperCall() in priv/host-x86/isel.c. Here, we use a variant of the method described in those comments. The problem is split into two cases: the fast scheme and the slow scheme. In the fast scheme, arguments are computed directly into the target (real) registers. This is only safe when we can be sure that computation of each argument will not trash any real registers set by computation of any other argument. In the slow scheme, all args are first computed into vregs, and once they are all done, they are moved to the relevant real regs. This always gives correct code, but it also gives a bunch of vreg-to-rreg moves which are usually redundant but are hard for the register allocator to get rid of. To decide which scheme to use, all argument expressions are first examined. If they are all so simple that it is clear they will be evaluated without use of any fixed registers, use the fast scheme, else use the slow scheme. Note also that only unconditional calls may use the fast scheme, since having to compute a condition expression could itself trash real registers. Note this requires being able to examine an expression and determine whether or not evaluation of it might use a fixed register. That requires knowledge of how the rest of this insn selector works. Currently just the following 3 are regarded as safe -- hopefully they cover the majority of arguments in practice: IRExpr_Tmp IRExpr_Const IRExpr_Get. */ /* Note that the cee->regparms field is meaningless on ARM64 hosts (since there is only one calling convention) and so we always ignore it. */ n_args = 0; for (i = 0; args[i]; i++) { IRExpr* arg = args[i]; if (UNLIKELY(arg->tag == Iex_VECRET)) { nVECRETs++; } else if (UNLIKELY(arg->tag == Iex_BBPTR)) { nBBPTRs++; } n_args++; } /* If this fails, the IR is ill-formed */ vassert(nBBPTRs == 0 || nBBPTRs == 1); /* If we have a VECRET, allocate space on the stack for the return value, and record the stack pointer after that. */ HReg r_vecRetAddr = INVALID_HREG; if (nVECRETs == 1) { vassert(retTy == Ity_V128 || retTy == Ity_V256); vassert(retTy != Ity_V256); // we don't handle that yet (if ever) r_vecRetAddr = newVRegI(env); addInstr(env, ARM64Instr_AddToSP(-16)); addInstr(env, ARM64Instr_FromSP(r_vecRetAddr)); } else { // If either of these fail, the IR is ill-formed vassert(retTy != Ity_V128 && retTy != Ity_V256); vassert(nVECRETs == 0); } argregs[0] = hregARM64_X0(); argregs[1] = hregARM64_X1(); argregs[2] = hregARM64_X2(); argregs[3] = hregARM64_X3(); argregs[4] = hregARM64_X4(); argregs[5] = hregARM64_X5(); argregs[6] = hregARM64_X6(); argregs[7] = hregARM64_X7(); tmpregs[0] = tmpregs[1] = tmpregs[2] = tmpregs[3] = INVALID_HREG; tmpregs[4] = tmpregs[5] = tmpregs[6] = tmpregs[7] = INVALID_HREG; /* First decide which scheme (slow or fast) is to be used. First assume the fast scheme, and select slow if any contraindications (wow) appear. */ go_fast = True; if (guard) { if (guard->tag == Iex_Const && guard->Iex.Const.con->tag == Ico_U1 && guard->Iex.Const.con->Ico.U1 == True) { /* unconditional */ } else { /* Not manifestly unconditional -- be conservative. */ go_fast = False; } } if (go_fast) { for (i = 0; i < n_args; i++) { if (mightRequireFixedRegs(args[i])) { go_fast = False; break; } } } if (go_fast) { if (retTy == Ity_V128 || retTy == Ity_V256) go_fast = False; } /* At this point the scheme to use has been established. Generate code to get the arg values into the argument rregs. If we run out of arg regs, give up. */ if (go_fast) { /* FAST SCHEME */ nextArgReg = 0; for (i = 0; i < n_args; i++) { IRExpr* arg = args[i]; IRType aTy = Ity_INVALID; if (LIKELY(!is_IRExpr_VECRET_or_BBPTR(arg))) aTy = typeOfIRExpr(env->type_env, args[i]); if (nextArgReg >= ARM64_N_ARGREGS) return False; /* out of argregs */ if (aTy == Ity_I64) { addInstr(env, ARM64Instr_MovI( argregs[nextArgReg], iselIntExpr_R(env, args[i]) )); nextArgReg++; } else if (arg->tag == Iex_BBPTR) { vassert(0); //ATC addInstr(env, ARM64Instr_MovI( argregs[nextArgReg], hregARM64_X21() )); nextArgReg++; } else if (arg->tag == Iex_VECRET) { // because of the go_fast logic above, we can't get here, // since vector return values makes us use the slow path // instead. vassert(0); } else return False; /* unhandled arg type */ } /* Fast scheme only applies for unconditional calls. Hence: */ cc = ARM64cc_AL; } else { /* SLOW SCHEME; move via temporaries */ nextArgReg = 0; for (i = 0; i < n_args; i++) { IRExpr* arg = args[i]; IRType aTy = Ity_INVALID; if (LIKELY(!is_IRExpr_VECRET_or_BBPTR(arg))) aTy = typeOfIRExpr(env->type_env, args[i]); if (nextArgReg >= ARM64_N_ARGREGS) return False; /* out of argregs */ if (aTy == Ity_I64) { tmpregs[nextArgReg] = iselIntExpr_R(env, args[i]); nextArgReg++; } else if (arg->tag == Iex_BBPTR) { vassert(0); //ATC tmpregs[nextArgReg] = hregARM64_X21(); nextArgReg++; } else if (arg->tag == Iex_VECRET) { vassert(!hregIsInvalid(r_vecRetAddr)); tmpregs[nextArgReg] = r_vecRetAddr; nextArgReg++; } else return False; /* unhandled arg type */ } /* Now we can compute the condition. We can't do it earlier because the argument computations could trash the condition codes. Be a bit clever to handle the common case where the guard is 1:Bit. */ cc = ARM64cc_AL; if (guard) { if (guard->tag == Iex_Const && guard->Iex.Const.con->tag == Ico_U1 && guard->Iex.Const.con->Ico.U1 == True) { /* unconditional -- do nothing */ } else { cc = iselCondCode( env, guard ); } } /* Move the args to their final destinations. */ for (i = 0; i < nextArgReg; i++) { vassert(!(hregIsInvalid(tmpregs[i]))); /* None of these insns, including any spill code that might be generated, may alter the condition codes. */ addInstr( env, ARM64Instr_MovI( argregs[i], tmpregs[i] ) ); } } /* Should be assured by checks above */ vassert(nextArgReg <= ARM64_N_ARGREGS); /* Do final checks, set the return values, and generate the call instruction proper. */ vassert(nBBPTRs == 0 || nBBPTRs == 1); vassert(nVECRETs == (retTy == Ity_V128 || retTy == Ity_V256) ? 1 : 0); vassert(*stackAdjustAfterCall == 0); vassert(is_RetLoc_INVALID(*retloc)); switch (retTy) { case Ity_INVALID: /* Function doesn't return a value. */ *retloc = mk_RetLoc_simple(RLPri_None); break; case Ity_I64: case Ity_I32: case Ity_I16: case Ity_I8: *retloc = mk_RetLoc_simple(RLPri_Int); break; case Ity_V128: *retloc = mk_RetLoc_spRel(RLPri_V128SpRel, 0); *stackAdjustAfterCall = 16; break; case Ity_V256: vassert(0); // ATC *retloc = mk_RetLoc_spRel(RLPri_V256SpRel, 0); *stackAdjustAfterCall = 32; break; default: /* IR can denote other possible return types, but we don't handle those here. */ vassert(0); } /* Finally, generate the call itself. This needs the *retloc value set in the switch above, which is why it's at the end. */ /* nextArgReg doles out argument registers. Since these are assigned in the order x0 .. x7, its numeric value at this point, which must be between 0 and 8 inclusive, is going to be equal to the number of arg regs in use for the call. Hence bake that number into the call (we'll need to know it when doing register allocation, to know what regs the call reads.) */ target = (HWord)Ptr_to_ULong(cee->addr); addInstr(env, ARM64Instr_Call( cc, target, nextArgReg, *retloc )); return True; /* success */ } /*---------------------------------------------------------*/ /*--- ISEL: Integer expressions (64/32 bit) ---*/ /*---------------------------------------------------------*/ /* Select insns for an integer-typed expression, and add them to the code list. Return a reg holding the result. This reg will be a virtual register. THE RETURNED REG MUST NOT BE MODIFIED. If you want to modify it, ask for a new vreg, copy it in there, and modify the copy. The register allocator will do its best to map both vregs to the same real register, so the copies will often disappear later in the game. This should handle expressions of 64- and 32-bit type. All results are returned in a 64-bit register. For 32-bit expressions, the upper 32 bits are arbitrary, so you should mask or sign extend partial values if necessary. */ /* --------------------- AMode --------------------- */ /* Return an AMode which computes the value of the specified expression, possibly also adding insns to the code list as a result. The expression may only be a 64-bit one. */ static Bool isValidScale ( UChar scale ) { switch (scale) { case 1: case 2: case 4: case 8: /* case 16: ??*/ return True; default: return False; } } static Bool sane_AMode ( ARM64AMode* am ) { switch (am->tag) { case ARM64am_RI9: return toBool( hregClass(am->ARM64am.RI9.reg) == HRcInt64 && (hregIsVirtual(am->ARM64am.RI9.reg) /* || sameHReg(am->ARM64am.RI9.reg, hregARM64_X21()) */ ) && am->ARM64am.RI9.simm9 >= -256 && am->ARM64am.RI9.simm9 <= 255 ); case ARM64am_RI12: return toBool( hregClass(am->ARM64am.RI12.reg) == HRcInt64 && (hregIsVirtual(am->ARM64am.RI12.reg) /* || sameHReg(am->ARM64am.RI12.reg, hregARM64_X21()) */ ) && am->ARM64am.RI12.uimm12 < 4096 && isValidScale(am->ARM64am.RI12.szB) ); case ARM64am_RR: return toBool( hregClass(am->ARM64am.RR.base) == HRcInt64 && hregIsVirtual(am->ARM64am.RR.base) && hregClass(am->ARM64am.RR.index) == HRcInt64 && hregIsVirtual(am->ARM64am.RR.index) ); default: vpanic("sane_AMode: unknown ARM64 AMode1 tag"); } } static ARM64AMode* iselIntExpr_AMode ( ISelEnv* env, IRExpr* e, IRType dty ) { ARM64AMode* am = iselIntExpr_AMode_wrk(env, e, dty); vassert(sane_AMode(am)); return am; } static ARM64AMode* iselIntExpr_AMode_wrk ( ISelEnv* env, IRExpr* e, IRType dty ) { IRType ty = typeOfIRExpr(env->type_env,e); vassert(ty == Ity_I64); ULong szBbits = 0; switch (dty) { case Ity_I64: szBbits = 3; break; case Ity_I32: szBbits = 2; break; case Ity_I16: szBbits = 1; break; case Ity_I8: szBbits = 0; break; default: vassert(0); } /* {Add64,Sub64}(expr,simm9). We don't care about |dty| here since we're going to create an amode suitable for LDU* or STU* instructions, which use unscaled immediate offsets. */ if (e->tag == Iex_Binop && (e->Iex.Binop.op == Iop_Add64 || e->Iex.Binop.op == Iop_Sub64) && e->Iex.Binop.arg2->tag == Iex_Const && e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U64) { Long simm = (Long)e->Iex.Binop.arg2->Iex.Const.con->Ico.U64; if (simm >= -255 && simm <= 255) { /* Although the gating condition might seem to be simm >= -256 && simm <= 255 we will need to negate simm in the case where the op is Sub64. Hence limit the lower value to -255 in order that its negation is representable. */ HReg reg = iselIntExpr_R(env, e->Iex.Binop.arg1); if (e->Iex.Binop.op == Iop_Sub64) simm = -simm; return ARM64AMode_RI9(reg, (Int)simm); } } /* Add64(expr, uimm12 * transfer-size) */ if (e->tag == Iex_Binop && e->Iex.Binop.op == Iop_Add64 && e->Iex.Binop.arg2->tag == Iex_Const && e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U64) { ULong uimm = e->Iex.Binop.arg2->Iex.Const.con->Ico.U64; ULong szB = 1 << szBbits; if (0 == (uimm & (szB-1)) /* "uimm is szB-aligned" */ && (uimm >> szBbits) < 4096) { HReg reg = iselIntExpr_R(env, e->Iex.Binop.arg1); return ARM64AMode_RI12(reg, (UInt)(uimm >> szBbits), (UChar)szB); } } /* Add64(expr1, expr2) */ if (e->tag == Iex_Binop && e->Iex.Binop.op == Iop_Add64) { HReg reg1 = iselIntExpr_R(env, e->Iex.Binop.arg1); HReg reg2 = iselIntExpr_R(env, e->Iex.Binop.arg2); return ARM64AMode_RR(reg1, reg2); } /* Doesn't match anything in particular. Generate it into a register and use that. */ HReg reg = iselIntExpr_R(env, e); return ARM64AMode_RI9(reg, 0); } //ZZ /* --------------------- AModeV --------------------- */ //ZZ //ZZ /* Return an AModeV which computes the value of the specified //ZZ expression, possibly also adding insns to the code list as a //ZZ result. The expression may only be a 32-bit one. //ZZ */ //ZZ //ZZ static Bool sane_AModeV ( ARMAModeV* am ) //ZZ { //ZZ return toBool( hregClass(am->reg) == HRcInt32 //ZZ && hregIsVirtual(am->reg) //ZZ && am->simm11 >= -1020 && am->simm11 <= 1020 //ZZ && 0 == (am->simm11 & 3) ); //ZZ } //ZZ //ZZ static ARMAModeV* iselIntExpr_AModeV ( ISelEnv* env, IRExpr* e ) //ZZ { //ZZ ARMAModeV* am = iselIntExpr_AModeV_wrk(env, e); //ZZ vassert(sane_AModeV(am)); //ZZ return am; //ZZ } //ZZ //ZZ static ARMAModeV* iselIntExpr_AModeV_wrk ( ISelEnv* env, IRExpr* e ) //ZZ { //ZZ IRType ty = typeOfIRExpr(env->type_env,e); //ZZ vassert(ty == Ity_I32); //ZZ //ZZ /* {Add32,Sub32}(expr, simm8 << 2) */ //ZZ if (e->tag == Iex_Binop //ZZ && (e->Iex.Binop.op == Iop_Add32 || e->Iex.Binop.op == Iop_Sub32) //ZZ && e->Iex.Binop.arg2->tag == Iex_Const //ZZ && e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U32) { //ZZ Int simm = (Int)e->Iex.Binop.arg2->Iex.Const.con->Ico.U32; //ZZ if (simm >= -1020 && simm <= 1020 && 0 == (simm & 3)) { //ZZ HReg reg; //ZZ if (e->Iex.Binop.op == Iop_Sub32) //ZZ simm = -simm; //ZZ reg = iselIntExpr_R(env, e->Iex.Binop.arg1); //ZZ return mkARMAModeV(reg, simm); //ZZ } //ZZ } //ZZ //ZZ /* Doesn't match anything in particular. Generate it into //ZZ a register and use that. */ //ZZ { //ZZ HReg reg = iselIntExpr_R(env, e); //ZZ return mkARMAModeV(reg, 0); //ZZ } //ZZ //ZZ } //ZZ //ZZ /* -------------------- AModeN -------------------- */ //ZZ //ZZ static ARMAModeN* iselIntExpr_AModeN ( ISelEnv* env, IRExpr* e ) //ZZ { //ZZ return iselIntExpr_AModeN_wrk(env, e); //ZZ } //ZZ //ZZ static ARMAModeN* iselIntExpr_AModeN_wrk ( ISelEnv* env, IRExpr* e ) //ZZ { //ZZ HReg reg = iselIntExpr_R(env, e); //ZZ return mkARMAModeN_R(reg); //ZZ } //ZZ //ZZ //ZZ /* --------------------- RI84 --------------------- */ //ZZ //ZZ /* Select instructions to generate 'e' into a RI84. If mayInv is //ZZ true, then the caller will also accept an I84 form that denotes //ZZ 'not e'. In this case didInv may not be NULL, and *didInv is set //ZZ to True. This complication is so as to allow generation of an RI84 //ZZ which is suitable for use in either an AND or BIC instruction, //ZZ without knowing (before this call) which one. //ZZ */ //ZZ static ARMRI84* iselIntExpr_RI84 ( /*OUT*/Bool* didInv, Bool mayInv, //ZZ ISelEnv* env, IRExpr* e ) //ZZ { //ZZ ARMRI84* ri; //ZZ if (mayInv) //ZZ vassert(didInv != NULL); //ZZ ri = iselIntExpr_RI84_wrk(didInv, mayInv, env, e); //ZZ /* sanity checks ... */ //ZZ switch (ri->tag) { //ZZ case ARMri84_I84: //ZZ return ri; //ZZ case ARMri84_R: //ZZ vassert(hregClass(ri->ARMri84.R.reg) == HRcInt32); //ZZ vassert(hregIsVirtual(ri->ARMri84.R.reg)); //ZZ return ri; //ZZ default: //ZZ vpanic("iselIntExpr_RI84: unknown arm RI84 tag"); //ZZ } //ZZ } //ZZ //ZZ /* DO NOT CALL THIS DIRECTLY ! */ //ZZ static ARMRI84* iselIntExpr_RI84_wrk ( /*OUT*/Bool* didInv, Bool mayInv, //ZZ ISelEnv* env, IRExpr* e ) //ZZ { //ZZ IRType ty = typeOfIRExpr(env->type_env,e); //ZZ vassert(ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8); //ZZ //ZZ if (didInv) *didInv = False; //ZZ //ZZ /* special case: immediate */ //ZZ if (e->tag == Iex_Const) { //ZZ UInt u, u8 = 0x100, u4 = 0x10; /* both invalid */ //ZZ switch (e->Iex.Const.con->tag) { //ZZ case Ico_U32: u = e->Iex.Const.con->Ico.U32; break; //ZZ case Ico_U16: u = 0xFFFF & (e->Iex.Const.con->Ico.U16); break; //ZZ case Ico_U8: u = 0xFF & (e->Iex.Const.con->Ico.U8); break; //ZZ default: vpanic("iselIntExpr_RI84.Iex_Const(armh)"); //ZZ } //ZZ if (fitsIn8x4(&u8, &u4, u)) { //ZZ return ARMRI84_I84( (UShort)u8, (UShort)u4 ); //ZZ } //ZZ if (mayInv && fitsIn8x4(&u8, &u4, ~u)) { //ZZ vassert(didInv); //ZZ *didInv = True; //ZZ return ARMRI84_I84( (UShort)u8, (UShort)u4 ); //ZZ } //ZZ /* else fail, fall through to default case */ //ZZ } //ZZ //ZZ /* default case: calculate into a register and return that */ //ZZ { //ZZ HReg r = iselIntExpr_R ( env, e ); //ZZ return ARMRI84_R(r); //ZZ } //ZZ } /* --------------------- RIA --------------------- */ /* Select instructions to generate 'e' into a RIA. */ static ARM64RIA* iselIntExpr_RIA ( ISelEnv* env, IRExpr* e ) { ARM64RIA* ri = iselIntExpr_RIA_wrk(env, e); /* sanity checks ... */ switch (ri->tag) { case ARM64riA_I12: vassert(ri->ARM64riA.I12.imm12 < 4096); vassert(ri->ARM64riA.I12.shift == 0 || ri->ARM64riA.I12.shift == 12); return ri; case ARM64riA_R: vassert(hregClass(ri->ARM64riA.R.reg) == HRcInt64); vassert(hregIsVirtual(ri->ARM64riA.R.reg)); return ri; default: vpanic("iselIntExpr_RIA: unknown arm RIA tag"); } } /* DO NOT CALL THIS DIRECTLY ! */ static ARM64RIA* iselIntExpr_RIA_wrk ( ISelEnv* env, IRExpr* e ) { IRType ty = typeOfIRExpr(env->type_env,e); vassert(ty == Ity_I64 || ty == Ity_I32); /* special case: immediate */ if (e->tag == Iex_Const) { ULong u = 0xF000000ULL; /* invalid */ switch (e->Iex.Const.con->tag) { case Ico_U64: u = e->Iex.Const.con->Ico.U64; break; case Ico_U32: u = e->Iex.Const.con->Ico.U32; break; default: vpanic("iselIntExpr_RIA.Iex_Const(arm64)"); } if (0 == (u & ~(0xFFFULL << 0))) return ARM64RIA_I12((UShort)((u >> 0) & 0xFFFULL), 0); if (0 == (u & ~(0xFFFULL << 12))) return ARM64RIA_I12((UShort)((u >> 12) & 0xFFFULL), 12); /* else fail, fall through to default case */ } /* default case: calculate into a register and return that */ { HReg r = iselIntExpr_R ( env, e ); return ARM64RIA_R(r); } } /* --------------------- RIL --------------------- */ /* Select instructions to generate 'e' into a RIL. At this point we have to deal with the strange bitfield-immediate encoding for logic instructions. */ // The following four functions // CountLeadingZeros CountTrailingZeros CountSetBits isImmLogical // are copied, with modifications, from // https://github.com/armvixl/vixl/blob/master/src/a64/assembler-a64.cc // which has the following copyright notice: /* Copyright 2013, ARM Limited All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. * Neither the name of ARM Limited nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ static Int CountLeadingZeros(ULong value, Int width) { vassert(width == 32 || width == 64); Int count = 0; ULong bit_test = 1ULL << (width - 1); while ((count < width) && ((bit_test & value) == 0)) { count++; bit_test >>= 1; } return count; } static Int CountTrailingZeros(ULong value, Int width) { vassert(width == 32 || width == 64); Int count = 0; while ((count < width) && (((value >> count) & 1) == 0)) { count++; } return count; } static Int CountSetBits(ULong value, Int width) { // TODO: Other widths could be added here, as the implementation already // supports them. vassert(width == 32 || width == 64); // Mask out unused bits to ensure that they are not counted. value &= (0xffffffffffffffffULL >> (64-width)); // Add up the set bits. // The algorithm works by adding pairs of bit fields together iteratively, // where the size of each bit field doubles each time. // An example for an 8-bit value: // Bits: h g f e d c b a // \ | \ | \ | \ | // value = h+g f+e d+c b+a // \ | \ | // value = h+g+f+e d+c+b+a // \ | // value = h+g+f+e+d+c+b+a value = ((value >> 1) & 0x5555555555555555ULL) + (value & 0x5555555555555555ULL); value = ((value >> 2) & 0x3333333333333333ULL) + (value & 0x3333333333333333ULL); value = ((value >> 4) & 0x0f0f0f0f0f0f0f0fULL) + (value & 0x0f0f0f0f0f0f0f0fULL); value = ((value >> 8) & 0x00ff00ff00ff00ffULL) + (value & 0x00ff00ff00ff00ffULL); value = ((value >> 16) & 0x0000ffff0000ffffULL) + (value & 0x0000ffff0000ffffULL); value = ((value >> 32) & 0x00000000ffffffffULL) + (value & 0x00000000ffffffffULL); return value; } static Bool isImmLogical ( /*OUT*/UInt* n, /*OUT*/UInt* imm_s, /*OUT*/UInt* imm_r, ULong value, UInt width ) { // Test if a given value can be encoded in the immediate field of a // logical instruction. // If it can be encoded, the function returns true, and values // pointed to by n, imm_s and imm_r are updated with immediates // encoded in the format required by the corresponding fields in the // logical instruction. If it can not be encoded, the function // returns false, and the values pointed to by n, imm_s and imm_r // are undefined. vassert(n != NULL && imm_s != NULL && imm_r != NULL); vassert(width == 32 || width == 64); // Logical immediates are encoded using parameters n, imm_s and imm_r using // the following table: // // N imms immr size S R // 1 ssssss rrrrrr 64 UInt(ssssss) UInt(rrrrrr) // 0 0sssss xrrrrr 32 UInt(sssss) UInt(rrrrr) // 0 10ssss xxrrrr 16 UInt(ssss) UInt(rrrr) // 0 110sss xxxrrr 8 UInt(sss) UInt(rrr) // 0 1110ss xxxxrr 4 UInt(ss) UInt(rr) // 0 11110s xxxxxr 2 UInt(s) UInt(r) // (s bits must not be all set) // // A pattern is constructed of size bits, where the least significant S+1 // bits are set. The pattern is rotated right by R, and repeated across a // 32 or 64-bit value, depending on destination register width. // // To test if an arbitrary immediate can be encoded using this scheme, an // iterative algorithm is used. // // TODO: This code does not consider using X/W register overlap to support // 64-bit immediates where the top 32-bits are zero, and the bottom 32-bits // are an encodable logical immediate. // 1. If the value has all set or all clear bits, it can't be encoded. if ((value == 0) || (value == 0xffffffffffffffffULL) || ((width == 32) && (value == 0xffffffff))) { return False; } UInt lead_zero = CountLeadingZeros(value, width); UInt lead_one = CountLeadingZeros(~value, width); UInt trail_zero = CountTrailingZeros(value, width); UInt trail_one = CountTrailingZeros(~value, width); UInt set_bits = CountSetBits(value, width); // The fixed bits in the immediate s field. // If width == 64 (X reg), start at 0xFFFFFF80. // If width == 32 (W reg), start at 0xFFFFFFC0, as the iteration for 64-bit // widths won't be executed. Int imm_s_fixed = (width == 64) ? -128 : -64; Int imm_s_mask = 0x3F; for (;;) { // 2. If the value is two bits wide, it can be encoded. if (width == 2) { *n = 0; *imm_s = 0x3C; *imm_r = (value & 3) - 1; return True; } *n = (width == 64) ? 1 : 0; *imm_s = ((imm_s_fixed | (set_bits - 1)) & imm_s_mask); if ((lead_zero + set_bits) == width) { *imm_r = 0; } else { *imm_r = (lead_zero > 0) ? (width - trail_zero) : lead_one; } // 3. If the sum of leading zeros, trailing zeros and set bits is equal to // the bit width of the value, it can be encoded. if (lead_zero + trail_zero + set_bits == width) { return True; } // 4. If the sum of leading ones, trailing ones and unset bits in the // value is equal to the bit width of the value, it can be encoded. if (lead_one + trail_one + (width - set_bits) == width) { return True; } // 5. If the most-significant half of the bitwise value is equal to the // least-significant half, return to step 2 using the least-significant // half of the value. ULong mask = (1ULL << (width >> 1)) - 1; if ((value & mask) == ((value >> (width >> 1)) & mask)) { width >>= 1; set_bits >>= 1; imm_s_fixed >>= 1; continue; } // 6. Otherwise, the value can't be encoded. return False; } } /* Create a RIL for the given immediate, if it is representable, or return NULL if not. */ static ARM64RIL* mb_mkARM64RIL_I ( ULong imm64 ) { UInt n = 0, imm_s = 0, imm_r = 0; Bool ok = isImmLogical(&n, &imm_s, &imm_r, imm64, 64); if (!ok) return NULL; vassert(n < 2 && imm_s < 64 && imm_r < 64); return ARM64RIL_I13(n, imm_r, imm_s); } /* So, finally .. */ static ARM64RIL* iselIntExpr_RIL ( ISelEnv* env, IRExpr* e ) { ARM64RIL* ri = iselIntExpr_RIL_wrk(env, e); /* sanity checks ... */ switch (ri->tag) { case ARM64riL_I13: vassert(ri->ARM64riL.I13.bitN < 2); vassert(ri->ARM64riL.I13.immR < 64); vassert(ri->ARM64riL.I13.immS < 64); return ri; case ARM64riL_R: vassert(hregClass(ri->ARM64riL.R.reg) == HRcInt64); vassert(hregIsVirtual(ri->ARM64riL.R.reg)); return ri; default: vpanic("iselIntExpr_RIL: unknown arm RIL tag"); } } /* DO NOT CALL THIS DIRECTLY ! */ static ARM64RIL* iselIntExpr_RIL_wrk ( ISelEnv* env, IRExpr* e ) { IRType ty = typeOfIRExpr(env->type_env,e); vassert(ty == Ity_I64 || ty == Ity_I32); /* special case: immediate */ if (e->tag == Iex_Const) { ARM64RIL* maybe = NULL; if (ty == Ity_I64) { vassert(e->Iex.Const.con->tag == Ico_U64); maybe = mb_mkARM64RIL_I(e->Iex.Const.con->Ico.U64); } else { vassert(ty == Ity_I32); vassert(e->Iex.Const.con->tag == Ico_U32); UInt u32 = e->Iex.Const.con->Ico.U32; ULong u64 = (ULong)u32; /* First try with 32 leading zeroes. */ maybe = mb_mkARM64RIL_I(u64); /* If that doesn't work, try with 2 copies, since it doesn't matter what winds up in the upper 32 bits. */ if (!maybe) { maybe = mb_mkARM64RIL_I((u64 << 32) | u64); } } if (maybe) return maybe; /* else fail, fall through to default case */ } /* default case: calculate into a register and return that */ { HReg r = iselIntExpr_R ( env, e ); return ARM64RIL_R(r); } } /* --------------------- RI6 --------------------- */ /* Select instructions to generate 'e' into a RI6. */ static ARM64RI6* iselIntExpr_RI6 ( ISelEnv* env, IRExpr* e ) { ARM64RI6* ri = iselIntExpr_RI6_wrk(env, e); /* sanity checks ... */ switch (ri->tag) { case ARM64ri6_I6: vassert(ri->ARM64ri6.I6.imm6 < 64); vassert(ri->ARM64ri6.I6.imm6 > 0); return ri; case ARM64ri6_R: vassert(hregClass(ri->ARM64ri6.R.reg) == HRcInt64); vassert(hregIsVirtual(ri->ARM64ri6.R.reg)); return ri; default: vpanic("iselIntExpr_RI6: unknown arm RI6 tag"); } } /* DO NOT CALL THIS DIRECTLY ! */ static ARM64RI6* iselIntExpr_RI6_wrk ( ISelEnv* env, IRExpr* e ) { IRType ty = typeOfIRExpr(env->type_env,e); vassert(ty == Ity_I64 || ty == Ity_I8); /* special case: immediate */ if (e->tag == Iex_Const) { switch (e->Iex.Const.con->tag) { case Ico_U8: { UInt u = e->Iex.Const.con->Ico.U8; if (u > 0 && u < 64) return ARM64RI6_I6(u); break; default: break; } } /* else fail, fall through to default case */ } /* default case: calculate into a register and return that */ { HReg r = iselIntExpr_R ( env, e ); return ARM64RI6_R(r); } } /* ------------------- CondCode ------------------- */ /* Generate code to evaluated a bit-typed expression, returning the condition code which would correspond when the expression would notionally have returned 1. */ static ARM64CondCode iselCondCode ( ISelEnv* env, IRExpr* e ) { ARM64CondCode cc = iselCondCode_wrk(env,e); vassert(cc != ARM64cc_NV); return cc; } static ARM64CondCode iselCondCode_wrk ( ISelEnv* env, IRExpr* e ) { vassert(e); vassert(typeOfIRExpr(env->type_env,e) == Ity_I1); /* var */ if (e->tag == Iex_RdTmp) { HReg rTmp = lookupIRTemp(env, e->Iex.RdTmp.tmp); /* Cmp doesn't modify rTmp; so this is OK. */ ARM64RIL* one = mb_mkARM64RIL_I(1); vassert(one); addInstr(env, ARM64Instr_Test(rTmp, one)); return ARM64cc_NE; } /* Not1(e) */ if (e->tag == Iex_Unop && e->Iex.Unop.op == Iop_Not1) { /* Generate code for the arg, and negate the test condition */ ARM64CondCode cc = iselCondCode(env, e->Iex.Unop.arg); if (cc == ARM64cc_AL || cc == ARM64cc_NV) { return ARM64cc_AL; } else { return 1 ^ cc; } } /* --- patterns rooted at: 64to1 --- */ if (e->tag == Iex_Unop && e->Iex.Unop.op == Iop_64to1) { HReg rTmp = iselIntExpr_R(env, e->Iex.Unop.arg); ARM64RIL* one = mb_mkARM64RIL_I(1); vassert(one); /* '1' must be representable */ addInstr(env, ARM64Instr_Test(rTmp, one)); return ARM64cc_NE; } /* --- patterns rooted at: CmpNEZ8 --- */ if (e->tag == Iex_Unop && e->Iex.Unop.op == Iop_CmpNEZ8) { HReg r1 = iselIntExpr_R(env, e->Iex.Unop.arg); ARM64RIL* xFF = mb_mkARM64RIL_I(0xFF); addInstr(env, ARM64Instr_Test(r1, xFF)); return ARM64cc_NE; } /* --- patterns rooted at: CmpNEZ64 --- */ if (e->tag == Iex_Unop && e->Iex.Unop.op == Iop_CmpNEZ64) { HReg r1 = iselIntExpr_R(env, e->Iex.Unop.arg); ARM64RIA* zero = ARM64RIA_I12(0,0); addInstr(env, ARM64Instr_Cmp(r1, zero, True/*is64*/)); return ARM64cc_NE; } /* --- patterns rooted at: CmpNEZ32 --- */ if (e->tag == Iex_Unop && e->Iex.Unop.op == Iop_CmpNEZ32) { HReg r1 = iselIntExpr_R(env, e->Iex.Unop.arg); ARM64RIA* zero = ARM64RIA_I12(0,0); addInstr(env, ARM64Instr_Cmp(r1, zero, False/*!is64*/)); return ARM64cc_NE; } /* --- Cmp*64*(x,y) --- */ if (e->tag == Iex_Binop && (e->Iex.Binop.op == Iop_CmpEQ64 || e->Iex.Binop.op == Iop_CmpNE64 || e->Iex.Binop.op == Iop_CmpLT64S || e->Iex.Binop.op == Iop_CmpLT64U || e->Iex.Binop.op == Iop_CmpLE64S || e->Iex.Binop.op == Iop_CmpLE64U)) { HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1); ARM64RIA* argR = iselIntExpr_RIA(env, e->Iex.Binop.arg2); addInstr(env, ARM64Instr_Cmp(argL, argR, True/*is64*/)); switch (e->Iex.Binop.op) { case Iop_CmpEQ64: return ARM64cc_EQ; case Iop_CmpNE64: return ARM64cc_NE; case Iop_CmpLT64S: return ARM64cc_LT; case Iop_CmpLT64U: return ARM64cc_CC; case Iop_CmpLE64S: return ARM64cc_LE; case Iop_CmpLE64U: return ARM64cc_LS; default: vpanic("iselCondCode(arm64): CmpXX64"); } } /* --- Cmp*32*(x,y) --- */ if (e->tag == Iex_Binop && (e->Iex.Binop.op == Iop_CmpEQ32 || e->Iex.Binop.op == Iop_CmpNE32 || e->Iex.Binop.op == Iop_CmpLT32S || e->Iex.Binop.op == Iop_CmpLT32U || e->Iex.Binop.op == Iop_CmpLE32S || e->Iex.Binop.op == Iop_CmpLE32U)) { HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1); ARM64RIA* argR = iselIntExpr_RIA(env, e->Iex.Binop.arg2); addInstr(env, ARM64Instr_Cmp(argL, argR, False/*!is64*/)); switch (e->Iex.Binop.op) { case Iop_CmpEQ32: return ARM64cc_EQ; case Iop_CmpNE32: return ARM64cc_NE; case Iop_CmpLT32S: return ARM64cc_LT; case Iop_CmpLT32U: return ARM64cc_CC; case Iop_CmpLE32S: return ARM64cc_LE; case Iop_CmpLE32U: return ARM64cc_LS; default: vpanic("iselCondCode(arm64): CmpXX32"); } } //ZZ /* const */ //ZZ /* Constant 1:Bit */ //ZZ if (e->tag == Iex_Const) { //ZZ HReg r; //ZZ vassert(e->Iex.Const.con->tag == Ico_U1); //ZZ vassert(e->Iex.Const.con->Ico.U1 == True //ZZ || e->Iex.Const.con->Ico.U1 == False); //ZZ r = newVRegI(env); //ZZ addInstr(env, ARMInstr_Imm32(r, 0)); //ZZ addInstr(env, ARMInstr_CmpOrTst(True/*isCmp*/, r, ARMRI84_R(r))); //ZZ return e->Iex.Const.con->Ico.U1 ? ARMcc_EQ : ARMcc_NE; //ZZ } //ZZ //ZZ // JRS 2013-Jan-03: this seems completely nonsensical //ZZ /* --- CasCmpEQ* --- */ //ZZ /* Ist_Cas has a dummy argument to compare with, so comparison is //ZZ always true. */ //ZZ //if (e->tag == Iex_Binop //ZZ // && (e->Iex.Binop.op == Iop_CasCmpEQ32 //ZZ // || e->Iex.Binop.op == Iop_CasCmpEQ16 //ZZ // || e->Iex.Binop.op == Iop_CasCmpEQ8)) { //ZZ // return ARMcc_AL; //ZZ //} ppIRExpr(e); vpanic("iselCondCode"); } /* --------------------- Reg --------------------- */ static HReg iselIntExpr_R ( ISelEnv* env, IRExpr* e ) { HReg r = iselIntExpr_R_wrk(env, e); /* sanity checks ... */ # if 0 vex_printf("\n"); ppIRExpr(e); vex_printf("\n"); # endif vassert(hregClass(r) == HRcInt64); vassert(hregIsVirtual(r)); return r; } /* DO NOT CALL THIS DIRECTLY ! */ static HReg iselIntExpr_R_wrk ( ISelEnv* env, IRExpr* e ) { IRType ty = typeOfIRExpr(env->type_env,e); vassert(ty == Ity_I64 || ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8); switch (e->tag) { /* --------- TEMP --------- */ case Iex_RdTmp: { return lookupIRTemp(env, e->Iex.RdTmp.tmp); } /* --------- LOAD --------- */ case Iex_Load: { HReg dst = newVRegI(env); if (e->Iex.Load.end != Iend_LE) goto irreducible; if (ty == Ity_I64) { ARM64AMode* amode = iselIntExpr_AMode ( env, e->Iex.Load.addr, ty ); addInstr(env, ARM64Instr_LdSt64(True/*isLoad*/, dst, amode)); return dst; } if (ty == Ity_I32) { ARM64AMode* amode = iselIntExpr_AMode ( env, e->Iex.Load.addr, ty ); addInstr(env, ARM64Instr_LdSt32(True/*isLoad*/, dst, amode)); return dst; } if (ty == Ity_I16) { ARM64AMode* amode = iselIntExpr_AMode ( env, e->Iex.Load.addr, ty ); addInstr(env, ARM64Instr_LdSt16(True/*isLoad*/, dst, amode)); return dst; } if (ty == Ity_I8) { ARM64AMode* amode = iselIntExpr_AMode ( env, e->Iex.Load.addr, ty ); addInstr(env, ARM64Instr_LdSt8(True/*isLoad*/, dst, amode)); return dst; } break; } /* --------- BINARY OP --------- */ case Iex_Binop: { ARM64LogicOp lop = 0; /* invalid */ ARM64ShiftOp sop = 0; /* invalid */ /* Special-case 0-x into a Neg instruction. Not because it's particularly useful but more so as to give value flow using this instruction, so as to check its assembly correctness for implementation of Left32/Left64. */ switch (e->Iex.Binop.op) { case Iop_Sub64: if (isZeroU64(e->Iex.Binop.arg1)) { HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2); HReg dst = newVRegI(env); addInstr(env, ARM64Instr_Unary(dst, argR, ARM64un_NEG)); return dst; } break; default: break; } /* ADD/SUB */ switch (e->Iex.Binop.op) { case Iop_Add64: case Iop_Add32: case Iop_Sub64: case Iop_Sub32: { Bool isAdd = e->Iex.Binop.op == Iop_Add64 || e->Iex.Binop.op == Iop_Add32; HReg dst = newVRegI(env); HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1); ARM64RIA* argR = iselIntExpr_RIA(env, e->Iex.Binop.arg2); addInstr(env, ARM64Instr_Arith(dst, argL, argR, isAdd)); return dst; } default: break; } /* AND/OR/XOR */ switch (e->Iex.Binop.op) { case Iop_And64: case Iop_And32: lop = ARM64lo_AND; goto log_binop; case Iop_Or64: case Iop_Or32: lop = ARM64lo_OR; goto log_binop; case Iop_Xor64: case Iop_Xor32: lop = ARM64lo_XOR; goto log_binop; log_binop: { HReg dst = newVRegI(env); HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1); ARM64RIL* argR = iselIntExpr_RIL(env, e->Iex.Binop.arg2); addInstr(env, ARM64Instr_Logic(dst, argL, argR, lop)); return dst; } default: break; } /* SHL/SHR/SAR */ switch (e->Iex.Binop.op) { case Iop_Shr64: sop = ARM64sh_SHR; goto sh_binop; case Iop_Sar64: sop = ARM64sh_SAR; goto sh_binop; case Iop_Shl64: case Iop_Shl32: sop = ARM64sh_SHL; goto sh_binop; sh_binop: { HReg dst = newVRegI(env); HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1); ARM64RI6* argR = iselIntExpr_RI6(env, e->Iex.Binop.arg2); addInstr(env, ARM64Instr_Shift(dst, argL, argR, sop)); return dst; } case Iop_Shr32: case Iop_Sar32: { Bool zx = e->Iex.Binop.op == Iop_Shr32; HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1); ARM64RI6* argR = iselIntExpr_RI6(env, e->Iex.Binop.arg2); HReg dst = zx ? widen_z_32_to_64(env, argL) : widen_s_32_to_64(env, argL); addInstr(env, ARM64Instr_Shift(dst, dst, argR, ARM64sh_SHR)); return dst; } default: break; } /* MUL */ if (e->Iex.Binop.op == Iop_Mul64 || e->Iex.Binop.op == Iop_Mul32) { HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1); HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2); HReg dst = newVRegI(env); addInstr(env, ARM64Instr_Mul(dst, argL, argR, ARM64mul_PLAIN)); return dst; } /* MULL */ if (e->Iex.Binop.op == Iop_MullU32 || e->Iex.Binop.op == Iop_MullS32) { Bool isS = e->Iex.Binop.op == Iop_MullS32; HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1); HReg extL = (isS ? widen_s_32_to_64 : widen_z_32_to_64)(env, argL); HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2); HReg extR = (isS ? widen_s_32_to_64 : widen_z_32_to_64)(env, argR); HReg dst = newVRegI(env); addInstr(env, ARM64Instr_Mul(dst, extL, extR, ARM64mul_PLAIN)); return dst; } /* Handle misc other ops. */ if (e->Iex.Binop.op == Iop_Max32U) { HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1); HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2); HReg dst = newVRegI(env); addInstr(env, ARM64Instr_Cmp(argL, ARM64RIA_R(argR), False/*!is64*/)); addInstr(env, ARM64Instr_CSel(dst, argL, argR, ARM64cc_CS)); return dst; } if (e->Iex.Binop.op == Iop_32HLto64) { HReg hi32s = iselIntExpr_R(env, e->Iex.Binop.arg1); HReg lo32s = iselIntExpr_R(env, e->Iex.Binop.arg2); HReg lo32 = widen_z_32_to_64(env, lo32s); HReg hi32 = newVRegI(env); addInstr(env, ARM64Instr_Shift(hi32, hi32s, ARM64RI6_I6(32), ARM64sh_SHL)); addInstr(env, ARM64Instr_Logic(hi32, hi32, ARM64RIL_R(lo32), ARM64lo_OR)); return hi32; } if (e->Iex.Binop.op == Iop_CmpF64 || e->Iex.Binop.op == Iop_CmpF32) { Bool isD = e->Iex.Binop.op == Iop_CmpF64; HReg dL = (isD ? iselDblExpr : iselFltExpr)(env, e->Iex.Binop.arg1); HReg dR = (isD ? iselDblExpr : iselFltExpr)(env, e->Iex.Binop.arg2); HReg dst = newVRegI(env); HReg imm = newVRegI(env); /* Do the compare (FCMP), which sets NZCV in PSTATE. Then create in dst, the IRCmpF64Result encoded result. */ addInstr(env, (isD ? ARM64Instr_VCmpD : ARM64Instr_VCmpS)(dL, dR)); addInstr(env, ARM64Instr_Imm64(dst, 0)); addInstr(env, ARM64Instr_Imm64(imm, 0x40)); // 0x40 = Ircr_EQ addInstr(env, ARM64Instr_CSel(dst, imm, dst, ARM64cc_EQ)); addInstr(env, ARM64Instr_Imm64(imm, 0x01)); // 0x01 = Ircr_LT addInstr(env, ARM64Instr_CSel(dst, imm, dst, ARM64cc_MI)); addInstr(env, ARM64Instr_Imm64(imm, 0x00)); // 0x00 = Ircr_GT addInstr(env, ARM64Instr_CSel(dst, imm, dst, ARM64cc_GT)); addInstr(env, ARM64Instr_Imm64(imm, 0x45)); // 0x45 = Ircr_UN addInstr(env, ARM64Instr_CSel(dst, imm, dst, ARM64cc_VS)); return dst; } { /* local scope */ ARM64CvtOp cvt_op = ARM64cvt_INVALID; Bool srcIsD = False; switch (e->Iex.Binop.op) { case Iop_F64toI64S: cvt_op = ARM64cvt_F64_I64S; srcIsD = True; break; case Iop_F64toI64U: cvt_op = ARM64cvt_F64_I64U; srcIsD = True; break; case Iop_F64toI32S: cvt_op = ARM64cvt_F64_I32S; srcIsD = True; break; case Iop_F64toI32U: cvt_op = ARM64cvt_F64_I32U; srcIsD = True; break; case Iop_F32toI32S: cvt_op = ARM64cvt_F32_I32S; srcIsD = False; break; case Iop_F32toI32U: cvt_op = ARM64cvt_F32_I32U; srcIsD = False; break; case Iop_F32toI64S: cvt_op = ARM64cvt_F32_I64S; srcIsD = False; break; case Iop_F32toI64U: cvt_op = ARM64cvt_F32_I64U; srcIsD = False; break; default: break; } if (cvt_op != ARM64cvt_INVALID) { /* This is all a bit dodgy, because we can't handle a non-constant (not-known-at-JIT-time) rounding mode indication. That's because there's no instruction AFAICS that does this conversion but rounds according to FPCR.RM, so we have to bake the rounding mode into the instruction right now. But that should be OK because (1) the front end attaches a literal Irrm_ value to the conversion binop, and (2) iropt will never float that off via CSE, into a literal. Hence we should always have an Irrm_ value as the first arg. */ IRExpr* arg1 = e->Iex.Binop.arg1; if (arg1->tag != Iex_Const) goto irreducible; IRConst* arg1con = arg1->Iex.Const.con; vassert(arg1con->tag == Ico_U32); // else ill-typed IR UInt irrm = arg1con->Ico.U32; /* Find the ARM-encoded equivalent for |irrm|. */ UInt armrm = 4; /* impossible */ switch (irrm) { case Irrm_NEAREST: armrm = 0; break; case Irrm_NegINF: armrm = 2; break; case Irrm_PosINF: armrm = 1; break; case Irrm_ZERO: armrm = 3; break; default: goto irreducible; } HReg src = (srcIsD ? iselDblExpr : iselFltExpr) (env, e->Iex.Binop.arg2); HReg dst = newVRegI(env); addInstr(env, ARM64Instr_VCvtF2I(cvt_op, dst, src, armrm)); return dst; } } /* local scope */ //ZZ if (e->Iex.Binop.op == Iop_GetElem8x8 //ZZ || e->Iex.Binop.op == Iop_GetElem16x4 //ZZ || e->Iex.Binop.op == Iop_GetElem32x2) { //ZZ HReg res = newVRegI(env); //ZZ HReg arg = iselNeon64Expr(env, e->Iex.Binop.arg1); //ZZ UInt index, size; //ZZ if (e->Iex.Binop.arg2->tag != Iex_Const || //ZZ typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) { //ZZ vpanic("ARM target supports GetElem with constant " //ZZ "second argument only\n"); //ZZ } //ZZ index = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8; //ZZ switch (e->Iex.Binop.op) { //ZZ case Iop_GetElem8x8: vassert(index < 8); size = 0; break; //ZZ case Iop_GetElem16x4: vassert(index < 4); size = 1; break; //ZZ case Iop_GetElem32x2: vassert(index < 2); size = 2; break; //ZZ default: vassert(0); //ZZ } //ZZ addInstr(env, ARMInstr_NUnaryS(ARMneon_GETELEMS, //ZZ mkARMNRS(ARMNRS_Reg, res, 0), //ZZ mkARMNRS(ARMNRS_Scalar, arg, index), //ZZ size, False)); //ZZ return res; //ZZ } //ZZ //ZZ if (e->Iex.Binop.op == Iop_GetElem8x16 //ZZ || e->Iex.Binop.op == Iop_GetElem16x8 //ZZ || e->Iex.Binop.op == Iop_GetElem32x4) { //ZZ HReg res = newVRegI(env); //ZZ HReg arg = iselNeonExpr(env, e->Iex.Binop.arg1); //ZZ UInt index, size; //ZZ if (e->Iex.Binop.arg2->tag != Iex_Const || //ZZ typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) { //ZZ vpanic("ARM target supports GetElem with constant " //ZZ "second argument only\n"); //ZZ } //ZZ index = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8; //ZZ switch (e->Iex.Binop.op) { //ZZ case Iop_GetElem8x16: vassert(index < 16); size = 0; break; //ZZ case Iop_GetElem16x8: vassert(index < 8); size = 1; break; //ZZ case Iop_GetElem32x4: vassert(index < 4); size = 2; break; //ZZ default: vassert(0); //ZZ } //ZZ addInstr(env, ARMInstr_NUnaryS(ARMneon_GETELEMS, //ZZ mkARMNRS(ARMNRS_Reg, res, 0), //ZZ mkARMNRS(ARMNRS_Scalar, arg, index), //ZZ size, True)); //ZZ return res; //ZZ } /* All cases involving host-side helper calls. */ void* fn = NULL; switch (e->Iex.Binop.op) { //ZZ case Iop_Add16x2: //ZZ fn = &h_generic_calc_Add16x2; break; //ZZ case Iop_Sub16x2: //ZZ fn = &h_generic_calc_Sub16x2; break; //ZZ case Iop_HAdd16Ux2: //ZZ fn = &h_generic_calc_HAdd16Ux2; break; //ZZ case Iop_HAdd16Sx2: //ZZ fn = &h_generic_calc_HAdd16Sx2; break; //ZZ case Iop_HSub16Ux2: //ZZ fn = &h_generic_calc_HSub16Ux2; break; //ZZ case Iop_HSub16Sx2: //ZZ fn = &h_generic_calc_HSub16Sx2; break; //ZZ case Iop_QAdd16Sx2: //ZZ fn = &h_generic_calc_QAdd16Sx2; break; //ZZ case Iop_QAdd16Ux2: //ZZ fn = &h_generic_calc_QAdd16Ux2; break; //ZZ case Iop_QSub16Sx2: //ZZ fn = &h_generic_calc_QSub16Sx2; break; //ZZ case Iop_Add8x4: //ZZ fn = &h_generic_calc_Add8x4; break; //ZZ case Iop_Sub8x4: //ZZ fn = &h_generic_calc_Sub8x4; break; //ZZ case Iop_HAdd8Ux4: //ZZ fn = &h_generic_calc_HAdd8Ux4; break; //ZZ case Iop_HAdd8Sx4: //ZZ fn = &h_generic_calc_HAdd8Sx4; break; //ZZ case Iop_HSub8Ux4: //ZZ fn = &h_generic_calc_HSub8Ux4; break; //ZZ case Iop_HSub8Sx4: //ZZ fn = &h_generic_calc_HSub8Sx4; break; //ZZ case Iop_QAdd8Sx4: //ZZ fn = &h_generic_calc_QAdd8Sx4; break; //ZZ case Iop_QAdd8Ux4: //ZZ fn = &h_generic_calc_QAdd8Ux4; break; //ZZ case Iop_QSub8Sx4: //ZZ fn = &h_generic_calc_QSub8Sx4; break; //ZZ case Iop_QSub8Ux4: //ZZ fn = &h_generic_calc_QSub8Ux4; break; //ZZ case Iop_Sad8Ux4: //ZZ fn = &h_generic_calc_Sad8Ux4; break; //ZZ case Iop_QAdd32S: //ZZ fn = &h_generic_calc_QAdd32S; break; //ZZ case Iop_QSub32S: //ZZ fn = &h_generic_calc_QSub32S; break; //ZZ case Iop_QSub16Ux2: //ZZ fn = &h_generic_calc_QSub16Ux2; break; case Iop_DivU32: fn = &h_calc_udiv32_w_arm_semantics; break; case Iop_DivS32: fn = &h_calc_sdiv32_w_arm_semantics; break; case Iop_DivU64: fn = &h_calc_udiv64_w_arm_semantics; break; case Iop_DivS64: fn = &h_calc_sdiv64_w_arm_semantics; break; default: break; } if (fn) { HReg regL = iselIntExpr_R(env, e->Iex.Binop.arg1); HReg regR = iselIntExpr_R(env, e->Iex.Binop.arg2); HReg res = newVRegI(env); addInstr(env, ARM64Instr_MovI(hregARM64_X0(), regL)); addInstr(env, ARM64Instr_MovI(hregARM64_X1(), regR)); addInstr(env, ARM64Instr_Call( ARM64cc_AL, (HWord)Ptr_to_ULong(fn), 2, mk_RetLoc_simple(RLPri_Int) )); addInstr(env, ARM64Instr_MovI(res, hregARM64_X0())); return res; } break; } /* --------- UNARY OP --------- */ case Iex_Unop: { switch (e->Iex.Unop.op) { case Iop_16Uto64: { /* This probably doesn't occur often enough to be worth rolling the extension into the load. */ IRExpr* arg = e->Iex.Unop.arg; HReg src = iselIntExpr_R(env, arg); HReg dst = widen_z_16_to_64(env, src); return dst; } case Iop_32Uto64: { IRExpr* arg = e->Iex.Unop.arg; if (arg->tag == Iex_Load) { /* This correctly zero extends because _LdSt32 is defined to do a zero extending load. */ HReg dst = newVRegI(env); ARM64AMode* am = iselIntExpr_AMode(env, arg->Iex.Load.addr, Ity_I32); addInstr(env, ARM64Instr_LdSt32(True/*isLoad*/, dst, am)); return dst; } /* else be lame and mask it */ HReg src = iselIntExpr_R(env, arg); HReg dst = widen_z_32_to_64(env, src); return dst; } case Iop_8Uto32: /* Just freeload on the 8Uto64 case */ case Iop_8Uto64: { IRExpr* arg = e->Iex.Unop.arg; if (arg->tag == Iex_Load) { /* This correctly zero extends because _LdSt8 is defined to do a zero extending load. */ HReg dst = newVRegI(env); ARM64AMode* am = iselIntExpr_AMode(env, arg->Iex.Load.addr, Ity_I8); addInstr(env, ARM64Instr_LdSt8(True/*isLoad*/, dst, am)); return dst; } /* else be lame and mask it */ HReg src = iselIntExpr_R(env, arg); HReg dst = widen_z_8_to_64(env, src); return dst; } case Iop_128HIto64: { HReg rHi, rLo; iselInt128Expr(&rHi,&rLo, env, e->Iex.Unop.arg); return rHi; /* and abandon rLo */ } case Iop_8Sto32: case Iop_8Sto64: { IRExpr* arg = e->Iex.Unop.arg; HReg src = iselIntExpr_R(env, arg); HReg dst = widen_s_8_to_64(env, src); return dst; } case Iop_16Sto32: case Iop_16Sto64: { IRExpr* arg = e->Iex.Unop.arg; HReg src = iselIntExpr_R(env, arg); HReg dst = widen_s_16_to_64(env, src); return dst; } case Iop_32Sto64: { IRExpr* arg = e->Iex.Unop.arg; HReg src = iselIntExpr_R(env, arg); HReg dst = widen_s_32_to_64(env, src); return dst; } case Iop_Not32: case Iop_Not64: { HReg dst = newVRegI(env); HReg src = iselIntExpr_R(env, e->Iex.Unop.arg); addInstr(env, ARM64Instr_Unary(dst, src, ARM64un_NOT)); return dst; } case Iop_Clz64: { HReg dst = newVRegI(env); HReg src = iselIntExpr_R(env, e->Iex.Unop.arg); addInstr(env, ARM64Instr_Unary(dst, src, ARM64un_CLZ)); return dst; } case Iop_Left32: case Iop_Left64: { /* Left64(src) = src | -src. Left32 can use the same implementation since in that case we don't care what the upper 32 bits become. */ HReg dst = newVRegI(env); HReg src = iselIntExpr_R(env, e->Iex.Unop.arg); addInstr(env, ARM64Instr_Unary(dst, src, ARM64un_NEG)); addInstr(env, ARM64Instr_Logic(dst, dst, ARM64RIL_R(src), ARM64lo_OR)); return dst; } case Iop_CmpwNEZ64: { /* CmpwNEZ64(src) = (src == 0) ? 0...0 : 1...1 = Left64(src) >>s 63 */ HReg dst = newVRegI(env); HReg src = iselIntExpr_R(env, e->Iex.Unop.arg); addInstr(env, ARM64Instr_Unary(dst, src, ARM64un_NEG)); addInstr(env, ARM64Instr_Logic(dst, dst, ARM64RIL_R(src), ARM64lo_OR)); addInstr(env, ARM64Instr_Shift(dst, dst, ARM64RI6_I6(63), ARM64sh_SAR)); return dst; } case Iop_CmpwNEZ32: { /* CmpwNEZ32(src) = CmpwNEZ64(src & 0xFFFFFFFF) = Left64(src & 0xFFFFFFFF) >>s 63 */ HReg dst = newVRegI(env); HReg pre = iselIntExpr_R(env, e->Iex.Unop.arg); HReg src = widen_z_32_to_64(env, pre); addInstr(env, ARM64Instr_Unary(dst, src, ARM64un_NEG)); addInstr(env, ARM64Instr_Logic(dst, dst, ARM64RIL_R(src), ARM64lo_OR)); addInstr(env, ARM64Instr_Shift(dst, dst, ARM64RI6_I6(63), ARM64sh_SAR)); return dst; } case Iop_V128to64: case Iop_V128HIto64: { HReg dst = newVRegI(env); HReg src = iselV128Expr(env, e->Iex.Unop.arg); UInt laneNo = (e->Iex.Unop.op == Iop_V128HIto64) ? 1 : 0; addInstr(env, ARM64Instr_VXfromQ(dst, src, laneNo)); return dst; } case Iop_1Sto32: case Iop_1Sto64: { /* As with the iselStmt case for 'tmp:I1 = expr', we could do a lot better here if it ever became necessary. */ HReg zero = newVRegI(env); HReg one = newVRegI(env); HReg dst = newVRegI(env); addInstr(env, ARM64Instr_Imm64(zero, 0)); addInstr(env, ARM64Instr_Imm64(one, 1)); ARM64CondCode cc = iselCondCode(env, e->Iex.Unop.arg); addInstr(env, ARM64Instr_CSel(dst, one, zero, cc)); addInstr(env, ARM64Instr_Shift(dst, dst, ARM64RI6_I6(63), ARM64sh_SHL)); addInstr(env, ARM64Instr_Shift(dst, dst, ARM64RI6_I6(63), ARM64sh_SAR)); return dst; } case Iop_NarrowUn16to8x8: case Iop_NarrowUn32to16x4: case Iop_NarrowUn64to32x2: { HReg src = iselV128Expr(env, e->Iex.Unop.arg); HReg tmp = newVRegV(env); HReg dst = newVRegI(env); UInt dszBlg2 = 3; /* illegal */ switch (e->Iex.Unop.op) { case Iop_NarrowUn16to8x8: dszBlg2 = 0; break; // 16to8_x8 case Iop_NarrowUn32to16x4: dszBlg2 = 1; break; // 32to16_x4 case Iop_NarrowUn64to32x2: dszBlg2 = 2; break; // 64to32_x2 default: vassert(0); } addInstr(env, ARM64Instr_VNarrowV(dszBlg2, tmp, src)); addInstr(env, ARM64Instr_VXfromQ(dst, tmp, 0/*laneNo*/)); return dst; } //ZZ case Iop_64HIto32: { //ZZ HReg rHi, rLo; //ZZ iselInt64Expr(&rHi,&rLo, env, e->Iex.Unop.arg); //ZZ return rHi; /* and abandon rLo .. poor wee thing :-) */ //ZZ } //ZZ case Iop_64to32: { //ZZ HReg rHi, rLo; //ZZ iselInt64Expr(&rHi,&rLo, env, e->Iex.Unop.arg); //ZZ return rLo; /* similar stupid comment to the above ... */ //ZZ } //ZZ case Iop_64to8: { //ZZ HReg rHi, rLo; //ZZ if (env->hwcaps & VEX_HWCAPS_ARM_NEON) { //ZZ HReg tHi = newVRegI(env); //ZZ HReg tLo = newVRegI(env); //ZZ HReg tmp = iselNeon64Expr(env, e->Iex.Unop.arg); //ZZ addInstr(env, ARMInstr_VXferD(False, tmp, tHi, tLo)); //ZZ rHi = tHi; //ZZ rLo = tLo; //ZZ } else { //ZZ iselInt64Expr(&rHi,&rLo, env, e->Iex.Unop.arg); //ZZ } //ZZ return rLo; //ZZ } case Iop_1Uto64: { /* 1Uto64(tmp). */ HReg dst = newVRegI(env); if (e->Iex.Unop.arg->tag == Iex_RdTmp) { ARM64RIL* one = mb_mkARM64RIL_I(1); HReg src = lookupIRTemp(env, e->Iex.Unop.arg->Iex.RdTmp.tmp); vassert(one); addInstr(env, ARM64Instr_Logic(dst, src, one, ARM64lo_AND)); } else { /* CLONE-01 */ HReg zero = newVRegI(env); HReg one = newVRegI(env); addInstr(env, ARM64Instr_Imm64(zero, 0)); addInstr(env, ARM64Instr_Imm64(one, 1)); ARM64CondCode cc = iselCondCode(env, e->Iex.Unop.arg); addInstr(env, ARM64Instr_CSel(dst, one, zero, cc)); } return dst; } //ZZ case Iop_1Uto8: { //ZZ HReg dst = newVRegI(env); //ZZ ARMCondCode cond = iselCondCode(env, e->Iex.Unop.arg); //ZZ addInstr(env, ARMInstr_Mov(dst, ARMRI84_I84(0,0))); //ZZ addInstr(env, ARMInstr_CMov(cond, dst, ARMRI84_I84(1,0))); //ZZ return dst; //ZZ } //ZZ //ZZ case Iop_1Sto32: { //ZZ HReg dst = newVRegI(env); //ZZ ARMCondCode cond = iselCondCode(env, e->Iex.Unop.arg); //ZZ ARMRI5* amt = ARMRI5_I5(31); //ZZ /* This is really rough. We could do much better here; //ZZ perhaps mvn{cond} dst, #0 as the second insn? //ZZ (same applies to 1Sto64) */ //ZZ addInstr(env, ARMInstr_Mov(dst, ARMRI84_I84(0,0))); //ZZ addInstr(env, ARMInstr_CMov(cond, dst, ARMRI84_I84(1,0))); //ZZ addInstr(env, ARMInstr_Shift(ARMsh_SHL, dst, dst, amt)); //ZZ addInstr(env, ARMInstr_Shift(ARMsh_SAR, dst, dst, amt)); //ZZ return dst; //ZZ } //ZZ //ZZ case Iop_Clz32: { //ZZ /* Count leading zeroes; easy on ARM. */ //ZZ HReg dst = newVRegI(env); //ZZ HReg src = iselIntExpr_R(env, e->Iex.Unop.arg); //ZZ addInstr(env, ARMInstr_Unary(ARMun_CLZ, dst, src)); //ZZ return dst; //ZZ } //ZZ //ZZ case Iop_CmpwNEZ32: { //ZZ HReg dst = newVRegI(env); //ZZ HReg src = iselIntExpr_R(env, e->Iex.Unop.arg); //ZZ addInstr(env, ARMInstr_Unary(ARMun_NEG, dst, src)); //ZZ addInstr(env, ARMInstr_Alu(ARMalu_OR, dst, dst, ARMRI84_R(src))); //ZZ addInstr(env, ARMInstr_Shift(ARMsh_SAR, dst, dst, ARMRI5_I5(31))); //ZZ return dst; //ZZ } //ZZ //ZZ case Iop_ReinterpF32asI32: { //ZZ HReg dst = newVRegI(env); //ZZ HReg src = iselFltExpr(env, e->Iex.Unop.arg); //ZZ addInstr(env, ARMInstr_VXferS(False/*!toS*/, src, dst)); //ZZ return dst; //ZZ } case Iop_64to32: case Iop_64to16: case Iop_64to8: /* These are no-ops. */ return iselIntExpr_R(env, e->Iex.Unop.arg); default: break; } //ZZ /* All Unop cases involving host-side helper calls. */ //ZZ void* fn = NULL; //ZZ switch (e->Iex.Unop.op) { //ZZ case Iop_CmpNEZ16x2: //ZZ fn = &h_generic_calc_CmpNEZ16x2; break; //ZZ case Iop_CmpNEZ8x4: //ZZ fn = &h_generic_calc_CmpNEZ8x4; break; //ZZ default: //ZZ break; //ZZ } //ZZ //ZZ if (fn) { //ZZ HReg arg = iselIntExpr_R(env, e->Iex.Unop.arg); //ZZ HReg res = newVRegI(env); //ZZ addInstr(env, mk_iMOVds_RR(hregARM_R0(), arg)); //ZZ addInstr(env, ARMInstr_Call( ARMcc_AL, (HWord)Ptr_to_ULong(fn), //ZZ 1, RetLocInt )); //ZZ addInstr(env, mk_iMOVds_RR(res, hregARM_R0())); //ZZ return res; //ZZ } break; } /* --------- GET --------- */ case Iex_Get: { if (ty == Ity_I64 && 0 == (e->Iex.Get.offset & 7) && e->Iex.Get.offset < (8<<12)-8) { HReg dst = newVRegI(env); ARM64AMode* am = mk_baseblock_64bit_access_amode(e->Iex.Get.offset); addInstr(env, ARM64Instr_LdSt64(True/*isLoad*/, dst, am)); return dst; } if (ty == Ity_I32 && 0 == (e->Iex.Get.offset & 3) && e->Iex.Get.offset < (4<<12)-4) { HReg dst = newVRegI(env); ARM64AMode* am = mk_baseblock_32bit_access_amode(e->Iex.Get.offset); addInstr(env, ARM64Instr_LdSt32(True/*isLoad*/, dst, am)); return dst; } if (ty == Ity_I16 && 0 == (e->Iex.Get.offset & 1) && e->Iex.Get.offset < (2<<12)-2) { HReg dst = newVRegI(env); ARM64AMode* am = mk_baseblock_16bit_access_amode(e->Iex.Get.offset); addInstr(env, ARM64Instr_LdSt16(True/*isLoad*/, dst, am)); return dst; } if (ty == Ity_I8 /* && no alignment check */ && e->Iex.Get.offset < (1<<12)-1) { HReg dst = newVRegI(env); ARM64AMode* am = mk_baseblock_8bit_access_amode(e->Iex.Get.offset); addInstr(env, ARM64Instr_LdSt8(True/*isLoad*/, dst, am)); return dst; } break; } /* --------- CCALL --------- */ case Iex_CCall: { HReg dst = newVRegI(env); vassert(ty == e->Iex.CCall.retty); /* be very restrictive for now. Only 64-bit ints allowed for args, and 64 bits for return type. Don't forget to change the RetLoc if more types are allowed in future. */ if (e->Iex.CCall.retty != Ity_I64) goto irreducible; /* Marshal args, do the call, clear stack. */ UInt addToSp = 0; RetLoc rloc = mk_RetLoc_INVALID(); Bool ok = doHelperCall( &addToSp, &rloc, env, NULL/*guard*/, e->Iex.CCall.cee, e->Iex.CCall.retty, e->Iex.CCall.args ); /* */ if (ok) { vassert(is_sane_RetLoc(rloc)); vassert(rloc.pri == RLPri_Int); vassert(addToSp == 0); addInstr(env, ARM64Instr_MovI(dst, hregARM64_X0())); return dst; } /* else fall through; will hit the irreducible: label */ } /* --------- LITERAL --------- */ /* 64-bit literals */ case Iex_Const: { ULong u = 0; HReg dst = newVRegI(env); switch (e->Iex.Const.con->tag) { case Ico_U64: u = e->Iex.Const.con->Ico.U64; break; case Ico_U32: u = e->Iex.Const.con->Ico.U32; break; case Ico_U16: u = e->Iex.Const.con->Ico.U16; break; case Ico_U8: u = e->Iex.Const.con->Ico.U8; break; default: ppIRExpr(e); vpanic("iselIntExpr_R.Iex_Const(arm64)"); } addInstr(env, ARM64Instr_Imm64(dst, u)); return dst; } /* --------- MULTIPLEX --------- */ case Iex_ITE: { /* ITE(ccexpr, iftrue, iffalse) */ if (ty == Ity_I64 || ty == Ity_I32) { ARM64CondCode cc; HReg r1 = iselIntExpr_R(env, e->Iex.ITE.iftrue); HReg r0 = iselIntExpr_R(env, e->Iex.ITE.iffalse); HReg dst = newVRegI(env); cc = iselCondCode(env, e->Iex.ITE.cond); addInstr(env, ARM64Instr_CSel(dst, r1, r0, cc)); return dst; } break; } default: break; } /* switch (e->tag) */ /* We get here if no pattern matched. */ irreducible: ppIRExpr(e); vpanic("iselIntExpr_R: cannot reduce tree"); } /*---------------------------------------------------------*/ /*--- ISEL: Integer expressions (128 bit) ---*/ /*---------------------------------------------------------*/ /* Compute a 128-bit value into a register pair, which is returned as the first two parameters. As with iselIntExpr_R, these may be either real or virtual regs; in any case they must not be changed by subsequent code emitted by the caller. */ static void iselInt128Expr ( HReg* rHi, HReg* rLo, ISelEnv* env, IRExpr* e ) { iselInt128Expr_wrk(rHi, rLo, env, e); # if 0 vex_printf("\n"); ppIRExpr(e); vex_printf("\n"); # endif vassert(hregClass(*rHi) == HRcInt64); vassert(hregIsVirtual(*rHi)); vassert(hregClass(*rLo) == HRcInt64); vassert(hregIsVirtual(*rLo)); } /* DO NOT CALL THIS DIRECTLY ! */ static void iselInt128Expr_wrk ( HReg* rHi, HReg* rLo, ISelEnv* env, IRExpr* e ) { vassert(e); vassert(typeOfIRExpr(env->type_env,e) == Ity_I128); /* --------- BINARY ops --------- */ if (e->tag == Iex_Binop) { switch (e->Iex.Binop.op) { /* 64 x 64 -> 128 multiply */ case Iop_MullU64: case Iop_MullS64: { Bool syned = toBool(e->Iex.Binop.op == Iop_MullS64); HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1); HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2); HReg dstLo = newVRegI(env); HReg dstHi = newVRegI(env); addInstr(env, ARM64Instr_Mul(dstLo, argL, argR, ARM64mul_PLAIN)); addInstr(env, ARM64Instr_Mul(dstHi, argL, argR, syned ? ARM64mul_SX : ARM64mul_ZX)); *rHi = dstHi; *rLo = dstLo; return; } /* 64HLto128(e1,e2) */ case Iop_64HLto128: *rHi = iselIntExpr_R(env, e->Iex.Binop.arg1); *rLo = iselIntExpr_R(env, e->Iex.Binop.arg2); return; default: break; } } /* if (e->tag == Iex_Binop) */ ppIRExpr(e); vpanic("iselInt128Expr(arm64)"); } //ZZ /* -------------------- 64-bit -------------------- */ //ZZ //ZZ /* Compute a 64-bit value into a register pair, which is returned as //ZZ the first two parameters. As with iselIntExpr_R, these may be //ZZ either real or virtual regs; in any case they must not be changed //ZZ by subsequent code emitted by the caller. */ //ZZ //ZZ static void iselInt64Expr ( HReg* rHi, HReg* rLo, ISelEnv* env, IRExpr* e ) //ZZ { //ZZ iselInt64Expr_wrk(rHi, rLo, env, e); //ZZ # if 0 //ZZ vex_printf("\n"); ppIRExpr(e); vex_printf("\n"); //ZZ # endif //ZZ vassert(hregClass(*rHi) == HRcInt32); //ZZ vassert(hregIsVirtual(*rHi)); //ZZ vassert(hregClass(*rLo) == HRcInt32); //ZZ vassert(hregIsVirtual(*rLo)); //ZZ } //ZZ //ZZ /* DO NOT CALL THIS DIRECTLY ! */ //ZZ static void iselInt64Expr_wrk ( HReg* rHi, HReg* rLo, ISelEnv* env, IRExpr* e ) //ZZ { //ZZ vassert(e); //ZZ vassert(typeOfIRExpr(env->type_env,e) == Ity_I64); //ZZ //ZZ /* 64-bit literal */ //ZZ if (e->tag == Iex_Const) { //ZZ ULong w64 = e->Iex.Const.con->Ico.U64; //ZZ UInt wHi = toUInt(w64 >> 32); //ZZ UInt wLo = toUInt(w64); //ZZ HReg tHi = newVRegI(env); //ZZ HReg tLo = newVRegI(env); //ZZ vassert(e->Iex.Const.con->tag == Ico_U64); //ZZ addInstr(env, ARMInstr_Imm32(tHi, wHi)); //ZZ addInstr(env, ARMInstr_Imm32(tLo, wLo)); //ZZ *rHi = tHi; //ZZ *rLo = tLo; //ZZ return; //ZZ } //ZZ //ZZ /* read 64-bit IRTemp */ //ZZ if (e->tag == Iex_RdTmp) { //ZZ if (env->hwcaps & VEX_HWCAPS_ARM_NEON) { //ZZ HReg tHi = newVRegI(env); //ZZ HReg tLo = newVRegI(env); //ZZ HReg tmp = iselNeon64Expr(env, e); //ZZ addInstr(env, ARMInstr_VXferD(False, tmp, tHi, tLo)); //ZZ *rHi = tHi; //ZZ *rLo = tLo; //ZZ } else { //ZZ lookupIRTemp64( rHi, rLo, env, e->Iex.RdTmp.tmp); //ZZ } //ZZ return; //ZZ } //ZZ //ZZ /* 64-bit load */ //ZZ if (e->tag == Iex_Load && e->Iex.Load.end == Iend_LE) { //ZZ HReg tLo, tHi, rA; //ZZ vassert(e->Iex.Load.ty == Ity_I64); //ZZ rA = iselIntExpr_R(env, e->Iex.Load.addr); //ZZ tHi = newVRegI(env); //ZZ tLo = newVRegI(env); //ZZ addInstr(env, ARMInstr_LdSt32(ARMcc_AL, True/*isLoad*/, //ZZ tHi, ARMAMode1_RI(rA, 4))); //ZZ addInstr(env, ARMInstr_LdSt32(ARMcc_AL, True/*isLoad*/, //ZZ tLo, ARMAMode1_RI(rA, 0))); //ZZ *rHi = tHi; //ZZ *rLo = tLo; //ZZ return; //ZZ } //ZZ //ZZ /* 64-bit GET */ //ZZ if (e->tag == Iex_Get) { //ZZ ARMAMode1* am0 = ARMAMode1_RI(hregARM_R8(), e->Iex.Get.offset + 0); //ZZ ARMAMode1* am4 = ARMAMode1_RI(hregARM_R8(), e->Iex.Get.offset + 4); //ZZ HReg tHi = newVRegI(env); //ZZ HReg tLo = newVRegI(env); //ZZ addInstr(env, ARMInstr_LdSt32(ARMcc_AL, True/*isLoad*/, tHi, am4)); //ZZ addInstr(env, ARMInstr_LdSt32(ARMcc_AL, True/*isLoad*/, tLo, am0)); //ZZ *rHi = tHi; //ZZ *rLo = tLo; //ZZ return; //ZZ } //ZZ //ZZ /* --------- BINARY ops --------- */ //ZZ if (e->tag == Iex_Binop) { //ZZ switch (e->Iex.Binop.op) { //ZZ //ZZ /* 32 x 32 -> 64 multiply */ //ZZ case Iop_MullS32: //ZZ case Iop_MullU32: { //ZZ HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1); //ZZ HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2); //ZZ HReg tHi = newVRegI(env); //ZZ HReg tLo = newVRegI(env); //ZZ ARMMulOp mop = e->Iex.Binop.op == Iop_MullS32 //ZZ ? ARMmul_SX : ARMmul_ZX; //ZZ addInstr(env, mk_iMOVds_RR(hregARM_R2(), argL)); //ZZ addInstr(env, mk_iMOVds_RR(hregARM_R3(), argR)); //ZZ addInstr(env, ARMInstr_Mul(mop)); //ZZ addInstr(env, mk_iMOVds_RR(tHi, hregARM_R1())); //ZZ addInstr(env, mk_iMOVds_RR(tLo, hregARM_R0())); //ZZ *rHi = tHi; //ZZ *rLo = tLo; //ZZ return; //ZZ } //ZZ //ZZ case Iop_Or64: { //ZZ HReg xLo, xHi, yLo, yHi; //ZZ HReg tHi = newVRegI(env); //ZZ HReg tLo = newVRegI(env); //ZZ iselInt64Expr(&xHi, &xLo, env, e->Iex.Binop.arg1); //ZZ iselInt64Expr(&yHi, &yLo, env, e->Iex.Binop.arg2); //ZZ addInstr(env, ARMInstr_Alu(ARMalu_OR, tHi, xHi, ARMRI84_R(yHi))); //ZZ addInstr(env, ARMInstr_Alu(ARMalu_OR, tLo, xLo, ARMRI84_R(yLo))); //ZZ *rHi = tHi; //ZZ *rLo = tLo; //ZZ return; //ZZ } //ZZ //ZZ case Iop_Add64: { //ZZ HReg xLo, xHi, yLo, yHi; //ZZ HReg tHi = newVRegI(env); //ZZ HReg tLo = newVRegI(env); //ZZ iselInt64Expr(&xHi, &xLo, env, e->Iex.Binop.arg1); //ZZ iselInt64Expr(&yHi, &yLo, env, e->Iex.Binop.arg2); //ZZ addInstr(env, ARMInstr_Alu(ARMalu_ADDS, tLo, xLo, ARMRI84_R(yLo))); //ZZ addInstr(env, ARMInstr_Alu(ARMalu_ADC, tHi, xHi, ARMRI84_R(yHi))); //ZZ *rHi = tHi; //ZZ *rLo = tLo; //ZZ return; //ZZ } //ZZ //ZZ /* 32HLto64(e1,e2) */ //ZZ case Iop_32HLto64: { //ZZ *rHi = iselIntExpr_R(env, e->Iex.Binop.arg1); //ZZ *rLo = iselIntExpr_R(env, e->Iex.Binop.arg2); //ZZ return; //ZZ } //ZZ //ZZ default: //ZZ break; //ZZ } //ZZ } //ZZ //ZZ /* --------- UNARY ops --------- */ //ZZ if (e->tag == Iex_Unop) { //ZZ switch (e->Iex.Unop.op) { //ZZ //ZZ /* ReinterpF64asI64 */ //ZZ case Iop_ReinterpF64asI64: { //ZZ HReg dstHi = newVRegI(env); //ZZ HReg dstLo = newVRegI(env); //ZZ HReg src = iselDblExpr(env, e->Iex.Unop.arg); //ZZ addInstr(env, ARMInstr_VXferD(False/*!toD*/, src, dstHi, dstLo)); //ZZ *rHi = dstHi; //ZZ *rLo = dstLo; //ZZ return; //ZZ } //ZZ //ZZ /* Left64(e) */ //ZZ case Iop_Left64: { //ZZ HReg yLo, yHi; //ZZ HReg tHi = newVRegI(env); //ZZ HReg tLo = newVRegI(env); //ZZ HReg zero = newVRegI(env); //ZZ /* yHi:yLo = arg */ //ZZ iselInt64Expr(&yHi, &yLo, env, e->Iex.Unop.arg); //ZZ /* zero = 0 */ //ZZ addInstr(env, ARMInstr_Imm32(zero, 0)); //ZZ /* tLo = 0 - yLo, and set carry */ //ZZ addInstr(env, ARMInstr_Alu(ARMalu_SUBS, //ZZ tLo, zero, ARMRI84_R(yLo))); //ZZ /* tHi = 0 - yHi - carry */ //ZZ addInstr(env, ARMInstr_Alu(ARMalu_SBC, //ZZ tHi, zero, ARMRI84_R(yHi))); //ZZ /* So now we have tHi:tLo = -arg. To finish off, or 'arg' //ZZ back in, so as to give the final result //ZZ tHi:tLo = arg | -arg. */ //ZZ addInstr(env, ARMInstr_Alu(ARMalu_OR, tHi, tHi, ARMRI84_R(yHi))); //ZZ addInstr(env, ARMInstr_Alu(ARMalu_OR, tLo, tLo, ARMRI84_R(yLo))); //ZZ *rHi = tHi; //ZZ *rLo = tLo; //ZZ return; //ZZ } //ZZ //ZZ /* CmpwNEZ64(e) */ //ZZ case Iop_CmpwNEZ64: { //ZZ HReg srcLo, srcHi; //ZZ HReg tmp1 = newVRegI(env); //ZZ HReg tmp2 = newVRegI(env); //ZZ /* srcHi:srcLo = arg */ //ZZ iselInt64Expr(&srcHi, &srcLo, env, e->Iex.Unop.arg); //ZZ /* tmp1 = srcHi | srcLo */ //ZZ addInstr(env, ARMInstr_Alu(ARMalu_OR, //ZZ tmp1, srcHi, ARMRI84_R(srcLo))); //ZZ /* tmp2 = (tmp1 | -tmp1) >>s 31 */ //ZZ addInstr(env, ARMInstr_Unary(ARMun_NEG, tmp2, tmp1)); //ZZ addInstr(env, ARMInstr_Alu(ARMalu_OR, //ZZ tmp2, tmp2, ARMRI84_R(tmp1))); //ZZ addInstr(env, ARMInstr_Shift(ARMsh_SAR, //ZZ tmp2, tmp2, ARMRI5_I5(31))); //ZZ *rHi = tmp2; //ZZ *rLo = tmp2; //ZZ return; //ZZ } //ZZ //ZZ case Iop_1Sto64: { //ZZ HReg dst = newVRegI(env); //ZZ ARMCondCode cond = iselCondCode(env, e->Iex.Unop.arg); //ZZ ARMRI5* amt = ARMRI5_I5(31); //ZZ /* This is really rough. We could do much better here; //ZZ perhaps mvn{cond} dst, #0 as the second insn? //ZZ (same applies to 1Sto32) */ //ZZ addInstr(env, ARMInstr_Mov(dst, ARMRI84_I84(0,0))); //ZZ addInstr(env, ARMInstr_CMov(cond, dst, ARMRI84_I84(1,0))); //ZZ addInstr(env, ARMInstr_Shift(ARMsh_SHL, dst, dst, amt)); //ZZ addInstr(env, ARMInstr_Shift(ARMsh_SAR, dst, dst, amt)); //ZZ *rHi = dst; //ZZ *rLo = dst; //ZZ return; //ZZ } //ZZ //ZZ default: //ZZ break; //ZZ } //ZZ } /* if (e->tag == Iex_Unop) */ //ZZ //ZZ /* --------- MULTIPLEX --------- */ //ZZ if (e->tag == Iex_ITE) { // VFD //ZZ IRType tyC; //ZZ HReg r1hi, r1lo, r0hi, r0lo, dstHi, dstLo; //ZZ ARMCondCode cc; //ZZ tyC = typeOfIRExpr(env->type_env,e->Iex.ITE.cond); //ZZ vassert(tyC == Ity_I1); //ZZ iselInt64Expr(&r1hi, &r1lo, env, e->Iex.ITE.iftrue); //ZZ iselInt64Expr(&r0hi, &r0lo, env, e->Iex.ITE.iffalse); //ZZ dstHi = newVRegI(env); //ZZ dstLo = newVRegI(env); //ZZ addInstr(env, mk_iMOVds_RR(dstHi, r1hi)); //ZZ addInstr(env, mk_iMOVds_RR(dstLo, r1lo)); //ZZ cc = iselCondCode(env, e->Iex.ITE.cond); //ZZ addInstr(env, ARMInstr_CMov(cc ^ 1, dstHi, ARMRI84_R(r0hi))); //ZZ addInstr(env, ARMInstr_CMov(cc ^ 1, dstLo, ARMRI84_R(r0lo))); //ZZ *rHi = dstHi; //ZZ *rLo = dstLo; //ZZ return; //ZZ } //ZZ //ZZ /* It is convenient sometimes to call iselInt64Expr even when we //ZZ have NEON support (e.g. in do_helper_call we need 64-bit //ZZ arguments as 2 x 32 regs). */ //ZZ if (env->hwcaps & VEX_HWCAPS_ARM_NEON) { //ZZ HReg tHi = newVRegI(env); //ZZ HReg tLo = newVRegI(env); //ZZ HReg tmp = iselNeon64Expr(env, e); //ZZ addInstr(env, ARMInstr_VXferD(False, tmp, tHi, tLo)); //ZZ *rHi = tHi; //ZZ *rLo = tLo; //ZZ return ; //ZZ } //ZZ //ZZ ppIRExpr(e); //ZZ vpanic("iselInt64Expr"); //ZZ } //ZZ //ZZ //ZZ /*---------------------------------------------------------*/ //ZZ /*--- ISEL: Vector (NEON) expressions (64 bit) ---*/ //ZZ /*---------------------------------------------------------*/ //ZZ //ZZ static HReg iselNeon64Expr ( ISelEnv* env, IRExpr* e ) //ZZ { //ZZ HReg r = iselNeon64Expr_wrk( env, e ); //ZZ vassert(hregClass(r) == HRcFlt64); //ZZ vassert(hregIsVirtual(r)); //ZZ return r; //ZZ } //ZZ //ZZ /* DO NOT CALL THIS DIRECTLY */ //ZZ static HReg iselNeon64Expr_wrk ( ISelEnv* env, IRExpr* e ) //ZZ { //ZZ IRType ty = typeOfIRExpr(env->type_env, e); //ZZ MatchInfo mi; //ZZ vassert(e); //ZZ vassert(ty == Ity_I64); //ZZ //ZZ if (e->tag == Iex_RdTmp) { //ZZ return lookupIRTemp(env, e->Iex.RdTmp.tmp); //ZZ } //ZZ //ZZ if (e->tag == Iex_Const) { //ZZ HReg rLo, rHi; //ZZ HReg res = newVRegD(env); //ZZ iselInt64Expr(&rHi, &rLo, env, e); //ZZ addInstr(env, ARMInstr_VXferD(True/*toD*/, res, rHi, rLo)); //ZZ return res; //ZZ } //ZZ //ZZ /* 64-bit load */ //ZZ if (e->tag == Iex_Load && e->Iex.Load.end == Iend_LE) { //ZZ HReg res = newVRegD(env); //ZZ ARMAModeN* am = iselIntExpr_AModeN(env, e->Iex.Load.addr); //ZZ vassert(ty == Ity_I64); //ZZ addInstr(env, ARMInstr_NLdStD(True, res, am)); //ZZ return res; //ZZ } //ZZ //ZZ /* 64-bit GET */ //ZZ if (e->tag == Iex_Get) { //ZZ HReg addr = newVRegI(env); //ZZ HReg res = newVRegD(env); //ZZ vassert(ty == Ity_I64); //ZZ addInstr(env, ARMInstr_Add32(addr, hregARM_R8(), e->Iex.Get.offset)); //ZZ addInstr(env, ARMInstr_NLdStD(True, res, mkARMAModeN_R(addr))); //ZZ return res; //ZZ } //ZZ //ZZ /* --------- BINARY ops --------- */ //ZZ if (e->tag == Iex_Binop) { //ZZ switch (e->Iex.Binop.op) { //ZZ //ZZ /* 32 x 32 -> 64 multiply */ //ZZ case Iop_MullS32: //ZZ case Iop_MullU32: { //ZZ HReg rLo, rHi; //ZZ HReg res = newVRegD(env); //ZZ iselInt64Expr(&rHi, &rLo, env, e); //ZZ addInstr(env, ARMInstr_VXferD(True/*toD*/, res, rHi, rLo)); //ZZ return res; //ZZ } //ZZ //ZZ case Iop_And64: { //ZZ HReg res = newVRegD(env); //ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1); //ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2); //ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VAND, //ZZ res, argL, argR, 4, False)); //ZZ return res; //ZZ } //ZZ case Iop_Or64: { //ZZ HReg res = newVRegD(env); //ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1); //ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2); //ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VORR, //ZZ res, argL, argR, 4, False)); //ZZ return res; //ZZ } //ZZ case Iop_Xor64: { //ZZ HReg res = newVRegD(env); //ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1); //ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2); //ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VXOR, //ZZ res, argL, argR, 4, False)); //ZZ return res; //ZZ } //ZZ //ZZ /* 32HLto64(e1,e2) */ //ZZ case Iop_32HLto64: { //ZZ HReg rHi = iselIntExpr_R(env, e->Iex.Binop.arg1); //ZZ HReg rLo = iselIntExpr_R(env, e->Iex.Binop.arg2); //ZZ HReg res = newVRegD(env); //ZZ addInstr(env, ARMInstr_VXferD(True/*toD*/, res, rHi, rLo)); //ZZ return res; //ZZ } //ZZ //ZZ case Iop_Add8x8: //ZZ case Iop_Add16x4: //ZZ case Iop_Add32x2: //ZZ case Iop_Add64: { //ZZ HReg res = newVRegD(env); //ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1); //ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2); //ZZ UInt size; //ZZ switch (e->Iex.Binop.op) { //ZZ case Iop_Add8x8: size = 0; break; //ZZ case Iop_Add16x4: size = 1; break; //ZZ case Iop_Add32x2: size = 2; break; //ZZ case Iop_Add64: size = 3; break; //ZZ default: vassert(0); //ZZ } //ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VADD, //ZZ res, argL, argR, size, False)); //ZZ return res; //ZZ } //ZZ case Iop_Add32Fx2: { //ZZ HReg res = newVRegD(env); //ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1); //ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2); //ZZ UInt size = 0; //ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VADDFP, //ZZ res, argL, argR, size, False)); //ZZ return res; //ZZ } //ZZ case Iop_Recps32Fx2: { //ZZ HReg res = newVRegD(env); //ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1); //ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2); //ZZ UInt size = 0; //ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VRECPS, //ZZ res, argL, argR, size, False)); //ZZ return res; //ZZ } //ZZ case Iop_Rsqrts32Fx2: { //ZZ HReg res = newVRegD(env); //ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1); //ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2); //ZZ UInt size = 0; //ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VRSQRTS, //ZZ res, argL, argR, size, False)); //ZZ return res; //ZZ } //ZZ //ZZ // These 6 verified 18 Apr 2013 //ZZ case Iop_InterleaveHI32x2: //ZZ case Iop_InterleaveLO32x2: //ZZ case Iop_InterleaveOddLanes8x8: //ZZ case Iop_InterleaveEvenLanes8x8: //ZZ case Iop_InterleaveOddLanes16x4: //ZZ case Iop_InterleaveEvenLanes16x4: { //ZZ HReg rD = newVRegD(env); //ZZ HReg rM = newVRegD(env); //ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1); //ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2); //ZZ UInt size; //ZZ Bool resRd; // is the result in rD or rM ? //ZZ switch (e->Iex.Binop.op) { //ZZ case Iop_InterleaveOddLanes8x8: resRd = False; size = 0; break; //ZZ case Iop_InterleaveEvenLanes8x8: resRd = True; size = 0; break; //ZZ case Iop_InterleaveOddLanes16x4: resRd = False; size = 1; break; //ZZ case Iop_InterleaveEvenLanes16x4: resRd = True; size = 1; break; //ZZ case Iop_InterleaveHI32x2: resRd = False; size = 2; break; //ZZ case Iop_InterleaveLO32x2: resRd = True; size = 2; break; //ZZ default: vassert(0); //ZZ } //ZZ addInstr(env, ARMInstr_NUnary(ARMneon_COPY, rM, argL, 4, False)); //ZZ addInstr(env, ARMInstr_NUnary(ARMneon_COPY, rD, argR, 4, False)); //ZZ addInstr(env, ARMInstr_NDual(ARMneon_TRN, rD, rM, size, False)); //ZZ return resRd ? rD : rM; //ZZ } //ZZ //ZZ // These 4 verified 18 Apr 2013 //ZZ case Iop_InterleaveHI8x8: //ZZ case Iop_InterleaveLO8x8: //ZZ case Iop_InterleaveHI16x4: //ZZ case Iop_InterleaveLO16x4: { //ZZ HReg rD = newVRegD(env); //ZZ HReg rM = newVRegD(env); //ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1); //ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2); //ZZ UInt size; //ZZ Bool resRd; // is the result in rD or rM ? //ZZ switch (e->Iex.Binop.op) { //ZZ case Iop_InterleaveHI8x8: resRd = False; size = 0; break; //ZZ case Iop_InterleaveLO8x8: resRd = True; size = 0; break; //ZZ case Iop_InterleaveHI16x4: resRd = False; size = 1; break; //ZZ case Iop_InterleaveLO16x4: resRd = True; size = 1; break; //ZZ default: vassert(0); //ZZ } //ZZ addInstr(env, ARMInstr_NUnary(ARMneon_COPY, rM, argL, 4, False)); //ZZ addInstr(env, ARMInstr_NUnary(ARMneon_COPY, rD, argR, 4, False)); //ZZ addInstr(env, ARMInstr_NDual(ARMneon_ZIP, rD, rM, size, False)); //ZZ return resRd ? rD : rM; //ZZ } //ZZ //ZZ // These 4 verified 18 Apr 2013 //ZZ case Iop_CatOddLanes8x8: //ZZ case Iop_CatEvenLanes8x8: //ZZ case Iop_CatOddLanes16x4: //ZZ case Iop_CatEvenLanes16x4: { //ZZ HReg rD = newVRegD(env); //ZZ HReg rM = newVRegD(env); //ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1); //ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2); //ZZ UInt size; //ZZ Bool resRd; // is the result in rD or rM ? //ZZ switch (e->Iex.Binop.op) { //ZZ case Iop_CatOddLanes8x8: resRd = False; size = 0; break; //ZZ case Iop_CatEvenLanes8x8: resRd = True; size = 0; break; //ZZ case Iop_CatOddLanes16x4: resRd = False; size = 1; break; //ZZ case Iop_CatEvenLanes16x4: resRd = True; size = 1; break; //ZZ default: vassert(0); //ZZ } //ZZ addInstr(env, ARMInstr_NUnary(ARMneon_COPY, rM, argL, 4, False)); //ZZ addInstr(env, ARMInstr_NUnary(ARMneon_COPY, rD, argR, 4, False)); //ZZ addInstr(env, ARMInstr_NDual(ARMneon_UZP, rD, rM, size, False)); //ZZ return resRd ? rD : rM; //ZZ } //ZZ //ZZ case Iop_QAdd8Ux8: //ZZ case Iop_QAdd16Ux4: //ZZ case Iop_QAdd32Ux2: //ZZ case Iop_QAdd64Ux1: { //ZZ HReg res = newVRegD(env); //ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1); //ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2); //ZZ UInt size; //ZZ switch (e->Iex.Binop.op) { //ZZ case Iop_QAdd8Ux8: size = 0; break; //ZZ case Iop_QAdd16Ux4: size = 1; break; //ZZ case Iop_QAdd32Ux2: size = 2; break; //ZZ case Iop_QAdd64Ux1: size = 3; break; //ZZ default: vassert(0); //ZZ } //ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VQADDU, //ZZ res, argL, argR, size, False)); //ZZ return res; //ZZ } //ZZ case Iop_QAdd8Sx8: //ZZ case Iop_QAdd16Sx4: //ZZ case Iop_QAdd32Sx2: //ZZ case Iop_QAdd64Sx1: { //ZZ HReg res = newVRegD(env); //ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1); //ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2); //ZZ UInt size; //ZZ switch (e->Iex.Binop.op) { //ZZ case Iop_QAdd8Sx8: size = 0; break; //ZZ case Iop_QAdd16Sx4: size = 1; break; //ZZ case Iop_QAdd32Sx2: size = 2; break; //ZZ case Iop_QAdd64Sx1: size = 3; break; //ZZ default: vassert(0); //ZZ } //ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VQADDS, //ZZ res, argL, argR, size, False)); //ZZ return res; //ZZ } //ZZ case Iop_Sub8x8: //ZZ case Iop_Sub16x4: //ZZ case Iop_Sub32x2: //ZZ case Iop_Sub64: { //ZZ HReg res = newVRegD(env); //ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1); //ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2); //ZZ UInt size; //ZZ switch (e->Iex.Binop.op) { //ZZ case Iop_Sub8x8: size = 0; break; //ZZ case Iop_Sub16x4: size = 1; break; //ZZ case Iop_Sub32x2: size = 2; break; //ZZ case Iop_Sub64: size = 3; break; //ZZ default: vassert(0); //ZZ } //ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VSUB, //ZZ res, argL, argR, size, False)); //ZZ return res; //ZZ } //ZZ case Iop_Sub32Fx2: { //ZZ HReg res = newVRegD(env); //ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1); //ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2); //ZZ UInt size = 0; //ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VSUBFP, //ZZ res, argL, argR, size, False)); //ZZ return res; //ZZ } //ZZ case Iop_QSub8Ux8: //ZZ case Iop_QSub16Ux4: //ZZ case Iop_QSub32Ux2: //ZZ case Iop_QSub64Ux1: { //ZZ HReg res = newVRegD(env); //ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1); //ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2); //ZZ UInt size; //ZZ switch (e->Iex.Binop.op) { //ZZ case Iop_QSub8Ux8: size = 0; break; //ZZ case Iop_QSub16Ux4: size = 1; break; //ZZ case Iop_QSub32Ux2: size = 2; break; //ZZ case Iop_QSub64Ux1: size = 3; break; //ZZ default: vassert(0); //ZZ } //ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VQSUBU, //ZZ res, argL, argR, size, False)); //ZZ return res; //ZZ } //ZZ case Iop_QSub8Sx8: //ZZ case Iop_QSub16Sx4: //ZZ case Iop_QSub32Sx2: //ZZ case Iop_QSub64Sx1: { //ZZ HReg res = newVRegD(env); //ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1); //ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2); //ZZ UInt size; //ZZ switch (e->Iex.Binop.op) { //ZZ case Iop_QSub8Sx8: size = 0; break; //ZZ case Iop_QSub16Sx4: size = 1; break; //ZZ case Iop_QSub32Sx2: size = 2; break; //ZZ case Iop_QSub64Sx1: size = 3; break; //ZZ default: vassert(0); //ZZ } //ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VQSUBS, //ZZ res, argL, argR, size, False)); //ZZ return res; //ZZ } //ZZ case Iop_Max8Ux8: //ZZ case Iop_Max16Ux4: //ZZ case Iop_Max32Ux2: { //ZZ HReg res = newVRegD(env); //ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1); //ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2); //ZZ UInt size; //ZZ switch (e->Iex.Binop.op) { //ZZ case Iop_Max8Ux8: size = 0; break; //ZZ case Iop_Max16Ux4: size = 1; break; //ZZ case Iop_Max32Ux2: size = 2; break; //ZZ default: vassert(0); //ZZ } //ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VMAXU, //ZZ res, argL, argR, size, False)); //ZZ return res; //ZZ } //ZZ case Iop_Max8Sx8: //ZZ case Iop_Max16Sx4: //ZZ case Iop_Max32Sx2: { //ZZ HReg res = newVRegD(env); //ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1); //ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2); //ZZ UInt size; //ZZ switch (e->Iex.Binop.op) { //ZZ case Iop_Max8Sx8: size = 0; break; //ZZ case Iop_Max16Sx4: size = 1; break; //ZZ case Iop_Max32Sx2: size = 2; break; //ZZ default: vassert(0); //ZZ } //ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VMAXS, //ZZ res, argL, argR, size, False)); //ZZ return res; //ZZ } //ZZ case Iop_Min8Ux8: //ZZ case Iop_Min16Ux4: //ZZ case Iop_Min32Ux2: { //ZZ HReg res = newVRegD(env); //ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1); //ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2); //ZZ UInt size; //ZZ switch (e->Iex.Binop.op) { //ZZ case Iop_Min8Ux8: size = 0; break; //ZZ case Iop_Min16Ux4: size = 1; break; //ZZ case Iop_Min32Ux2: size = 2; break; //ZZ default: vassert(0); //ZZ } //ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VMINU, //ZZ res, argL, argR, size, False)); //ZZ return res; //ZZ } //ZZ case Iop_Min8Sx8: //ZZ case Iop_Min16Sx4: //ZZ case Iop_Min32Sx2: { //ZZ HReg res = newVRegD(env); //ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1); //ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2); //ZZ UInt size; //ZZ switch (e->Iex.Binop.op) { //ZZ case Iop_Min8Sx8: size = 0; break; //ZZ case Iop_Min16Sx4: size = 1; break; //ZZ case Iop_Min32Sx2: size = 2; break; //ZZ default: vassert(0); //ZZ } //ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VMINS, //ZZ res, argL, argR, size, False)); //ZZ return res; //ZZ } //ZZ case Iop_Sar8x8: //ZZ case Iop_Sar16x4: //ZZ case Iop_Sar32x2: { //ZZ HReg res = newVRegD(env); //ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1); //ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2); //ZZ HReg argR2 = newVRegD(env); //ZZ HReg zero = newVRegD(env); //ZZ UInt size; //ZZ switch (e->Iex.Binop.op) { //ZZ case Iop_Sar8x8: size = 0; break; //ZZ case Iop_Sar16x4: size = 1; break; //ZZ case Iop_Sar32x2: size = 2; break; //ZZ case Iop_Sar64: size = 3; break; //ZZ default: vassert(0); //ZZ } //ZZ addInstr(env, ARMInstr_NeonImm(zero, ARMNImm_TI(0,0))); //ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VSUB, //ZZ argR2, zero, argR, size, False)); //ZZ addInstr(env, ARMInstr_NShift(ARMneon_VSAL, //ZZ res, argL, argR2, size, False)); //ZZ return res; //ZZ } //ZZ case Iop_Sal8x8: //ZZ case Iop_Sal16x4: //ZZ case Iop_Sal32x2: //ZZ case Iop_Sal64x1: { //ZZ HReg res = newVRegD(env); //ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1); //ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2); //ZZ UInt size; //ZZ switch (e->Iex.Binop.op) { //ZZ case Iop_Sal8x8: size = 0; break; //ZZ case Iop_Sal16x4: size = 1; break; //ZZ case Iop_Sal32x2: size = 2; break; //ZZ case Iop_Sal64x1: size = 3; break; //ZZ default: vassert(0); //ZZ } //ZZ addInstr(env, ARMInstr_NShift(ARMneon_VSAL, //ZZ res, argL, argR, size, False)); //ZZ return res; //ZZ } //ZZ case Iop_Shr8x8: //ZZ case Iop_Shr16x4: //ZZ case Iop_Shr32x2: { //ZZ HReg res = newVRegD(env); //ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1); //ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2); //ZZ HReg argR2 = newVRegD(env); //ZZ HReg zero = newVRegD(env); //ZZ UInt size; //ZZ switch (e->Iex.Binop.op) { //ZZ case Iop_Shr8x8: size = 0; break; //ZZ case Iop_Shr16x4: size = 1; break; //ZZ case Iop_Shr32x2: size = 2; break; //ZZ default: vassert(0); //ZZ } //ZZ addInstr(env, ARMInstr_NeonImm(zero, ARMNImm_TI(0,0))); //ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VSUB, //ZZ argR2, zero, argR, size, False)); //ZZ addInstr(env, ARMInstr_NShift(ARMneon_VSHL, //ZZ res, argL, argR2, size, False)); //ZZ return res; //ZZ } //ZZ case Iop_Shl8x8: //ZZ case Iop_Shl16x4: //ZZ case Iop_Shl32x2: { //ZZ HReg res = newVRegD(env); //ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1); //ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2); //ZZ UInt size; //ZZ switch (e->Iex.Binop.op) { //ZZ case Iop_Shl8x8: size = 0; break; //ZZ case Iop_Shl16x4: size = 1; break; //ZZ case Iop_Shl32x2: size = 2; break; //ZZ default: vassert(0); //ZZ } //ZZ addInstr(env, ARMInstr_NShift(ARMneon_VSHL, //ZZ res, argL, argR, size, False)); //ZZ return res; //ZZ } //ZZ case Iop_QShl8x8: //ZZ case Iop_QShl16x4: //ZZ case Iop_QShl32x2: //ZZ case Iop_QShl64x1: { //ZZ HReg res = newVRegD(env); //ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1); //ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2); //ZZ UInt size; //ZZ switch (e->Iex.Binop.op) { //ZZ case Iop_QShl8x8: size = 0; break; //ZZ case Iop_QShl16x4: size = 1; break; //ZZ case Iop_QShl32x2: size = 2; break; //ZZ case Iop_QShl64x1: size = 3; break; //ZZ default: vassert(0); //ZZ } //ZZ addInstr(env, ARMInstr_NShift(ARMneon_VQSHL, //ZZ res, argL, argR, size, False)); //ZZ return res; //ZZ } //ZZ case Iop_QSal8x8: //ZZ case Iop_QSal16x4: //ZZ case Iop_QSal32x2: //ZZ case Iop_QSal64x1: { //ZZ HReg res = newVRegD(env); //ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1); //ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2); //ZZ UInt size; //ZZ switch (e->Iex.Binop.op) { //ZZ case Iop_QSal8x8: size = 0; break; //ZZ case Iop_QSal16x4: size = 1; break; //ZZ case Iop_QSal32x2: size = 2; break; //ZZ case Iop_QSal64x1: size = 3; break; //ZZ default: vassert(0); //ZZ } //ZZ addInstr(env, ARMInstr_NShift(ARMneon_VQSAL, //ZZ res, argL, argR, size, False)); //ZZ return res; //ZZ } //ZZ case Iop_QShlN8x8: //ZZ case Iop_QShlN16x4: //ZZ case Iop_QShlN32x2: //ZZ case Iop_QShlN64x1: { //ZZ HReg res = newVRegD(env); //ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1); //ZZ UInt size, imm; //ZZ if (e->Iex.Binop.arg2->tag != Iex_Const || //ZZ typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) { //ZZ vpanic("ARM taget supports Iop_QShlNAxB with constant " //ZZ "second argument only\n"); //ZZ } //ZZ imm = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8; //ZZ switch (e->Iex.Binop.op) { //ZZ case Iop_QShlN8x8: size = 8 | imm; break; //ZZ case Iop_QShlN16x4: size = 16 | imm; break; //ZZ case Iop_QShlN32x2: size = 32 | imm; break; //ZZ case Iop_QShlN64x1: size = 64 | imm; break; //ZZ default: vassert(0); //ZZ } //ZZ addInstr(env, ARMInstr_NUnary(ARMneon_VQSHLNUU, //ZZ res, argL, size, False)); //ZZ return res; //ZZ } //ZZ case Iop_QShlN8Sx8: //ZZ case Iop_QShlN16Sx4: //ZZ case Iop_QShlN32Sx2: //ZZ case Iop_QShlN64Sx1: { //ZZ HReg res = newVRegD(env); //ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1); //ZZ UInt size, imm; //ZZ if (e->Iex.Binop.arg2->tag != Iex_Const || //ZZ typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) { //ZZ vpanic("ARM taget supports Iop_QShlNAxB with constant " //ZZ "second argument only\n"); //ZZ } //ZZ imm = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8; //ZZ switch (e->Iex.Binop.op) { //ZZ case Iop_QShlN8Sx8: size = 8 | imm; break; //ZZ case Iop_QShlN16Sx4: size = 16 | imm; break; //ZZ case Iop_QShlN32Sx2: size = 32 | imm; break; //ZZ case Iop_QShlN64Sx1: size = 64 | imm; break; //ZZ default: vassert(0); //ZZ } //ZZ addInstr(env, ARMInstr_NUnary(ARMneon_VQSHLNUS, //ZZ res, argL, size, False)); //ZZ return res; //ZZ } //ZZ case Iop_QSalN8x8: //ZZ case Iop_QSalN16x4: //ZZ case Iop_QSalN32x2: //ZZ case Iop_QSalN64x1: { //ZZ HReg res = newVRegD(env); //ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1); //ZZ UInt size, imm; //ZZ if (e->Iex.Binop.arg2->tag != Iex_Const || //ZZ typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) { //ZZ vpanic("ARM taget supports Iop_QShlNAxB with constant " //ZZ "second argument only\n"); //ZZ } //ZZ imm = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8; //ZZ switch (e->Iex.Binop.op) { //ZZ case Iop_QSalN8x8: size = 8 | imm; break; //ZZ case Iop_QSalN16x4: size = 16 | imm; break; //ZZ case Iop_QSalN32x2: size = 32 | imm; break; //ZZ case Iop_QSalN64x1: size = 64 | imm; break; //ZZ default: vassert(0); //ZZ } //ZZ addInstr(env, ARMInstr_NUnary(ARMneon_VQSHLNSS, //ZZ res, argL, size, False)); //ZZ return res; //ZZ } //ZZ case Iop_ShrN8x8: //ZZ case Iop_ShrN16x4: //ZZ case Iop_ShrN32x2: //ZZ case Iop_Shr64: { //ZZ HReg res = newVRegD(env); //ZZ HReg tmp = newVRegD(env); //ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1); //ZZ HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2); //ZZ HReg argR2 = newVRegI(env); //ZZ UInt size; //ZZ switch (e->Iex.Binop.op) { //ZZ case Iop_ShrN8x8: size = 0; break; //ZZ case Iop_ShrN16x4: size = 1; break; //ZZ case Iop_ShrN32x2: size = 2; break; //ZZ case Iop_Shr64: size = 3; break; //ZZ default: vassert(0); //ZZ } //ZZ addInstr(env, ARMInstr_Unary(ARMun_NEG, argR2, argR)); //ZZ addInstr(env, ARMInstr_NUnary(ARMneon_DUP, tmp, argR2, 0, False)); //ZZ addInstr(env, ARMInstr_NShift(ARMneon_VSHL, //ZZ res, argL, tmp, size, False)); //ZZ return res; //ZZ } //ZZ case Iop_ShlN8x8: //ZZ case Iop_ShlN16x4: //ZZ case Iop_ShlN32x2: //ZZ case Iop_Shl64: { //ZZ HReg res = newVRegD(env); //ZZ HReg tmp = newVRegD(env); //ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1); //ZZ /* special-case Shl64(x, imm8) since the Neon front //ZZ end produces a lot of those for V{LD,ST}{1,2,3,4}. */ //ZZ if (e->Iex.Binop.op == Iop_Shl64 //ZZ && e->Iex.Binop.arg2->tag == Iex_Const) { //ZZ vassert(e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8); //ZZ Int nshift = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8; //ZZ if (nshift >= 1 && nshift <= 63) { //ZZ addInstr(env, ARMInstr_NShl64(res, argL, nshift)); //ZZ return res; //ZZ } //ZZ /* else fall through to general case */ //ZZ } //ZZ HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2); //ZZ UInt size; //ZZ switch (e->Iex.Binop.op) { //ZZ case Iop_ShlN8x8: size = 0; break; //ZZ case Iop_ShlN16x4: size = 1; break; //ZZ case Iop_ShlN32x2: size = 2; break; //ZZ case Iop_Shl64: size = 3; break; //ZZ default: vassert(0); //ZZ } //ZZ addInstr(env, ARMInstr_NUnary(ARMneon_DUP, //ZZ tmp, argR, 0, False)); //ZZ addInstr(env, ARMInstr_NShift(ARMneon_VSHL, //ZZ res, argL, tmp, size, False)); //ZZ return res; //ZZ } //ZZ case Iop_SarN8x8: //ZZ case Iop_SarN16x4: //ZZ case Iop_SarN32x2: //ZZ case Iop_Sar64: { //ZZ HReg res = newVRegD(env); //ZZ HReg tmp = newVRegD(env); //ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1); //ZZ HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2); //ZZ HReg argR2 = newVRegI(env); //ZZ UInt size; //ZZ switch (e->Iex.Binop.op) { //ZZ case Iop_SarN8x8: size = 0; break; //ZZ case Iop_SarN16x4: size = 1; break; //ZZ case Iop_SarN32x2: size = 2; break; //ZZ case Iop_Sar64: size = 3; break; //ZZ default: vassert(0); //ZZ } //ZZ addInstr(env, ARMInstr_Unary(ARMun_NEG, argR2, argR)); //ZZ addInstr(env, ARMInstr_NUnary(ARMneon_DUP, tmp, argR2, 0, False)); //ZZ addInstr(env, ARMInstr_NShift(ARMneon_VSAL, //ZZ res, argL, tmp, size, False)); //ZZ return res; //ZZ } //ZZ case Iop_CmpGT8Ux8: //ZZ case Iop_CmpGT16Ux4: //ZZ case Iop_CmpGT32Ux2: { //ZZ HReg res = newVRegD(env); //ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1); //ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2); //ZZ UInt size; //ZZ switch (e->Iex.Binop.op) { //ZZ case Iop_CmpGT8Ux8: size = 0; break; //ZZ case Iop_CmpGT16Ux4: size = 1; break; //ZZ case Iop_CmpGT32Ux2: size = 2; break; //ZZ default: vassert(0); //ZZ } //ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VCGTU, //ZZ res, argL, argR, size, False)); //ZZ return res; //ZZ } //ZZ case Iop_CmpGT8Sx8: //ZZ case Iop_CmpGT16Sx4: //ZZ case Iop_CmpGT32Sx2: { //ZZ HReg res = newVRegD(env); //ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1); //ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2); //ZZ UInt size; //ZZ switch (e->Iex.Binop.op) { //ZZ case Iop_CmpGT8Sx8: size = 0; break; //ZZ case Iop_CmpGT16Sx4: size = 1; break; //ZZ case Iop_CmpGT32Sx2: size = 2; break; //ZZ default: vassert(0); //ZZ } //ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VCGTS, //ZZ res, argL, argR, size, False)); //ZZ return res; //ZZ } //ZZ case Iop_CmpEQ8x8: //ZZ case Iop_CmpEQ16x4: //ZZ case Iop_CmpEQ32x2: { //ZZ HReg res = newVRegD(env); //ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1); //ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2); //ZZ UInt size; //ZZ switch (e->Iex.Binop.op) { //ZZ case Iop_CmpEQ8x8: size = 0; break; //ZZ case Iop_CmpEQ16x4: size = 1; break; //ZZ case Iop_CmpEQ32x2: size = 2; break; //ZZ default: vassert(0); //ZZ } //ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VCEQ, //ZZ res, argL, argR, size, False)); //ZZ return res; //ZZ } //ZZ case Iop_Mul8x8: //ZZ case Iop_Mul16x4: //ZZ case Iop_Mul32x2: { //ZZ HReg res = newVRegD(env); //ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1); //ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2); //ZZ UInt size = 0; //ZZ switch(e->Iex.Binop.op) { //ZZ case Iop_Mul8x8: size = 0; break; //ZZ case Iop_Mul16x4: size = 1; break; //ZZ case Iop_Mul32x2: size = 2; break; //ZZ default: vassert(0); //ZZ } //ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VMUL, //ZZ res, argL, argR, size, False)); //ZZ return res; //ZZ } //ZZ case Iop_Mul32Fx2: { //ZZ HReg res = newVRegD(env); //ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1); //ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2); //ZZ UInt size = 0; //ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VMULFP, //ZZ res, argL, argR, size, False)); //ZZ return res; //ZZ } //ZZ case Iop_QDMulHi16Sx4: //ZZ case Iop_QDMulHi32Sx2: { //ZZ HReg res = newVRegD(env); //ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1); //ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2); //ZZ UInt size = 0; //ZZ switch(e->Iex.Binop.op) { //ZZ case Iop_QDMulHi16Sx4: size = 1; break; //ZZ case Iop_QDMulHi32Sx2: size = 2; break; //ZZ default: vassert(0); //ZZ } //ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VQDMULH, //ZZ res, argL, argR, size, False)); //ZZ return res; //ZZ } //ZZ //ZZ case Iop_QRDMulHi16Sx4: //ZZ case Iop_QRDMulHi32Sx2: { //ZZ HReg res = newVRegD(env); //ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1); //ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2); //ZZ UInt size = 0; //ZZ switch(e->Iex.Binop.op) { //ZZ case Iop_QRDMulHi16Sx4: size = 1; break; //ZZ case Iop_QRDMulHi32Sx2: size = 2; break; //ZZ default: vassert(0); //ZZ } //ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VQRDMULH, //ZZ res, argL, argR, size, False)); //ZZ return res; //ZZ } //ZZ //ZZ case Iop_PwAdd8x8: //ZZ case Iop_PwAdd16x4: //ZZ case Iop_PwAdd32x2: { //ZZ HReg res = newVRegD(env); //ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1); //ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2); //ZZ UInt size = 0; //ZZ switch(e->Iex.Binop.op) { //ZZ case Iop_PwAdd8x8: size = 0; break; //ZZ case Iop_PwAdd16x4: size = 1; break; //ZZ case Iop_PwAdd32x2: size = 2; break; //ZZ default: vassert(0); //ZZ } //ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VPADD, //ZZ res, argL, argR, size, False)); //ZZ return res; //ZZ } //ZZ case Iop_PwAdd32Fx2: { //ZZ HReg res = newVRegD(env); //ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1); //ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2); //ZZ UInt size = 0; //ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VPADDFP, //ZZ res, argL, argR, size, False)); //ZZ return res; //ZZ } //ZZ case Iop_PwMin8Ux8: //ZZ case Iop_PwMin16Ux4: //ZZ case Iop_PwMin32Ux2: { //ZZ HReg res = newVRegD(env); //ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1); //ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2); //ZZ UInt size = 0; //ZZ switch(e->Iex.Binop.op) { //ZZ case Iop_PwMin8Ux8: size = 0; break; //ZZ case Iop_PwMin16Ux4: size = 1; break; //ZZ case Iop_PwMin32Ux2: size = 2; break; //ZZ default: vassert(0); //ZZ } //ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VPMINU, //ZZ res, argL, argR, size, False)); //ZZ return res; //ZZ } //ZZ case Iop_PwMin8Sx8: //ZZ case Iop_PwMin16Sx4: //ZZ case Iop_PwMin32Sx2: { //ZZ HReg res = newVRegD(env); //ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1); //ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2); //ZZ UInt size = 0; //ZZ switch(e->Iex.Binop.op) { //ZZ case Iop_PwMin8Sx8: size = 0; break; //ZZ case Iop_PwMin16Sx4: size = 1; break; //ZZ case Iop_PwMin32Sx2: size = 2; break; //ZZ default: vassert(0); //ZZ } //ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VPMINS, //ZZ res, argL, argR, size, False)); //ZZ return res; //ZZ } //ZZ case Iop_PwMax8Ux8: //ZZ case Iop_PwMax16Ux4: //ZZ case Iop_PwMax32Ux2: { //ZZ HReg res = newVRegD(env); //ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1); //ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2); //ZZ UInt size = 0; //ZZ switch(e->Iex.Binop.op) { //ZZ case Iop_PwMax8Ux8: size = 0; break; //ZZ case Iop_PwMax16Ux4: size = 1; break; //ZZ case Iop_PwMax32Ux2: size = 2; break; //ZZ default: vassert(0); //ZZ } //ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VPMAXU, //ZZ res, argL, argR, size, False)); //ZZ return res; //ZZ } //ZZ case Iop_PwMax8Sx8: //ZZ case Iop_PwMax16Sx4: //ZZ case Iop_PwMax32Sx2: { //ZZ HReg res = newVRegD(env); //ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1); //ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2); //ZZ UInt size = 0; //ZZ switch(e->Iex.Binop.op) { //ZZ case Iop_PwMax8Sx8: size = 0; break; //ZZ case Iop_PwMax16Sx4: size = 1; break; //ZZ case Iop_PwMax32Sx2: size = 2; break; //ZZ default: vassert(0); //ZZ } //ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VPMAXS, //ZZ res, argL, argR, size, False)); //ZZ return res; //ZZ } //ZZ case Iop_Perm8x8: { //ZZ HReg res = newVRegD(env); //ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1); //ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2); //ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VTBL, //ZZ res, argL, argR, 0, False)); //ZZ return res; //ZZ } //ZZ case Iop_PolynomialMul8x8: { //ZZ HReg res = newVRegD(env); //ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1); //ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2); //ZZ UInt size = 0; //ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VMULP, //ZZ res, argL, argR, size, False)); //ZZ return res; //ZZ } //ZZ case Iop_Max32Fx2: { //ZZ HReg res = newVRegD(env); //ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1); //ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2); //ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VMAXF, //ZZ res, argL, argR, 2, False)); //ZZ return res; //ZZ } //ZZ case Iop_Min32Fx2: { //ZZ HReg res = newVRegD(env); //ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1); //ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2); //ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VMINF, //ZZ res, argL, argR, 2, False)); //ZZ return res; //ZZ } //ZZ case Iop_PwMax32Fx2: { //ZZ HReg res = newVRegD(env); //ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1); //ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2); //ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VPMAXF, //ZZ res, argL, argR, 2, False)); //ZZ return res; //ZZ } //ZZ case Iop_PwMin32Fx2: { //ZZ HReg res = newVRegD(env); //ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1); //ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2); //ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VPMINF, //ZZ res, argL, argR, 2, False)); //ZZ return res; //ZZ } //ZZ case Iop_CmpGT32Fx2: { //ZZ HReg res = newVRegD(env); //ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1); //ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2); //ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VCGTF, //ZZ res, argL, argR, 2, False)); //ZZ return res; //ZZ } //ZZ case Iop_CmpGE32Fx2: { //ZZ HReg res = newVRegD(env); //ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1); //ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2); //ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VCGEF, //ZZ res, argL, argR, 2, False)); //ZZ return res; //ZZ } //ZZ case Iop_CmpEQ32Fx2: { //ZZ HReg res = newVRegD(env); //ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1); //ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2); //ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VCEQF, //ZZ res, argL, argR, 2, False)); //ZZ return res; //ZZ } //ZZ case Iop_F32ToFixed32Ux2_RZ: //ZZ case Iop_F32ToFixed32Sx2_RZ: //ZZ case Iop_Fixed32UToF32x2_RN: //ZZ case Iop_Fixed32SToF32x2_RN: { //ZZ HReg res = newVRegD(env); //ZZ HReg arg = iselNeon64Expr(env, e->Iex.Binop.arg1); //ZZ ARMNeonUnOp op; //ZZ UInt imm6; //ZZ if (e->Iex.Binop.arg2->tag != Iex_Const || //ZZ typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) { //ZZ vpanic("ARM supports FP <-> Fixed conversion with constant " //ZZ "second argument less than 33 only\n"); //ZZ } //ZZ imm6 = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8; //ZZ vassert(imm6 <= 32 && imm6 > 0); //ZZ imm6 = 64 - imm6; //ZZ switch(e->Iex.Binop.op) { //ZZ case Iop_F32ToFixed32Ux2_RZ: op = ARMneon_VCVTFtoFixedU; break; //ZZ case Iop_F32ToFixed32Sx2_RZ: op = ARMneon_VCVTFtoFixedS; break; //ZZ case Iop_Fixed32UToF32x2_RN: op = ARMneon_VCVTFixedUtoF; break; //ZZ case Iop_Fixed32SToF32x2_RN: op = ARMneon_VCVTFixedStoF; break; //ZZ default: vassert(0); //ZZ } //ZZ addInstr(env, ARMInstr_NUnary(op, res, arg, imm6, False)); //ZZ return res; //ZZ } //ZZ /* //ZZ FIXME: is this here or not? //ZZ case Iop_VDup8x8: //ZZ case Iop_VDup16x4: //ZZ case Iop_VDup32x2: { //ZZ HReg res = newVRegD(env); //ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1); //ZZ UInt index; //ZZ UInt imm4; //ZZ UInt size = 0; //ZZ if (e->Iex.Binop.arg2->tag != Iex_Const || //ZZ typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) { //ZZ vpanic("ARM supports Iop_VDup with constant " //ZZ "second argument less than 16 only\n"); //ZZ } //ZZ index = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8; //ZZ switch(e->Iex.Binop.op) { //ZZ case Iop_VDup8x8: imm4 = (index << 1) + 1; break; //ZZ case Iop_VDup16x4: imm4 = (index << 2) + 2; break; //ZZ case Iop_VDup32x2: imm4 = (index << 3) + 4; break; //ZZ default: vassert(0); //ZZ } //ZZ if (imm4 >= 16) { //ZZ vpanic("ARM supports Iop_VDup with constant " //ZZ "second argument less than 16 only\n"); //ZZ } //ZZ addInstr(env, ARMInstr_NUnary(ARMneon_VDUP, //ZZ res, argL, imm4, False)); //ZZ return res; //ZZ } //ZZ */ //ZZ default: //ZZ break; //ZZ } //ZZ } //ZZ //ZZ /* --------- UNARY ops --------- */ //ZZ if (e->tag == Iex_Unop) { //ZZ switch (e->Iex.Unop.op) { //ZZ //ZZ /* 32Uto64 */ //ZZ case Iop_32Uto64: { //ZZ HReg rLo = iselIntExpr_R(env, e->Iex.Unop.arg); //ZZ HReg rHi = newVRegI(env); //ZZ HReg res = newVRegD(env); //ZZ addInstr(env, ARMInstr_Imm32(rHi, 0)); //ZZ addInstr(env, ARMInstr_VXferD(True/*toD*/, res, rHi, rLo)); //ZZ return res; //ZZ } //ZZ //ZZ /* 32Sto64 */ //ZZ case Iop_32Sto64: { //ZZ HReg rLo = iselIntExpr_R(env, e->Iex.Unop.arg); //ZZ HReg rHi = newVRegI(env); //ZZ addInstr(env, mk_iMOVds_RR(rHi, rLo)); //ZZ addInstr(env, ARMInstr_Shift(ARMsh_SAR, rHi, rHi, ARMRI5_I5(31))); //ZZ HReg res = newVRegD(env); //ZZ addInstr(env, ARMInstr_VXferD(True/*toD*/, res, rHi, rLo)); //ZZ return res; //ZZ } //ZZ //ZZ /* The next 3 are pass-throughs */ //ZZ /* ReinterpF64asI64 */ //ZZ case Iop_ReinterpF64asI64: //ZZ /* Left64(e) */ //ZZ case Iop_Left64: //ZZ /* CmpwNEZ64(e) */ //ZZ case Iop_1Sto64: { //ZZ HReg rLo, rHi; //ZZ HReg res = newVRegD(env); //ZZ iselInt64Expr(&rHi, &rLo, env, e); //ZZ addInstr(env, ARMInstr_VXferD(True/*toD*/, res, rHi, rLo)); //ZZ return res; //ZZ } //ZZ //ZZ case Iop_Not64: { //ZZ DECLARE_PATTERN(p_veqz_8x8); //ZZ DECLARE_PATTERN(p_veqz_16x4); //ZZ DECLARE_PATTERN(p_veqz_32x2); //ZZ DECLARE_PATTERN(p_vcge_8sx8); //ZZ DECLARE_PATTERN(p_vcge_16sx4); //ZZ DECLARE_PATTERN(p_vcge_32sx2); //ZZ DECLARE_PATTERN(p_vcge_8ux8); //ZZ DECLARE_PATTERN(p_vcge_16ux4); //ZZ DECLARE_PATTERN(p_vcge_32ux2); //ZZ DEFINE_PATTERN(p_veqz_8x8, //ZZ unop(Iop_Not64, unop(Iop_CmpNEZ8x8, bind(0)))); //ZZ DEFINE_PATTERN(p_veqz_16x4, //ZZ unop(Iop_Not64, unop(Iop_CmpNEZ16x4, bind(0)))); //ZZ DEFINE_PATTERN(p_veqz_32x2, //ZZ unop(Iop_Not64, unop(Iop_CmpNEZ32x2, bind(0)))); //ZZ DEFINE_PATTERN(p_vcge_8sx8, //ZZ unop(Iop_Not64, binop(Iop_CmpGT8Sx8, bind(1), bind(0)))); //ZZ DEFINE_PATTERN(p_vcge_16sx4, //ZZ unop(Iop_Not64, binop(Iop_CmpGT16Sx4, bind(1), bind(0)))); //ZZ DEFINE_PATTERN(p_vcge_32sx2, //ZZ unop(Iop_Not64, binop(Iop_CmpGT32Sx2, bind(1), bind(0)))); //ZZ DEFINE_PATTERN(p_vcge_8ux8, //ZZ unop(Iop_Not64, binop(Iop_CmpGT8Ux8, bind(1), bind(0)))); //ZZ DEFINE_PATTERN(p_vcge_16ux4, //ZZ unop(Iop_Not64, binop(Iop_CmpGT16Ux4, bind(1), bind(0)))); //ZZ DEFINE_PATTERN(p_vcge_32ux2, //ZZ unop(Iop_Not64, binop(Iop_CmpGT32Ux2, bind(1), bind(0)))); //ZZ if (matchIRExpr(&mi, p_veqz_8x8, e)) { //ZZ HReg res = newVRegD(env); //ZZ HReg arg = iselNeon64Expr(env, mi.bindee[0]); //ZZ addInstr(env, ARMInstr_NUnary(ARMneon_EQZ, res, arg, 0, False)); //ZZ return res; //ZZ } else if (matchIRExpr(&mi, p_veqz_16x4, e)) { //ZZ HReg res = newVRegD(env); //ZZ HReg arg = iselNeon64Expr(env, mi.bindee[0]); //ZZ addInstr(env, ARMInstr_NUnary(ARMneon_EQZ, res, arg, 1, False)); //ZZ return res; //ZZ } else if (matchIRExpr(&mi, p_veqz_32x2, e)) { //ZZ HReg res = newVRegD(env); //ZZ HReg arg = iselNeon64Expr(env, mi.bindee[0]); //ZZ addInstr(env, ARMInstr_NUnary(ARMneon_EQZ, res, arg, 2, False)); //ZZ return res; //ZZ } else if (matchIRExpr(&mi, p_vcge_8sx8, e)) { //ZZ HReg res = newVRegD(env); //ZZ HReg argL = iselNeon64Expr(env, mi.bindee[0]); //ZZ HReg argR = iselNeon64Expr(env, mi.bindee[1]); //ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VCGES, //ZZ res, argL, argR, 0, False)); //ZZ return res; //ZZ } else if (matchIRExpr(&mi, p_vcge_16sx4, e)) { //ZZ HReg res = newVRegD(env); //ZZ HReg argL = iselNeon64Expr(env, mi.bindee[0]); //ZZ HReg argR = iselNeon64Expr(env, mi.bindee[1]); //ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VCGES, //ZZ res, argL, argR, 1, False)); //ZZ return res; //ZZ } else if (matchIRExpr(&mi, p_vcge_32sx2, e)) { //ZZ HReg res = newVRegD(env); //ZZ HReg argL = iselNeon64Expr(env, mi.bindee[0]); //ZZ HReg argR = iselNeon64Expr(env, mi.bindee[1]); //ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VCGES, //ZZ res, argL, argR, 2, False)); //ZZ return res; //ZZ } else if (matchIRExpr(&mi, p_vcge_8ux8, e)) { //ZZ HReg res = newVRegD(env); //ZZ HReg argL = iselNeon64Expr(env, mi.bindee[0]); //ZZ HReg argR = iselNeon64Expr(env, mi.bindee[1]); //ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VCGEU, //ZZ res, argL, argR, 0, False)); //ZZ return res; //ZZ } else if (matchIRExpr(&mi, p_vcge_16ux4, e)) { //ZZ HReg res = newVRegD(env); //ZZ HReg argL = iselNeon64Expr(env, mi.bindee[0]); //ZZ HReg argR = iselNeon64Expr(env, mi.bindee[1]); //ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VCGEU, //ZZ res, argL, argR, 1, False)); //ZZ return res; //ZZ } else if (matchIRExpr(&mi, p_vcge_32ux2, e)) { //ZZ HReg res = newVRegD(env); //ZZ HReg argL = iselNeon64Expr(env, mi.bindee[0]); //ZZ HReg argR = iselNeon64Expr(env, mi.bindee[1]); //ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VCGEU, //ZZ res, argL, argR, 2, False)); //ZZ return res; //ZZ } else { //ZZ HReg res = newVRegD(env); //ZZ HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg); //ZZ addInstr(env, ARMInstr_NUnary(ARMneon_NOT, res, arg, 4, False)); //ZZ return res; //ZZ } //ZZ } //ZZ case Iop_Dup8x8: //ZZ case Iop_Dup16x4: //ZZ case Iop_Dup32x2: { //ZZ HReg res, arg; //ZZ UInt size; //ZZ DECLARE_PATTERN(p_vdup_8x8); //ZZ DECLARE_PATTERN(p_vdup_16x4); //ZZ DECLARE_PATTERN(p_vdup_32x2); //ZZ DEFINE_PATTERN(p_vdup_8x8, //ZZ unop(Iop_Dup8x8, binop(Iop_GetElem8x8, bind(0), bind(1)))); //ZZ DEFINE_PATTERN(p_vdup_16x4, //ZZ unop(Iop_Dup16x4, binop(Iop_GetElem16x4, bind(0), bind(1)))); //ZZ DEFINE_PATTERN(p_vdup_32x2, //ZZ unop(Iop_Dup32x2, binop(Iop_GetElem32x2, bind(0), bind(1)))); //ZZ if (matchIRExpr(&mi, p_vdup_8x8, e)) { //ZZ UInt index; //ZZ UInt imm4; //ZZ if (mi.bindee[1]->tag == Iex_Const && //ZZ typeOfIRExpr(env->type_env, mi.bindee[1]) == Ity_I8) { //ZZ index = mi.bindee[1]->Iex.Const.con->Ico.U8; //ZZ imm4 = (index << 1) + 1; //ZZ if (index < 8) { //ZZ res = newVRegD(env); //ZZ arg = iselNeon64Expr(env, mi.bindee[0]); //ZZ addInstr(env, ARMInstr_NUnaryS( //ZZ ARMneon_VDUP, //ZZ mkARMNRS(ARMNRS_Reg, res, 0), //ZZ mkARMNRS(ARMNRS_Scalar, arg, index), //ZZ imm4, False //ZZ )); //ZZ return res; //ZZ } //ZZ } //ZZ } else if (matchIRExpr(&mi, p_vdup_16x4, e)) { //ZZ UInt index; //ZZ UInt imm4; //ZZ if (mi.bindee[1]->tag == Iex_Const && //ZZ typeOfIRExpr(env->type_env, mi.bindee[1]) == Ity_I8) { //ZZ index = mi.bindee[1]->Iex.Const.con->Ico.U8; //ZZ imm4 = (index << 2) + 2; //ZZ if (index < 4) { //ZZ res = newVRegD(env); //ZZ arg = iselNeon64Expr(env, mi.bindee[0]); //ZZ addInstr(env, ARMInstr_NUnaryS( //ZZ ARMneon_VDUP, //ZZ mkARMNRS(ARMNRS_Reg, res, 0), //ZZ mkARMNRS(ARMNRS_Scalar, arg, index), //ZZ imm4, False //ZZ )); //ZZ return res; //ZZ } //ZZ } //ZZ } else if (matchIRExpr(&mi, p_vdup_32x2, e)) { //ZZ UInt index; //ZZ UInt imm4; //ZZ if (mi.bindee[1]->tag == Iex_Const && //ZZ typeOfIRExpr(env->type_env, mi.bindee[1]) == Ity_I8) { //ZZ index = mi.bindee[1]->Iex.Const.con->Ico.U8; //ZZ imm4 = (index << 3) + 4; //ZZ if (index < 2) { //ZZ res = newVRegD(env); //ZZ arg = iselNeon64Expr(env, mi.bindee[0]); //ZZ addInstr(env, ARMInstr_NUnaryS( //ZZ ARMneon_VDUP, //ZZ mkARMNRS(ARMNRS_Reg, res, 0), //ZZ mkARMNRS(ARMNRS_Scalar, arg, index), //ZZ imm4, False //ZZ )); //ZZ return res; //ZZ } //ZZ } //ZZ } //ZZ arg = iselIntExpr_R(env, e->Iex.Unop.arg); //ZZ res = newVRegD(env); //ZZ switch (e->Iex.Unop.op) { //ZZ case Iop_Dup8x8: size = 0; break; //ZZ case Iop_Dup16x4: size = 1; break; //ZZ case Iop_Dup32x2: size = 2; break; //ZZ default: vassert(0); //ZZ } //ZZ addInstr(env, ARMInstr_NUnary(ARMneon_DUP, res, arg, size, False)); //ZZ return res; //ZZ } //ZZ case Iop_Abs8x8: //ZZ case Iop_Abs16x4: //ZZ case Iop_Abs32x2: { //ZZ HReg res = newVRegD(env); //ZZ HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg); //ZZ UInt size = 0; //ZZ switch(e->Iex.Binop.op) { //ZZ case Iop_Abs8x8: size = 0; break; //ZZ case Iop_Abs16x4: size = 1; break; //ZZ case Iop_Abs32x2: size = 2; break; //ZZ default: vassert(0); //ZZ } //ZZ addInstr(env, ARMInstr_NUnary(ARMneon_ABS, res, arg, size, False)); //ZZ return res; //ZZ } //ZZ case Iop_Reverse64_8x8: //ZZ case Iop_Reverse64_16x4: //ZZ case Iop_Reverse64_32x2: { //ZZ HReg res = newVRegD(env); //ZZ HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg); //ZZ UInt size = 0; //ZZ switch(e->Iex.Binop.op) { //ZZ case Iop_Reverse64_8x8: size = 0; break; //ZZ case Iop_Reverse64_16x4: size = 1; break; //ZZ case Iop_Reverse64_32x2: size = 2; break; //ZZ default: vassert(0); //ZZ } //ZZ addInstr(env, ARMInstr_NUnary(ARMneon_REV64, //ZZ res, arg, size, False)); //ZZ return res; //ZZ } //ZZ case Iop_Reverse32_8x8: //ZZ case Iop_Reverse32_16x4: { //ZZ HReg res = newVRegD(env); //ZZ HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg); //ZZ UInt size = 0; //ZZ switch(e->Iex.Binop.op) { //ZZ case Iop_Reverse32_8x8: size = 0; break; //ZZ case Iop_Reverse32_16x4: size = 1; break; //ZZ default: vassert(0); //ZZ } //ZZ addInstr(env, ARMInstr_NUnary(ARMneon_REV32, //ZZ res, arg, size, False)); //ZZ return res; //ZZ } //ZZ case Iop_Reverse16_8x8: { //ZZ HReg res = newVRegD(env); //ZZ HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg); //ZZ UInt size = 0; //ZZ addInstr(env, ARMInstr_NUnary(ARMneon_REV16, //ZZ res, arg, size, False)); //ZZ return res; //ZZ } //ZZ case Iop_CmpwNEZ64: { //ZZ HReg x_lsh = newVRegD(env); //ZZ HReg x_rsh = newVRegD(env); //ZZ HReg lsh_amt = newVRegD(env); //ZZ HReg rsh_amt = newVRegD(env); //ZZ HReg zero = newVRegD(env); //ZZ HReg tmp = newVRegD(env); //ZZ HReg tmp2 = newVRegD(env); //ZZ HReg res = newVRegD(env); //ZZ HReg x = newVRegD(env); //ZZ HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg); //ZZ addInstr(env, ARMInstr_NUnary(ARMneon_EQZ, tmp2, arg, 2, False)); //ZZ addInstr(env, ARMInstr_NUnary(ARMneon_NOT, x, tmp2, 4, False)); //ZZ addInstr(env, ARMInstr_NeonImm(lsh_amt, ARMNImm_TI(0, 32))); //ZZ addInstr(env, ARMInstr_NeonImm(zero, ARMNImm_TI(0, 0))); //ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VSUB, //ZZ rsh_amt, zero, lsh_amt, 2, False)); //ZZ addInstr(env, ARMInstr_NShift(ARMneon_VSHL, //ZZ x_lsh, x, lsh_amt, 3, False)); //ZZ addInstr(env, ARMInstr_NShift(ARMneon_VSHL, //ZZ x_rsh, x, rsh_amt, 3, False)); //ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VORR, //ZZ tmp, x_lsh, x_rsh, 0, False)); //ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VORR, //ZZ res, tmp, x, 0, False)); //ZZ return res; //ZZ } //ZZ case Iop_CmpNEZ8x8: //ZZ case Iop_CmpNEZ16x4: //ZZ case Iop_CmpNEZ32x2: { //ZZ HReg res = newVRegD(env); //ZZ HReg tmp = newVRegD(env); //ZZ HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg); //ZZ UInt size; //ZZ switch (e->Iex.Unop.op) { //ZZ case Iop_CmpNEZ8x8: size = 0; break; //ZZ case Iop_CmpNEZ16x4: size = 1; break; //ZZ case Iop_CmpNEZ32x2: size = 2; break; //ZZ default: vassert(0); //ZZ } //ZZ addInstr(env, ARMInstr_NUnary(ARMneon_EQZ, tmp, arg, size, False)); //ZZ addInstr(env, ARMInstr_NUnary(ARMneon_NOT, res, tmp, 4, False)); //ZZ return res; //ZZ } //ZZ case Iop_NarrowUn16to8x8: //ZZ case Iop_NarrowUn32to16x4: //ZZ case Iop_NarrowUn64to32x2: { //ZZ HReg res = newVRegD(env); //ZZ HReg arg = iselNeonExpr(env, e->Iex.Unop.arg); //ZZ UInt size = 0; //ZZ switch(e->Iex.Binop.op) { //ZZ case Iop_NarrowUn16to8x8: size = 0; break; //ZZ case Iop_NarrowUn32to16x4: size = 1; break; //ZZ case Iop_NarrowUn64to32x2: size = 2; break; //ZZ default: vassert(0); //ZZ } //ZZ addInstr(env, ARMInstr_NUnary(ARMneon_COPYN, //ZZ res, arg, size, False)); //ZZ return res; //ZZ } //ZZ case Iop_QNarrowUn16Sto8Sx8: //ZZ case Iop_QNarrowUn32Sto16Sx4: //ZZ case Iop_QNarrowUn64Sto32Sx2: { //ZZ HReg res = newVRegD(env); //ZZ HReg arg = iselNeonExpr(env, e->Iex.Unop.arg); //ZZ UInt size = 0; //ZZ switch(e->Iex.Binop.op) { //ZZ case Iop_QNarrowUn16Sto8Sx8: size = 0; break; //ZZ case Iop_QNarrowUn32Sto16Sx4: size = 1; break; //ZZ case Iop_QNarrowUn64Sto32Sx2: size = 2; break; //ZZ default: vassert(0); //ZZ } //ZZ addInstr(env, ARMInstr_NUnary(ARMneon_COPYQNSS, //ZZ res, arg, size, False)); //ZZ return res; //ZZ } //ZZ case Iop_QNarrowUn16Sto8Ux8: //ZZ case Iop_QNarrowUn32Sto16Ux4: //ZZ case Iop_QNarrowUn64Sto32Ux2: { //ZZ HReg res = newVRegD(env); //ZZ HReg arg = iselNeonExpr(env, e->Iex.Unop.arg); //ZZ UInt size = 0; //ZZ switch(e->Iex.Binop.op) { //ZZ case Iop_QNarrowUn16Sto8Ux8: size = 0; break; //ZZ case Iop_QNarrowUn32Sto16Ux4: size = 1; break; //ZZ case Iop_QNarrowUn64Sto32Ux2: size = 2; break; //ZZ default: vassert(0); //ZZ } //ZZ addInstr(env, ARMInstr_NUnary(ARMneon_COPYQNUS, //ZZ res, arg, size, False)); //ZZ return res; //ZZ } //ZZ case Iop_QNarrowUn16Uto8Ux8: //ZZ case Iop_QNarrowUn32Uto16Ux4: //ZZ case Iop_QNarrowUn64Uto32Ux2: { //ZZ HReg res = newVRegD(env); //ZZ HReg arg = iselNeonExpr(env, e->Iex.Unop.arg); //ZZ UInt size = 0; //ZZ switch(e->Iex.Binop.op) { //ZZ case Iop_QNarrowUn16Uto8Ux8: size = 0; break; //ZZ case Iop_QNarrowUn32Uto16Ux4: size = 1; break; //ZZ case Iop_QNarrowUn64Uto32Ux2: size = 2; break; //ZZ default: vassert(0); //ZZ } //ZZ addInstr(env, ARMInstr_NUnary(ARMneon_COPYQNUU, //ZZ res, arg, size, False)); //ZZ return res; //ZZ } //ZZ case Iop_PwAddL8Sx8: //ZZ case Iop_PwAddL16Sx4: //ZZ case Iop_PwAddL32Sx2: { //ZZ HReg res = newVRegD(env); //ZZ HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg); //ZZ UInt size = 0; //ZZ switch(e->Iex.Binop.op) { //ZZ case Iop_PwAddL8Sx8: size = 0; break; //ZZ case Iop_PwAddL16Sx4: size = 1; break; //ZZ case Iop_PwAddL32Sx2: size = 2; break; //ZZ default: vassert(0); //ZZ } //ZZ addInstr(env, ARMInstr_NUnary(ARMneon_PADDLS, //ZZ res, arg, size, False)); //ZZ return res; //ZZ } //ZZ case Iop_PwAddL8Ux8: //ZZ case Iop_PwAddL16Ux4: //ZZ case Iop_PwAddL32Ux2: { //ZZ HReg res = newVRegD(env); //ZZ HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg); //ZZ UInt size = 0; //ZZ switch(e->Iex.Binop.op) { //ZZ case Iop_PwAddL8Ux8: size = 0; break; //ZZ case Iop_PwAddL16Ux4: size = 1; break; //ZZ case Iop_PwAddL32Ux2: size = 2; break; //ZZ default: vassert(0); //ZZ } //ZZ addInstr(env, ARMInstr_NUnary(ARMneon_PADDLU, //ZZ res, arg, size, False)); //ZZ return res; //ZZ } //ZZ case Iop_Cnt8x8: { //ZZ HReg res = newVRegD(env); //ZZ HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg); //ZZ UInt size = 0; //ZZ addInstr(env, ARMInstr_NUnary(ARMneon_CNT, //ZZ res, arg, size, False)); //ZZ return res; //ZZ } //ZZ case Iop_Clz8Sx8: //ZZ case Iop_Clz16Sx4: //ZZ case Iop_Clz32Sx2: { //ZZ HReg res = newVRegD(env); //ZZ HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg); //ZZ UInt size = 0; //ZZ switch(e->Iex.Binop.op) { //ZZ case Iop_Clz8Sx8: size = 0; break; //ZZ case Iop_Clz16Sx4: size = 1; break; //ZZ case Iop_Clz32Sx2: size = 2; break; //ZZ default: vassert(0); //ZZ } //ZZ addInstr(env, ARMInstr_NUnary(ARMneon_CLZ, //ZZ res, arg, size, False)); //ZZ return res; //ZZ } //ZZ case Iop_Cls8Sx8: //ZZ case Iop_Cls16Sx4: //ZZ case Iop_Cls32Sx2: { //ZZ HReg res = newVRegD(env); //ZZ HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg); //ZZ UInt size = 0; //ZZ switch(e->Iex.Binop.op) { //ZZ case Iop_Cls8Sx8: size = 0; break; //ZZ case Iop_Cls16Sx4: size = 1; break; //ZZ case Iop_Cls32Sx2: size = 2; break; //ZZ default: vassert(0); //ZZ } //ZZ addInstr(env, ARMInstr_NUnary(ARMneon_CLS, //ZZ res, arg, size, False)); //ZZ return res; //ZZ } //ZZ case Iop_FtoI32Sx2_RZ: { //ZZ HReg res = newVRegD(env); //ZZ HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg); //ZZ addInstr(env, ARMInstr_NUnary(ARMneon_VCVTFtoS, //ZZ res, arg, 2, False)); //ZZ return res; //ZZ } //ZZ case Iop_FtoI32Ux2_RZ: { //ZZ HReg res = newVRegD(env); //ZZ HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg); //ZZ addInstr(env, ARMInstr_NUnary(ARMneon_VCVTFtoU, //ZZ res, arg, 2, False)); //ZZ return res; //ZZ } //ZZ case Iop_I32StoFx2: { //ZZ HReg res = newVRegD(env); //ZZ HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg); //ZZ addInstr(env, ARMInstr_NUnary(ARMneon_VCVTStoF, //ZZ res, arg, 2, False)); //ZZ return res; //ZZ } //ZZ case Iop_I32UtoFx2: { //ZZ HReg res = newVRegD(env); //ZZ HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg); //ZZ addInstr(env, ARMInstr_NUnary(ARMneon_VCVTUtoF, //ZZ res, arg, 2, False)); //ZZ return res; //ZZ } //ZZ case Iop_F32toF16x4: { //ZZ HReg res = newVRegD(env); //ZZ HReg arg = iselNeonExpr(env, e->Iex.Unop.arg); //ZZ addInstr(env, ARMInstr_NUnary(ARMneon_VCVTF32toF16, //ZZ res, arg, 2, False)); //ZZ return res; //ZZ } //ZZ case Iop_Recip32Fx2: { //ZZ HReg res = newVRegD(env); //ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1); //ZZ addInstr(env, ARMInstr_NUnary(ARMneon_VRECIPF, //ZZ res, argL, 0, False)); //ZZ return res; //ZZ } //ZZ case Iop_Recip32x2: { //ZZ HReg res = newVRegD(env); //ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1); //ZZ addInstr(env, ARMInstr_NUnary(ARMneon_VRECIP, //ZZ res, argL, 0, False)); //ZZ return res; //ZZ } //ZZ case Iop_Abs32Fx2: { //ZZ DECLARE_PATTERN(p_vabd_32fx2); //ZZ DEFINE_PATTERN(p_vabd_32fx2, //ZZ unop(Iop_Abs32Fx2, //ZZ binop(Iop_Sub32Fx2, //ZZ bind(0), //ZZ bind(1)))); //ZZ if (matchIRExpr(&mi, p_vabd_32fx2, e)) { //ZZ HReg res = newVRegD(env); //ZZ HReg argL = iselNeon64Expr(env, mi.bindee[0]); //ZZ HReg argR = iselNeon64Expr(env, mi.bindee[1]); //ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VABDFP, //ZZ res, argL, argR, 0, False)); //ZZ return res; //ZZ } else { //ZZ HReg res = newVRegD(env); //ZZ HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg); //ZZ addInstr(env, ARMInstr_NUnary(ARMneon_VABSFP, //ZZ res, arg, 0, False)); //ZZ return res; //ZZ } //ZZ } //ZZ case Iop_Rsqrte32Fx2: { //ZZ HReg res = newVRegD(env); //ZZ HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg); //ZZ addInstr(env, ARMInstr_NUnary(ARMneon_VRSQRTEFP, //ZZ res, arg, 0, False)); //ZZ return res; //ZZ } //ZZ case Iop_Rsqrte32x2: { //ZZ HReg res = newVRegD(env); //ZZ HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg); //ZZ addInstr(env, ARMInstr_NUnary(ARMneon_VRSQRTE, //ZZ res, arg, 0, False)); //ZZ return res; //ZZ } //ZZ case Iop_Neg32Fx2: { //ZZ HReg res = newVRegD(env); //ZZ HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg); //ZZ addInstr(env, ARMInstr_NUnary(ARMneon_VNEGF, //ZZ res, arg, 0, False)); //ZZ return res; //ZZ } //ZZ default: //ZZ break; //ZZ } //ZZ } /* if (e->tag == Iex_Unop) */ //ZZ //ZZ if (e->tag == Iex_Triop) { //ZZ IRTriop *triop = e->Iex.Triop.details; //ZZ //ZZ switch (triop->op) { //ZZ case Iop_Extract64: { //ZZ HReg res = newVRegD(env); //ZZ HReg argL = iselNeon64Expr(env, triop->arg1); //ZZ HReg argR = iselNeon64Expr(env, triop->arg2); //ZZ UInt imm4; //ZZ if (triop->arg3->tag != Iex_Const || //ZZ typeOfIRExpr(env->type_env, triop->arg3) != Ity_I8) { //ZZ vpanic("ARM target supports Iop_Extract64 with constant " //ZZ "third argument less than 16 only\n"); //ZZ } //ZZ imm4 = triop->arg3->Iex.Const.con->Ico.U8; //ZZ if (imm4 >= 8) { //ZZ vpanic("ARM target supports Iop_Extract64 with constant " //ZZ "third argument less than 16 only\n"); //ZZ } //ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VEXT, //ZZ res, argL, argR, imm4, False)); //ZZ return res; //ZZ } //ZZ case Iop_SetElem8x8: //ZZ case Iop_SetElem16x4: //ZZ case Iop_SetElem32x2: { //ZZ HReg res = newVRegD(env); //ZZ HReg dreg = iselNeon64Expr(env, triop->arg1); //ZZ HReg arg = iselIntExpr_R(env, triop->arg3); //ZZ UInt index, size; //ZZ if (triop->arg2->tag != Iex_Const || //ZZ typeOfIRExpr(env->type_env, triop->arg2) != Ity_I8) { //ZZ vpanic("ARM target supports SetElem with constant " //ZZ "second argument only\n"); //ZZ } //ZZ index = triop->arg2->Iex.Const.con->Ico.U8; //ZZ switch (triop->op) { //ZZ case Iop_SetElem8x8: vassert(index < 8); size = 0; break; //ZZ case Iop_SetElem16x4: vassert(index < 4); size = 1; break; //ZZ case Iop_SetElem32x2: vassert(index < 2); size = 2; break; //ZZ default: vassert(0); //ZZ } //ZZ addInstr(env, ARMInstr_NUnary(ARMneon_COPY, res, dreg, 4, False)); //ZZ addInstr(env, ARMInstr_NUnaryS(ARMneon_SETELEM, //ZZ mkARMNRS(ARMNRS_Scalar, res, index), //ZZ mkARMNRS(ARMNRS_Reg, arg, 0), //ZZ size, False)); //ZZ return res; //ZZ } //ZZ default: //ZZ break; //ZZ } //ZZ } //ZZ //ZZ /* --------- MULTIPLEX --------- */ //ZZ if (e->tag == Iex_ITE) { // VFD //ZZ HReg rLo, rHi; //ZZ HReg res = newVRegD(env); //ZZ iselInt64Expr(&rHi, &rLo, env, e); //ZZ addInstr(env, ARMInstr_VXferD(True/*toD*/, res, rHi, rLo)); //ZZ return res; //ZZ } //ZZ //ZZ ppIRExpr(e); //ZZ vpanic("iselNeon64Expr"); //ZZ } /*---------------------------------------------------------*/ /*--- ISEL: Vector (NEON) expressions (128 bit) ---*/ /*---------------------------------------------------------*/ static HReg iselV128Expr ( ISelEnv* env, IRExpr* e ) { HReg r = iselV128Expr_wrk( env, e ); vassert(hregClass(r) == HRcVec128); vassert(hregIsVirtual(r)); return r; } /* DO NOT CALL THIS DIRECTLY */ static HReg iselV128Expr_wrk ( ISelEnv* env, IRExpr* e ) { IRType ty = typeOfIRExpr(env->type_env, e); vassert(e); vassert(ty == Ity_V128); if (e->tag == Iex_RdTmp) { return lookupIRTemp(env, e->Iex.RdTmp.tmp); } if (e->tag == Iex_Const) { /* Only a very limited range of constants is handled. */ vassert(e->Iex.Const.con->tag == Ico_V128); UShort con = e->Iex.Const.con->Ico.V128; if (con == 0x0000) { HReg res = newVRegV(env); addInstr(env, ARM64Instr_VImmQ(res, con)); return res; } /* Unhandled */ goto v128_expr_bad; } if (e->tag == Iex_Load) { HReg res = newVRegV(env); HReg rN = iselIntExpr_R(env, e->Iex.Load.addr); vassert(ty == Ity_V128); addInstr(env, ARM64Instr_VLdStQ(True/*isLoad*/, res, rN)); return res; } if (e->tag == Iex_Get) { UInt offs = (UInt)e->Iex.Get.offset; if (offs < (1<<12)) { HReg addr = mk_baseblock_128bit_access_addr(env, offs); HReg res = newVRegV(env); vassert(ty == Ity_V128); addInstr(env, ARM64Instr_VLdStQ(True/*isLoad*/, res, addr)); return res; } goto v128_expr_bad; } if (e->tag == Iex_Unop) { /* Iop_ZeroHIXXofV128 cases */ UShort imm16 = 0; switch (e->Iex.Unop.op) { case Iop_ZeroHI64ofV128: imm16 = 0x00FF; break; case Iop_ZeroHI96ofV128: imm16 = 0x000F; break; case Iop_ZeroHI112ofV128: imm16 = 0x0003; break; case Iop_ZeroHI120ofV128: imm16 = 0x0001; break; default: break; } if (imm16 != 0) { HReg src = iselV128Expr(env, e->Iex.Unop.arg); HReg imm = newVRegV(env); HReg res = newVRegV(env); addInstr(env, ARM64Instr_VImmQ(imm, imm16)); addInstr(env, ARM64Instr_VBinV(ARM64vecb_AND, res, src, imm)); return res; } /* Other cases */ switch (e->Iex.Unop.op) { case Iop_Cnt8x16: case Iop_NotV128: case Iop_AddLV8Ux16: case Iop_AddLV16Ux8: case Iop_AddLV32Ux4: case Iop_AddLV8Sx16: case Iop_AddLV16Sx8: case Iop_AddLV32Sx4: case Iop_Abs64Fx2: case Iop_Abs32Fx4: case Iop_Neg64Fx2: case Iop_Neg32Fx4: { HReg res = newVRegV(env); HReg arg = iselV128Expr(env, e->Iex.Unop.arg); ARM64VecUnaryOp op = ARM64vecu_INVALID; switch (e->Iex.Unop.op) { case Iop_NotV128: op = ARM64vecu_NOT; break; case Iop_Cnt8x16: op = ARM64vecu_CNT; break; case Iop_Abs64Fx2: op = ARM64vecu_FABS64x2; break; case Iop_Abs32Fx4: op = ARM64vecu_FABS32x4; break; case Iop_Neg64Fx2: op = ARM64vecu_FNEG64x2; break; case Iop_Neg32Fx4: op = ARM64vecu_FNEG32x4; break; case Iop_AddLV8Ux16: op = ARM64vecu_UADDLV8x16; break; case Iop_AddLV16Ux8: op = ARM64vecu_UADDLV16x8; break; case Iop_AddLV32Ux4: op = ARM64vecu_UADDLV32x4; break; case Iop_AddLV8Sx16: op = ARM64vecu_SADDLV8x16; break; case Iop_AddLV16Sx8: op = ARM64vecu_SADDLV16x8; break; case Iop_AddLV32Sx4: op = ARM64vecu_SADDLV32x4; break; default: vassert(0); } addInstr(env, ARM64Instr_VUnaryV(op, res, arg)); return res; } case Iop_CmpNEZ8x16: case Iop_CmpNEZ16x8: case Iop_CmpNEZ32x4: case Iop_CmpNEZ64x2: { HReg arg = iselV128Expr(env, e->Iex.Unop.arg); HReg zero = newVRegV(env); HReg res = newVRegV(env); ARM64VecBinOp cmp = ARM64vecb_INVALID; switch (e->Iex.Unop.op) { case Iop_CmpNEZ64x2: cmp = ARM64vecb_CMEQ64x2; break; case Iop_CmpNEZ32x4: cmp = ARM64vecb_CMEQ32x4; break; case Iop_CmpNEZ16x8: cmp = ARM64vecb_CMEQ16x8; break; case Iop_CmpNEZ8x16: cmp = ARM64vecb_CMEQ8x16; break; default: vassert(0); } // This is pretty feeble. Better: use CMP against zero // and avoid the extra instruction and extra register. addInstr(env, ARM64Instr_VImmQ(zero, 0x0000)); addInstr(env, ARM64Instr_VBinV(cmp, res, arg, zero)); addInstr(env, ARM64Instr_VUnaryV(ARM64vecu_NOT, res, res)); return res; } case Iop_Widen8Uto16x8: case Iop_Widen16Uto32x4: case Iop_Widen32Uto64x2: case Iop_Widen8Sto16x8: case Iop_Widen16Sto32x4: case Iop_Widen32Sto64x2: { HReg res = newVRegV(env); HReg arg = iselIntExpr_R(env, e->Iex.Unop.arg); ARM64VecUnaryOp wop = ARM64vecu_INVALID; switch (e->Iex.Unop.op) { case Iop_Widen8Uto16x8: wop = ARM64vecu_VMOVL8U; break; case Iop_Widen16Uto32x4: wop = ARM64vecu_VMOVL16U; break; case Iop_Widen32Uto64x2: wop = ARM64vecu_VMOVL32U; break; case Iop_Widen8Sto16x8: wop = ARM64vecu_VMOVL8S; break; case Iop_Widen16Sto32x4: wop = ARM64vecu_VMOVL16S; break; case Iop_Widen32Sto64x2: wop = ARM64vecu_VMOVL32S; break; default: vassert(0); } addInstr(env, ARM64Instr_VUnaryV(wop, res, arg)); return res; } //ZZ case Iop_NotV128: { //ZZ DECLARE_PATTERN(p_veqz_8x16); //ZZ DECLARE_PATTERN(p_veqz_16x8); //ZZ DECLARE_PATTERN(p_veqz_32x4); //ZZ DECLARE_PATTERN(p_vcge_8sx16); //ZZ DECLARE_PATTERN(p_vcge_16sx8); //ZZ DECLARE_PATTERN(p_vcge_32sx4); //ZZ DECLARE_PATTERN(p_vcge_8ux16); //ZZ DECLARE_PATTERN(p_vcge_16ux8); //ZZ DECLARE_PATTERN(p_vcge_32ux4); //ZZ DEFINE_PATTERN(p_veqz_8x16, //ZZ unop(Iop_NotV128, unop(Iop_CmpNEZ8x16, bind(0)))); //ZZ DEFINE_PATTERN(p_veqz_16x8, //ZZ unop(Iop_NotV128, unop(Iop_CmpNEZ16x8, bind(0)))); //ZZ DEFINE_PATTERN(p_veqz_32x4, //ZZ unop(Iop_NotV128, unop(Iop_CmpNEZ32x4, bind(0)))); //ZZ DEFINE_PATTERN(p_vcge_8sx16, //ZZ unop(Iop_NotV128, binop(Iop_CmpGT8Sx16, bind(1), bind(0)))); //ZZ DEFINE_PATTERN(p_vcge_16sx8, //ZZ unop(Iop_NotV128, binop(Iop_CmpGT16Sx8, bind(1), bind(0)))); //ZZ DEFINE_PATTERN(p_vcge_32sx4, //ZZ unop(Iop_NotV128, binop(Iop_CmpGT32Sx4, bind(1), bind(0)))); //ZZ DEFINE_PATTERN(p_vcge_8ux16, //ZZ unop(Iop_NotV128, binop(Iop_CmpGT8Ux16, bind(1), bind(0)))); //ZZ DEFINE_PATTERN(p_vcge_16ux8, //ZZ unop(Iop_NotV128, binop(Iop_CmpGT16Ux8, bind(1), bind(0)))); //ZZ DEFINE_PATTERN(p_vcge_32ux4, //ZZ unop(Iop_NotV128, binop(Iop_CmpGT32Ux4, bind(1), bind(0)))); //ZZ if (matchIRExpr(&mi, p_veqz_8x16, e)) { //ZZ HReg res = newVRegV(env); //ZZ HReg arg = iselNeonExpr(env, mi.bindee[0]); //ZZ addInstr(env, ARMInstr_NUnary(ARMneon_EQZ, res, arg, 0, True)); //ZZ return res; //ZZ } else if (matchIRExpr(&mi, p_veqz_16x8, e)) { //ZZ HReg res = newVRegV(env); //ZZ HReg arg = iselNeonExpr(env, mi.bindee[0]); //ZZ addInstr(env, ARMInstr_NUnary(ARMneon_EQZ, res, arg, 1, True)); //ZZ return res; //ZZ } else if (matchIRExpr(&mi, p_veqz_32x4, e)) { //ZZ HReg res = newVRegV(env); //ZZ HReg arg = iselNeonExpr(env, mi.bindee[0]); //ZZ addInstr(env, ARMInstr_NUnary(ARMneon_EQZ, res, arg, 2, True)); //ZZ return res; //ZZ } else if (matchIRExpr(&mi, p_vcge_8sx16, e)) { //ZZ HReg res = newVRegV(env); //ZZ HReg argL = iselNeonExpr(env, mi.bindee[0]); //ZZ HReg argR = iselNeonExpr(env, mi.bindee[1]); //ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VCGES, //ZZ res, argL, argR, 0, True)); //ZZ return res; //ZZ } else if (matchIRExpr(&mi, p_vcge_16sx8, e)) { //ZZ HReg res = newVRegV(env); //ZZ HReg argL = iselNeonExpr(env, mi.bindee[0]); //ZZ HReg argR = iselNeonExpr(env, mi.bindee[1]); //ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VCGES, //ZZ res, argL, argR, 1, True)); //ZZ return res; //ZZ } else if (matchIRExpr(&mi, p_vcge_32sx4, e)) { //ZZ HReg res = newVRegV(env); //ZZ HReg argL = iselNeonExpr(env, mi.bindee[0]); //ZZ HReg argR = iselNeonExpr(env, mi.bindee[1]); //ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VCGES, //ZZ res, argL, argR, 2, True)); //ZZ return res; //ZZ } else if (matchIRExpr(&mi, p_vcge_8ux16, e)) { //ZZ HReg res = newVRegV(env); //ZZ HReg argL = iselNeonExpr(env, mi.bindee[0]); //ZZ HReg argR = iselNeonExpr(env, mi.bindee[1]); //ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VCGEU, //ZZ res, argL, argR, 0, True)); //ZZ return res; //ZZ } else if (matchIRExpr(&mi, p_vcge_16ux8, e)) { //ZZ HReg res = newVRegV(env); //ZZ HReg argL = iselNeonExpr(env, mi.bindee[0]); //ZZ HReg argR = iselNeonExpr(env, mi.bindee[1]); //ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VCGEU, //ZZ res, argL, argR, 1, True)); //ZZ return res; //ZZ } else if (matchIRExpr(&mi, p_vcge_32ux4, e)) { //ZZ HReg res = newVRegV(env); //ZZ HReg argL = iselNeonExpr(env, mi.bindee[0]); //ZZ HReg argR = iselNeonExpr(env, mi.bindee[1]); //ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VCGEU, //ZZ res, argL, argR, 2, True)); //ZZ return res; //ZZ } else { //ZZ HReg res = newVRegV(env); //ZZ HReg arg = iselNeonExpr(env, e->Iex.Unop.arg); //ZZ addInstr(env, ARMInstr_NUnary(ARMneon_NOT, res, arg, 4, True)); //ZZ return res; //ZZ } //ZZ } //ZZ case Iop_Dup8x16: //ZZ case Iop_Dup16x8: //ZZ case Iop_Dup32x4: { //ZZ HReg res, arg; //ZZ UInt size; //ZZ DECLARE_PATTERN(p_vdup_8x16); //ZZ DECLARE_PATTERN(p_vdup_16x8); //ZZ DECLARE_PATTERN(p_vdup_32x4); //ZZ DEFINE_PATTERN(p_vdup_8x16, //ZZ unop(Iop_Dup8x16, binop(Iop_GetElem8x8, bind(0), bind(1)))); //ZZ DEFINE_PATTERN(p_vdup_16x8, //ZZ unop(Iop_Dup16x8, binop(Iop_GetElem16x4, bind(0), bind(1)))); //ZZ DEFINE_PATTERN(p_vdup_32x4, //ZZ unop(Iop_Dup32x4, binop(Iop_GetElem32x2, bind(0), bind(1)))); //ZZ if (matchIRExpr(&mi, p_vdup_8x16, e)) { //ZZ UInt index; //ZZ UInt imm4; //ZZ if (mi.bindee[1]->tag == Iex_Const && //ZZ typeOfIRExpr(env->type_env, mi.bindee[1]) == Ity_I8) { //ZZ index = mi.bindee[1]->Iex.Const.con->Ico.U8; //ZZ imm4 = (index << 1) + 1; //ZZ if (index < 8) { //ZZ res = newVRegV(env); //ZZ arg = iselNeon64Expr(env, mi.bindee[0]); //ZZ addInstr(env, ARMInstr_NUnaryS( //ZZ ARMneon_VDUP, //ZZ mkARMNRS(ARMNRS_Reg, res, 0), //ZZ mkARMNRS(ARMNRS_Scalar, arg, index), //ZZ imm4, True //ZZ )); //ZZ return res; //ZZ } //ZZ } //ZZ } else if (matchIRExpr(&mi, p_vdup_16x8, e)) { //ZZ UInt index; //ZZ UInt imm4; //ZZ if (mi.bindee[1]->tag == Iex_Const && //ZZ typeOfIRExpr(env->type_env, mi.bindee[1]) == Ity_I8) { //ZZ index = mi.bindee[1]->Iex.Const.con->Ico.U8; //ZZ imm4 = (index << 2) + 2; //ZZ if (index < 4) { //ZZ res = newVRegV(env); //ZZ arg = iselNeon64Expr(env, mi.bindee[0]); //ZZ addInstr(env, ARMInstr_NUnaryS( //ZZ ARMneon_VDUP, //ZZ mkARMNRS(ARMNRS_Reg, res, 0), //ZZ mkARMNRS(ARMNRS_Scalar, arg, index), //ZZ imm4, True //ZZ )); //ZZ return res; //ZZ } //ZZ } //ZZ } else if (matchIRExpr(&mi, p_vdup_32x4, e)) { //ZZ UInt index; //ZZ UInt imm4; //ZZ if (mi.bindee[1]->tag == Iex_Const && //ZZ typeOfIRExpr(env->type_env, mi.bindee[1]) == Ity_I8) { //ZZ index = mi.bindee[1]->Iex.Const.con->Ico.U8; //ZZ imm4 = (index << 3) + 4; //ZZ if (index < 2) { //ZZ res = newVRegV(env); //ZZ arg = iselNeon64Expr(env, mi.bindee[0]); //ZZ addInstr(env, ARMInstr_NUnaryS( //ZZ ARMneon_VDUP, //ZZ mkARMNRS(ARMNRS_Reg, res, 0), //ZZ mkARMNRS(ARMNRS_Scalar, arg, index), //ZZ imm4, True //ZZ )); //ZZ return res; //ZZ } //ZZ } //ZZ } //ZZ arg = iselIntExpr_R(env, e->Iex.Unop.arg); //ZZ res = newVRegV(env); //ZZ switch (e->Iex.Unop.op) { //ZZ case Iop_Dup8x16: size = 0; break; //ZZ case Iop_Dup16x8: size = 1; break; //ZZ case Iop_Dup32x4: size = 2; break; //ZZ default: vassert(0); //ZZ } //ZZ addInstr(env, ARMInstr_NUnary(ARMneon_DUP, res, arg, size, True)); //ZZ return res; //ZZ } //ZZ case Iop_Abs8x16: //ZZ case Iop_Abs16x8: //ZZ case Iop_Abs32x4: { //ZZ HReg res = newVRegV(env); //ZZ HReg arg = iselNeonExpr(env, e->Iex.Unop.arg); //ZZ UInt size = 0; //ZZ switch(e->Iex.Binop.op) { //ZZ case Iop_Abs8x16: size = 0; break; //ZZ case Iop_Abs16x8: size = 1; break; //ZZ case Iop_Abs32x4: size = 2; break; //ZZ default: vassert(0); //ZZ } //ZZ addInstr(env, ARMInstr_NUnary(ARMneon_ABS, res, arg, size, True)); //ZZ return res; //ZZ } //ZZ case Iop_Reverse64_8x16: //ZZ case Iop_Reverse64_16x8: //ZZ case Iop_Reverse64_32x4: { //ZZ HReg res = newVRegV(env); //ZZ HReg arg = iselNeonExpr(env, e->Iex.Unop.arg); //ZZ UInt size = 0; //ZZ switch(e->Iex.Binop.op) { //ZZ case Iop_Reverse64_8x16: size = 0; break; //ZZ case Iop_Reverse64_16x8: size = 1; break; //ZZ case Iop_Reverse64_32x4: size = 2; break; //ZZ default: vassert(0); //ZZ } //ZZ addInstr(env, ARMInstr_NUnary(ARMneon_REV64, //ZZ res, arg, size, True)); //ZZ return res; //ZZ } //ZZ case Iop_Reverse32_8x16: //ZZ case Iop_Reverse32_16x8: { //ZZ HReg res = newVRegV(env); //ZZ HReg arg = iselNeonExpr(env, e->Iex.Unop.arg); //ZZ UInt size = 0; //ZZ switch(e->Iex.Binop.op) { //ZZ case Iop_Reverse32_8x16: size = 0; break; //ZZ case Iop_Reverse32_16x8: size = 1; break; //ZZ default: vassert(0); //ZZ } //ZZ addInstr(env, ARMInstr_NUnary(ARMneon_REV32, //ZZ res, arg, size, True)); //ZZ return res; //ZZ } //ZZ case Iop_Reverse16_8x16: { //ZZ HReg res = newVRegV(env); //ZZ HReg arg = iselNeonExpr(env, e->Iex.Unop.arg); //ZZ UInt size = 0; //ZZ addInstr(env, ARMInstr_NUnary(ARMneon_REV16, //ZZ res, arg, size, True)); //ZZ return res; //ZZ } //ZZ case Iop_CmpNEZ64x2: { //ZZ HReg x_lsh = newVRegV(env); //ZZ HReg x_rsh = newVRegV(env); //ZZ HReg lsh_amt = newVRegV(env); //ZZ HReg rsh_amt = newVRegV(env); //ZZ HReg zero = newVRegV(env); //ZZ HReg tmp = newVRegV(env); //ZZ HReg tmp2 = newVRegV(env); //ZZ HReg res = newVRegV(env); //ZZ HReg x = newVRegV(env); //ZZ HReg arg = iselNeonExpr(env, e->Iex.Unop.arg); //ZZ addInstr(env, ARMInstr_NUnary(ARMneon_EQZ, tmp2, arg, 2, True)); //ZZ addInstr(env, ARMInstr_NUnary(ARMneon_NOT, x, tmp2, 4, True)); //ZZ addInstr(env, ARMInstr_NeonImm(lsh_amt, ARMNImm_TI(0, 32))); //ZZ addInstr(env, ARMInstr_NeonImm(zero, ARMNImm_TI(0, 0))); //ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VSUB, //ZZ rsh_amt, zero, lsh_amt, 2, True)); //ZZ addInstr(env, ARMInstr_NShift(ARMneon_VSHL, //ZZ x_lsh, x, lsh_amt, 3, True)); //ZZ addInstr(env, ARMInstr_NShift(ARMneon_VSHL, //ZZ x_rsh, x, rsh_amt, 3, True)); //ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VORR, //ZZ tmp, x_lsh, x_rsh, 0, True)); //ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VORR, //ZZ res, tmp, x, 0, True)); //ZZ return res; //ZZ } //ZZ case Iop_Widen8Sto16x8: //ZZ case Iop_Widen16Sto32x4: //ZZ case Iop_Widen32Sto64x2: { //ZZ HReg res = newVRegV(env); //ZZ HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg); //ZZ UInt size; //ZZ switch (e->Iex.Unop.op) { //ZZ case Iop_Widen8Sto16x8: size = 0; break; //ZZ case Iop_Widen16Sto32x4: size = 1; break; //ZZ case Iop_Widen32Sto64x2: size = 2; break; //ZZ default: vassert(0); //ZZ } //ZZ addInstr(env, ARMInstr_NUnary(ARMneon_COPYLS, //ZZ res, arg, size, True)); //ZZ return res; //ZZ } //ZZ case Iop_PwAddL8Sx16: //ZZ case Iop_PwAddL16Sx8: //ZZ case Iop_PwAddL32Sx4: { //ZZ HReg res = newVRegV(env); //ZZ HReg arg = iselNeonExpr(env, e->Iex.Unop.arg); //ZZ UInt size = 0; //ZZ switch(e->Iex.Binop.op) { //ZZ case Iop_PwAddL8Sx16: size = 0; break; //ZZ case Iop_PwAddL16Sx8: size = 1; break; //ZZ case Iop_PwAddL32Sx4: size = 2; break; //ZZ default: vassert(0); //ZZ } //ZZ addInstr(env, ARMInstr_NUnary(ARMneon_PADDLS, //ZZ res, arg, size, True)); //ZZ return res; //ZZ } //ZZ case Iop_PwAddL8Ux16: //ZZ case Iop_PwAddL16Ux8: //ZZ case Iop_PwAddL32Ux4: { //ZZ HReg res = newVRegV(env); //ZZ HReg arg = iselNeonExpr(env, e->Iex.Unop.arg); //ZZ UInt size = 0; //ZZ switch(e->Iex.Binop.op) { //ZZ case Iop_PwAddL8Ux16: size = 0; break; //ZZ case Iop_PwAddL16Ux8: size = 1; break; //ZZ case Iop_PwAddL32Ux4: size = 2; break; //ZZ default: vassert(0); //ZZ } //ZZ addInstr(env, ARMInstr_NUnary(ARMneon_PADDLU, //ZZ res, arg, size, True)); //ZZ return res; //ZZ } //ZZ case Iop_Cnt8x16: { //ZZ HReg res = newVRegV(env); //ZZ HReg arg = iselNeonExpr(env, e->Iex.Unop.arg); //ZZ UInt size = 0; //ZZ addInstr(env, ARMInstr_NUnary(ARMneon_CNT, res, arg, size, True)); //ZZ return res; //ZZ } //ZZ case Iop_Clz8Sx16: //ZZ case Iop_Clz16Sx8: //ZZ case Iop_Clz32Sx4: { //ZZ HReg res = newVRegV(env); //ZZ HReg arg = iselNeonExpr(env, e->Iex.Unop.arg); //ZZ UInt size = 0; //ZZ switch(e->Iex.Binop.op) { //ZZ case Iop_Clz8Sx16: size = 0; break; //ZZ case Iop_Clz16Sx8: size = 1; break; //ZZ case Iop_Clz32Sx4: size = 2; break; //ZZ default: vassert(0); //ZZ } //ZZ addInstr(env, ARMInstr_NUnary(ARMneon_CLZ, res, arg, size, True)); //ZZ return res; //ZZ } //ZZ case Iop_Cls8Sx16: //ZZ case Iop_Cls16Sx8: //ZZ case Iop_Cls32Sx4: { //ZZ HReg res = newVRegV(env); //ZZ HReg arg = iselNeonExpr(env, e->Iex.Unop.arg); //ZZ UInt size = 0; //ZZ switch(e->Iex.Binop.op) { //ZZ case Iop_Cls8Sx16: size = 0; break; //ZZ case Iop_Cls16Sx8: size = 1; break; //ZZ case Iop_Cls32Sx4: size = 2; break; //ZZ default: vassert(0); //ZZ } //ZZ addInstr(env, ARMInstr_NUnary(ARMneon_CLS, res, arg, size, True)); //ZZ return res; //ZZ } //ZZ case Iop_FtoI32Sx4_RZ: { //ZZ HReg res = newVRegV(env); //ZZ HReg arg = iselNeonExpr(env, e->Iex.Unop.arg); //ZZ addInstr(env, ARMInstr_NUnary(ARMneon_VCVTFtoS, //ZZ res, arg, 2, True)); //ZZ return res; //ZZ } //ZZ case Iop_FtoI32Ux4_RZ: { //ZZ HReg res = newVRegV(env); //ZZ HReg arg = iselNeonExpr(env, e->Iex.Unop.arg); //ZZ addInstr(env, ARMInstr_NUnary(ARMneon_VCVTFtoU, //ZZ res, arg, 2, True)); //ZZ return res; //ZZ } //ZZ case Iop_I32StoFx4: { //ZZ HReg res = newVRegV(env); //ZZ HReg arg = iselNeonExpr(env, e->Iex.Unop.arg); //ZZ addInstr(env, ARMInstr_NUnary(ARMneon_VCVTStoF, //ZZ res, arg, 2, True)); //ZZ return res; //ZZ } //ZZ case Iop_I32UtoFx4: { //ZZ HReg res = newVRegV(env); //ZZ HReg arg = iselNeonExpr(env, e->Iex.Unop.arg); //ZZ addInstr(env, ARMInstr_NUnary(ARMneon_VCVTUtoF, //ZZ res, arg, 2, True)); //ZZ return res; //ZZ } //ZZ case Iop_F16toF32x4: { //ZZ HReg res = newVRegV(env); //ZZ HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg); //ZZ addInstr(env, ARMInstr_NUnary(ARMneon_VCVTF16toF32, //ZZ res, arg, 2, True)); //ZZ return res; //ZZ } //ZZ case Iop_Recip32Fx4: { //ZZ HReg res = newVRegV(env); //ZZ HReg argL = iselNeonExpr(env, e->Iex.Unop.arg); //ZZ addInstr(env, ARMInstr_NUnary(ARMneon_VRECIPF, //ZZ res, argL, 0, True)); //ZZ return res; //ZZ } //ZZ case Iop_Recip32x4: { //ZZ HReg res = newVRegV(env); //ZZ HReg argL = iselNeonExpr(env, e->Iex.Unop.arg); //ZZ addInstr(env, ARMInstr_NUnary(ARMneon_VRECIP, //ZZ res, argL, 0, True)); //ZZ return res; //ZZ } //ZZ case Iop_Rsqrte32Fx4: { //ZZ HReg res = newVRegV(env); //ZZ HReg argL = iselNeonExpr(env, e->Iex.Unop.arg); //ZZ addInstr(env, ARMInstr_NUnary(ARMneon_VRSQRTEFP, //ZZ res, argL, 0, True)); //ZZ return res; //ZZ } //ZZ case Iop_Rsqrte32x4: { //ZZ HReg res = newVRegV(env); //ZZ HReg argL = iselNeonExpr(env, e->Iex.Unop.arg); //ZZ addInstr(env, ARMInstr_NUnary(ARMneon_VRSQRTE, //ZZ res, argL, 0, True)); //ZZ return res; //ZZ } /* ... */ default: break; } /* switch on the unop */ } /* if (e->tag == Iex_Unop) */ if (e->tag == Iex_Binop) { switch (e->Iex.Binop.op) { case Iop_64HLtoV128: { HReg res = newVRegV(env); HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1); HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2); addInstr(env, ARM64Instr_VQfromXX(res, argL, argR)); return res; } //ZZ case Iop_AndV128: { //ZZ HReg res = newVRegV(env); //ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1); //ZZ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2); //ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VAND, //ZZ res, argL, argR, 4, True)); //ZZ return res; //ZZ } //ZZ case Iop_OrV128: { //ZZ HReg res = newVRegV(env); //ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1); //ZZ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2); //ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VORR, //ZZ res, argL, argR, 4, True)); //ZZ return res; //ZZ } //ZZ case Iop_XorV128: { //ZZ HReg res = newVRegV(env); //ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1); //ZZ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2); //ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VXOR, //ZZ res, argL, argR, 4, True)); //ZZ return res; //ZZ } //ZZ case Iop_Add8x16: //ZZ case Iop_Add16x8: //ZZ case Iop_Add32x4: case Iop_AndV128: case Iop_OrV128: case Iop_XorV128: case Iop_Max32Ux4: case Iop_Max16Ux8: case Iop_Max8Ux16: case Iop_Min32Ux4: case Iop_Min16Ux8: case Iop_Min8Ux16: case Iop_Max32Sx4: case Iop_Max16Sx8: case Iop_Max8Sx16: case Iop_Min32Sx4: case Iop_Min16Sx8: case Iop_Min8Sx16: case Iop_Add64x2: case Iop_Add32x4: case Iop_Add16x8: case Iop_Add8x16: case Iop_Sub64x2: case Iop_Sub32x4: case Iop_Sub16x8: case Iop_Sub8x16: case Iop_Mul32x4: case Iop_Mul16x8: case Iop_Mul8x16: case Iop_CmpEQ64x2: case Iop_CmpEQ32x4: case Iop_CmpEQ16x8: case Iop_CmpEQ8x16: case Iop_CmpGT64Ux2: case Iop_CmpGT32Ux4: case Iop_CmpGT16Ux8: case Iop_CmpGT8Ux16: case Iop_CmpGT64Sx2: case Iop_CmpGT32Sx4: case Iop_CmpGT16Sx8: case Iop_CmpGT8Sx16: case Iop_CmpEQ64Fx2: case Iop_CmpEQ32Fx4: case Iop_CmpLE64Fx2: case Iop_CmpLE32Fx4: case Iop_CmpLT64Fx2: case Iop_CmpLT32Fx4: case Iop_Perm8x16: { HReg res = newVRegV(env); HReg argL = iselV128Expr(env, e->Iex.Binop.arg1); HReg argR = iselV128Expr(env, e->Iex.Binop.arg2); Bool sw = False; ARM64VecBinOp op = ARM64vecb_INVALID; switch (e->Iex.Binop.op) { case Iop_AndV128: op = ARM64vecb_AND; break; case Iop_OrV128: op = ARM64vecb_ORR; break; case Iop_XorV128: op = ARM64vecb_XOR; break; case Iop_Max32Ux4: op = ARM64vecb_UMAX32x4; break; case Iop_Max16Ux8: op = ARM64vecb_UMAX16x8; break; case Iop_Max8Ux16: op = ARM64vecb_UMAX8x16; break; case Iop_Min32Ux4: op = ARM64vecb_UMIN32x4; break; case Iop_Min16Ux8: op = ARM64vecb_UMIN16x8; break; case Iop_Min8Ux16: op = ARM64vecb_UMIN8x16; break; case Iop_Max32Sx4: op = ARM64vecb_SMAX32x4; break; case Iop_Max16Sx8: op = ARM64vecb_SMAX16x8; break; case Iop_Max8Sx16: op = ARM64vecb_SMAX8x16; break; case Iop_Min32Sx4: op = ARM64vecb_SMIN32x4; break; case Iop_Min16Sx8: op = ARM64vecb_SMIN16x8; break; case Iop_Min8Sx16: op = ARM64vecb_SMIN8x16; break; case Iop_Add64x2: op = ARM64vecb_ADD64x2; break; case Iop_Add32x4: op = ARM64vecb_ADD32x4; break; case Iop_Add16x8: op = ARM64vecb_ADD16x8; break; case Iop_Add8x16: op = ARM64vecb_ADD8x16; break; case Iop_Sub64x2: op = ARM64vecb_SUB64x2; break; case Iop_Sub32x4: op = ARM64vecb_SUB32x4; break; case Iop_Sub16x8: op = ARM64vecb_SUB16x8; break; case Iop_Sub8x16: op = ARM64vecb_SUB8x16; break; case Iop_Mul32x4: op = ARM64vecb_MUL32x4; break; case Iop_Mul16x8: op = ARM64vecb_MUL16x8; break; case Iop_Mul8x16: op = ARM64vecb_MUL8x16; break; case Iop_CmpEQ64x2: op = ARM64vecb_CMEQ64x2; break; case Iop_CmpEQ32x4: op = ARM64vecb_CMEQ32x4; break; case Iop_CmpEQ16x8: op = ARM64vecb_CMEQ16x8; break; case Iop_CmpEQ8x16: op = ARM64vecb_CMEQ8x16; break; case Iop_CmpGT64Ux2: op = ARM64vecb_CMHI64x2; break; case Iop_CmpGT32Ux4: op = ARM64vecb_CMHI32x4; break; case Iop_CmpGT16Ux8: op = ARM64vecb_CMHI16x8; break; case Iop_CmpGT8Ux16: op = ARM64vecb_CMHI8x16; break; case Iop_CmpGT64Sx2: op = ARM64vecb_CMGT64x2; break; case Iop_CmpGT32Sx4: op = ARM64vecb_CMGT32x4; break; case Iop_CmpGT16Sx8: op = ARM64vecb_CMGT16x8; break; case Iop_CmpGT8Sx16: op = ARM64vecb_CMGT8x16; break; case Iop_CmpEQ64Fx2: op = ARM64vecb_FCMEQ64x2; break; case Iop_CmpEQ32Fx4: op = ARM64vecb_FCMEQ32x4; break; case Iop_CmpLE64Fx2: op = ARM64vecb_FCMGE64x2; sw = True; break; case Iop_CmpLE32Fx4: op = ARM64vecb_FCMGE32x4; sw = True; break; case Iop_CmpLT64Fx2: op = ARM64vecb_FCMGT64x2; sw = True; break; case Iop_CmpLT32Fx4: op = ARM64vecb_FCMGT32x4; sw = True; break; case Iop_Perm8x16: op = ARM64vecb_TBL1; break; default: vassert(0); } if (sw) { addInstr(env, ARM64Instr_VBinV(op, res, argR, argL)); } else { addInstr(env, ARM64Instr_VBinV(op, res, argL, argR)); } return res; } //ZZ case Iop_Add32Fx4: { //ZZ HReg res = newVRegV(env); //ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1); //ZZ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2); //ZZ UInt size = 0; //ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VADDFP, //ZZ res, argL, argR, size, True)); //ZZ return res; //ZZ } //ZZ case Iop_Recps32Fx4: { //ZZ HReg res = newVRegV(env); //ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1); //ZZ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2); //ZZ UInt size = 0; //ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VRECPS, //ZZ res, argL, argR, size, True)); //ZZ return res; //ZZ } //ZZ case Iop_Rsqrts32Fx4: { //ZZ HReg res = newVRegV(env); //ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1); //ZZ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2); //ZZ UInt size = 0; //ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VRSQRTS, //ZZ res, argL, argR, size, True)); //ZZ return res; //ZZ } //ZZ //ZZ // These 6 verified 18 Apr 2013 //ZZ case Iop_InterleaveEvenLanes8x16: //ZZ case Iop_InterleaveOddLanes8x16: //ZZ case Iop_InterleaveEvenLanes16x8: //ZZ case Iop_InterleaveOddLanes16x8: //ZZ case Iop_InterleaveEvenLanes32x4: //ZZ case Iop_InterleaveOddLanes32x4: { //ZZ HReg rD = newVRegV(env); //ZZ HReg rM = newVRegV(env); //ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1); //ZZ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2); //ZZ UInt size; //ZZ Bool resRd; // is the result in rD or rM ? //ZZ switch (e->Iex.Binop.op) { //ZZ case Iop_InterleaveOddLanes8x16: resRd = False; size = 0; break; //ZZ case Iop_InterleaveEvenLanes8x16: resRd = True; size = 0; break; //ZZ case Iop_InterleaveOddLanes16x8: resRd = False; size = 1; break; //ZZ case Iop_InterleaveEvenLanes16x8: resRd = True; size = 1; break; //ZZ case Iop_InterleaveOddLanes32x4: resRd = False; size = 2; break; //ZZ case Iop_InterleaveEvenLanes32x4: resRd = True; size = 2; break; //ZZ default: vassert(0); //ZZ } //ZZ addInstr(env, ARMInstr_NUnary(ARMneon_COPY, rM, argL, 4, True)); //ZZ addInstr(env, ARMInstr_NUnary(ARMneon_COPY, rD, argR, 4, True)); //ZZ addInstr(env, ARMInstr_NDual(ARMneon_TRN, rD, rM, size, True)); //ZZ return resRd ? rD : rM; //ZZ } //ZZ //ZZ // These 6 verified 18 Apr 2013 //ZZ case Iop_InterleaveHI8x16: //ZZ case Iop_InterleaveLO8x16: //ZZ case Iop_InterleaveHI16x8: //ZZ case Iop_InterleaveLO16x8: //ZZ case Iop_InterleaveHI32x4: //ZZ case Iop_InterleaveLO32x4: { //ZZ HReg rD = newVRegV(env); //ZZ HReg rM = newVRegV(env); //ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1); //ZZ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2); //ZZ UInt size; //ZZ Bool resRd; // is the result in rD or rM ? //ZZ switch (e->Iex.Binop.op) { //ZZ case Iop_InterleaveHI8x16: resRd = False; size = 0; break; //ZZ case Iop_InterleaveLO8x16: resRd = True; size = 0; break; //ZZ case Iop_InterleaveHI16x8: resRd = False; size = 1; break; //ZZ case Iop_InterleaveLO16x8: resRd = True; size = 1; break; //ZZ case Iop_InterleaveHI32x4: resRd = False; size = 2; break; //ZZ case Iop_InterleaveLO32x4: resRd = True; size = 2; break; //ZZ default: vassert(0); //ZZ } //ZZ addInstr(env, ARMInstr_NUnary(ARMneon_COPY, rM, argL, 4, True)); //ZZ addInstr(env, ARMInstr_NUnary(ARMneon_COPY, rD, argR, 4, True)); //ZZ addInstr(env, ARMInstr_NDual(ARMneon_ZIP, rD, rM, size, True)); //ZZ return resRd ? rD : rM; //ZZ } //ZZ //ZZ // These 6 verified 18 Apr 2013 //ZZ case Iop_CatOddLanes8x16: //ZZ case Iop_CatEvenLanes8x16: //ZZ case Iop_CatOddLanes16x8: //ZZ case Iop_CatEvenLanes16x8: //ZZ case Iop_CatOddLanes32x4: //ZZ case Iop_CatEvenLanes32x4: { //ZZ HReg rD = newVRegV(env); //ZZ HReg rM = newVRegV(env); //ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1); //ZZ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2); //ZZ UInt size; //ZZ Bool resRd; // is the result in rD or rM ? //ZZ switch (e->Iex.Binop.op) { //ZZ case Iop_CatOddLanes8x16: resRd = False; size = 0; break; //ZZ case Iop_CatEvenLanes8x16: resRd = True; size = 0; break; //ZZ case Iop_CatOddLanes16x8: resRd = False; size = 1; break; //ZZ case Iop_CatEvenLanes16x8: resRd = True; size = 1; break; //ZZ case Iop_CatOddLanes32x4: resRd = False; size = 2; break; //ZZ case Iop_CatEvenLanes32x4: resRd = True; size = 2; break; //ZZ default: vassert(0); //ZZ } //ZZ addInstr(env, ARMInstr_NUnary(ARMneon_COPY, rM, argL, 4, True)); //ZZ addInstr(env, ARMInstr_NUnary(ARMneon_COPY, rD, argR, 4, True)); //ZZ addInstr(env, ARMInstr_NDual(ARMneon_UZP, rD, rM, size, True)); //ZZ return resRd ? rD : rM; //ZZ } //ZZ //ZZ case Iop_QAdd8Ux16: //ZZ case Iop_QAdd16Ux8: //ZZ case Iop_QAdd32Ux4: //ZZ case Iop_QAdd64Ux2: { //ZZ HReg res = newVRegV(env); //ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1); //ZZ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2); //ZZ UInt size; //ZZ switch (e->Iex.Binop.op) { //ZZ case Iop_QAdd8Ux16: size = 0; break; //ZZ case Iop_QAdd16Ux8: size = 1; break; //ZZ case Iop_QAdd32Ux4: size = 2; break; //ZZ case Iop_QAdd64Ux2: size = 3; break; //ZZ default: //ZZ ppIROp(e->Iex.Binop.op); //ZZ vpanic("Illegal element size in VQADDU"); //ZZ } //ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VQADDU, //ZZ res, argL, argR, size, True)); //ZZ return res; //ZZ } //ZZ case Iop_QAdd8Sx16: //ZZ case Iop_QAdd16Sx8: //ZZ case Iop_QAdd32Sx4: //ZZ case Iop_QAdd64Sx2: { //ZZ HReg res = newVRegV(env); //ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1); //ZZ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2); //ZZ UInt size; //ZZ switch (e->Iex.Binop.op) { //ZZ case Iop_QAdd8Sx16: size = 0; break; //ZZ case Iop_QAdd16Sx8: size = 1; break; //ZZ case Iop_QAdd32Sx4: size = 2; break; //ZZ case Iop_QAdd64Sx2: size = 3; break; //ZZ default: //ZZ ppIROp(e->Iex.Binop.op); //ZZ vpanic("Illegal element size in VQADDS"); //ZZ } //ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VQADDS, //ZZ res, argL, argR, size, True)); //ZZ return res; //ZZ } //ZZ case Iop_Sub8x16: //ZZ case Iop_Sub16x8: //ZZ case Iop_Sub32x4: //ZZ case Iop_Sub64x2: { //ZZ HReg res = newVRegV(env); //ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1); //ZZ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2); //ZZ UInt size; //ZZ switch (e->Iex.Binop.op) { //ZZ case Iop_Sub8x16: size = 0; break; //ZZ case Iop_Sub16x8: size = 1; break; //ZZ case Iop_Sub32x4: size = 2; break; //ZZ case Iop_Sub64x2: size = 3; break; //ZZ default: //ZZ ppIROp(e->Iex.Binop.op); //ZZ vpanic("Illegal element size in VSUB"); //ZZ } //ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VSUB, //ZZ res, argL, argR, size, True)); //ZZ return res; //ZZ } //ZZ case Iop_Sub32Fx4: { //ZZ HReg res = newVRegV(env); //ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1); //ZZ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2); //ZZ UInt size = 0; //ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VSUBFP, //ZZ res, argL, argR, size, True)); //ZZ return res; //ZZ } //ZZ case Iop_QSub8Ux16: //ZZ case Iop_QSub16Ux8: //ZZ case Iop_QSub32Ux4: //ZZ case Iop_QSub64Ux2: { //ZZ HReg res = newVRegV(env); //ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1); //ZZ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2); //ZZ UInt size; //ZZ switch (e->Iex.Binop.op) { //ZZ case Iop_QSub8Ux16: size = 0; break; //ZZ case Iop_QSub16Ux8: size = 1; break; //ZZ case Iop_QSub32Ux4: size = 2; break; //ZZ case Iop_QSub64Ux2: size = 3; break; //ZZ default: //ZZ ppIROp(e->Iex.Binop.op); //ZZ vpanic("Illegal element size in VQSUBU"); //ZZ } //ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VQSUBU, //ZZ res, argL, argR, size, True)); //ZZ return res; //ZZ } //ZZ case Iop_QSub8Sx16: //ZZ case Iop_QSub16Sx8: //ZZ case Iop_QSub32Sx4: //ZZ case Iop_QSub64Sx2: { //ZZ HReg res = newVRegV(env); //ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1); //ZZ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2); //ZZ UInt size; //ZZ switch (e->Iex.Binop.op) { //ZZ case Iop_QSub8Sx16: size = 0; break; //ZZ case Iop_QSub16Sx8: size = 1; break; //ZZ case Iop_QSub32Sx4: size = 2; break; //ZZ case Iop_QSub64Sx2: size = 3; break; //ZZ default: //ZZ ppIROp(e->Iex.Binop.op); //ZZ vpanic("Illegal element size in VQSUBS"); //ZZ } //ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VQSUBS, //ZZ res, argL, argR, size, True)); //ZZ return res; //ZZ } //ZZ case Iop_Max8Ux16: //ZZ case Iop_Max16Ux8: //ZZ case Iop_Max32Ux4: { //ZZ HReg res = newVRegV(env); //ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1); //ZZ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2); //ZZ UInt size; //ZZ switch (e->Iex.Binop.op) { //ZZ case Iop_Max8Ux16: size = 0; break; //ZZ case Iop_Max16Ux8: size = 1; break; //ZZ case Iop_Max32Ux4: size = 2; break; //ZZ default: vpanic("Illegal element size in VMAXU"); //ZZ } //ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VMAXU, //ZZ res, argL, argR, size, True)); //ZZ return res; //ZZ } //ZZ case Iop_Max8Sx16: //ZZ case Iop_Max16Sx8: //ZZ case Iop_Max32Sx4: { //ZZ HReg res = newVRegV(env); //ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1); //ZZ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2); //ZZ UInt size; //ZZ switch (e->Iex.Binop.op) { //ZZ case Iop_Max8Sx16: size = 0; break; //ZZ case Iop_Max16Sx8: size = 1; break; //ZZ case Iop_Max32Sx4: size = 2; break; //ZZ default: vpanic("Illegal element size in VMAXU"); //ZZ } //ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VMAXS, //ZZ res, argL, argR, size, True)); //ZZ return res; //ZZ } //ZZ case Iop_Min8Ux16: //ZZ case Iop_Min16Ux8: //ZZ case Iop_Min32Ux4: { //ZZ HReg res = newVRegV(env); //ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1); //ZZ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2); //ZZ UInt size; //ZZ switch (e->Iex.Binop.op) { //ZZ case Iop_Min8Ux16: size = 0; break; //ZZ case Iop_Min16Ux8: size = 1; break; //ZZ case Iop_Min32Ux4: size = 2; break; //ZZ default: vpanic("Illegal element size in VMAXU"); //ZZ } //ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VMINU, //ZZ res, argL, argR, size, True)); //ZZ return res; //ZZ } //ZZ case Iop_Min8Sx16: //ZZ case Iop_Min16Sx8: //ZZ case Iop_Min32Sx4: { //ZZ HReg res = newVRegV(env); //ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1); //ZZ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2); //ZZ UInt size; //ZZ switch (e->Iex.Binop.op) { //ZZ case Iop_Min8Sx16: size = 0; break; //ZZ case Iop_Min16Sx8: size = 1; break; //ZZ case Iop_Min32Sx4: size = 2; break; //ZZ default: vpanic("Illegal element size in VMAXU"); //ZZ } //ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VMINS, //ZZ res, argL, argR, size, True)); //ZZ return res; //ZZ } //ZZ case Iop_Sar8x16: //ZZ case Iop_Sar16x8: //ZZ case Iop_Sar32x4: //ZZ case Iop_Sar64x2: { //ZZ HReg res = newVRegV(env); //ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1); //ZZ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2); //ZZ HReg argR2 = newVRegV(env); //ZZ HReg zero = newVRegV(env); //ZZ UInt size; //ZZ switch (e->Iex.Binop.op) { //ZZ case Iop_Sar8x16: size = 0; break; //ZZ case Iop_Sar16x8: size = 1; break; //ZZ case Iop_Sar32x4: size = 2; break; //ZZ case Iop_Sar64x2: size = 3; break; //ZZ default: vassert(0); //ZZ } //ZZ addInstr(env, ARMInstr_NeonImm(zero, ARMNImm_TI(0,0))); //ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VSUB, //ZZ argR2, zero, argR, size, True)); //ZZ addInstr(env, ARMInstr_NShift(ARMneon_VSAL, //ZZ res, argL, argR2, size, True)); //ZZ return res; //ZZ } //ZZ case Iop_Sal8x16: //ZZ case Iop_Sal16x8: //ZZ case Iop_Sal32x4: //ZZ case Iop_Sal64x2: { //ZZ HReg res = newVRegV(env); //ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1); //ZZ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2); //ZZ UInt size; //ZZ switch (e->Iex.Binop.op) { //ZZ case Iop_Sal8x16: size = 0; break; //ZZ case Iop_Sal16x8: size = 1; break; //ZZ case Iop_Sal32x4: size = 2; break; //ZZ case Iop_Sal64x2: size = 3; break; //ZZ default: vassert(0); //ZZ } //ZZ addInstr(env, ARMInstr_NShift(ARMneon_VSAL, //ZZ res, argL, argR, size, True)); //ZZ return res; //ZZ } //ZZ case Iop_Shr8x16: //ZZ case Iop_Shr16x8: //ZZ case Iop_Shr32x4: //ZZ case Iop_Shr64x2: { //ZZ HReg res = newVRegV(env); //ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1); //ZZ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2); //ZZ HReg argR2 = newVRegV(env); //ZZ HReg zero = newVRegV(env); //ZZ UInt size; //ZZ switch (e->Iex.Binop.op) { //ZZ case Iop_Shr8x16: size = 0; break; //ZZ case Iop_Shr16x8: size = 1; break; //ZZ case Iop_Shr32x4: size = 2; break; //ZZ case Iop_Shr64x2: size = 3; break; //ZZ default: vassert(0); //ZZ } //ZZ addInstr(env, ARMInstr_NeonImm(zero, ARMNImm_TI(0,0))); //ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VSUB, //ZZ argR2, zero, argR, size, True)); //ZZ addInstr(env, ARMInstr_NShift(ARMneon_VSHL, //ZZ res, argL, argR2, size, True)); //ZZ return res; //ZZ } //ZZ case Iop_Shl8x16: //ZZ case Iop_Shl16x8: //ZZ case Iop_Shl32x4: //ZZ case Iop_Shl64x2: { //ZZ HReg res = newVRegV(env); //ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1); //ZZ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2); //ZZ UInt size; //ZZ switch (e->Iex.Binop.op) { //ZZ case Iop_Shl8x16: size = 0; break; //ZZ case Iop_Shl16x8: size = 1; break; //ZZ case Iop_Shl32x4: size = 2; break; //ZZ case Iop_Shl64x2: size = 3; break; //ZZ default: vassert(0); //ZZ } //ZZ addInstr(env, ARMInstr_NShift(ARMneon_VSHL, //ZZ res, argL, argR, size, True)); //ZZ return res; //ZZ } //ZZ case Iop_QShl8x16: //ZZ case Iop_QShl16x8: //ZZ case Iop_QShl32x4: //ZZ case Iop_QShl64x2: { //ZZ HReg res = newVRegV(env); //ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1); //ZZ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2); //ZZ UInt size; //ZZ switch (e->Iex.Binop.op) { //ZZ case Iop_QShl8x16: size = 0; break; //ZZ case Iop_QShl16x8: size = 1; break; //ZZ case Iop_QShl32x4: size = 2; break; //ZZ case Iop_QShl64x2: size = 3; break; //ZZ default: vassert(0); //ZZ } //ZZ addInstr(env, ARMInstr_NShift(ARMneon_VQSHL, //ZZ res, argL, argR, size, True)); //ZZ return res; //ZZ } //ZZ case Iop_QSal8x16: //ZZ case Iop_QSal16x8: //ZZ case Iop_QSal32x4: //ZZ case Iop_QSal64x2: { //ZZ HReg res = newVRegV(env); //ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1); //ZZ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2); //ZZ UInt size; //ZZ switch (e->Iex.Binop.op) { //ZZ case Iop_QSal8x16: size = 0; break; //ZZ case Iop_QSal16x8: size = 1; break; //ZZ case Iop_QSal32x4: size = 2; break; //ZZ case Iop_QSal64x2: size = 3; break; //ZZ default: vassert(0); //ZZ } //ZZ addInstr(env, ARMInstr_NShift(ARMneon_VQSAL, //ZZ res, argL, argR, size, True)); //ZZ return res; //ZZ } //ZZ case Iop_QShlN8x16: //ZZ case Iop_QShlN16x8: //ZZ case Iop_QShlN32x4: //ZZ case Iop_QShlN64x2: { //ZZ HReg res = newVRegV(env); //ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1); //ZZ UInt size, imm; //ZZ if (e->Iex.Binop.arg2->tag != Iex_Const || //ZZ typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) { //ZZ vpanic("ARM taget supports Iop_QShlNAxB with constant " //ZZ "second argument only\n"); //ZZ } //ZZ imm = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8; //ZZ switch (e->Iex.Binop.op) { //ZZ case Iop_QShlN8x16: size = 8 | imm; break; //ZZ case Iop_QShlN16x8: size = 16 | imm; break; //ZZ case Iop_QShlN32x4: size = 32 | imm; break; //ZZ case Iop_QShlN64x2: size = 64 | imm; break; //ZZ default: vassert(0); //ZZ } //ZZ addInstr(env, ARMInstr_NUnary(ARMneon_VQSHLNUU, //ZZ res, argL, size, True)); //ZZ return res; //ZZ } //ZZ case Iop_QShlN8Sx16: //ZZ case Iop_QShlN16Sx8: //ZZ case Iop_QShlN32Sx4: //ZZ case Iop_QShlN64Sx2: { //ZZ HReg res = newVRegV(env); //ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1); //ZZ UInt size, imm; //ZZ if (e->Iex.Binop.arg2->tag != Iex_Const || //ZZ typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) { //ZZ vpanic("ARM taget supports Iop_QShlNASxB with constant " //ZZ "second argument only\n"); //ZZ } //ZZ imm = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8; //ZZ switch (e->Iex.Binop.op) { //ZZ case Iop_QShlN8Sx16: size = 8 | imm; break; //ZZ case Iop_QShlN16Sx8: size = 16 | imm; break; //ZZ case Iop_QShlN32Sx4: size = 32 | imm; break; //ZZ case Iop_QShlN64Sx2: size = 64 | imm; break; //ZZ default: vassert(0); //ZZ } //ZZ addInstr(env, ARMInstr_NUnary(ARMneon_VQSHLNUS, //ZZ res, argL, size, True)); //ZZ return res; //ZZ } //ZZ case Iop_QSalN8x16: //ZZ case Iop_QSalN16x8: //ZZ case Iop_QSalN32x4: //ZZ case Iop_QSalN64x2: { //ZZ HReg res = newVRegV(env); //ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1); //ZZ UInt size, imm; //ZZ if (e->Iex.Binop.arg2->tag != Iex_Const || //ZZ typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) { //ZZ vpanic("ARM taget supports Iop_QShlNAxB with constant " //ZZ "second argument only\n"); //ZZ } //ZZ imm = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8; //ZZ switch (e->Iex.Binop.op) { //ZZ case Iop_QSalN8x16: size = 8 | imm; break; //ZZ case Iop_QSalN16x8: size = 16 | imm; break; //ZZ case Iop_QSalN32x4: size = 32 | imm; break; //ZZ case Iop_QSalN64x2: size = 64 | imm; break; //ZZ default: vassert(0); //ZZ } //ZZ addInstr(env, ARMInstr_NUnary(ARMneon_VQSHLNSS, //ZZ res, argL, size, True)); //ZZ return res; //ZZ } case Iop_ShrN64x2: case Iop_ShrN32x4: case Iop_ShrN16x8: case Iop_ShrN8x16: case Iop_SarN64x2: case Iop_SarN32x4: case Iop_SarN16x8: case Iop_SarN8x16: case Iop_ShlN64x2: case Iop_ShlN32x4: case Iop_ShlN16x8: case Iop_ShlN8x16: { IRExpr* argL = e->Iex.Binop.arg1; IRExpr* argR = e->Iex.Binop.arg2; if (argR->tag == Iex_Const && argR->Iex.Const.con->tag == Ico_U8) { UInt amt = argR->Iex.Const.con->Ico.U8; UInt limit = 0; ARM64VecShiftOp op = ARM64vecsh_INVALID; switch (e->Iex.Binop.op) { case Iop_ShrN64x2: op = ARM64vecsh_USHR64x2; limit = 63; break; case Iop_ShrN32x4: op = ARM64vecsh_USHR32x4; limit = 31; break; case Iop_ShrN16x8: op = ARM64vecsh_USHR16x8; limit = 15; break; case Iop_ShrN8x16: op = ARM64vecsh_USHR8x16; limit = 7; break; case Iop_SarN64x2: op = ARM64vecsh_SSHR64x2; limit = 63; break; case Iop_SarN32x4: op = ARM64vecsh_SSHR32x4; limit = 31; break; case Iop_SarN16x8: op = ARM64vecsh_SSHR16x8; limit = 15; break; case Iop_SarN8x16: op = ARM64vecsh_SSHR8x16; limit = 7; break; case Iop_ShlN64x2: op = ARM64vecsh_SHL64x2; limit = 63; break; case Iop_ShlN32x4: op = ARM64vecsh_SHL32x4; limit = 31; break; case Iop_ShlN16x8: op = ARM64vecsh_SHL16x8; limit = 15; break; case Iop_ShlN8x16: op = ARM64vecsh_SHL8x16; limit = 7; break; default: vassert(0); } if (op != ARM64vecsh_INVALID && amt >= 0 && amt <= limit) { HReg src = iselV128Expr(env, argL); HReg dst = newVRegV(env); if (amt > 0) { addInstr(env, ARM64Instr_VShiftImmV(op, dst, src, amt)); } else { dst = src; } return dst; } } /* else fall out; this is unhandled */ break; } //ZZ case Iop_CmpGT8Ux16: //ZZ case Iop_CmpGT16Ux8: //ZZ case Iop_CmpGT32Ux4: { //ZZ HReg res = newVRegV(env); //ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1); //ZZ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2); //ZZ UInt size; //ZZ switch (e->Iex.Binop.op) { //ZZ case Iop_CmpGT8Ux16: size = 0; break; //ZZ case Iop_CmpGT16Ux8: size = 1; break; //ZZ case Iop_CmpGT32Ux4: size = 2; break; //ZZ default: vassert(0); //ZZ } //ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VCGTU, //ZZ res, argL, argR, size, True)); //ZZ return res; //ZZ } //ZZ case Iop_CmpGT8Sx16: //ZZ case Iop_CmpGT16Sx8: //ZZ case Iop_CmpGT32Sx4: { //ZZ HReg res = newVRegV(env); //ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1); //ZZ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2); //ZZ UInt size; //ZZ switch (e->Iex.Binop.op) { //ZZ case Iop_CmpGT8Sx16: size = 0; break; //ZZ case Iop_CmpGT16Sx8: size = 1; break; //ZZ case Iop_CmpGT32Sx4: size = 2; break; //ZZ default: vassert(0); //ZZ } //ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VCGTS, //ZZ res, argL, argR, size, True)); //ZZ return res; //ZZ } //ZZ case Iop_CmpEQ8x16: //ZZ case Iop_CmpEQ16x8: //ZZ case Iop_CmpEQ32x4: { //ZZ HReg res = newVRegV(env); //ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1); //ZZ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2); //ZZ UInt size; //ZZ switch (e->Iex.Binop.op) { //ZZ case Iop_CmpEQ8x16: size = 0; break; //ZZ case Iop_CmpEQ16x8: size = 1; break; //ZZ case Iop_CmpEQ32x4: size = 2; break; //ZZ default: vassert(0); //ZZ } //ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VCEQ, //ZZ res, argL, argR, size, True)); //ZZ return res; //ZZ } //ZZ case Iop_Mul8x16: //ZZ case Iop_Mul16x8: //ZZ case Iop_Mul32x4: { //ZZ HReg res = newVRegV(env); //ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1); //ZZ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2); //ZZ UInt size = 0; //ZZ switch(e->Iex.Binop.op) { //ZZ case Iop_Mul8x16: size = 0; break; //ZZ case Iop_Mul16x8: size = 1; break; //ZZ case Iop_Mul32x4: size = 2; break; //ZZ default: vassert(0); //ZZ } //ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VMUL, //ZZ res, argL, argR, size, True)); //ZZ return res; //ZZ } //ZZ case Iop_Mul32Fx4: { //ZZ HReg res = newVRegV(env); //ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1); //ZZ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2); //ZZ UInt size = 0; //ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VMULFP, //ZZ res, argL, argR, size, True)); //ZZ return res; //ZZ } case Iop_Mull8Ux8: case Iop_Mull16Ux4: case Iop_Mull32Ux2: { HReg res = newVRegV(env); HReg argL = iselDblExpr(env, e->Iex.Binop.arg1); HReg argR = iselDblExpr(env, e->Iex.Binop.arg2); UInt size = 0; ARM64VecBinOp op = ARM64vecb_INVALID; switch(e->Iex.Binop.op) { case Iop_Mull8Ux8: op = ARM64vecb_UMULL8x8; break; case Iop_Mull16Ux4: op = ARM64vecb_UMULL16x4; break; case Iop_Mull32Ux2: op = ARM64vecb_UMULL32x2; break; default: vassert(0); } addInstr(env, ARM64Instr_VBinV(op, res, argL, argR)); return res; } //ZZ //ZZ case Iop_Mull8Sx8: //ZZ case Iop_Mull16Sx4: //ZZ case Iop_Mull32Sx2: { //ZZ HReg res = newVRegV(env); //ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1); //ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2); //ZZ UInt size = 0; //ZZ switch(e->Iex.Binop.op) { //ZZ case Iop_Mull8Sx8: size = 0; break; //ZZ case Iop_Mull16Sx4: size = 1; break; //ZZ case Iop_Mull32Sx2: size = 2; break; //ZZ default: vassert(0); //ZZ } //ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VMULLS, //ZZ res, argL, argR, size, True)); //ZZ return res; //ZZ } //ZZ //ZZ case Iop_QDMulHi16Sx8: //ZZ case Iop_QDMulHi32Sx4: { //ZZ HReg res = newVRegV(env); //ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1); //ZZ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2); //ZZ UInt size = 0; //ZZ switch(e->Iex.Binop.op) { //ZZ case Iop_QDMulHi16Sx8: size = 1; break; //ZZ case Iop_QDMulHi32Sx4: size = 2; break; //ZZ default: vassert(0); //ZZ } //ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VQDMULH, //ZZ res, argL, argR, size, True)); //ZZ return res; //ZZ } //ZZ //ZZ case Iop_QRDMulHi16Sx8: //ZZ case Iop_QRDMulHi32Sx4: { //ZZ HReg res = newVRegV(env); //ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1); //ZZ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2); //ZZ UInt size = 0; //ZZ switch(e->Iex.Binop.op) { //ZZ case Iop_QRDMulHi16Sx8: size = 1; break; //ZZ case Iop_QRDMulHi32Sx4: size = 2; break; //ZZ default: vassert(0); //ZZ } //ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VQRDMULH, //ZZ res, argL, argR, size, True)); //ZZ return res; //ZZ } //ZZ //ZZ case Iop_QDMulLong16Sx4: //ZZ case Iop_QDMulLong32Sx2: { //ZZ HReg res = newVRegV(env); //ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1); //ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2); //ZZ UInt size = 0; //ZZ switch(e->Iex.Binop.op) { //ZZ case Iop_QDMulLong16Sx4: size = 1; break; //ZZ case Iop_QDMulLong32Sx2: size = 2; break; //ZZ default: vassert(0); //ZZ } //ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VQDMULL, //ZZ res, argL, argR, size, True)); //ZZ return res; //ZZ } //ZZ case Iop_PolynomialMul8x16: { //ZZ HReg res = newVRegV(env); //ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1); //ZZ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2); //ZZ UInt size = 0; //ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VMULP, //ZZ res, argL, argR, size, True)); //ZZ return res; //ZZ } //ZZ case Iop_Max32Fx4: { //ZZ HReg res = newVRegV(env); //ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1); //ZZ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2); //ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VMAXF, //ZZ res, argL, argR, 2, True)); //ZZ return res; //ZZ } //ZZ case Iop_Min32Fx4: { //ZZ HReg res = newVRegV(env); //ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1); //ZZ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2); //ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VMINF, //ZZ res, argL, argR, 2, True)); //ZZ return res; //ZZ } //ZZ case Iop_PwMax32Fx4: { //ZZ HReg res = newVRegV(env); //ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1); //ZZ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2); //ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VPMAXF, //ZZ res, argL, argR, 2, True)); //ZZ return res; //ZZ } //ZZ case Iop_PwMin32Fx4: { //ZZ HReg res = newVRegV(env); //ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1); //ZZ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2); //ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VPMINF, //ZZ res, argL, argR, 2, True)); //ZZ return res; //ZZ } //ZZ case Iop_CmpGT32Fx4: { //ZZ HReg res = newVRegV(env); //ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1); //ZZ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2); //ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VCGTF, //ZZ res, argL, argR, 2, True)); //ZZ return res; //ZZ } //ZZ case Iop_CmpGE32Fx4: { //ZZ HReg res = newVRegV(env); //ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1); //ZZ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2); //ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VCGEF, //ZZ res, argL, argR, 2, True)); //ZZ return res; //ZZ } //ZZ case Iop_CmpEQ32Fx4: { //ZZ HReg res = newVRegV(env); //ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1); //ZZ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2); //ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VCEQF, //ZZ res, argL, argR, 2, True)); //ZZ return res; //ZZ } //ZZ //ZZ case Iop_PolynomialMull8x8: { //ZZ HReg res = newVRegV(env); //ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1); //ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2); //ZZ UInt size = 0; //ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VMULLP, //ZZ res, argL, argR, size, True)); //ZZ return res; //ZZ } //ZZ case Iop_F32ToFixed32Ux4_RZ: //ZZ case Iop_F32ToFixed32Sx4_RZ: //ZZ case Iop_Fixed32UToF32x4_RN: //ZZ case Iop_Fixed32SToF32x4_RN: { //ZZ HReg res = newVRegV(env); //ZZ HReg arg = iselNeonExpr(env, e->Iex.Binop.arg1); //ZZ ARMNeonUnOp op; //ZZ UInt imm6; //ZZ if (e->Iex.Binop.arg2->tag != Iex_Const || //ZZ typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) { //ZZ vpanic("ARM supports FP <-> Fixed conversion with constant " //ZZ "second argument less than 33 only\n"); //ZZ } //ZZ imm6 = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8; //ZZ vassert(imm6 <= 32 && imm6 > 0); //ZZ imm6 = 64 - imm6; //ZZ switch(e->Iex.Binop.op) { //ZZ case Iop_F32ToFixed32Ux4_RZ: op = ARMneon_VCVTFtoFixedU; break; //ZZ case Iop_F32ToFixed32Sx4_RZ: op = ARMneon_VCVTFtoFixedS; break; //ZZ case Iop_Fixed32UToF32x4_RN: op = ARMneon_VCVTFixedUtoF; break; //ZZ case Iop_Fixed32SToF32x4_RN: op = ARMneon_VCVTFixedStoF; break; //ZZ default: vassert(0); //ZZ } //ZZ addInstr(env, ARMInstr_NUnary(op, res, arg, imm6, True)); //ZZ return res; //ZZ } //ZZ /* //ZZ FIXME remove if not used //ZZ case Iop_VDup8x16: //ZZ case Iop_VDup16x8: //ZZ case Iop_VDup32x4: { //ZZ HReg res = newVRegV(env); //ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1); //ZZ UInt imm4; //ZZ UInt index; //ZZ if (e->Iex.Binop.arg2->tag != Iex_Const || //ZZ typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) { //ZZ vpanic("ARM supports Iop_VDup with constant " //ZZ "second argument less than 16 only\n"); //ZZ } //ZZ index = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8; //ZZ switch(e->Iex.Binop.op) { //ZZ case Iop_VDup8x16: imm4 = (index << 1) + 1; break; //ZZ case Iop_VDup16x8: imm4 = (index << 2) + 2; break; //ZZ case Iop_VDup32x4: imm4 = (index << 3) + 4; break; //ZZ default: vassert(0); //ZZ } //ZZ if (imm4 >= 16) { //ZZ vpanic("ARM supports Iop_VDup with constant " //ZZ "second argument less than 16 only\n"); //ZZ } //ZZ addInstr(env, ARMInstr_NUnary(ARMneon_VDUP, //ZZ res, argL, imm4, True)); //ZZ return res; //ZZ } //ZZ */ //ZZ case Iop_PwAdd8x16: //ZZ case Iop_PwAdd16x8: //ZZ case Iop_PwAdd32x4: { //ZZ HReg res = newVRegV(env); //ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1); //ZZ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2); //ZZ UInt size = 0; //ZZ switch(e->Iex.Binop.op) { //ZZ case Iop_PwAdd8x16: size = 0; break; //ZZ case Iop_PwAdd16x8: size = 1; break; //ZZ case Iop_PwAdd32x4: size = 2; break; //ZZ default: vassert(0); //ZZ } //ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VPADD, //ZZ res, argL, argR, size, True)); //ZZ return res; //ZZ } /* ... */ default: break; } /* switch on the binop */ } /* if (e->tag == Iex_Binop) */ if (e->tag == Iex_Triop) { IRTriop* triop = e->Iex.Triop.details; ARM64VecBinOp vecbop = ARM64vecb_INVALID; switch (triop->op) { case Iop_Add64Fx2: vecbop = ARM64vecb_FADD64x2; break; case Iop_Sub64Fx2: vecbop = ARM64vecb_FSUB64x2; break; case Iop_Mul64Fx2: vecbop = ARM64vecb_FMUL64x2; break; case Iop_Div64Fx2: vecbop = ARM64vecb_FDIV64x2; break; case Iop_Add32Fx4: vecbop = ARM64vecb_FADD32x4; break; case Iop_Sub32Fx4: vecbop = ARM64vecb_FSUB32x4; break; case Iop_Mul32Fx4: vecbop = ARM64vecb_FMUL32x4; break; case Iop_Div32Fx4: vecbop = ARM64vecb_FDIV32x4; break; default: break; } if (vecbop != ARM64vecb_INVALID) { HReg argL = iselV128Expr(env, triop->arg2); HReg argR = iselV128Expr(env, triop->arg3); HReg dst = newVRegV(env); set_FPCR_rounding_mode(env, triop->arg1); addInstr(env, ARM64Instr_VBinV(vecbop, dst, argL, argR)); return dst; } //ZZ switch (triop->op) { //ZZ case Iop_ExtractV128: { //ZZ HReg res = newVRegV(env); //ZZ HReg argL = iselNeonExpr(env, triop->arg1); //ZZ HReg argR = iselNeonExpr(env, triop->arg2); //ZZ UInt imm4; //ZZ if (triop->arg3->tag != Iex_Const || //ZZ typeOfIRExpr(env->type_env, triop->arg3) != Ity_I8) { //ZZ vpanic("ARM target supports Iop_ExtractV128 with constant " //ZZ "third argument less than 16 only\n"); //ZZ } //ZZ imm4 = triop->arg3->Iex.Const.con->Ico.U8; //ZZ if (imm4 >= 16) { //ZZ vpanic("ARM target supports Iop_ExtractV128 with constant " //ZZ "third argument less than 16 only\n"); //ZZ } //ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VEXT, //ZZ res, argL, argR, imm4, True)); //ZZ return res; //ZZ } //ZZ default: //ZZ break; //ZZ } } //ZZ if (e->tag == Iex_ITE) { // VFD //ZZ ARMCondCode cc; //ZZ HReg r1 = iselNeonExpr(env, e->Iex.ITE.iftrue); //ZZ HReg r0 = iselNeonExpr(env, e->Iex.ITE.iffalse); //ZZ HReg dst = newVRegV(env); //ZZ addInstr(env, ARMInstr_NUnary(ARMneon_COPY, dst, r1, 4, True)); //ZZ cc = iselCondCode(env, e->Iex.ITE.cond); //ZZ addInstr(env, ARMInstr_NCMovQ(cc ^ 1, dst, r0)); //ZZ return dst; //ZZ } v128_expr_bad: ppIRExpr(e); vpanic("iselV128Expr_wrk"); } /*---------------------------------------------------------*/ /*--- ISEL: Floating point expressions (64 bit) ---*/ /*---------------------------------------------------------*/ /* Compute a 64-bit floating point value into a register, the identity of which is returned. As with iselIntExpr_R, the reg may be either real or virtual; in any case it must not be changed by subsequent code emitted by the caller. */ static HReg iselDblExpr ( ISelEnv* env, IRExpr* e ) { HReg r = iselDblExpr_wrk( env, e ); # if 0 vex_printf("\n"); ppIRExpr(e); vex_printf("\n"); # endif vassert(hregClass(r) == HRcFlt64); vassert(hregIsVirtual(r)); return r; } /* DO NOT CALL THIS DIRECTLY */ static HReg iselDblExpr_wrk ( ISelEnv* env, IRExpr* e ) { IRType ty = typeOfIRExpr(env->type_env,e); vassert(e); vassert(ty == Ity_F64 || ty == Ity_I64); if (e->tag == Iex_RdTmp) { return lookupIRTemp(env, e->Iex.RdTmp.tmp); } if (e->tag == Iex_Const) { IRConst* con = e->Iex.Const.con; if (con->tag == Ico_F64i) { HReg src = newVRegI(env); HReg dst = newVRegD(env); addInstr(env, ARM64Instr_Imm64(src, con->Ico.F64i)); addInstr(env, ARM64Instr_VDfromX(dst, src)); return dst; } } if (e->tag == Iex_Load && e->Iex.Load.end == Iend_LE) { vassert(e->Iex.Load.ty == Ity_F64 || e->Iex.Load.ty == Ity_I64); HReg addr = iselIntExpr_R(env, e->Iex.Load.addr); HReg res = newVRegD(env); addInstr(env, ARM64Instr_VLdStD(True/*isLoad*/, res, addr, 0)); return res; } if (e->tag == Iex_Get) { Int offs = e->Iex.Get.offset; if (offs >= 0 && offs < 32768 && 0 == (offs & 7)) { HReg rD = newVRegD(env); HReg rN = get_baseblock_register(); addInstr(env, ARM64Instr_VLdStD(True/*isLoad*/, rD, rN, offs)); return rD; } } if (e->tag == Iex_Unop) { switch (e->Iex.Unop.op) { //ZZ case Iop_ReinterpI64asF64: { //ZZ if (env->hwcaps & VEX_HWCAPS_ARM_NEON) { //ZZ return iselNeon64Expr(env, e->Iex.Unop.arg); //ZZ } else { //ZZ HReg srcHi, srcLo; //ZZ HReg dst = newVRegD(env); //ZZ iselInt64Expr(&srcHi, &srcLo, env, e->Iex.Unop.arg); //ZZ addInstr(env, ARMInstr_VXferD(True/*toD*/, dst, srcHi, srcLo)); //ZZ return dst; //ZZ } //ZZ } case Iop_NegF64: { HReg src = iselDblExpr(env, e->Iex.Unop.arg); HReg dst = newVRegD(env); addInstr(env, ARM64Instr_VUnaryD(ARM64fpu_NEG, dst, src)); return dst; } case Iop_AbsF64: { HReg src = iselDblExpr(env, e->Iex.Unop.arg); HReg dst = newVRegD(env); addInstr(env, ARM64Instr_VUnaryD(ARM64fpu_ABS, dst, src)); return dst; } case Iop_F32toF64: { HReg src = iselFltExpr(env, e->Iex.Unop.arg); HReg dst = newVRegD(env); addInstr(env, ARM64Instr_VCvtSD(True/*sToD*/, dst, src)); return dst; } case Iop_I32UtoF64: case Iop_I32StoF64: { /* Rounding mode is not involved here, since the conversion can always be done without loss of precision. */ HReg src = iselIntExpr_R(env, e->Iex.Unop.arg); HReg dst = newVRegD(env); Bool syned = e->Iex.Unop.op == Iop_I32StoF64; ARM64CvtOp cvt_op = syned ? ARM64cvt_F64_I32S : ARM64cvt_F64_I32U; addInstr(env, ARM64Instr_VCvtI2F(cvt_op, dst, src)); return dst; } default: break; } } if (e->tag == Iex_Binop) { switch (e->Iex.Binop.op) { case Iop_RoundF64toInt: { HReg src = iselDblExpr(env, e->Iex.Binop.arg2); HReg dst = newVRegD(env); set_FPCR_rounding_mode(env, e->Iex.Binop.arg1); addInstr(env, ARM64Instr_VUnaryD(ARM64fpu_RINT, dst, src)); return dst; } case Iop_SqrtF64: { HReg src = iselDblExpr(env, e->Iex.Binop.arg2); HReg dst = newVRegD(env); set_FPCR_rounding_mode(env, e->Iex.Binop.arg1); addInstr(env, ARM64Instr_VUnaryD(ARM64fpu_SQRT, dst, src)); return dst; } case Iop_I64StoF64: case Iop_I64UtoF64: { ARM64CvtOp cvt_op = e->Iex.Binop.op == Iop_I64StoF64 ? ARM64cvt_F64_I64S : ARM64cvt_F64_I64U; HReg srcI = iselIntExpr_R(env, e->Iex.Binop.arg2); set_FPCR_rounding_mode(env, e->Iex.Binop.arg1); HReg dstS = newVRegD(env); addInstr(env, ARM64Instr_VCvtI2F(cvt_op, dstS, srcI)); return dstS; } default: break; } } if (e->tag == Iex_Triop) { IRTriop* triop = e->Iex.Triop.details; ARM64FpBinOp dblop = ARM64fpb_INVALID; switch (triop->op) { case Iop_DivF64: dblop = ARM64fpb_DIV; break; case Iop_MulF64: dblop = ARM64fpb_MUL; break; case Iop_SubF64: dblop = ARM64fpb_SUB; break; case Iop_AddF64: dblop = ARM64fpb_ADD; break; default: break; } if (dblop != ARM64fpb_INVALID) { HReg argL = iselDblExpr(env, triop->arg2); HReg argR = iselDblExpr(env, triop->arg3); HReg dst = newVRegD(env); set_FPCR_rounding_mode(env, triop->arg1); addInstr(env, ARM64Instr_VBinD(dblop, dst, argL, argR)); return dst; } } //ZZ if (e->tag == Iex_ITE) { // VFD //ZZ if (ty == Ity_F64 //ZZ && typeOfIRExpr(env->type_env,e->Iex.ITE.cond) == Ity_I1) { //ZZ HReg r1 = iselDblExpr(env, e->Iex.ITE.iftrue); //ZZ HReg r0 = iselDblExpr(env, e->Iex.ITE.iffalse); //ZZ HReg dst = newVRegD(env); //ZZ addInstr(env, ARMInstr_VUnaryD(ARMvfpu_COPY, dst, r1)); //ZZ ARMCondCode cc = iselCondCode(env, e->Iex.ITE.cond); //ZZ addInstr(env, ARMInstr_VCMovD(cc ^ 1, dst, r0)); //ZZ return dst; //ZZ } //ZZ } ppIRExpr(e); vpanic("iselDblExpr_wrk"); } /*---------------------------------------------------------*/ /*--- ISEL: Floating point expressions (32 bit) ---*/ /*---------------------------------------------------------*/ /* Compute a 32-bit floating point value into a register, the identity of which is returned. As with iselIntExpr_R, the reg may be either real or virtual; in any case it must not be changed by subsequent code emitted by the caller. Values are generated into HRcFlt64 registers despite the values themselves being Ity_F32s. */ static HReg iselFltExpr ( ISelEnv* env, IRExpr* e ) { HReg r = iselFltExpr_wrk( env, e ); # if 0 vex_printf("\n"); ppIRExpr(e); vex_printf("\n"); # endif vassert(hregClass(r) == HRcFlt64); vassert(hregIsVirtual(r)); return r; } /* DO NOT CALL THIS DIRECTLY */ static HReg iselFltExpr_wrk ( ISelEnv* env, IRExpr* e ) { IRType ty = typeOfIRExpr(env->type_env,e); vassert(e); vassert(ty == Ity_F32); if (e->tag == Iex_RdTmp) { return lookupIRTemp(env, e->Iex.RdTmp.tmp); } if (e->tag == Iex_Const) { /* This is something of a kludge. Since a 32 bit floating point zero is just .. all zeroes, just create a 64 bit zero word and transfer it. This avoids having to create a SfromW instruction for this specific case. */ IRConst* con = e->Iex.Const.con; if (con->tag == Ico_F32i && con->Ico.F32i == 0) { HReg src = newVRegI(env); HReg dst = newVRegD(env); addInstr(env, ARM64Instr_Imm64(src, 0)); addInstr(env, ARM64Instr_VDfromX(dst, src)); return dst; } } //ZZ if (e->tag == Iex_Load && e->Iex.Load.end == Iend_LE) { //ZZ ARMAModeV* am; //ZZ HReg res = newVRegF(env); //ZZ vassert(e->Iex.Load.ty == Ity_F32); //ZZ am = iselIntExpr_AModeV(env, e->Iex.Load.addr); //ZZ addInstr(env, ARMInstr_VLdStS(True/*isLoad*/, res, am)); //ZZ return res; //ZZ } if (e->tag == Iex_Get) { Int offs = e->Iex.Get.offset; if (offs >= 0 && offs < 16384 && 0 == (offs & 3)) { HReg rD = newVRegD(env); HReg rN = get_baseblock_register(); addInstr(env, ARM64Instr_VLdStS(True/*isLoad*/, rD, rN, offs)); return rD; } } if (e->tag == Iex_Unop) { switch (e->Iex.Unop.op) { //ZZ case Iop_ReinterpI32asF32: { //ZZ HReg dst = newVRegF(env); //ZZ HReg src = iselIntExpr_R(env, e->Iex.Unop.arg); //ZZ addInstr(env, ARMInstr_VXferS(True/*toS*/, dst, src)); //ZZ return dst; //ZZ } case Iop_NegF32: { HReg src = iselFltExpr(env, e->Iex.Unop.arg); HReg dst = newVRegD(env); addInstr(env, ARM64Instr_VUnaryS(ARM64fpu_NEG, dst, src)); return dst; } case Iop_AbsF32: { HReg src = iselFltExpr(env, e->Iex.Unop.arg); HReg dst = newVRegD(env); addInstr(env, ARM64Instr_VUnaryS(ARM64fpu_ABS, dst, src)); return dst; } default: break; } } if (e->tag == Iex_Binop) { switch (e->Iex.Binop.op) { case Iop_RoundF32toInt: { HReg src = iselFltExpr(env, e->Iex.Binop.arg2); HReg dst = newVRegD(env); set_FPCR_rounding_mode(env, e->Iex.Binop.arg1); addInstr(env, ARM64Instr_VUnaryS(ARM64fpu_RINT, dst, src)); return dst; } case Iop_SqrtF32: { HReg src = iselFltExpr(env, e->Iex.Binop.arg2); HReg dst = newVRegD(env); set_FPCR_rounding_mode(env, e->Iex.Binop.arg1); addInstr(env, ARM64Instr_VUnaryS(ARM64fpu_SQRT, dst, src)); return dst; } case Iop_F64toF32: { HReg srcD = iselDblExpr(env, e->Iex.Binop.arg2); set_FPCR_rounding_mode(env, e->Iex.Binop.arg1); HReg dstS = newVRegD(env); addInstr(env, ARM64Instr_VCvtSD(False/*dToS*/, dstS, srcD)); return dstS; } case Iop_I32UtoF32: case Iop_I32StoF32: case Iop_I64UtoF32: case Iop_I64StoF32: { ARM64CvtOp cvt_op = ARM64cvt_INVALID; switch (e->Iex.Binop.op) { case Iop_I32UtoF32: cvt_op = ARM64cvt_F32_I32U; break; case Iop_I32StoF32: cvt_op = ARM64cvt_F32_I32S; break; case Iop_I64UtoF32: cvt_op = ARM64cvt_F32_I64U; break; case Iop_I64StoF32: cvt_op = ARM64cvt_F32_I64S; break; default: vassert(0); } HReg srcI = iselIntExpr_R(env, e->Iex.Binop.arg2); set_FPCR_rounding_mode(env, e->Iex.Binop.arg1); HReg dstS = newVRegD(env); addInstr(env, ARM64Instr_VCvtI2F(cvt_op, dstS, srcI)); return dstS; } default: break; } } if (e->tag == Iex_Triop) { IRTriop* triop = e->Iex.Triop.details; ARM64FpBinOp sglop = ARM64fpb_INVALID; switch (triop->op) { case Iop_DivF32: sglop = ARM64fpb_DIV; break; case Iop_MulF32: sglop = ARM64fpb_MUL; break; case Iop_SubF32: sglop = ARM64fpb_SUB; break; case Iop_AddF32: sglop = ARM64fpb_ADD; break; default: break; } if (sglop != ARM64fpb_INVALID) { HReg argL = iselFltExpr(env, triop->arg2); HReg argR = iselFltExpr(env, triop->arg3); HReg dst = newVRegD(env); set_FPCR_rounding_mode(env, triop->arg1); addInstr(env, ARM64Instr_VBinS(sglop, dst, argL, argR)); return dst; } } //ZZ //ZZ if (e->tag == Iex_ITE) { // VFD //ZZ if (ty == Ity_F32 //ZZ && typeOfIRExpr(env->type_env,e->Iex.ITE.cond) == Ity_I1) { //ZZ ARMCondCode cc; //ZZ HReg r1 = iselFltExpr(env, e->Iex.ITE.iftrue); //ZZ HReg r0 = iselFltExpr(env, e->Iex.ITE.iffalse); //ZZ HReg dst = newVRegF(env); //ZZ addInstr(env, ARMInstr_VUnaryS(ARMvfpu_COPY, dst, r1)); //ZZ cc = iselCondCode(env, e->Iex.ITE.cond); //ZZ addInstr(env, ARMInstr_VCMovS(cc ^ 1, dst, r0)); //ZZ return dst; //ZZ } //ZZ } ppIRExpr(e); vpanic("iselFltExpr_wrk"); } /*---------------------------------------------------------*/ /*--- ISEL: Statements ---*/ /*---------------------------------------------------------*/ static void iselStmt ( ISelEnv* env, IRStmt* stmt ) { if (vex_traceflags & VEX_TRACE_VCODE) { vex_printf("\n-- "); ppIRStmt(stmt); vex_printf("\n"); } switch (stmt->tag) { /* --------- STORE --------- */ /* little-endian write to memory */ case Ist_Store: { IRType tya = typeOfIRExpr(env->type_env, stmt->Ist.Store.addr); IRType tyd = typeOfIRExpr(env->type_env, stmt->Ist.Store.data); IREndness end = stmt->Ist.Store.end; if (tya != Ity_I64 || end != Iend_LE) goto stmt_fail; if (tyd == Ity_I64) { HReg rD = iselIntExpr_R(env, stmt->Ist.Store.data); ARM64AMode* am = iselIntExpr_AMode(env, stmt->Ist.Store.addr, tyd); addInstr(env, ARM64Instr_LdSt64(False/*!isLoad*/, rD, am)); return; } if (tyd == Ity_I32) { HReg rD = iselIntExpr_R(env, stmt->Ist.Store.data); ARM64AMode* am = iselIntExpr_AMode(env, stmt->Ist.Store.addr, tyd); addInstr(env, ARM64Instr_LdSt32(False/*!isLoad*/, rD, am)); return; } if (tyd == Ity_I16) { HReg rD = iselIntExpr_R(env, stmt->Ist.Store.data); ARM64AMode* am = iselIntExpr_AMode(env, stmt->Ist.Store.addr, tyd); addInstr(env, ARM64Instr_LdSt16(False/*!isLoad*/, rD, am)); return; } if (tyd == Ity_I8) { HReg rD = iselIntExpr_R(env, stmt->Ist.Store.data); ARM64AMode* am = iselIntExpr_AMode(env, stmt->Ist.Store.addr, tyd); addInstr(env, ARM64Instr_LdSt8(False/*!isLoad*/, rD, am)); return; } if (tyd == Ity_V128) { HReg qD = iselV128Expr(env, stmt->Ist.Store.data); HReg addr = iselIntExpr_R(env, stmt->Ist.Store.addr); addInstr(env, ARM64Instr_VLdStQ(False/*!isLoad*/, qD, addr)); return; } if (tyd == Ity_F64) { HReg dD = iselDblExpr(env, stmt->Ist.Store.data); HReg addr = iselIntExpr_R(env, stmt->Ist.Store.addr); addInstr(env, ARM64Instr_VLdStD(False/*!isLoad*/, dD, addr, 0)); return; } if (tyd == Ity_F32) { HReg sD = iselFltExpr(env, stmt->Ist.Store.data); HReg addr = iselIntExpr_R(env, stmt->Ist.Store.addr); addInstr(env, ARM64Instr_VLdStS(False/*!isLoad*/, sD, addr, 0)); return; } //ZZ if (tyd == Ity_I16) { //ZZ HReg rD = iselIntExpr_R(env, stmt->Ist.Store.data); //ZZ ARMAMode2* am = iselIntExpr_AMode2(env, stmt->Ist.Store.addr); //ZZ addInstr(env, ARMInstr_LdSt16(ARMcc_AL, //ZZ False/*!isLoad*/, //ZZ False/*!isSignedLoad*/, rD, am)); //ZZ return; //ZZ } //ZZ if (tyd == Ity_I8) { //ZZ HReg rD = iselIntExpr_R(env, stmt->Ist.Store.data); //ZZ ARMAMode1* am = iselIntExpr_AMode1(env, stmt->Ist.Store.addr); //ZZ addInstr(env, ARMInstr_LdSt8U(ARMcc_AL, False/*!isLoad*/, rD, am)); //ZZ return; //ZZ } //ZZ if (tyd == Ity_I64) { //ZZ if (env->hwcaps & VEX_HWCAPS_ARM_NEON) { //ZZ HReg dD = iselNeon64Expr(env, stmt->Ist.Store.data); //ZZ ARMAModeN* am = iselIntExpr_AModeN(env, stmt->Ist.Store.addr); //ZZ addInstr(env, ARMInstr_NLdStD(False, dD, am)); //ZZ } else { //ZZ HReg rDhi, rDlo, rA; //ZZ iselInt64Expr(&rDhi, &rDlo, env, stmt->Ist.Store.data); //ZZ rA = iselIntExpr_R(env, stmt->Ist.Store.addr); //ZZ addInstr(env, ARMInstr_LdSt32(ARMcc_AL, False/*!load*/, rDhi, //ZZ ARMAMode1_RI(rA,4))); //ZZ addInstr(env, ARMInstr_LdSt32(ARMcc_AL, False/*!load*/, rDlo, //ZZ ARMAMode1_RI(rA,0))); //ZZ } //ZZ return; //ZZ } //ZZ if (tyd == Ity_F64) { //ZZ HReg dD = iselDblExpr(env, stmt->Ist.Store.data); //ZZ ARMAModeV* am = iselIntExpr_AModeV(env, stmt->Ist.Store.addr); //ZZ addInstr(env, ARMInstr_VLdStD(False/*!isLoad*/, dD, am)); //ZZ return; //ZZ } //ZZ if (tyd == Ity_F32) { //ZZ HReg fD = iselFltExpr(env, stmt->Ist.Store.data); //ZZ ARMAModeV* am = iselIntExpr_AModeV(env, stmt->Ist.Store.addr); //ZZ addInstr(env, ARMInstr_VLdStS(False/*!isLoad*/, fD, am)); //ZZ return; //ZZ } //ZZ if (tyd == Ity_V128) { //ZZ HReg qD = iselNeonExpr(env, stmt->Ist.Store.data); //ZZ ARMAModeN* am = iselIntExpr_AModeN(env, stmt->Ist.Store.addr); //ZZ addInstr(env, ARMInstr_NLdStQ(False, qD, am)); //ZZ return; //ZZ } break; } //ZZ /* --------- CONDITIONAL STORE --------- */ //ZZ /* conditional little-endian write to memory */ //ZZ case Ist_StoreG: { //ZZ IRStoreG* sg = stmt->Ist.StoreG.details; //ZZ IRType tya = typeOfIRExpr(env->type_env, sg->addr); //ZZ IRType tyd = typeOfIRExpr(env->type_env, sg->data); //ZZ IREndness end = sg->end; //ZZ //ZZ if (tya != Ity_I32 || end != Iend_LE) //ZZ goto stmt_fail; //ZZ //ZZ switch (tyd) { //ZZ case Ity_I8: //ZZ case Ity_I32: { //ZZ HReg rD = iselIntExpr_R(env, sg->data); //ZZ ARMAMode1* am = iselIntExpr_AMode1(env, sg->addr); //ZZ ARMCondCode cc = iselCondCode(env, sg->guard); //ZZ addInstr(env, (tyd == Ity_I32 ? ARMInstr_LdSt32 : ARMInstr_LdSt8U) //ZZ (cc, False/*!isLoad*/, rD, am)); //ZZ return; //ZZ } //ZZ case Ity_I16: { //ZZ HReg rD = iselIntExpr_R(env, sg->data); //ZZ ARMAMode2* am = iselIntExpr_AMode2(env, sg->addr); //ZZ ARMCondCode cc = iselCondCode(env, sg->guard); //ZZ addInstr(env, ARMInstr_LdSt16(cc, //ZZ False/*!isLoad*/, //ZZ False/*!isSignedLoad*/, rD, am)); //ZZ return; //ZZ } //ZZ default: //ZZ break; //ZZ } //ZZ break; //ZZ } //ZZ //ZZ /* --------- CONDITIONAL LOAD --------- */ //ZZ /* conditional little-endian load from memory */ //ZZ case Ist_LoadG: { //ZZ IRLoadG* lg = stmt->Ist.LoadG.details; //ZZ IRType tya = typeOfIRExpr(env->type_env, lg->addr); //ZZ IREndness end = lg->end; //ZZ //ZZ if (tya != Ity_I32 || end != Iend_LE) //ZZ goto stmt_fail; //ZZ //ZZ switch (lg->cvt) { //ZZ case ILGop_8Uto32: //ZZ case ILGop_Ident32: { //ZZ HReg rAlt = iselIntExpr_R(env, lg->alt); //ZZ ARMAMode1* am = iselIntExpr_AMode1(env, lg->addr); //ZZ HReg rD = lookupIRTemp(env, lg->dst); //ZZ addInstr(env, mk_iMOVds_RR(rD, rAlt)); //ZZ ARMCondCode cc = iselCondCode(env, lg->guard); //ZZ addInstr(env, (lg->cvt == ILGop_Ident32 ? ARMInstr_LdSt32 //ZZ : ARMInstr_LdSt8U) //ZZ (cc, True/*isLoad*/, rD, am)); //ZZ return; //ZZ } //ZZ case ILGop_16Sto32: //ZZ case ILGop_16Uto32: //ZZ case ILGop_8Sto32: { //ZZ HReg rAlt = iselIntExpr_R(env, lg->alt); //ZZ ARMAMode2* am = iselIntExpr_AMode2(env, lg->addr); //ZZ HReg rD = lookupIRTemp(env, lg->dst); //ZZ addInstr(env, mk_iMOVds_RR(rD, rAlt)); //ZZ ARMCondCode cc = iselCondCode(env, lg->guard); //ZZ if (lg->cvt == ILGop_8Sto32) { //ZZ addInstr(env, ARMInstr_Ld8S(cc, rD, am)); //ZZ } else { //ZZ vassert(lg->cvt == ILGop_16Sto32 || lg->cvt == ILGop_16Uto32); //ZZ Bool sx = lg->cvt == ILGop_16Sto32; //ZZ addInstr(env, ARMInstr_LdSt16(cc, True/*isLoad*/, sx, rD, am)); //ZZ } //ZZ return; //ZZ } //ZZ default: //ZZ break; //ZZ } //ZZ break; //ZZ } /* --------- PUT --------- */ /* write guest state, fixed offset */ case Ist_Put: { IRType tyd = typeOfIRExpr(env->type_env, stmt->Ist.Put.data); UInt offs = (UInt)stmt->Ist.Put.offset; if (tyd == Ity_I64 && 0 == (offs & 7) && offs < (8<<12)) { HReg rD = iselIntExpr_R(env, stmt->Ist.Put.data); ARM64AMode* am = mk_baseblock_64bit_access_amode(offs); addInstr(env, ARM64Instr_LdSt64(False/*!isLoad*/, rD, am)); return; } if (tyd == Ity_I32 && 0 == (offs & 3) && offs < (4<<12)) { HReg rD = iselIntExpr_R(env, stmt->Ist.Put.data); ARM64AMode* am = mk_baseblock_32bit_access_amode(offs); addInstr(env, ARM64Instr_LdSt32(False/*!isLoad*/, rD, am)); return; } if (tyd == Ity_I16 && 0 == (offs & 1) && offs < (2<<12)) { HReg rD = iselIntExpr_R(env, stmt->Ist.Put.data); ARM64AMode* am = mk_baseblock_16bit_access_amode(offs); addInstr(env, ARM64Instr_LdSt16(False/*!isLoad*/, rD, am)); return; } if (tyd == Ity_I8 && offs < (1<<12)) { HReg rD = iselIntExpr_R(env, stmt->Ist.Put.data); ARM64AMode* am = mk_baseblock_8bit_access_amode(offs); addInstr(env, ARM64Instr_LdSt8(False/*!isLoad*/, rD, am)); return; } if (tyd == Ity_V128 && offs < (1<<12)) { HReg qD = iselV128Expr(env, stmt->Ist.Put.data); HReg addr = mk_baseblock_128bit_access_addr(env, offs); addInstr(env, ARM64Instr_VLdStQ(False/*!isLoad*/, qD, addr)); return; } if (tyd == Ity_F64 && 0 == (offs & 7) && offs < (8<<12)) { HReg dD = iselDblExpr(env, stmt->Ist.Put.data); HReg bbp = get_baseblock_register(); addInstr(env, ARM64Instr_VLdStD(False/*!isLoad*/, dD, bbp, offs)); return; } if (tyd == Ity_F32 && 0 == (offs & 3) && offs < (4<<12)) { HReg dD = iselFltExpr(env, stmt->Ist.Put.data); HReg bbp = get_baseblock_register(); addInstr(env, ARM64Instr_VLdStS(False/*!isLoad*/, dD, bbp, offs)); return; } //ZZ if (tyd == Ity_I64) { //ZZ if (env->hwcaps & VEX_HWCAPS_ARM_NEON) { //ZZ HReg addr = newVRegI(env); //ZZ HReg qD = iselNeon64Expr(env, stmt->Ist.Put.data); //ZZ addInstr(env, ARMInstr_Add32(addr, hregARM_R8(), //ZZ stmt->Ist.Put.offset)); //ZZ addInstr(env, ARMInstr_NLdStD(False, qD, mkARMAModeN_R(addr))); //ZZ } else { //ZZ HReg rDhi, rDlo; //ZZ ARMAMode1* am0 = ARMAMode1_RI(hregARM_R8(), //ZZ stmt->Ist.Put.offset + 0); //ZZ ARMAMode1* am4 = ARMAMode1_RI(hregARM_R8(), //ZZ stmt->Ist.Put.offset + 4); //ZZ iselInt64Expr(&rDhi, &rDlo, env, stmt->Ist.Put.data); //ZZ addInstr(env, ARMInstr_LdSt32(ARMcc_AL, False/*!isLoad*/, //ZZ rDhi, am4)); //ZZ addInstr(env, ARMInstr_LdSt32(ARMcc_AL, False/*!isLoad*/, //ZZ rDlo, am0)); //ZZ } //ZZ return; //ZZ } //ZZ if (tyd == Ity_F64) { //ZZ // XXX This won't work if offset > 1020 or is not 0 % 4. //ZZ // In which case we'll have to generate more longwinded code. //ZZ ARMAModeV* am = mkARMAModeV(hregARM_R8(), stmt->Ist.Put.offset); //ZZ HReg rD = iselDblExpr(env, stmt->Ist.Put.data); //ZZ addInstr(env, ARMInstr_VLdStD(False/*!isLoad*/, rD, am)); //ZZ return; //ZZ } //ZZ if (tyd == Ity_F32) { //ZZ // XXX This won't work if offset > 1020 or is not 0 % 4. //ZZ // In which case we'll have to generate more longwinded code. //ZZ ARMAModeV* am = mkARMAModeV(hregARM_R8(), stmt->Ist.Put.offset); //ZZ HReg rD = iselFltExpr(env, stmt->Ist.Put.data); //ZZ addInstr(env, ARMInstr_VLdStS(False/*!isLoad*/, rD, am)); //ZZ return; //ZZ } break; } /* --------- TMP --------- */ /* assign value to temporary */ case Ist_WrTmp: { IRTemp tmp = stmt->Ist.WrTmp.tmp; IRType ty = typeOfIRTemp(env->type_env, tmp); if (ty == Ity_I64 || ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8) { /* We could do a lot better here. But for the time being: */ HReg dst = lookupIRTemp(env, tmp); HReg rD = iselIntExpr_R(env, stmt->Ist.WrTmp.data); addInstr(env, ARM64Instr_MovI(dst, rD)); return; } if (ty == Ity_I1) { /* Here, we are generating a I1 value into a 64 bit register. Make sure the value in the register is only zero or one, but no other. This allows optimisation of the 1Uto64(tmp:I1) case, by making it simply a copy of the register holding 'tmp'. The point being that the value in the register holding 'tmp' can only have been created here. LATER: that seems dangerous; safer to do 'tmp & 1' in that case. Also, could do this just with a single CINC insn. */ /* CLONE-01 */ HReg zero = newVRegI(env); HReg one = newVRegI(env); HReg dst = lookupIRTemp(env, tmp); addInstr(env, ARM64Instr_Imm64(zero, 0)); addInstr(env, ARM64Instr_Imm64(one, 1)); ARM64CondCode cc = iselCondCode(env, stmt->Ist.WrTmp.data); addInstr(env, ARM64Instr_CSel(dst, one, zero, cc)); return; } if (ty == Ity_F64) { HReg src = iselDblExpr(env, stmt->Ist.WrTmp.data); HReg dst = lookupIRTemp(env, tmp); addInstr(env, ARM64Instr_VMov(8, dst, src)); return; } if (ty == Ity_F32) { HReg src = iselFltExpr(env, stmt->Ist.WrTmp.data); HReg dst = lookupIRTemp(env, tmp); addInstr(env, ARM64Instr_VMov(8/*yes, really*/, dst, src)); return; } if (ty == Ity_V128) { HReg src = iselV128Expr(env, stmt->Ist.WrTmp.data); HReg dst = lookupIRTemp(env, tmp); addInstr(env, ARM64Instr_VMov(16, dst, src)); return; } break; } /* --------- Call to DIRTY helper --------- */ /* call complex ("dirty") helper function */ case Ist_Dirty: { IRDirty* d = stmt->Ist.Dirty.details; /* Figure out the return type, if any. */ IRType retty = Ity_INVALID; if (d->tmp != IRTemp_INVALID) retty = typeOfIRTemp(env->type_env, d->tmp); Bool retty_ok = False; switch (retty) { case Ity_INVALID: /* function doesn't return anything */ case Ity_I64: case Ity_I32: case Ity_I16: case Ity_I8: case Ity_V128: retty_ok = True; break; default: break; } if (!retty_ok) break; /* will go to stmt_fail: */ /* Marshal args, do the call, and set the return value to 0x555..555 if this is a conditional call that returns a value and the call is skipped. */ UInt addToSp = 0; RetLoc rloc = mk_RetLoc_INVALID(); doHelperCall( &addToSp, &rloc, env, d->guard, d->cee, retty, d->args ); vassert(is_sane_RetLoc(rloc)); /* Now figure out what to do with the returned value, if any. */ switch (retty) { case Ity_INVALID: { /* No return value. Nothing to do. */ vassert(d->tmp == IRTemp_INVALID); vassert(rloc.pri == RLPri_None); vassert(addToSp == 0); return; } case Ity_I64: case Ity_I32: case Ity_I16: case Ity_I8: { vassert(rloc.pri == RLPri_Int); vassert(addToSp == 0); /* The returned value is in x0. Park it in the register associated with tmp. */ HReg dst = lookupIRTemp(env, d->tmp); addInstr(env, ARM64Instr_MovI(dst, hregARM64_X0()) ); return; } case Ity_V128: { /* The returned value is on the stack, and *retloc tells us where. Fish it off the stack and then move the stack pointer upwards to clear it, as directed by doHelperCall. */ vassert(rloc.pri == RLPri_V128SpRel); vassert(rloc.spOff < 256); // stay sane vassert(addToSp >= 16); // ditto vassert(addToSp < 256); // ditto HReg dst = lookupIRTemp(env, d->tmp); HReg tmp = newVRegI(env); // the address of the returned value addInstr(env, ARM64Instr_FromSP(tmp)); // tmp = SP addInstr(env, ARM64Instr_Arith(tmp, tmp, ARM64RIA_I12((UShort)rloc.spOff, 0), True/*isAdd*/ )); addInstr(env, ARM64Instr_VLdStQ(True/*isLoad*/, dst, tmp)); addInstr(env, ARM64Instr_AddToSP(addToSp)); return; } default: /*NOTREACHED*/ vassert(0); } break; } /* --------- Load Linked and Store Conditional --------- */ case Ist_LLSC: { if (stmt->Ist.LLSC.storedata == NULL) { /* LL */ IRTemp res = stmt->Ist.LLSC.result; IRType ty = typeOfIRTemp(env->type_env, res); if (ty == Ity_I64 || ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8) { Int szB = 0; HReg r_dst = lookupIRTemp(env, res); HReg raddr = iselIntExpr_R(env, stmt->Ist.LLSC.addr); switch (ty) { case Ity_I8: szB = 1; break; case Ity_I16: szB = 2; break; case Ity_I32: szB = 4; break; case Ity_I64: szB = 8; break; default: vassert(0); } addInstr(env, ARM64Instr_MovI(hregARM64_X4(), raddr)); addInstr(env, ARM64Instr_LdrEX(szB)); addInstr(env, ARM64Instr_MovI(r_dst, hregARM64_X2())); return; } goto stmt_fail; } else { /* SC */ IRType tyd = typeOfIRExpr(env->type_env, stmt->Ist.LLSC.storedata); if (tyd == Ity_I64 || tyd == Ity_I32 || tyd == Ity_I16 || tyd == Ity_I8) { Int szB = 0; HReg rD = iselIntExpr_R(env, stmt->Ist.LLSC.storedata); HReg rA = iselIntExpr_R(env, stmt->Ist.LLSC.addr); switch (tyd) { case Ity_I8: szB = 1; break; case Ity_I16: szB = 2; break; case Ity_I32: szB = 4; break; case Ity_I64: szB = 8; break; default: vassert(0); } addInstr(env, ARM64Instr_MovI(hregARM64_X2(), rD)); addInstr(env, ARM64Instr_MovI(hregARM64_X4(), rA)); addInstr(env, ARM64Instr_StrEX(szB)); } else { goto stmt_fail; } /* now r0 is 1 if failed, 0 if success. Change to IR conventions (0 is fail, 1 is success). Also transfer result to r_res. */ IRTemp res = stmt->Ist.LLSC.result; IRType ty = typeOfIRTemp(env->type_env, res); HReg r_res = lookupIRTemp(env, res); ARM64RIL* one = mb_mkARM64RIL_I(1); vassert(ty == Ity_I1); vassert(one); addInstr(env, ARM64Instr_Logic(r_res, hregARM64_X0(), one, ARM64lo_XOR)); /* And be conservative -- mask off all but the lowest bit. */ addInstr(env, ARM64Instr_Logic(r_res, r_res, one, ARM64lo_AND)); return; } break; } /* --------- MEM FENCE --------- */ case Ist_MBE: switch (stmt->Ist.MBE.event) { case Imbe_Fence: addInstr(env, ARM64Instr_MFence()); return; //ZZ case Imbe_CancelReservation: //ZZ addInstr(env, ARMInstr_CLREX()); //ZZ return; default: break; } break; /* --------- INSTR MARK --------- */ /* Doesn't generate any executable code ... */ case Ist_IMark: return; /* --------- NO-OP --------- */ case Ist_NoOp: return; /* --------- EXIT --------- */ case Ist_Exit: { if (stmt->Ist.Exit.dst->tag != Ico_U64) vpanic("isel_arm: Ist_Exit: dst is not a 64-bit value"); ARM64CondCode cc = iselCondCode(env, stmt->Ist.Exit.guard); ARM64AMode* amPC = mk_baseblock_64bit_access_amode(stmt->Ist.Exit.offsIP); /* Case: boring transfer to known address */ if (stmt->Ist.Exit.jk == Ijk_Boring /*ATC || stmt->Ist.Exit.jk == Ijk_Call */ /*ATC || stmt->Ist.Exit.jk == Ijk_Ret */ ) { if (env->chainingAllowed) { /* .. almost always true .. */ /* Skip the event check at the dst if this is a forwards edge. */ Bool toFastEP = ((Addr64)stmt->Ist.Exit.dst->Ico.U64) > env->max_ga; if (0) vex_printf("%s", toFastEP ? "Y" : ","); addInstr(env, ARM64Instr_XDirect(stmt->Ist.Exit.dst->Ico.U64, amPC, cc, toFastEP)); } else { /* .. very occasionally .. */ /* We can't use chaining, so ask for an assisted transfer, as that's the only alternative that is allowable. */ HReg r = iselIntExpr_R(env, IRExpr_Const(stmt->Ist.Exit.dst)); addInstr(env, ARM64Instr_XAssisted(r, amPC, cc, Ijk_Boring)); } return; } //ZZ /* Case: assisted transfer to arbitrary address */ //ZZ switch (stmt->Ist.Exit.jk) { //ZZ /* Keep this list in sync with that in iselNext below */ //ZZ case Ijk_ClientReq: //ZZ case Ijk_NoDecode: //ZZ case Ijk_NoRedir: //ZZ case Ijk_Sys_syscall: //ZZ case Ijk_InvalICache: //ZZ case Ijk_Yield: //ZZ { //ZZ HReg r = iselIntExpr_R(env, IRExpr_Const(stmt->Ist.Exit.dst)); //ZZ addInstr(env, ARMInstr_XAssisted(r, amR15T, cc, //ZZ stmt->Ist.Exit.jk)); //ZZ return; //ZZ } //ZZ default: //ZZ break; //ZZ } /* Do we ever expect to see any other kind? */ goto stmt_fail; } default: break; } stmt_fail: ppIRStmt(stmt); vpanic("iselStmt"); } /*---------------------------------------------------------*/ /*--- ISEL: Basic block terminators (Nexts) ---*/ /*---------------------------------------------------------*/ static void iselNext ( ISelEnv* env, IRExpr* next, IRJumpKind jk, Int offsIP ) { if (vex_traceflags & VEX_TRACE_VCODE) { vex_printf( "\n-- PUT(%d) = ", offsIP); ppIRExpr( next ); vex_printf( "; exit-"); ppIRJumpKind(jk); vex_printf( "\n"); } /* Case: boring transfer to known address */ if (next->tag == Iex_Const) { IRConst* cdst = next->Iex.Const.con; vassert(cdst->tag == Ico_U64); if (jk == Ijk_Boring || jk == Ijk_Call) { /* Boring transfer to known address */ ARM64AMode* amPC = mk_baseblock_64bit_access_amode(offsIP); if (env->chainingAllowed) { /* .. almost always true .. */ /* Skip the event check at the dst if this is a forwards edge. */ Bool toFastEP = ((Addr64)cdst->Ico.U64) > env->max_ga; if (0) vex_printf("%s", toFastEP ? "X" : "."); addInstr(env, ARM64Instr_XDirect(cdst->Ico.U64, amPC, ARM64cc_AL, toFastEP)); } else { /* .. very occasionally .. */ /* We can't use chaining, so ask for an assisted transfer, as that's the only alternative that is allowable. */ HReg r = iselIntExpr_R(env, next); addInstr(env, ARM64Instr_XAssisted(r, amPC, ARM64cc_AL, Ijk_Boring)); } return; } } /* Case: call/return (==boring) transfer to any address */ switch (jk) { case Ijk_Boring: case Ijk_Ret: case Ijk_Call: { HReg r = iselIntExpr_R(env, next); ARM64AMode* amPC = mk_baseblock_64bit_access_amode(offsIP); if (env->chainingAllowed) { addInstr(env, ARM64Instr_XIndir(r, amPC, ARM64cc_AL)); } else { addInstr(env, ARM64Instr_XAssisted(r, amPC, ARM64cc_AL, Ijk_Boring)); } return; } default: break; } /* Case: assisted transfer to arbitrary address */ switch (jk) { /* Keep this list in sync with that for Ist_Exit above */ case Ijk_ClientReq: case Ijk_NoDecode: case Ijk_NoRedir: case Ijk_Sys_syscall: case Ijk_InvalICache: case Ijk_FlushDCache: //ZZ case Ijk_Yield: { HReg r = iselIntExpr_R(env, next); ARM64AMode* amPC = mk_baseblock_64bit_access_amode(offsIP); addInstr(env, ARM64Instr_XAssisted(r, amPC, ARM64cc_AL, jk)); return; } default: break; } vex_printf( "\n-- PUT(%d) = ", offsIP); ppIRExpr( next ); vex_printf( "; exit-"); ppIRJumpKind(jk); vex_printf( "\n"); vassert(0); // are we expecting any other kind? } /*---------------------------------------------------------*/ /*--- Insn selector top-level ---*/ /*---------------------------------------------------------*/ /* Translate an entire SB to arm64 code. */ HInstrArray* iselSB_ARM64 ( IRSB* bb, VexArch arch_host, VexArchInfo* archinfo_host, VexAbiInfo* vbi/*UNUSED*/, Int offs_Host_EvC_Counter, Int offs_Host_EvC_FailAddr, Bool chainingAllowed, Bool addProfInc, Addr64 max_ga ) { Int i, j; HReg hreg, hregHI; ISelEnv* env; UInt hwcaps_host = archinfo_host->hwcaps; ARM64AMode *amCounter, *amFailAddr; /* sanity ... */ vassert(arch_host == VexArchARM64); /* guard against unexpected space regressions */ vassert(sizeof(ARM64Instr) <= 32); /* Make up an initial environment to use. */ env = LibVEX_Alloc(sizeof(ISelEnv)); env->vreg_ctr = 0; /* Set up output code array. */ env->code = newHInstrArray(); /* Copy BB's type env. */ env->type_env = bb->tyenv; /* Make up an IRTemp -> virtual HReg mapping. This doesn't change as we go along. */ env->n_vregmap = bb->tyenv->types_used; env->vregmap = LibVEX_Alloc(env->n_vregmap * sizeof(HReg)); env->vregmapHI = LibVEX_Alloc(env->n_vregmap * sizeof(HReg)); /* and finally ... */ env->chainingAllowed = chainingAllowed; env->hwcaps = hwcaps_host; env->previous_rm = NULL; env->max_ga = max_ga; /* For each IR temporary, allocate a suitably-kinded virtual register. */ j = 0; for (i = 0; i < env->n_vregmap; i++) { hregHI = hreg = INVALID_HREG; switch (bb->tyenv->types[i]) { case Ity_I1: case Ity_I8: case Ity_I16: case Ity_I32: case Ity_I64: hreg = mkHReg(j++, HRcInt64, True); break; case Ity_I128: hreg = mkHReg(j++, HRcInt64, True); hregHI = mkHReg(j++, HRcInt64, True); break; case Ity_F32: // we'll use HRcFlt64 regs for F32 too case Ity_F64: hreg = mkHReg(j++, HRcFlt64, True); break; case Ity_V128: hreg = mkHReg(j++, HRcVec128, True); break; default: ppIRType(bb->tyenv->types[i]); vpanic("iselBB(arm64): IRTemp type"); } env->vregmap[i] = hreg; env->vregmapHI[i] = hregHI; } env->vreg_ctr = j; /* The very first instruction must be an event check. */ amCounter = ARM64AMode_RI9(hregARM64_X21(), offs_Host_EvC_Counter); amFailAddr = ARM64AMode_RI9(hregARM64_X21(), offs_Host_EvC_FailAddr); addInstr(env, ARM64Instr_EvCheck(amCounter, amFailAddr)); /* Possibly a block counter increment (for profiling). At this point we don't know the address of the counter, so just pretend it is zero. It will have to be patched later, but before this translation is used, by a call to LibVEX_patchProfCtr. */ if (addProfInc) { vassert(0); //addInstr(env, ARM64Instr_ProfInc()); } /* Ok, finally we can iterate over the statements. */ for (i = 0; i < bb->stmts_used; i++) iselStmt(env, bb->stmts[i]); iselNext(env, bb->next, bb->jumpkind, bb->offsIP); /* record the number of vregs we used. */ env->code->n_vregs = env->vreg_ctr; return env->code; } /*---------------------------------------------------------------*/ /*--- end host_arm64_isel.c ---*/ /*---------------------------------------------------------------*/