1 //===- subzero/src/IceTargetLoweringARM32.h - ARM32 lowering ----*- C++ -*-===// 2 // 3 // The Subzero Code Generator 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 /// 10 /// \file 11 /// \brief Declares the TargetLoweringARM32 class, which implements the 12 /// TargetLowering interface for the ARM 32-bit architecture. 13 /// 14 //===----------------------------------------------------------------------===// 15 16 #ifndef SUBZERO_SRC_ICETARGETLOWERINGARM32_H 17 #define SUBZERO_SRC_ICETARGETLOWERINGARM32_H 18 19 #include "IceAssemblerARM32.h" 20 #include "IceDefs.h" 21 #include "IceInstARM32.h" 22 #include "IceRegistersARM32.h" 23 #include "IceTargetLowering.h" 24 25 #include <utility> 26 27 namespace Ice { 28 namespace ARM32 { 29 30 // Class encapsulating ARM cpu features / instruction set. 31 class TargetARM32Features { 32 TargetARM32Features() = delete; 33 TargetARM32Features(const TargetARM32Features &) = delete; 34 TargetARM32Features &operator=(const TargetARM32Features &) = delete; 35 36 public: 37 explicit TargetARM32Features(const ClFlags &Flags); 38 39 enum ARM32InstructionSet { 40 Begin, 41 // Neon is the PNaCl baseline instruction set. 42 Neon = Begin, 43 HWDivArm, // HW divide in ARM mode (not just Thumb mode). 44 End 45 }; 46 hasFeature(ARM32InstructionSet I)47 bool hasFeature(ARM32InstructionSet I) const { return I <= InstructionSet; } 48 49 private: 50 ARM32InstructionSet InstructionSet = ARM32InstructionSet::Begin; 51 }; 52 53 // The target lowering logic for ARM32. 54 class TargetARM32 : public TargetLowering { 55 TargetARM32() = delete; 56 TargetARM32(const TargetARM32 &) = delete; 57 TargetARM32 &operator=(const TargetARM32 &) = delete; 58 59 public: 60 static void staticInit(GlobalContext *Ctx); 61 shouldBePooled(const Constant * C)62 static bool shouldBePooled(const Constant *C) { 63 if (auto *ConstDouble = llvm::dyn_cast<ConstantDouble>(C)) { 64 return !Utils::isPositiveZero(ConstDouble->getValue()); 65 } 66 if (llvm::isa<ConstantFloat>(C)) 67 return true; 68 return false; 69 } 70 getPointerType()71 static ::Ice::Type getPointerType() { return ::Ice::IceType_i32; } 72 73 // TODO(jvoung): return a unique_ptr. create(Cfg * Func)74 static std::unique_ptr<::Ice::TargetLowering> create(Cfg *Func) { 75 return makeUnique<TargetARM32>(Func); 76 } 77 createAssembler()78 std::unique_ptr<::Ice::Assembler> createAssembler() const override { 79 const bool IsNonsfi = SandboxingType == ST_Nonsfi; 80 return makeUnique<ARM32::AssemblerARM32>(IsNonsfi); 81 } 82 initNodeForLowering(CfgNode * Node)83 void initNodeForLowering(CfgNode *Node) override { 84 Computations.forgetProducers(); 85 Computations.recordProducers(Node); 86 Computations.dump(Func); 87 } 88 89 void translateOm1() override; 90 void translateO2() override; 91 bool doBranchOpt(Inst *I, const CfgNode *NextNode) override; 92 getNumRegisters()93 SizeT getNumRegisters() const override { return RegARM32::Reg_NUM; } 94 Variable *getPhysicalRegister(RegNumT RegNum, 95 Type Ty = IceType_void) override; 96 const char *getRegName(RegNumT RegNum, Type Ty) const override; 97 SmallBitVector getRegisterSet(RegSetMask Include, 98 RegSetMask Exclude) const override; 99 const SmallBitVector & getRegistersForVariable(const Variable * Var)100 getRegistersForVariable(const Variable *Var) const override { 101 RegClass RC = Var->getRegClass(); 102 switch (RC) { 103 default: 104 assert(RC < RC_Target); 105 return TypeToRegisterSet[RC]; 106 case RegARM32::RCARM32_QtoS: 107 return TypeToRegisterSet[RC]; 108 } 109 } 110 const SmallBitVector & getAllRegistersForVariable(const Variable * Var)111 getAllRegistersForVariable(const Variable *Var) const override { 112 RegClass RC = Var->getRegClass(); 113 assert((RegARM32::RegClassARM32)RC < RegARM32::RCARM32_NUM); 114 return TypeToRegisterSetUnfiltered[RC]; 115 } getAliasesForRegister(RegNumT Reg)116 const SmallBitVector &getAliasesForRegister(RegNumT Reg) const override { 117 return RegisterAliases[Reg]; 118 } hasFramePointer()119 bool hasFramePointer() const override { return UsesFramePointer; } setHasFramePointer()120 void setHasFramePointer() override { UsesFramePointer = true; } getStackReg()121 RegNumT getStackReg() const override { return RegARM32::Reg_sp; } getFrameReg()122 RegNumT getFrameReg() const override { return RegARM32::Reg_fp; } getFrameOrStackReg()123 RegNumT getFrameOrStackReg() const override { 124 return UsesFramePointer ? getFrameReg() : getStackReg(); 125 } getReservedTmpReg()126 RegNumT getReservedTmpReg() const { return RegARM32::Reg_ip; } 127 typeWidthInBytesOnStack(Type Ty)128 size_t typeWidthInBytesOnStack(Type Ty) const override { 129 // Round up to the next multiple of 4 bytes. In particular, i1, i8, and i16 130 // are rounded up to 4 bytes. 131 return (typeWidthInBytes(Ty) + 3) & ~3; 132 } 133 uint32_t getStackAlignment() const override; reserveFixedAllocaArea(size_t Size,size_t Align)134 void reserveFixedAllocaArea(size_t Size, size_t Align) override { 135 FixedAllocaSizeBytes = Size; 136 assert(llvm::isPowerOf2_32(Align)); 137 FixedAllocaAlignBytes = Align; 138 PrologEmitsFixedAllocas = true; 139 } getFrameFixedAllocaOffset()140 int32_t getFrameFixedAllocaOffset() const override { 141 return FixedAllocaSizeBytes - (SpillAreaSizeBytes - MaxOutArgsSizeBytes); 142 } maxOutArgsSizeBytes()143 uint32_t maxOutArgsSizeBytes() const override { return MaxOutArgsSizeBytes; } 144 shouldSplitToVariable64On32(Type Ty)145 bool shouldSplitToVariable64On32(Type Ty) const override { 146 return Ty == IceType_i64; 147 } 148 149 // TODO(ascull): what size is best for ARM? getMinJumpTableSize()150 SizeT getMinJumpTableSize() const override { return 3; } 151 void emitJumpTable(const Cfg *Func, 152 const InstJumpTable *JumpTable) const override; 153 154 void emitVariable(const Variable *Var) const override; 155 156 void emit(const ConstantUndef *C) const final; 157 void emit(const ConstantInteger32 *C) const final; 158 void emit(const ConstantInteger64 *C) const final; 159 void emit(const ConstantFloat *C) const final; 160 void emit(const ConstantDouble *C) const final; 161 void emit(const ConstantRelocatable *C) const final; 162 163 void lowerArguments() override; 164 void addProlog(CfgNode *Node) override; 165 void addEpilog(CfgNode *Node) override; 166 167 Operand *loOperand(Operand *Operand); 168 Operand *hiOperand(Operand *Operand); 169 void finishArgumentLowering(Variable *Arg, Variable *FramePtr, 170 size_t BasicFrameOffset, size_t *InArgsSizeBytes); 171 hasCPUFeature(TargetARM32Features::ARM32InstructionSet I)172 bool hasCPUFeature(TargetARM32Features::ARM32InstructionSet I) const { 173 return CPUFeatures.hasFeature(I); 174 } 175 176 enum OperandLegalization { 177 Legal_Reg = 1 << 0, /// physical register, not stack location 178 Legal_Flex = 1 << 1, /// A flexible operand2, which can hold rotated small 179 /// immediates, shifted registers, or modified fp imm. 180 Legal_Mem = 1 << 2, /// includes [r0, r1 lsl #2] as well as [sp, #12] 181 Legal_Rematerializable = 1 << 3, 182 Legal_Default = ~Legal_Rematerializable, 183 }; 184 185 using LegalMask = uint32_t; 186 Operand *legalizeUndef(Operand *From, RegNumT RegNum = RegNumT()); 187 Operand *legalize(Operand *From, LegalMask Allowed = Legal_Default, 188 RegNumT RegNum = RegNumT()); 189 Variable *legalizeToReg(Operand *From, RegNumT RegNum = RegNumT()); 190 shAmtImm(uint32_t ShAmtImm)191 OperandARM32ShAmtImm *shAmtImm(uint32_t ShAmtImm) const { 192 assert(ShAmtImm < 32); 193 return OperandARM32ShAmtImm::create( 194 Func, 195 llvm::cast<ConstantInteger32>(Ctx->getConstantInt32(ShAmtImm & 0x1F))); 196 } 197 getCtx()198 GlobalContext *getCtx() const { return Ctx; } 199 200 protected: 201 explicit TargetARM32(Cfg *Func); 202 203 void postLower() override; 204 205 enum SafeBoolChain { 206 SBC_No, 207 SBC_Yes, 208 }; 209 210 void lowerAlloca(const InstAlloca *Instr) override; 211 SafeBoolChain lowerInt1Arithmetic(const InstArithmetic *Instr); 212 void lowerInt64Arithmetic(InstArithmetic::OpKind Op, Variable *Dest, 213 Operand *Src0, Operand *Src1); 214 void lowerArithmetic(const InstArithmetic *Instr) override; 215 void lowerAssign(const InstAssign *Instr) override; 216 void lowerBr(const InstBr *Instr) override; 217 void lowerCall(const InstCall *Instr) override; 218 void lowerCast(const InstCast *Instr) override; 219 void lowerExtractElement(const InstExtractElement *Instr) override; 220 221 /// CondWhenTrue is a helper type returned by every method in the lowering 222 /// that emits code to set the condition codes. 223 class CondWhenTrue { 224 public: 225 explicit CondWhenTrue(CondARM32::Cond T0, 226 CondARM32::Cond T1 = CondARM32::kNone) WhenTrue0(T0)227 : WhenTrue0(T0), WhenTrue1(T1) { 228 assert(T1 == CondARM32::kNone || T0 != CondARM32::kNone); 229 assert(T1 != T0 || T0 == CondARM32::kNone); 230 } 231 CondARM32::Cond WhenTrue0; 232 CondARM32::Cond WhenTrue1; 233 234 /// invert returns a new object with WhenTrue0 and WhenTrue1 inverted. invert()235 CondWhenTrue invert() const { 236 switch (WhenTrue0) { 237 default: 238 if (WhenTrue1 == CondARM32::kNone) 239 return CondWhenTrue(InstARM32::getOppositeCondition(WhenTrue0)); 240 return CondWhenTrue(InstARM32::getOppositeCondition(WhenTrue0), 241 InstARM32::getOppositeCondition(WhenTrue1)); 242 case CondARM32::AL: 243 return CondWhenTrue(CondARM32::kNone); 244 case CondARM32::kNone: 245 return CondWhenTrue(CondARM32::AL); 246 } 247 } 248 }; 249 250 CondWhenTrue lowerFcmpCond(const InstFcmp *Instr); 251 void lowerFcmp(const InstFcmp *Instr) override; 252 CondWhenTrue lowerInt8AndInt16IcmpCond(InstIcmp::ICond Condition, 253 Operand *Src0, Operand *Src1); 254 CondWhenTrue lowerInt32IcmpCond(InstIcmp::ICond Condition, Operand *Src0, 255 Operand *Src1); 256 CondWhenTrue lowerInt64IcmpCond(InstIcmp::ICond Condition, Operand *Src0, 257 Operand *Src1); 258 CondWhenTrue lowerIcmpCond(InstIcmp::ICond Condition, Operand *Src0, 259 Operand *Src1); 260 CondWhenTrue lowerIcmpCond(const InstIcmp *Instr); 261 void lowerIcmp(const InstIcmp *Instr) override; 262 /// Emits the basic sequence for lower-linked/store-exclusive loops: 263 /// 264 /// retry: 265 /// ldrex tmp, [Addr] 266 /// StoreValue = Operation(tmp) 267 /// strexCond success, StoreValue, [Addr] 268 /// cmpCond success, #0 269 /// bne retry 270 /// 271 /// Operation needs to return which value to strex in Addr, it must not change 272 /// the flags if Cond is not AL, and must not emit any instructions that could 273 /// end up writing to memory. Operation also needs to handle fake-defing for 274 /// i64 handling. 275 void 276 lowerLoadLinkedStoreExclusive(Type Ty, Operand *Addr, 277 std::function<Variable *(Variable *)> Operation, 278 CondARM32::Cond Cond = CondARM32::AL); 279 void lowerInt64AtomicRMW(Variable *Dest, uint32_t Operation, Operand *Ptr, 280 Operand *Val); 281 void lowerAtomicRMW(Variable *Dest, uint32_t Operation, Operand *Ptr, 282 Operand *Val); 283 void lowerBreakpoint(const InstBreakpoint *Instr) override; 284 void lowerIntrinsicCall(const InstIntrinsicCall *Instr) override; 285 void lowerInsertElement(const InstInsertElement *Instr) override; 286 void lowerLoad(const InstLoad *Instr) override; 287 void lowerPhi(const InstPhi *Instr) override; 288 void lowerRet(const InstRet *Instr) override; 289 void lowerSelect(const InstSelect *Instr) override; 290 void lowerShuffleVector(const InstShuffleVector *Instr) override; 291 void lowerStore(const InstStore *Instr) override; 292 void lowerSwitch(const InstSwitch *Instr) override; 293 void lowerUnreachable(const InstUnreachable *Instr) override; 294 void prelowerPhis() override; 295 uint32_t getCallStackArgumentsSizeBytes(const InstCall *Instr) override; 296 void genTargetHelperCallFor(Inst *Instr) override; 297 void doAddressOptLoad() override; 298 void doAddressOptStore() override; 299 void randomlyInsertNop(float Probability, 300 RandomNumberGenerator &RNG) override; 301 302 OperandARM32Mem *formMemoryOperand(Operand *Ptr, Type Ty); 303 304 Variable64On32 *makeI64RegPair(); 305 Variable *makeReg(Type Ty, RegNumT RegNum = RegNumT()); 306 static Type stackSlotType(); 307 Variable *copyToReg(Operand *Src, RegNumT RegNum = RegNumT()); 308 void alignRegisterPow2(Variable *Reg, uint32_t Align, 309 RegNumT TmpRegNum = RegNumT()); 310 311 /// Returns a vector in a register with the given constant entries. 312 Variable *makeVectorOfZeros(Type Ty, RegNumT RegNum = RegNumT()); 313 314 void 315 makeRandomRegisterPermutation(llvm::SmallVectorImpl<RegNumT> &Permutation, 316 const SmallBitVector &ExcludeRegisters, 317 uint64_t Salt) const override; 318 319 // If a divide-by-zero check is needed, inserts a: test; branch .LSKIP; trap; 320 // .LSKIP: <continuation>. If no check is needed nothing is inserted. 321 void div0Check(Type Ty, Operand *SrcLo, Operand *SrcHi); 322 using ExtInstr = void (TargetARM32::*)(Variable *, Variable *, 323 CondARM32::Cond); 324 using DivInstr = void (TargetARM32::*)(Variable *, Variable *, Variable *, 325 CondARM32::Cond); 326 void lowerIDivRem(Variable *Dest, Variable *T, Variable *Src0R, Operand *Src1, 327 ExtInstr ExtFunc, DivInstr DivFunc, bool IsRemainder); 328 329 void lowerCLZ(Variable *Dest, Variable *ValLo, Variable *ValHi); 330 331 // The following are helpers that insert lowered ARM32 instructions with 332 // minimal syntactic overhead, so that the lowering code can look as close to 333 // assembly as practical. 334 void _add(Variable *Dest, Variable *Src0, Operand *Src1, 335 CondARM32::Cond Pred = CondARM32::AL) { 336 Context.insert<InstARM32Add>(Dest, Src0, Src1, Pred); 337 } 338 void _adds(Variable *Dest, Variable *Src0, Operand *Src1, 339 CondARM32::Cond Pred = CondARM32::AL) { 340 constexpr bool SetFlags = true; 341 Context.insert<InstARM32Add>(Dest, Src0, Src1, Pred, SetFlags); 342 if (SetFlags) { 343 Context.insert<InstFakeUse>(Dest); 344 } 345 } 346 void _adc(Variable *Dest, Variable *Src0, Operand *Src1, 347 CondARM32::Cond Pred = CondARM32::AL) { 348 Context.insert<InstARM32Adc>(Dest, Src0, Src1, Pred); 349 } 350 void _and(Variable *Dest, Variable *Src0, Operand *Src1, 351 CondARM32::Cond Pred = CondARM32::AL) { 352 Context.insert<InstARM32And>(Dest, Src0, Src1, Pred); 353 } 354 void _asr(Variable *Dest, Variable *Src0, Operand *Src1, 355 CondARM32::Cond Pred = CondARM32::AL) { 356 Context.insert<InstARM32Asr>(Dest, Src0, Src1, Pred); 357 } 358 void _bic(Variable *Dest, Variable *Src0, Operand *Src1, 359 CondARM32::Cond Pred = CondARM32::AL) { 360 Context.insert<InstARM32Bic>(Dest, Src0, Src1, Pred); 361 } _br(CfgNode * TargetTrue,CfgNode * TargetFalse,CondARM32::Cond Condition)362 void _br(CfgNode *TargetTrue, CfgNode *TargetFalse, 363 CondARM32::Cond Condition) { 364 Context.insert<InstARM32Br>(TargetTrue, TargetFalse, Condition); 365 } _br(CfgNode * Target)366 void _br(CfgNode *Target) { Context.insert<InstARM32Br>(Target); } _br(CfgNode * Target,CondARM32::Cond Condition)367 void _br(CfgNode *Target, CondARM32::Cond Condition) { 368 Context.insert<InstARM32Br>(Target, Condition); 369 } _br(InstARM32Label * Label,CondARM32::Cond Condition)370 void _br(InstARM32Label *Label, CondARM32::Cond Condition) { 371 Context.insert<InstARM32Br>(Label, Condition); 372 } 373 void _cmn(Variable *Src0, Operand *Src1, 374 CondARM32::Cond Pred = CondARM32::AL) { 375 Context.insert<InstARM32Cmn>(Src0, Src1, Pred); 376 } 377 void _cmp(Variable *Src0, Operand *Src1, 378 CondARM32::Cond Pred = CondARM32::AL) { 379 Context.insert<InstARM32Cmp>(Src0, Src1, Pred); 380 } 381 void _clz(Variable *Dest, Variable *Src0, 382 CondARM32::Cond Pred = CondARM32::AL) { 383 Context.insert<InstARM32Clz>(Dest, Src0, Pred); 384 } _dmb()385 void _dmb() { Context.insert<InstARM32Dmb>(); } 386 void _eor(Variable *Dest, Variable *Src0, Operand *Src1, 387 CondARM32::Cond Pred = CondARM32::AL) { 388 Context.insert<InstARM32Eor>(Dest, Src0, Src1, Pred); 389 } 390 /// _ldr, for all your memory to Variable data moves. It handles all types 391 /// (integer, floating point, and vectors.) Addr needs to be valid for Dest's 392 /// type (e.g., no immediates for vector loads, and no index registers for fp 393 /// loads.) 394 void _ldr(Variable *Dest, OperandARM32Mem *Addr, 395 CondARM32::Cond Pred = CondARM32::AL) { 396 Context.insert<InstARM32Ldr>(Dest, Addr, Pred); 397 } 398 InstARM32Ldrex *_ldrex(Variable *Dest, OperandARM32Mem *Addr, 399 CondARM32::Cond Pred = CondARM32::AL) { 400 auto *Ldrex = Context.insert<InstARM32Ldrex>(Dest, Addr, Pred); 401 if (auto *Dest64 = llvm::dyn_cast<Variable64On32>(Dest)) { 402 Context.insert<InstFakeDef>(Dest64->getLo(), Dest); 403 Context.insert<InstFakeDef>(Dest64->getHi(), Dest); 404 } 405 return Ldrex; 406 } 407 void _lsl(Variable *Dest, Variable *Src0, Operand *Src1, 408 CondARM32::Cond Pred = CondARM32::AL) { 409 Context.insert<InstARM32Lsl>(Dest, Src0, Src1, Pred); 410 } 411 void _lsls(Variable *Dest, Variable *Src0, Operand *Src1, 412 CondARM32::Cond Pred = CondARM32::AL) { 413 constexpr bool SetFlags = true; 414 Context.insert<InstARM32Lsl>(Dest, Src0, Src1, Pred, SetFlags); 415 if (SetFlags) { 416 Context.insert<InstFakeUse>(Dest); 417 } 418 } 419 void _lsr(Variable *Dest, Variable *Src0, Operand *Src1, 420 CondARM32::Cond Pred = CondARM32::AL) { 421 Context.insert<InstARM32Lsr>(Dest, Src0, Src1, Pred); 422 } 423 void _mla(Variable *Dest, Variable *Src0, Variable *Src1, Variable *Acc, 424 CondARM32::Cond Pred = CondARM32::AL) { 425 Context.insert<InstARM32Mla>(Dest, Src0, Src1, Acc, Pred); 426 } 427 void _mls(Variable *Dest, Variable *Src0, Variable *Src1, Variable *Acc, 428 CondARM32::Cond Pred = CondARM32::AL) { 429 Context.insert<InstARM32Mls>(Dest, Src0, Src1, Acc, Pred); 430 } 431 /// _mov, for all your Variable to Variable data movement needs. It handles 432 /// all types (integer, floating point, and vectors), as well as moves between 433 /// Core and VFP registers. This is not a panacea: you must obey the (weird, 434 /// confusing, non-uniform) rules for data moves in ARM. 435 void _mov(Variable *Dest, Operand *Src0, 436 CondARM32::Cond Pred = CondARM32::AL) { 437 // _mov used to be unique in the sense that it would create a temporary 438 // automagically if Dest was nullptr. It won't do that anymore, so we keep 439 // an assert around just in case there is some untested code path where Dest 440 // is nullptr. 441 assert(Dest != nullptr); 442 assert(!llvm::isa<OperandARM32Mem>(Src0)); 443 auto *Instr = Context.insert<InstARM32Mov>(Dest, Src0, Pred); 444 445 if (Instr->isMultiDest()) { 446 // If Instr is multi-dest, then Dest must be a Variable64On32. We add a 447 // fake-def for Instr.DestHi here. 448 assert(llvm::isa<Variable64On32>(Dest)); 449 Context.insert<InstFakeDef>(Instr->getDestHi()); 450 } 451 } 452 453 void _mov_redefined(Variable *Dest, Operand *Src0, 454 CondARM32::Cond Pred = CondARM32::AL) { 455 auto *Instr = Context.insert<InstARM32Mov>(Dest, Src0, Pred); 456 Instr->setDestRedefined(); 457 if (Instr->isMultiDest()) { 458 // If Instr is multi-dest, then Dest must be a Variable64On32. We add a 459 // fake-def for Instr.DestHi here. 460 assert(llvm::isa<Variable64On32>(Dest)); 461 Context.insert<InstFakeDef>(Instr->getDestHi()); 462 } 463 } 464 _nop()465 void _nop() { Context.insert<InstARM32Nop>(); } 466 467 // Generates a vmov instruction to extract the given index from a vector 468 // register. 469 void _extractelement(Variable *Dest, Variable *Src0, uint32_t Index, 470 CondARM32::Cond Pred = CondARM32::AL) { 471 Context.insert<InstARM32Extract>(Dest, Src0, Index, Pred); 472 } 473 474 // Generates a vmov instruction to insert a value into the given index of a 475 // vector register. 476 void _insertelement(Variable *Dest, Variable *Src0, uint32_t Index, 477 CondARM32::Cond Pred = CondARM32::AL) { 478 Context.insert<InstARM32Insert>(Dest, Src0, Index, Pred); 479 } 480 481 // -------------------------------------------------------------------------- 482 // Begin bool folding machinery. 483 // 484 // There are three types of boolean lowerings handled by this target: 485 // 486 // 1) Boolean expressions leading to a boolean Variable definition 487 // --------------------------------------------------------------- 488 // 489 // Whenever a i1 Variable is live out (i.e., its live range extends beyond 490 // the defining basic block) we do not fold the operation. We instead 491 // materialize (i.e., compute) the variable normally, so that it can be used 492 // when needed. We also materialize i1 values that are not single use to 493 // avoid code duplication. These expressions are not short circuited. 494 // 495 // 2) Boolean expressions leading to a select 496 // ------------------------------------------ 497 // 498 // These include boolean chains leading to a select instruction, as well as 499 // i1 Sexts. These boolean expressions are lowered to: 500 // 501 // mov T, <false value> 502 // CC <- eval(Boolean Expression) 503 // movCC T, <true value> 504 // 505 // For Sexts, <false value> is 0, and <true value> is -1. 506 // 507 // 3) Boolean expressions leading to a br i1 508 // ----------------------------------------- 509 // 510 // These are the boolean chains leading to a branch. These chains are 511 // short-circuited, i.e.: 512 // 513 // A = or i1 B, C 514 // br i1 A, label %T, label %F 515 // 516 // becomes 517 // 518 // tst B 519 // jne %T 520 // tst B 521 // jne %T 522 // j %F 523 // 524 // and 525 // 526 // A = and i1 B, C 527 // br i1 A, label %T, label %F 528 // 529 // becomes 530 // 531 // tst B 532 // jeq %F 533 // tst B 534 // jeq %F 535 // j %T 536 // 537 // Arbitrarily long chains are short circuited, e.g 538 // 539 // A = or i1 B, C 540 // D = and i1 A, E 541 // F = and i1 G, H 542 // I = or i1 D, F 543 // br i1 I, label %True, label %False 544 // 545 // becomes 546 // 547 // Label[A]: 548 // tst B, 1 549 // bne Label[D] 550 // tst C, 1 551 // beq Label[I] 552 // Label[D]: 553 // tst E, 1 554 // bne %True 555 // Label[I] 556 // tst G, 1 557 // beq %False 558 // tst H, 1 559 // beq %False (bne %True) 560 561 /// lowerInt1 materializes Boolean to a Variable. 562 SafeBoolChain lowerInt1(Variable *Dest, Operand *Boolean); 563 564 /// lowerInt1ForSelect generates the following instruction sequence: 565 /// 566 /// mov T, FalseValue 567 /// CC <- eval(Boolean) 568 /// movCC T, TrueValue 569 /// mov Dest, T 570 /// 571 /// It is used for lowering select i1, as well as i1 Sext. 572 void lowerInt1ForSelect(Variable *Dest, Operand *Boolean, Operand *TrueValue, 573 Operand *FalseValue); 574 575 /// LowerInt1BranchTarget is used by lowerIntForBranch. It wraps a CfgNode, or 576 /// an InstARM32Label (but never both) so that, during br i1 lowering, we can 577 /// create auxiliary labels for short circuiting the condition evaluation. 578 class LowerInt1BranchTarget { 579 public: LowerInt1BranchTarget(CfgNode * const Target)580 explicit LowerInt1BranchTarget(CfgNode *const Target) 581 : NodeTarget(Target) {} LowerInt1BranchTarget(InstARM32Label * const Target)582 explicit LowerInt1BranchTarget(InstARM32Label *const Target) 583 : LabelTarget(Target) {} 584 585 /// createForLabelOrDuplicate will return a new LowerInt1BranchTarget that 586 /// is the exact copy of this if Label is nullptr; otherwise, the returned 587 /// object will wrap Label instead. 588 LowerInt1BranchTarget createForLabelOrDuplicate(InstARM32Label * Label)589 createForLabelOrDuplicate(InstARM32Label *Label) const { 590 if (Label != nullptr) 591 return LowerInt1BranchTarget(Label); 592 if (NodeTarget) 593 return LowerInt1BranchTarget(NodeTarget); 594 return LowerInt1BranchTarget(LabelTarget); 595 } 596 597 CfgNode *const NodeTarget = nullptr; 598 InstARM32Label *const LabelTarget = nullptr; 599 }; 600 601 /// LowerInt1AllowShortCircuit is a helper type used by lowerInt1ForBranch for 602 /// determining which type arithmetic is allowed to be short circuited. This 603 /// is useful for lowering 604 /// 605 /// t1 = and i1 A, B 606 /// t2 = and i1 t1, C 607 /// br i1 t2, label %False, label %True 608 /// 609 /// to 610 /// 611 /// tst A, 1 612 /// beq %False 613 /// tst B, 1 614 /// beq %False 615 /// tst C, 1 616 /// bne %True 617 /// b %False 618 /// 619 /// Without this information, short circuiting would only allow to short 620 /// circuit a single high level instruction. For example: 621 /// 622 /// t1 = or i1 A, B 623 /// t2 = and i1 t1, C 624 /// br i1 t2, label %False, label %True 625 /// 626 /// cannot be lowered to 627 /// 628 /// tst A, 1 629 /// bne %True 630 /// tst B, 1 631 /// bne %True 632 /// tst C, 1 633 /// beq %True 634 /// b %False 635 /// 636 /// It needs to be lowered to 637 /// 638 /// tst A, 1 639 /// bne Aux 640 /// tst B, 1 641 /// beq %False 642 /// Aux: 643 /// tst C, 1 644 /// bne %True 645 /// b %False 646 /// 647 /// TODO(jpp): evaluate if this kind of short circuiting hurts performance (it 648 /// might.) 649 enum LowerInt1AllowShortCircuit { 650 SC_And = 1, 651 SC_Or = 2, 652 SC_All = SC_And | SC_Or, 653 }; 654 655 /// ShortCircuitCondAndLabel wraps the condition codes that should be used 656 /// after a lowerInt1ForBranch returns to branch to the 657 /// TrueTarget/FalseTarget. If ShortCircuitLabel is not nullptr, then the 658 /// called lowerInt1forBranch created an internal (i.e., short-circuit) label 659 /// used for short circuiting. 660 class ShortCircuitCondAndLabel { 661 public: 662 explicit ShortCircuitCondAndLabel(CondWhenTrue &&C, 663 InstARM32Label *L = nullptr) Cond(std::move (C))664 : Cond(std::move(C)), ShortCircuitTarget(L) {} 665 const CondWhenTrue Cond; 666 InstARM32Label *const ShortCircuitTarget; 667 assertNoLabelAndReturnCond()668 CondWhenTrue assertNoLabelAndReturnCond() const { 669 assert(ShortCircuitTarget == nullptr); 670 return Cond; 671 } 672 }; 673 674 /// lowerInt1ForBranch expands Boolean, and returns the condition codes that 675 /// are to be used for branching to the branch's TrueTarget. It may return a 676 /// label that the expansion of Boolean used to short circuit the chain's 677 /// evaluation. 678 ShortCircuitCondAndLabel 679 lowerInt1ForBranch(Operand *Boolean, const LowerInt1BranchTarget &TargetTrue, 680 const LowerInt1BranchTarget &TargetFalse, 681 uint32_t ShortCircuitable); 682 683 // _br is a convenience wrapper that emits br instructions to Target. 684 void _br(const LowerInt1BranchTarget &BrTarget, 685 CondARM32::Cond Cond = CondARM32::AL) { 686 assert((BrTarget.NodeTarget == nullptr) != 687 (BrTarget.LabelTarget == nullptr)); 688 if (BrTarget.NodeTarget != nullptr) 689 _br(BrTarget.NodeTarget, Cond); 690 else 691 _br(BrTarget.LabelTarget, Cond); 692 } 693 694 // _br_short_circuit is used when lowering InstArithmetic::And and 695 // InstArithmetic::Or and a short circuit branch is needed. _br_short_circuit(const LowerInt1BranchTarget & Target,const CondWhenTrue & Cond)696 void _br_short_circuit(const LowerInt1BranchTarget &Target, 697 const CondWhenTrue &Cond) { 698 if (Cond.WhenTrue1 != CondARM32::kNone) { 699 _br(Target, Cond.WhenTrue1); 700 } 701 if (Cond.WhenTrue0 != CondARM32::kNone) { 702 _br(Target, Cond.WhenTrue0); 703 } 704 } 705 // End of bool folding machinery 706 // -------------------------------------------------------------------------- 707 708 /// The Operand can only be a 16-bit immediate or a ConstantRelocatable (with 709 /// an upper16 relocation). 710 void _movt(Variable *Dest, Operand *Src0, 711 CondARM32::Cond Pred = CondARM32::AL) { 712 Context.insert<InstARM32Movt>(Dest, Src0, Pred); 713 } 714 void _movw(Variable *Dest, Operand *Src0, 715 CondARM32::Cond Pred = CondARM32::AL) { 716 Context.insert<InstARM32Movw>(Dest, Src0, Pred); 717 } 718 void _mul(Variable *Dest, Variable *Src0, Variable *Src1, 719 CondARM32::Cond Pred = CondARM32::AL) { 720 Context.insert<InstARM32Mul>(Dest, Src0, Src1, Pred); 721 } 722 void _mvn(Variable *Dest, Operand *Src0, 723 CondARM32::Cond Pred = CondARM32::AL) { 724 Context.insert<InstARM32Mvn>(Dest, Src0, Pred); 725 } 726 void _orr(Variable *Dest, Variable *Src0, Operand *Src1, 727 CondARM32::Cond Pred = CondARM32::AL) { 728 Context.insert<InstARM32Orr>(Dest, Src0, Src1, Pred); 729 } 730 void _orrs(Variable *Dest, Variable *Src0, Operand *Src1, 731 CondARM32::Cond Pred = CondARM32::AL) { 732 constexpr bool SetFlags = true; 733 Context.insert<InstARM32Orr>(Dest, Src0, Src1, Pred, SetFlags); 734 if (SetFlags) { 735 Context.insert<InstFakeUse>(Dest); 736 } 737 } _push(const VarList & Sources)738 void _push(const VarList &Sources) { Context.insert<InstARM32Push>(Sources); } _pop(const VarList & Dests)739 void _pop(const VarList &Dests) { 740 Context.insert<InstARM32Pop>(Dests); 741 // Mark dests as modified. 742 for (Variable *Dest : Dests) 743 Context.insert<InstFakeDef>(Dest); 744 } 745 void _rbit(Variable *Dest, Variable *Src0, 746 CondARM32::Cond Pred = CondARM32::AL) { 747 Context.insert<InstARM32Rbit>(Dest, Src0, Pred); 748 } 749 void _rev(Variable *Dest, Variable *Src0, 750 CondARM32::Cond Pred = CondARM32::AL) { 751 Context.insert<InstARM32Rev>(Dest, Src0, Pred); 752 } 753 void _ret(Variable *LR, Variable *Src0 = nullptr) { 754 Context.insert<InstARM32Ret>(LR, Src0); 755 } 756 void _rscs(Variable *Dest, Variable *Src0, Operand *Src1, 757 CondARM32::Cond Pred = CondARM32::AL) { 758 constexpr bool SetFlags = true; 759 Context.insert<InstARM32Rsc>(Dest, Src0, Src1, Pred, SetFlags); 760 if (SetFlags) { 761 Context.insert<InstFakeUse>(Dest); 762 } 763 } 764 void _rsc(Variable *Dest, Variable *Src0, Operand *Src1, 765 CondARM32::Cond Pred = CondARM32::AL) { 766 Context.insert<InstARM32Rsc>(Dest, Src0, Src1, Pred); 767 } 768 void _rsbs(Variable *Dest, Variable *Src0, Operand *Src1, 769 CondARM32::Cond Pred = CondARM32::AL) { 770 constexpr bool SetFlags = true; 771 Context.insert<InstARM32Rsb>(Dest, Src0, Src1, Pred, SetFlags); 772 if (SetFlags) { 773 Context.insert<InstFakeUse>(Dest); 774 } 775 } 776 void _rsb(Variable *Dest, Variable *Src0, Operand *Src1, 777 CondARM32::Cond Pred = CondARM32::AL) { 778 Context.insert<InstARM32Rsb>(Dest, Src0, Src1, Pred); 779 } 780 void _sbc(Variable *Dest, Variable *Src0, Operand *Src1, 781 CondARM32::Cond Pred = CondARM32::AL) { 782 Context.insert<InstARM32Sbc>(Dest, Src0, Src1, Pred); 783 } 784 void _sbcs(Variable *Dest, Variable *Src0, Operand *Src1, 785 CondARM32::Cond Pred = CondARM32::AL) { 786 constexpr bool SetFlags = true; 787 Context.insert<InstARM32Sbc>(Dest, Src0, Src1, Pred, SetFlags); 788 if (SetFlags) { 789 Context.insert<InstFakeUse>(Dest); 790 } 791 } 792 void _sdiv(Variable *Dest, Variable *Src0, Variable *Src1, 793 CondARM32::Cond Pred = CondARM32::AL) { 794 Context.insert<InstARM32Sdiv>(Dest, Src0, Src1, Pred); 795 } 796 /// _str, for all your Variable to memory transfers. Addr has the same 797 /// restrictions that it does in _ldr. 798 void _str(Variable *Value, OperandARM32Mem *Addr, 799 CondARM32::Cond Pred = CondARM32::AL) { 800 Context.insert<InstARM32Str>(Value, Addr, Pred); 801 } 802 InstARM32Strex *_strex(Variable *Dest, Variable *Value, OperandARM32Mem *Addr, 803 CondARM32::Cond Pred = CondARM32::AL) { 804 if (auto *Value64 = llvm::dyn_cast<Variable64On32>(Value)) { 805 Context.insert<InstFakeUse>(Value64->getLo()); 806 Context.insert<InstFakeUse>(Value64->getHi()); 807 } 808 return Context.insert<InstARM32Strex>(Dest, Value, Addr, Pred); 809 } 810 void _sub(Variable *Dest, Variable *Src0, Operand *Src1, 811 CondARM32::Cond Pred = CondARM32::AL) { 812 Context.insert<InstARM32Sub>(Dest, Src0, Src1, Pred); 813 } 814 void _subs(Variable *Dest, Variable *Src0, Operand *Src1, 815 CondARM32::Cond Pred = CondARM32::AL) { 816 constexpr bool SetFlags = true; 817 Context.insert<InstARM32Sub>(Dest, Src0, Src1, Pred, SetFlags); 818 if (SetFlags) { 819 Context.insert<InstFakeUse>(Dest); 820 } 821 } 822 void _sxt(Variable *Dest, Variable *Src0, 823 CondARM32::Cond Pred = CondARM32::AL) { 824 Context.insert<InstARM32Sxt>(Dest, Src0, Pred); 825 } 826 void _tst(Variable *Src0, Operand *Src1, 827 CondARM32::Cond Pred = CondARM32::AL) { 828 Context.insert<InstARM32Tst>(Src0, Src1, Pred); 829 } _trap()830 void _trap() { Context.insert<InstARM32Trap>(); } 831 void _udiv(Variable *Dest, Variable *Src0, Variable *Src1, 832 CondARM32::Cond Pred = CondARM32::AL) { 833 Context.insert<InstARM32Udiv>(Dest, Src0, Src1, Pred); 834 } 835 void _umull(Variable *DestLo, Variable *DestHi, Variable *Src0, 836 Variable *Src1, CondARM32::Cond Pred = CondARM32::AL) { 837 // umull requires DestLo and DestHi to be assigned to different GPRs. The 838 // following lines create overlapping liveness ranges for both variables. If 839 // either one of them is live, then they are both going to be live, and thus 840 // assigned to different registers; if they are both dead, then DCE will 841 // kick in and delete the following three instructions. 842 Context.insert<InstFakeDef>(DestHi); 843 Context.insert<InstARM32Umull>(DestLo, DestHi, Src0, Src1, Pred); 844 Context.insert<InstFakeDef>(DestHi, DestLo)->setDestRedefined(); 845 Context.insert<InstFakeUse>(DestHi); 846 } 847 void _uxt(Variable *Dest, Variable *Src0, 848 CondARM32::Cond Pred = CondARM32::AL) { 849 Context.insert<InstARM32Uxt>(Dest, Src0, Pred); 850 } 851 void _vabs(Variable *Dest, Variable *Src, 852 CondARM32::Cond Pred = CondARM32::AL) { 853 Context.insert<InstARM32Vabs>(Dest, Src, Pred); 854 } _vadd(Variable * Dest,Variable * Src0,Variable * Src1)855 void _vadd(Variable *Dest, Variable *Src0, Variable *Src1) { 856 Context.insert<InstARM32Vadd>(Dest, Src0, Src1); 857 } _vand(Variable * Dest,Variable * Src0,Variable * Src1)858 void _vand(Variable *Dest, Variable *Src0, Variable *Src1) { 859 Context.insert<InstARM32Vand>(Dest, Src0, Src1); 860 } _vbsl(Variable * Dest,Variable * Src0,Variable * Src1)861 InstARM32Vbsl *_vbsl(Variable *Dest, Variable *Src0, Variable *Src1) { 862 return Context.insert<InstARM32Vbsl>(Dest, Src0, Src1); 863 } _vceq(Variable * Dest,Variable * Src0,Variable * Src1)864 void _vceq(Variable *Dest, Variable *Src0, Variable *Src1) { 865 Context.insert<InstARM32Vceq>(Dest, Src0, Src1); 866 } _vcge(Variable * Dest,Variable * Src0,Variable * Src1)867 InstARM32Vcge *_vcge(Variable *Dest, Variable *Src0, Variable *Src1) { 868 return Context.insert<InstARM32Vcge>(Dest, Src0, Src1); 869 } _vcgt(Variable * Dest,Variable * Src0,Variable * Src1)870 InstARM32Vcgt *_vcgt(Variable *Dest, Variable *Src0, Variable *Src1) { 871 return Context.insert<InstARM32Vcgt>(Dest, Src0, Src1); 872 } 873 void _vcvt(Variable *Dest, Variable *Src, InstARM32Vcvt::VcvtVariant Variant, 874 CondARM32::Cond Pred = CondARM32::AL) { 875 Context.insert<InstARM32Vcvt>(Dest, Src, Variant, Pred); 876 } _vdiv(Variable * Dest,Variable * Src0,Variable * Src1)877 void _vdiv(Variable *Dest, Variable *Src0, Variable *Src1) { 878 Context.insert<InstARM32Vdiv>(Dest, Src0, Src1); 879 } 880 void _vcmp(Variable *Src0, Variable *Src1, 881 CondARM32::Cond Pred = CondARM32::AL) { 882 Context.insert<InstARM32Vcmp>(Src0, Src1, Pred); 883 } 884 void _vcmp(Variable *Src0, OperandARM32FlexFpZero *FpZero, 885 CondARM32::Cond Pred = CondARM32::AL) { 886 Context.insert<InstARM32Vcmp>(Src0, FpZero, Pred); 887 } _vdup(Variable * Dest,Variable * Src,int Idx)888 void _vdup(Variable *Dest, Variable *Src, int Idx) { 889 Context.insert<InstARM32Vdup>(Dest, Src, Idx); 890 } _veor(Variable * Dest,Variable * Src0,Variable * Src1)891 void _veor(Variable *Dest, Variable *Src0, Variable *Src1) { 892 Context.insert<InstARM32Veor>(Dest, Src0, Src1); 893 } 894 void _vldr1d(Variable *Dest, OperandARM32Mem *Addr, 895 CondARM32::Cond Pred = CondARM32::AL) { 896 Context.insert<InstARM32Vldr1d>(Dest, Addr, Pred); 897 } 898 void _vldr1q(Variable *Dest, OperandARM32Mem *Addr, 899 CondARM32::Cond Pred = CondARM32::AL) { 900 Context.insert<InstARM32Vldr1q>(Dest, Addr, Pred); 901 } 902 void _vmrs(CondARM32::Cond Pred = CondARM32::AL) { 903 Context.insert<InstARM32Vmrs>(Pred); 904 } _vmla(Variable * Dest,Variable * Src0,Variable * Src1)905 void _vmla(Variable *Dest, Variable *Src0, Variable *Src1) { 906 Context.insert<InstARM32Vmla>(Dest, Src0, Src1); 907 } _vmlap(Variable * Dest,Variable * Src0,Variable * Src1)908 void _vmlap(Variable *Dest, Variable *Src0, Variable *Src1) { 909 Context.insert<InstARM32Vmlap>(Dest, Src0, Src1); 910 } _vmls(Variable * Dest,Variable * Src0,Variable * Src1)911 void _vmls(Variable *Dest, Variable *Src0, Variable *Src1) { 912 Context.insert<InstARM32Vmls>(Dest, Src0, Src1); 913 } _vmovl(Variable * Dest,Variable * Src0,Variable * Src1)914 void _vmovl(Variable *Dest, Variable *Src0, Variable *Src1) { 915 Context.insert<InstARM32Vmovl>(Dest, Src0, Src1); 916 } _vmovh(Variable * Dest,Variable * Src0,Variable * Src1)917 void _vmovh(Variable *Dest, Variable *Src0, Variable *Src1) { 918 Context.insert<InstARM32Vmovh>(Dest, Src0, Src1); 919 } _vmovhl(Variable * Dest,Variable * Src0,Variable * Src1)920 void _vmovhl(Variable *Dest, Variable *Src0, Variable *Src1) { 921 Context.insert<InstARM32Vmovhl>(Dest, Src0, Src1); 922 } _vmovlh(Variable * Dest,Variable * Src0,Variable * Src1)923 void _vmovlh(Variable *Dest, Variable *Src0, Variable *Src1) { 924 Context.insert<InstARM32Vmovlh>(Dest, Src0, Src1); 925 } _vmul(Variable * Dest,Variable * Src0,Variable * Src1)926 void _vmul(Variable *Dest, Variable *Src0, Variable *Src1) { 927 Context.insert<InstARM32Vmul>(Dest, Src0, Src1); 928 } _vmulh(Variable * Dest,Variable * Src0,Variable * Src1,bool Unsigned)929 void _vmulh(Variable *Dest, Variable *Src0, Variable *Src1, bool Unsigned) { 930 Context.insert<InstARM32Vmulh>(Dest, Src0, Src1) 931 ->setSignType(Unsigned ? InstARM32::FS_Unsigned : InstARM32::FS_Signed); 932 } _vmvn(Variable * Dest,Variable * Src0)933 void _vmvn(Variable *Dest, Variable *Src0) { 934 Context.insert<InstARM32Vmvn>(Dest, Src0, CondARM32::AL); 935 } _vneg(Variable * Dest,Variable * Src0)936 void _vneg(Variable *Dest, Variable *Src0) { 937 Context.insert<InstARM32Vneg>(Dest, Src0, CondARM32::AL) 938 ->setSignType(InstARM32::FS_Signed); 939 } _vorr(Variable * Dest,Variable * Src0,Variable * Src1)940 void _vorr(Variable *Dest, Variable *Src0, Variable *Src1) { 941 Context.insert<InstARM32Vorr>(Dest, Src0, Src1); 942 } _vqadd(Variable * Dest,Variable * Src0,Variable * Src1,bool Unsigned)943 void _vqadd(Variable *Dest, Variable *Src0, Variable *Src1, bool Unsigned) { 944 Context.insert<InstARM32Vqadd>(Dest, Src0, Src1) 945 ->setSignType(Unsigned ? InstARM32::FS_Unsigned : InstARM32::FS_Signed); 946 } _vqmovn2(Variable * Dest,Variable * Src0,Variable * Src1,bool Unsigned,bool Saturating)947 void _vqmovn2(Variable *Dest, Variable *Src0, Variable *Src1, bool Unsigned, 948 bool Saturating) { 949 Context.insert<InstARM32Vqmovn2>(Dest, Src0, Src1) 950 ->setSignType(Saturating ? (Unsigned ? InstARM32::FS_Unsigned 951 : InstARM32::FS_Signed) 952 : InstARM32::FS_None); 953 } _vqsub(Variable * Dest,Variable * Src0,Variable * Src1,bool Unsigned)954 void _vqsub(Variable *Dest, Variable *Src0, Variable *Src1, bool Unsigned) { 955 Context.insert<InstARM32Vqsub>(Dest, Src0, Src1) 956 ->setSignType(Unsigned ? InstARM32::FS_Unsigned : InstARM32::FS_Signed); 957 } _vshl(Variable * Dest,Variable * Src0,Variable * Src1)958 InstARM32Vshl *_vshl(Variable *Dest, Variable *Src0, Variable *Src1) { 959 return Context.insert<InstARM32Vshl>(Dest, Src0, Src1); 960 } _vshl(Variable * Dest,Variable * Src0,ConstantInteger32 * Src1)961 void _vshl(Variable *Dest, Variable *Src0, ConstantInteger32 *Src1) { 962 Context.insert<InstARM32Vshl>(Dest, Src0, Src1) 963 ->setSignType(InstARM32::FS_Unsigned); 964 } _vshr(Variable * Dest,Variable * Src0,ConstantInteger32 * Src1)965 InstARM32Vshr *_vshr(Variable *Dest, Variable *Src0, 966 ConstantInteger32 *Src1) { 967 return Context.insert<InstARM32Vshr>(Dest, Src0, Src1); 968 } 969 void _vsqrt(Variable *Dest, Variable *Src, 970 CondARM32::Cond Pred = CondARM32::AL) { 971 Context.insert<InstARM32Vsqrt>(Dest, Src, Pred); 972 } 973 void _vstr1d(Variable *Value, OperandARM32Mem *Addr, 974 CondARM32::Cond Pred = CondARM32::AL) { 975 Context.insert<InstARM32Vstr1>(Value, Addr, Pred, 32); 976 } 977 void _vstr1q(Variable *Value, OperandARM32Mem *Addr, 978 CondARM32::Cond Pred = CondARM32::AL) { 979 Context.insert<InstARM32Vstr1>(Value, Addr, Pred, 64); 980 } _vsub(Variable * Dest,Variable * Src0,Variable * Src1)981 void _vsub(Variable *Dest, Variable *Src0, Variable *Src1) { 982 Context.insert<InstARM32Vsub>(Dest, Src0, Src1); 983 } _vzip(Variable * Dest,Variable * Src0,Variable * Src1)984 void _vzip(Variable *Dest, Variable *Src0, Variable *Src1) { 985 Context.insert<InstARM32Vzip>(Dest, Src0, Src1); 986 } 987 988 // Iterates over the CFG and determines the maximum outgoing stack arguments 989 // bytes. This information is later used during addProlog() to pre-allocate 990 // the outargs area. 991 // TODO(jpp): This could live in the Parser, if we provided a Target-specific 992 // method that the Parser could call. 993 void findMaxStackOutArgsSize(); 994 995 /// Returns true if the given Offset can be represented in a Load/Store Mem 996 /// Operand. 997 bool isLegalMemOffset(Type Ty, int32_t Offset) const; 998 999 void postLowerLegalization(); 1000 1001 /// Manages the GotPtr variable, which is used for Nonsfi sandboxing. 1002 /// @{ 1003 void createGotPtr(); 1004 void insertGotPtrInitPlaceholder(); 1005 VariableDeclaration *createGotRelocation(RelocOffset *AddPcReloc); 1006 void materializeGotAddr(CfgNode *Node); 1007 Variable *GotPtr = nullptr; 1008 // TODO(jpp): use CfgLocalAllocator. 1009 /// @} 1010 1011 /// Manages the Gotoff relocations created during the function lowering. A 1012 /// single Gotoff relocation is created for each global variable used by the 1013 /// function being lowered. 1014 /// @{ 1015 // TODO(jpp): if the same global G is used in different functions, then this 1016 // method will emit one G(gotoff) relocation per function. 1017 GlobalString createGotoffRelocation(const ConstantRelocatable *CR); 1018 CfgUnorderedSet<GlobalString> KnownGotoffs; 1019 /// @} 1020 1021 /// Loads the constant relocatable Name to Register. Then invoke Finish to 1022 /// finish the relocatable lowering. Finish **must** use PC in its first 1023 /// emitted instruction, or the relocatable in Register will contain the wrong 1024 /// value. 1025 // 1026 // Lowered sequence: 1027 // 1028 // Movw: 1029 // movw Register, #:lower16:Name - (End - Movw) - 8 . 1030 // Movt: 1031 // movt Register, #:upper16:Name - (End - Movt) - 8 . 1032 // PC = fake-def 1033 // End: 1034 // Finish(PC) 1035 // 1036 // The -8 in movw/movt above is to account for the PC value that the first 1037 // instruction emitted by Finish(PC) will read. 1038 void 1039 loadNamedConstantRelocatablePIC(GlobalString Name, Variable *Register, 1040 std::function<void(Variable *PC)> Finish); 1041 1042 /// Sandboxer defines methods for ensuring that "dangerous" operations are 1043 /// masked during sandboxed code emission. For regular, non-sandboxed code 1044 /// emission, its methods are simple pass-through methods. 1045 /// 1046 /// The Sandboxer also emits BundleLock/BundleUnlock pseudo-instructions 1047 /// in the constructor/destructor during sandboxed code emission. Therefore, 1048 /// it is a bad idea to create an object of this type and "keep it around." 1049 /// The recommended usage is: 1050 /// 1051 /// AutoSandboxing(this).<<operation>>(...); 1052 /// 1053 /// This usage ensures that no other instructions are inadvertently added to 1054 /// the bundle. 1055 class Sandboxer { 1056 Sandboxer() = delete; 1057 Sandboxer(const Sandboxer &) = delete; 1058 Sandboxer &operator=(const Sandboxer &) = delete; 1059 1060 public: 1061 explicit Sandboxer( 1062 TargetARM32 *Target, 1063 InstBundleLock::Option BundleOption = InstBundleLock::Opt_None); 1064 ~Sandboxer(); 1065 1066 /// Increments sp: 1067 /// 1068 /// add sp, sp, AddAmount 1069 /// bic sp, sp, 0xc0000000 1070 /// 1071 /// (for the rationale, see the ARM 32-bit Sandbox Specification.) 1072 void add_sp(Operand *AddAmount); 1073 1074 /// Emits code to align sp to the specified alignment: 1075 /// 1076 /// bic/and sp, sp, Alignment 1077 /// bic, sp, sp, 0xc0000000 1078 void align_sp(size_t Alignment); 1079 1080 /// Emits a call instruction. If CallTarget is a Variable, it emits 1081 /// 1082 /// bic CallTarget, CallTarget, 0xc000000f 1083 /// bl CallTarget 1084 /// 1085 /// Otherwise, it emits 1086 /// 1087 /// bl CallTarget 1088 /// 1089 /// Note: in sandboxed code calls are always emitted in addresses 12 mod 16. 1090 InstARM32Call *bl(Variable *ReturnReg, Operand *CallTarget); 1091 1092 /// Emits a load: 1093 /// 1094 /// bic rBase, rBase, 0xc0000000 1095 /// ldr rDest, [rBase, #Offset] 1096 /// 1097 /// Exception: if rBase is r9 or sp, then the load is emitted as: 1098 /// 1099 /// ldr rDest, [rBase, #Offset] 1100 /// 1101 /// because the NaCl ARM 32-bit Sandbox Specification guarantees they are 1102 /// always valid. 1103 void ldr(Variable *Dest, OperandARM32Mem *Mem, CondARM32::Cond Pred); 1104 1105 /// Emits a load exclusive: 1106 /// 1107 /// bic rBase, rBase, 0xc0000000 1108 /// ldrex rDest, [rBase] 1109 /// 1110 /// Exception: if rBase is r9 or sp, then the load is emitted as: 1111 /// 1112 /// ldrex rDest, [rBase] 1113 /// 1114 /// because the NaCl ARM 32-bit Sandbox Specification guarantees they are 1115 /// always valid. 1116 void ldrex(Variable *Dest, OperandARM32Mem *Mem, CondARM32::Cond Pred); 1117 1118 /// Resets sp to Src: 1119 /// 1120 /// mov sp, Src 1121 /// bic sp, sp, 0xc0000000 1122 void reset_sp(Variable *Src); 1123 1124 /// Emits code to return from a function: 1125 /// 1126 /// bic lr, lr, 0xc000000f 1127 /// bx lr 1128 void ret(Variable *RetAddr, Variable *RetValue); 1129 1130 /// Emits a store: 1131 /// 1132 /// bic rBase, rBase, 0xc0000000 1133 /// str rSrc, [rBase, #Offset] 1134 /// 1135 /// Exception: if rBase is r9 or sp, then the store is emitted as: 1136 /// 1137 /// str rDest, [rBase, #Offset] 1138 /// 1139 /// because the NaCl ARM 32-bit Sandbox Specification guarantees they are 1140 /// always valid. 1141 void str(Variable *Src, OperandARM32Mem *Mem, CondARM32::Cond Pred); 1142 1143 /// Emits a store exclusive: 1144 /// 1145 /// bic rBase, rBase, 0xc0000000 1146 /// strex rDest, rSrc, [rBase] 1147 /// 1148 /// Exception: if rBase is r9 or sp, then the store is emitted as: 1149 /// 1150 /// strex rDest, rSrc, [rBase] 1151 /// 1152 /// because the NaCl ARM 32-bit Sandbox Specification guarantees they are 1153 /// always valid. 1154 void strex(Variable *Dest, Variable *Src, OperandARM32Mem *Mem, 1155 CondARM32::Cond Pred); 1156 1157 /// Decrements sp: 1158 /// 1159 /// sub sp, sp, SubAmount 1160 /// bic sp, sp, 0xc0000000 1161 void sub_sp(Operand *SubAmount); 1162 1163 private: 1164 TargetARM32 *const Target; 1165 const InstBundleLock::Option BundleOption; 1166 std::unique_ptr<AutoBundle> Bundler; 1167 1168 void createAutoBundle(); 1169 }; 1170 1171 class PostLoweringLegalizer { 1172 PostLoweringLegalizer() = delete; 1173 PostLoweringLegalizer(const PostLoweringLegalizer &) = delete; 1174 PostLoweringLegalizer &operator=(const PostLoweringLegalizer &) = delete; 1175 1176 public: PostLoweringLegalizer(TargetARM32 * Target)1177 explicit PostLoweringLegalizer(TargetARM32 *Target) 1178 : Target(Target), StackOrFrameReg(Target->getPhysicalRegister( 1179 Target->getFrameOrStackReg())) {} 1180 1181 void resetTempBaseIfClobberedBy(const Inst *Instr); 1182 1183 // Ensures that the TempBase register held by the this legalizer (if any) is 1184 // assigned to IP. assertNoTempOrAssignedToIP()1185 void assertNoTempOrAssignedToIP() const { 1186 assert(TempBaseReg == nullptr || 1187 TempBaseReg->getRegNum() == Target->getReservedTmpReg()); 1188 } 1189 1190 // Legalizes Mem. if Mem.Base is a Reamaterializable variable, Mem.Offset is 1191 // fixed up. 1192 OperandARM32Mem *legalizeMemOperand(OperandARM32Mem *Mem, 1193 bool AllowOffsets = true); 1194 1195 /// Legalizes Mov if its Source (or Destination) is a spilled Variable, or 1196 /// if its Source is a Rematerializable variable (this form is used in lieu 1197 /// of lea, which is not available in ARM.) 1198 /// 1199 /// Moves to memory become store instructions, and moves from memory, loads. 1200 void legalizeMov(InstARM32Mov *Mov); 1201 1202 private: 1203 /// Creates a new Base register centered around [Base, +/- Offset]. 1204 Variable *newBaseRegister(Variable *Base, int32_t Offset, 1205 RegNumT ScratchRegNum); 1206 1207 /// Creates a new, legal OperandARM32Mem for accessing Base + Offset. 1208 /// The returned mem operand is a legal operand for accessing memory that is 1209 /// of type Ty. 1210 /// 1211 /// If [Base, #Offset] is encodable, then the method returns a Mem operand 1212 /// expressing it. Otherwise, 1213 /// 1214 /// if [TempBaseReg, #Offset-TempBaseOffset] is a valid memory operand, the 1215 /// method will return that. Otherwise, 1216 /// 1217 /// a new base register ip=Base+Offset is created, and the method returns a 1218 /// memory operand expressing [ip, #0]. 1219 OperandARM32Mem *createMemOperand(Type Ty, Variable *Base, int32_t Offset, 1220 bool AllowOffsets = true); 1221 TargetARM32 *const Target; 1222 Variable *const StackOrFrameReg; 1223 Variable *TempBaseReg = nullptr; 1224 int32_t TempBaseOffset = 0; 1225 }; 1226 1227 const bool NeedSandboxing; 1228 TargetARM32Features CPUFeatures; 1229 bool UsesFramePointer = false; 1230 bool NeedsStackAlignment = false; 1231 bool MaybeLeafFunc = true; 1232 size_t SpillAreaSizeBytes = 0; 1233 size_t FixedAllocaSizeBytes = 0; 1234 size_t FixedAllocaAlignBytes = 0; 1235 bool PrologEmitsFixedAllocas = false; 1236 uint32_t MaxOutArgsSizeBytes = 0; 1237 // TODO(jpp): std::array instead of array. 1238 static SmallBitVector TypeToRegisterSet[RegARM32::RCARM32_NUM]; 1239 static SmallBitVector TypeToRegisterSetUnfiltered[RegARM32::RCARM32_NUM]; 1240 static SmallBitVector RegisterAliases[RegARM32::Reg_NUM]; 1241 SmallBitVector RegsUsed; 1242 VarList PhysicalRegisters[IceType_NUM]; 1243 VarList PreservedGPRs; 1244 VarList PreservedSRegs; 1245 1246 /// Helper class that understands the Calling Convention and register 1247 /// assignments. The first few integer type parameters can use r0-r3, 1248 /// regardless of their position relative to the floating-point/vector 1249 /// arguments in the argument list. Floating-point and vector arguments 1250 /// can use q0-q3 (aka d0-d7, s0-s15). For more information on the topic, 1251 /// see the ARM Architecture Procedure Calling Standards (AAPCS). 1252 /// 1253 /// Technically, arguments that can start with registers but extend beyond the 1254 /// available registers can be split between the registers and the stack. 1255 /// However, this is typically for passing GPR structs by value, and PNaCl 1256 /// transforms expand this out. 1257 /// 1258 /// At (public) function entry, the stack must be 8-byte aligned. 1259 class CallingConv { 1260 CallingConv(const CallingConv &) = delete; 1261 CallingConv &operator=(const CallingConv &) = delete; 1262 1263 public: 1264 CallingConv(); 1265 ~CallingConv() = default; 1266 1267 /// argInGPR returns true if there is a GPR available for the requested 1268 /// type, and false otherwise. If it returns true, Reg is set to the 1269 /// appropriate register number. Note that, when Ty == IceType_i64, Reg will 1270 /// be an I64 register pair. 1271 bool argInGPR(Type Ty, RegNumT *Reg); 1272 1273 /// argInVFP is to floating-point/vector types what argInGPR is for integer 1274 /// types. 1275 bool argInVFP(Type Ty, RegNumT *Reg); 1276 1277 private: 1278 void discardUnavailableGPRsAndTheirAliases(CfgVector<RegNumT> *Regs); 1279 SmallBitVector GPRegsUsed; 1280 CfgVector<RegNumT> GPRArgs; 1281 CfgVector<RegNumT> I64Args; 1282 1283 void discardUnavailableVFPRegs(CfgVector<RegNumT> *Regs); 1284 SmallBitVector VFPRegsUsed; 1285 CfgVector<RegNumT> FP32Args; 1286 CfgVector<RegNumT> FP64Args; 1287 CfgVector<RegNumT> Vec128Args; 1288 }; 1289 1290 private: 1291 ENABLE_MAKE_UNIQUE; 1292 1293 OperandARM32Mem *formAddressingMode(Type Ty, Cfg *Func, const Inst *LdSt, 1294 Operand *Base); 1295 1296 void postambleCtpop64(const InstCall *Instr); 1297 void preambleDivRem(const InstCall *Instr); 1298 CfgUnorderedMap<Operand *, void (TargetARM32::*)(const InstCall *Instr)> 1299 ARM32HelpersPreamble; 1300 CfgUnorderedMap<Operand *, void (TargetARM32::*)(const InstCall *Instr)> 1301 ARM32HelpersPostamble; 1302 1303 class ComputationTracker { 1304 public: 1305 ComputationTracker() = default; 1306 ~ComputationTracker() = default; 1307 forgetProducers()1308 void forgetProducers() { KnownComputations.clear(); } 1309 void recordProducers(CfgNode *Node); 1310 getProducerOf(const Operand * Opnd)1311 const Inst *getProducerOf(const Operand *Opnd) const { 1312 auto *Var = llvm::dyn_cast<Variable>(Opnd); 1313 if (Var == nullptr) { 1314 return nullptr; 1315 } 1316 1317 auto Iter = KnownComputations.find(Var->getIndex()); 1318 if (Iter == KnownComputations.end()) { 1319 return nullptr; 1320 } 1321 1322 return Iter->second.Instr; 1323 } 1324 dump(const Cfg * Func)1325 void dump(const Cfg *Func) const { 1326 if (!BuildDefs::dump() || !Func->isVerbose(IceV_Folding)) 1327 return; 1328 OstreamLocker L(Func->getContext()); 1329 Ostream &Str = Func->getContext()->getStrDump(); 1330 Str << "foldable producer:\n"; 1331 for (const auto &Computation : KnownComputations) { 1332 Str << " "; 1333 Computation.second.Instr->dump(Func); 1334 Str << "\n"; 1335 } 1336 Str << "\n"; 1337 } 1338 1339 private: 1340 class ComputationEntry { 1341 public: ComputationEntry(Inst * I,Type Ty)1342 ComputationEntry(Inst *I, Type Ty) : Instr(I), ComputationType(Ty) {} 1343 Inst *const Instr; 1344 // Boolean folding is disabled for variables whose live range is multi 1345 // block. We conservatively initialize IsLiveOut to true, and set it to 1346 // false once we find the end of the live range for the variable defined 1347 // by this instruction. If liveness analysis is not performed (e.g., in 1348 // Om1 mode) IsLiveOut will never be set to false, and folding will be 1349 // disabled. 1350 bool IsLiveOut = true; 1351 int32_t NumUses = 0; 1352 Type ComputationType; 1353 }; 1354 1355 // ComputationMap maps a Variable number to a payload identifying which 1356 // instruction defined it. 1357 using ComputationMap = CfgUnorderedMap<SizeT, ComputationEntry>; 1358 ComputationMap KnownComputations; 1359 }; 1360 1361 ComputationTracker Computations; 1362 1363 // AllowTemporaryWithNoReg indicates if TargetARM32::makeReg() can be invoked 1364 // without specifying a physical register. This is needed for creating unbound 1365 // temporaries during Ice -> ARM lowering, but before register allocation. 1366 // This a safe-guard that no unbound temporaries are created during the 1367 // legalization post-passes. 1368 bool AllowTemporaryWithNoReg = true; 1369 // ForbidTemporaryWithoutReg is a RAII class that manages 1370 // AllowTemporaryWithNoReg. 1371 class ForbidTemporaryWithoutReg { 1372 ForbidTemporaryWithoutReg() = delete; 1373 ForbidTemporaryWithoutReg(const ForbidTemporaryWithoutReg &) = delete; 1374 ForbidTemporaryWithoutReg & 1375 operator=(const ForbidTemporaryWithoutReg &) = delete; 1376 1377 public: ForbidTemporaryWithoutReg(TargetARM32 * Target)1378 explicit ForbidTemporaryWithoutReg(TargetARM32 *Target) : Target(Target) { 1379 Target->AllowTemporaryWithNoReg = false; 1380 } ~ForbidTemporaryWithoutReg()1381 ~ForbidTemporaryWithoutReg() { Target->AllowTemporaryWithNoReg = true; } 1382 1383 private: 1384 TargetARM32 *const Target; 1385 }; 1386 }; 1387 1388 class TargetDataARM32 final : public TargetDataLowering { 1389 TargetDataARM32() = delete; 1390 TargetDataARM32(const TargetDataARM32 &) = delete; 1391 TargetDataARM32 &operator=(const TargetDataARM32 &) = delete; 1392 1393 public: create(GlobalContext * Ctx)1394 static std::unique_ptr<TargetDataLowering> create(GlobalContext *Ctx) { 1395 return std::unique_ptr<TargetDataLowering>(new TargetDataARM32(Ctx)); 1396 } 1397 1398 void lowerGlobals(const VariableDeclarationList &Vars, 1399 const std::string &SectionSuffix) override; 1400 void lowerConstants() override; 1401 void lowerJumpTables() override; 1402 1403 protected: 1404 explicit TargetDataARM32(GlobalContext *Ctx); 1405 1406 private: 1407 ~TargetDataARM32() override = default; 1408 }; 1409 1410 class TargetHeaderARM32 final : public TargetHeaderLowering { 1411 TargetHeaderARM32() = delete; 1412 TargetHeaderARM32(const TargetHeaderARM32 &) = delete; 1413 TargetHeaderARM32 &operator=(const TargetHeaderARM32 &) = delete; 1414 1415 public: create(GlobalContext * Ctx)1416 static std::unique_ptr<TargetHeaderLowering> create(GlobalContext *Ctx) { 1417 return std::unique_ptr<TargetHeaderLowering>(new TargetHeaderARM32(Ctx)); 1418 } 1419 1420 void lower() override; 1421 1422 protected: 1423 explicit TargetHeaderARM32(GlobalContext *Ctx); 1424 1425 private: 1426 ~TargetHeaderARM32() = default; 1427 1428 TargetARM32Features CPUFeatures; 1429 }; 1430 1431 } // end of namespace ARM32 1432 } // end of namespace Ice 1433 1434 #endif // SUBZERO_SRC_ICETARGETLOWERINGARM32_H 1435