1 //===- subzero/src/IceTargetLoweringARM32.h - ARM32 lowering ----*- C++ -*-===// 2 // 3 // The Subzero Code Generator 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 /// 10 /// \file 11 /// \brief Declares the TargetLoweringARM32 class, which implements the 12 /// TargetLowering interface for the ARM 32-bit architecture. 13 /// 14 //===----------------------------------------------------------------------===// 15 16 #ifndef SUBZERO_SRC_ICETARGETLOWERINGARM32_H 17 #define SUBZERO_SRC_ICETARGETLOWERINGARM32_H 18 19 #include "IceAssemblerARM32.h" 20 #include "IceDefs.h" 21 #include "IceInstARM32.h" 22 #include "IceRegistersARM32.h" 23 #include "IceTargetLowering.h" 24 25 #include <utility> 26 27 namespace Ice { 28 namespace ARM32 { 29 30 // Class encapsulating ARM cpu features / instruction set. 31 class TargetARM32Features { 32 TargetARM32Features() = delete; 33 TargetARM32Features(const TargetARM32Features &) = delete; 34 TargetARM32Features &operator=(const TargetARM32Features &) = delete; 35 36 public: 37 explicit TargetARM32Features(const ClFlags &Flags); 38 39 enum ARM32InstructionSet { 40 Begin, 41 // Neon is the PNaCl baseline instruction set. 42 Neon = Begin, 43 HWDivArm, // HW divide in ARM mode (not just Thumb mode). 44 End 45 }; 46 hasFeature(ARM32InstructionSet I)47 bool hasFeature(ARM32InstructionSet I) const { return I <= InstructionSet; } 48 49 private: 50 ARM32InstructionSet InstructionSet = ARM32InstructionSet::Begin; 51 }; 52 53 // The target lowering logic for ARM32. 54 class TargetARM32 : public TargetLowering { 55 TargetARM32() = delete; 56 TargetARM32(const TargetARM32 &) = delete; 57 TargetARM32 &operator=(const TargetARM32 &) = delete; 58 59 public: 60 static void staticInit(GlobalContext *Ctx); 61 shouldBePooled(const Constant * C)62 static bool shouldBePooled(const Constant *C) { 63 if (auto *ConstDouble = llvm::dyn_cast<ConstantDouble>(C)) { 64 return !Utils::isPositiveZero(ConstDouble->getValue()); 65 } 66 if (llvm::isa<ConstantFloat>(C)) 67 return true; 68 return false; 69 } 70 getPointerType()71 static ::Ice::Type getPointerType() { return ::Ice::IceType_i32; } 72 73 // TODO(jvoung): return a unique_ptr. create(Cfg * Func)74 static std::unique_ptr<::Ice::TargetLowering> create(Cfg *Func) { 75 return makeUnique<TargetARM32>(Func); 76 } 77 createAssembler()78 std::unique_ptr<::Ice::Assembler> createAssembler() const override { 79 const bool IsNonsfi = SandboxingType == ST_Nonsfi; 80 return makeUnique<ARM32::AssemblerARM32>(IsNonsfi); 81 } 82 initNodeForLowering(CfgNode * Node)83 void initNodeForLowering(CfgNode *Node) override { 84 Computations.forgetProducers(); 85 Computations.recordProducers(Node); 86 Computations.dump(Func); 87 } 88 89 void translateOm1() override; 90 void translateO2() override; 91 bool doBranchOpt(Inst *I, const CfgNode *NextNode) override; 92 getNumRegisters()93 SizeT getNumRegisters() const override { return RegARM32::Reg_NUM; } 94 Variable *getPhysicalRegister(RegNumT RegNum, 95 Type Ty = IceType_void) override; 96 const char *getRegName(RegNumT RegNum, Type Ty) const override; 97 SmallBitVector getRegisterSet(RegSetMask Include, 98 RegSetMask Exclude) const override; 99 const SmallBitVector & getRegistersForVariable(const Variable * Var)100 getRegistersForVariable(const Variable *Var) const override { 101 RegClass RC = Var->getRegClass(); 102 switch (RC) { 103 default: 104 assert(RC < RC_Target); 105 return TypeToRegisterSet[RC]; 106 case RegARM32::RCARM32_QtoS: 107 return TypeToRegisterSet[RC]; 108 } 109 } 110 const SmallBitVector & getAllRegistersForVariable(const Variable * Var)111 getAllRegistersForVariable(const Variable *Var) const override { 112 RegClass RC = Var->getRegClass(); 113 assert((RegARM32::RegClassARM32)RC < RegARM32::RCARM32_NUM); 114 return TypeToRegisterSetUnfiltered[RC]; 115 } getAliasesForRegister(RegNumT Reg)116 const SmallBitVector &getAliasesForRegister(RegNumT Reg) const override { 117 return RegisterAliases[Reg]; 118 } hasFramePointer()119 bool hasFramePointer() const override { return UsesFramePointer; } setHasFramePointer()120 void setHasFramePointer() override { UsesFramePointer = true; } getStackReg()121 RegNumT getStackReg() const override { return RegARM32::Reg_sp; } getFrameReg()122 RegNumT getFrameReg() const override { return RegARM32::Reg_fp; } getFrameOrStackReg()123 RegNumT getFrameOrStackReg() const override { 124 return UsesFramePointer ? getFrameReg() : getStackReg(); 125 } getReservedTmpReg()126 RegNumT getReservedTmpReg() const { return RegARM32::Reg_ip; } 127 typeWidthInBytesOnStack(Type Ty)128 size_t typeWidthInBytesOnStack(Type Ty) const override { 129 // Round up to the next multiple of 4 bytes. In particular, i1, i8, and i16 130 // are rounded up to 4 bytes. 131 return (typeWidthInBytes(Ty) + 3) & ~3; 132 } 133 uint32_t getStackAlignment() const override; reserveFixedAllocaArea(size_t Size,size_t Align)134 void reserveFixedAllocaArea(size_t Size, size_t Align) override { 135 FixedAllocaSizeBytes = Size; 136 assert(llvm::isPowerOf2_32(Align)); 137 FixedAllocaAlignBytes = Align; 138 PrologEmitsFixedAllocas = true; 139 } getFrameFixedAllocaOffset()140 int32_t getFrameFixedAllocaOffset() const override { 141 return FixedAllocaSizeBytes - (SpillAreaSizeBytes - MaxOutArgsSizeBytes); 142 } maxOutArgsSizeBytes()143 uint32_t maxOutArgsSizeBytes() const override { return MaxOutArgsSizeBytes; } 144 shouldSplitToVariable64On32(Type Ty)145 bool shouldSplitToVariable64On32(Type Ty) const override { 146 return Ty == IceType_i64; 147 } 148 149 // TODO(ascull): what size is best for ARM? getMinJumpTableSize()150 SizeT getMinJumpTableSize() const override { return 3; } 151 void emitJumpTable(const Cfg *Func, 152 const InstJumpTable *JumpTable) const override; 153 154 void emitVariable(const Variable *Var) const override; 155 156 void emit(const ConstantUndef *C) const final; 157 void emit(const ConstantInteger32 *C) const final; 158 void emit(const ConstantInteger64 *C) const final; 159 void emit(const ConstantFloat *C) const final; 160 void emit(const ConstantDouble *C) const final; 161 void emit(const ConstantRelocatable *C) const final; 162 163 void lowerArguments() override; 164 void addProlog(CfgNode *Node) override; 165 void addEpilog(CfgNode *Node) override; 166 167 Operand *loOperand(Operand *Operand); 168 Operand *hiOperand(Operand *Operand); 169 void finishArgumentLowering(Variable *Arg, Variable *FramePtr, 170 size_t BasicFrameOffset, size_t *InArgsSizeBytes); 171 hasCPUFeature(TargetARM32Features::ARM32InstructionSet I)172 bool hasCPUFeature(TargetARM32Features::ARM32InstructionSet I) const { 173 return CPUFeatures.hasFeature(I); 174 } 175 176 enum OperandLegalization { 177 Legal_Reg = 1 << 0, /// physical register, not stack location 178 Legal_Flex = 1 << 1, /// A flexible operand2, which can hold rotated small 179 /// immediates, shifted registers, or modified fp imm. 180 Legal_Mem = 1 << 2, /// includes [r0, r1 lsl #2] as well as [sp, #12] 181 Legal_Rematerializable = 1 << 3, 182 Legal_Default = ~Legal_Rematerializable, 183 }; 184 185 using LegalMask = uint32_t; 186 Operand *legalizeUndef(Operand *From, RegNumT RegNum = RegNumT()); 187 Operand *legalize(Operand *From, LegalMask Allowed = Legal_Default, 188 RegNumT RegNum = RegNumT()); 189 Variable *legalizeToReg(Operand *From, RegNumT RegNum = RegNumT()); 190 shAmtImm(uint32_t ShAmtImm)191 OperandARM32ShAmtImm *shAmtImm(uint32_t ShAmtImm) const { 192 assert(ShAmtImm < 32); 193 return OperandARM32ShAmtImm::create( 194 Func, 195 llvm::cast<ConstantInteger32>(Ctx->getConstantInt32(ShAmtImm & 0x1F))); 196 } 197 getCtx()198 GlobalContext *getCtx() const { return Ctx; } 199 200 protected: 201 explicit TargetARM32(Cfg *Func); 202 203 void postLower() override; 204 205 enum SafeBoolChain { 206 SBC_No, 207 SBC_Yes, 208 }; 209 210 void lowerAlloca(const InstAlloca *Instr) override; 211 SafeBoolChain lowerInt1Arithmetic(const InstArithmetic *Instr); 212 void lowerInt64Arithmetic(InstArithmetic::OpKind Op, Variable *Dest, 213 Operand *Src0, Operand *Src1); 214 void lowerArithmetic(const InstArithmetic *Instr) override; 215 void lowerAssign(const InstAssign *Instr) override; 216 void lowerBr(const InstBr *Instr) override; 217 void lowerCall(const InstCall *Instr) override; 218 void lowerCast(const InstCast *Instr) override; 219 void lowerExtractElement(const InstExtractElement *Instr) override; 220 221 /// CondWhenTrue is a helper type returned by every method in the lowering 222 /// that emits code to set the condition codes. 223 class CondWhenTrue { 224 public: 225 explicit CondWhenTrue(CondARM32::Cond T0, 226 CondARM32::Cond T1 = CondARM32::kNone) WhenTrue0(T0)227 : WhenTrue0(T0), WhenTrue1(T1) { 228 assert(T1 == CondARM32::kNone || T0 != CondARM32::kNone); 229 assert(T1 != T0 || T0 == CondARM32::kNone); 230 } 231 CondARM32::Cond WhenTrue0; 232 CondARM32::Cond WhenTrue1; 233 234 /// invert returns a new object with WhenTrue0 and WhenTrue1 inverted. invert()235 CondWhenTrue invert() const { 236 switch (WhenTrue0) { 237 default: 238 if (WhenTrue1 == CondARM32::kNone) 239 return CondWhenTrue(InstARM32::getOppositeCondition(WhenTrue0)); 240 return CondWhenTrue(InstARM32::getOppositeCondition(WhenTrue0), 241 InstARM32::getOppositeCondition(WhenTrue1)); 242 case CondARM32::AL: 243 return CondWhenTrue(CondARM32::kNone); 244 case CondARM32::kNone: 245 return CondWhenTrue(CondARM32::AL); 246 } 247 } 248 }; 249 250 CondWhenTrue lowerFcmpCond(const InstFcmp *Instr); 251 void lowerFcmp(const InstFcmp *Instr) override; 252 CondWhenTrue lowerInt8AndInt16IcmpCond(InstIcmp::ICond Condition, 253 Operand *Src0, Operand *Src1); 254 CondWhenTrue lowerInt32IcmpCond(InstIcmp::ICond Condition, Operand *Src0, 255 Operand *Src1); 256 CondWhenTrue lowerInt64IcmpCond(InstIcmp::ICond Condition, Operand *Src0, 257 Operand *Src1); 258 CondWhenTrue lowerIcmpCond(InstIcmp::ICond Condition, Operand *Src0, 259 Operand *Src1); 260 CondWhenTrue lowerIcmpCond(const InstIcmp *Instr); 261 void lowerIcmp(const InstIcmp *Instr) override; 262 /// Emits the basic sequence for lower-linked/store-exclusive loops: 263 /// 264 /// retry: 265 /// ldrex tmp, [Addr] 266 /// StoreValue = Operation(tmp) 267 /// strexCond success, StoreValue, [Addr] 268 /// cmpCond success, #0 269 /// bne retry 270 /// 271 /// Operation needs to return which value to strex in Addr, it must not change 272 /// the flags if Cond is not AL, and must not emit any instructions that could 273 /// end up writing to memory. Operation also needs to handle fake-defing for 274 /// i64 handling. 275 void 276 lowerLoadLinkedStoreExclusive(Type Ty, Operand *Addr, 277 std::function<Variable *(Variable *)> Operation, 278 CondARM32::Cond Cond = CondARM32::AL); 279 void lowerInt64AtomicRMW(Variable *Dest, uint32_t Operation, Operand *Ptr, 280 Operand *Val); 281 void lowerAtomicRMW(Variable *Dest, uint32_t Operation, Operand *Ptr, 282 Operand *Val); 283 void lowerBreakpoint(const InstBreakpoint *Instr) override; 284 void lowerIntrinsic(const InstIntrinsic *Instr) override; 285 void lowerInsertElement(const InstInsertElement *Instr) override; 286 void lowerLoad(const InstLoad *Instr) override; 287 void lowerPhi(const InstPhi *Instr) override; 288 void lowerRet(const InstRet *Instr) override; 289 void lowerSelect(const InstSelect *Instr) override; 290 void lowerShuffleVector(const InstShuffleVector *Instr) override; 291 void lowerStore(const InstStore *Instr) override; 292 void lowerSwitch(const InstSwitch *Instr) override; 293 void lowerUnreachable(const InstUnreachable *Instr) override; 294 void prelowerPhis() override; 295 uint32_t getCallStackArgumentsSizeBytes(const InstCall *Instr) override; 296 void genTargetHelperCallFor(Inst *Instr) override; 297 void doAddressOptLoad() override; 298 void doAddressOptStore() override; 299 300 OperandARM32Mem *formMemoryOperand(Operand *Ptr, Type Ty); 301 302 Variable64On32 *makeI64RegPair(); 303 Variable *makeReg(Type Ty, RegNumT RegNum = RegNumT()); 304 static Type stackSlotType(); 305 Variable *copyToReg(Operand *Src, RegNumT RegNum = RegNumT()); 306 void alignRegisterPow2(Variable *Reg, uint32_t Align, 307 RegNumT TmpRegNum = RegNumT()); 308 309 /// Returns a vector in a register with the given constant entries. 310 Variable *makeVectorOfZeros(Type Ty, RegNumT RegNum = RegNumT()); 311 312 // If a divide-by-zero check is needed, inserts a: test; branch .LSKIP; trap; 313 // .LSKIP: <continuation>. If no check is needed nothing is inserted. 314 void div0Check(Type Ty, Operand *SrcLo, Operand *SrcHi); 315 using ExtInstr = void (TargetARM32::*)(Variable *, Variable *, 316 CondARM32::Cond); 317 using DivInstr = void (TargetARM32::*)(Variable *, Variable *, Variable *, 318 CondARM32::Cond); 319 void lowerIDivRem(Variable *Dest, Variable *T, Variable *Src0R, Operand *Src1, 320 ExtInstr ExtFunc, DivInstr DivFunc, bool IsRemainder); 321 322 void lowerCLZ(Variable *Dest, Variable *ValLo, Variable *ValHi); 323 324 // The following are helpers that insert lowered ARM32 instructions with 325 // minimal syntactic overhead, so that the lowering code can look as close to 326 // assembly as practical. 327 void _add(Variable *Dest, Variable *Src0, Operand *Src1, 328 CondARM32::Cond Pred = CondARM32::AL) { 329 Context.insert<InstARM32Add>(Dest, Src0, Src1, Pred); 330 } 331 void _adds(Variable *Dest, Variable *Src0, Operand *Src1, 332 CondARM32::Cond Pred = CondARM32::AL) { 333 constexpr bool SetFlags = true; 334 Context.insert<InstARM32Add>(Dest, Src0, Src1, Pred, SetFlags); 335 if (SetFlags) { 336 Context.insert<InstFakeUse>(Dest); 337 } 338 } 339 void _adc(Variable *Dest, Variable *Src0, Operand *Src1, 340 CondARM32::Cond Pred = CondARM32::AL) { 341 Context.insert<InstARM32Adc>(Dest, Src0, Src1, Pred); 342 } 343 void _and(Variable *Dest, Variable *Src0, Operand *Src1, 344 CondARM32::Cond Pred = CondARM32::AL) { 345 Context.insert<InstARM32And>(Dest, Src0, Src1, Pred); 346 } 347 void _asr(Variable *Dest, Variable *Src0, Operand *Src1, 348 CondARM32::Cond Pred = CondARM32::AL) { 349 Context.insert<InstARM32Asr>(Dest, Src0, Src1, Pred); 350 } 351 void _bic(Variable *Dest, Variable *Src0, Operand *Src1, 352 CondARM32::Cond Pred = CondARM32::AL) { 353 Context.insert<InstARM32Bic>(Dest, Src0, Src1, Pred); 354 } _br(CfgNode * TargetTrue,CfgNode * TargetFalse,CondARM32::Cond Condition)355 void _br(CfgNode *TargetTrue, CfgNode *TargetFalse, 356 CondARM32::Cond Condition) { 357 Context.insert<InstARM32Br>(TargetTrue, TargetFalse, Condition); 358 } _br(CfgNode * Target)359 void _br(CfgNode *Target) { Context.insert<InstARM32Br>(Target); } _br(CfgNode * Target,CondARM32::Cond Condition)360 void _br(CfgNode *Target, CondARM32::Cond Condition) { 361 Context.insert<InstARM32Br>(Target, Condition); 362 } _br(InstARM32Label * Label,CondARM32::Cond Condition)363 void _br(InstARM32Label *Label, CondARM32::Cond Condition) { 364 Context.insert<InstARM32Br>(Label, Condition); 365 } 366 void _cmn(Variable *Src0, Operand *Src1, 367 CondARM32::Cond Pred = CondARM32::AL) { 368 Context.insert<InstARM32Cmn>(Src0, Src1, Pred); 369 } 370 void _cmp(Variable *Src0, Operand *Src1, 371 CondARM32::Cond Pred = CondARM32::AL) { 372 Context.insert<InstARM32Cmp>(Src0, Src1, Pred); 373 } 374 void _clz(Variable *Dest, Variable *Src0, 375 CondARM32::Cond Pred = CondARM32::AL) { 376 Context.insert<InstARM32Clz>(Dest, Src0, Pred); 377 } _dmb()378 void _dmb() { Context.insert<InstARM32Dmb>(); } 379 void _eor(Variable *Dest, Variable *Src0, Operand *Src1, 380 CondARM32::Cond Pred = CondARM32::AL) { 381 Context.insert<InstARM32Eor>(Dest, Src0, Src1, Pred); 382 } 383 /// _ldr, for all your memory to Variable data moves. It handles all types 384 /// (integer, floating point, and vectors.) Addr needs to be valid for Dest's 385 /// type (e.g., no immediates for vector loads, and no index registers for fp 386 /// loads.) 387 void _ldr(Variable *Dest, OperandARM32Mem *Addr, 388 CondARM32::Cond Pred = CondARM32::AL) { 389 Context.insert<InstARM32Ldr>(Dest, Addr, Pred); 390 } 391 InstARM32Ldrex *_ldrex(Variable *Dest, OperandARM32Mem *Addr, 392 CondARM32::Cond Pred = CondARM32::AL) { 393 auto *Ldrex = Context.insert<InstARM32Ldrex>(Dest, Addr, Pred); 394 if (auto *Dest64 = llvm::dyn_cast<Variable64On32>(Dest)) { 395 Context.insert<InstFakeDef>(Dest64->getLo(), Dest); 396 Context.insert<InstFakeDef>(Dest64->getHi(), Dest); 397 } 398 return Ldrex; 399 } 400 void _lsl(Variable *Dest, Variable *Src0, Operand *Src1, 401 CondARM32::Cond Pred = CondARM32::AL) { 402 Context.insert<InstARM32Lsl>(Dest, Src0, Src1, Pred); 403 } 404 void _lsls(Variable *Dest, Variable *Src0, Operand *Src1, 405 CondARM32::Cond Pred = CondARM32::AL) { 406 constexpr bool SetFlags = true; 407 Context.insert<InstARM32Lsl>(Dest, Src0, Src1, Pred, SetFlags); 408 if (SetFlags) { 409 Context.insert<InstFakeUse>(Dest); 410 } 411 } 412 void _lsr(Variable *Dest, Variable *Src0, Operand *Src1, 413 CondARM32::Cond Pred = CondARM32::AL) { 414 Context.insert<InstARM32Lsr>(Dest, Src0, Src1, Pred); 415 } 416 void _mla(Variable *Dest, Variable *Src0, Variable *Src1, Variable *Acc, 417 CondARM32::Cond Pred = CondARM32::AL) { 418 Context.insert<InstARM32Mla>(Dest, Src0, Src1, Acc, Pred); 419 } 420 void _mls(Variable *Dest, Variable *Src0, Variable *Src1, Variable *Acc, 421 CondARM32::Cond Pred = CondARM32::AL) { 422 Context.insert<InstARM32Mls>(Dest, Src0, Src1, Acc, Pred); 423 } 424 /// _mov, for all your Variable to Variable data movement needs. It handles 425 /// all types (integer, floating point, and vectors), as well as moves between 426 /// Core and VFP registers. This is not a panacea: you must obey the (weird, 427 /// confusing, non-uniform) rules for data moves in ARM. 428 void _mov(Variable *Dest, Operand *Src0, 429 CondARM32::Cond Pred = CondARM32::AL) { 430 // _mov used to be unique in the sense that it would create a temporary 431 // automagically if Dest was nullptr. It won't do that anymore, so we keep 432 // an assert around just in case there is some untested code path where Dest 433 // is nullptr. 434 assert(Dest != nullptr); 435 assert(!llvm::isa<OperandARM32Mem>(Src0)); 436 auto *Instr = Context.insert<InstARM32Mov>(Dest, Src0, Pred); 437 438 if (Instr->isMultiDest()) { 439 // If Instr is multi-dest, then Dest must be a Variable64On32. We add a 440 // fake-def for Instr.DestHi here. 441 assert(llvm::isa<Variable64On32>(Dest)); 442 Context.insert<InstFakeDef>(Instr->getDestHi()); 443 } 444 } 445 446 void _mov_redefined(Variable *Dest, Operand *Src0, 447 CondARM32::Cond Pred = CondARM32::AL) { 448 auto *Instr = Context.insert<InstARM32Mov>(Dest, Src0, Pred); 449 Instr->setDestRedefined(); 450 if (Instr->isMultiDest()) { 451 // If Instr is multi-dest, then Dest must be a Variable64On32. We add a 452 // fake-def for Instr.DestHi here. 453 assert(llvm::isa<Variable64On32>(Dest)); 454 Context.insert<InstFakeDef>(Instr->getDestHi()); 455 } 456 } 457 _nop()458 void _nop() { Context.insert<InstARM32Nop>(); } 459 460 // Generates a vmov instruction to extract the given index from a vector 461 // register. 462 void _extractelement(Variable *Dest, Variable *Src0, uint32_t Index, 463 CondARM32::Cond Pred = CondARM32::AL) { 464 Context.insert<InstARM32Extract>(Dest, Src0, Index, Pred); 465 } 466 467 // Generates a vmov instruction to insert a value into the given index of a 468 // vector register. 469 void _insertelement(Variable *Dest, Variable *Src0, uint32_t Index, 470 CondARM32::Cond Pred = CondARM32::AL) { 471 Context.insert<InstARM32Insert>(Dest, Src0, Index, Pred); 472 } 473 474 // -------------------------------------------------------------------------- 475 // Begin bool folding machinery. 476 // 477 // There are three types of boolean lowerings handled by this target: 478 // 479 // 1) Boolean expressions leading to a boolean Variable definition 480 // --------------------------------------------------------------- 481 // 482 // Whenever a i1 Variable is live out (i.e., its live range extends beyond 483 // the defining basic block) we do not fold the operation. We instead 484 // materialize (i.e., compute) the variable normally, so that it can be used 485 // when needed. We also materialize i1 values that are not single use to 486 // avoid code duplication. These expressions are not short circuited. 487 // 488 // 2) Boolean expressions leading to a select 489 // ------------------------------------------ 490 // 491 // These include boolean chains leading to a select instruction, as well as 492 // i1 Sexts. These boolean expressions are lowered to: 493 // 494 // mov T, <false value> 495 // CC <- eval(Boolean Expression) 496 // movCC T, <true value> 497 // 498 // For Sexts, <false value> is 0, and <true value> is -1. 499 // 500 // 3) Boolean expressions leading to a br i1 501 // ----------------------------------------- 502 // 503 // These are the boolean chains leading to a branch. These chains are 504 // short-circuited, i.e.: 505 // 506 // A = or i1 B, C 507 // br i1 A, label %T, label %F 508 // 509 // becomes 510 // 511 // tst B 512 // jne %T 513 // tst B 514 // jne %T 515 // j %F 516 // 517 // and 518 // 519 // A = and i1 B, C 520 // br i1 A, label %T, label %F 521 // 522 // becomes 523 // 524 // tst B 525 // jeq %F 526 // tst B 527 // jeq %F 528 // j %T 529 // 530 // Arbitrarily long chains are short circuited, e.g 531 // 532 // A = or i1 B, C 533 // D = and i1 A, E 534 // F = and i1 G, H 535 // I = or i1 D, F 536 // br i1 I, label %True, label %False 537 // 538 // becomes 539 // 540 // Label[A]: 541 // tst B, 1 542 // bne Label[D] 543 // tst C, 1 544 // beq Label[I] 545 // Label[D]: 546 // tst E, 1 547 // bne %True 548 // Label[I] 549 // tst G, 1 550 // beq %False 551 // tst H, 1 552 // beq %False (bne %True) 553 554 /// lowerInt1 materializes Boolean to a Variable. 555 SafeBoolChain lowerInt1(Variable *Dest, Operand *Boolean); 556 557 /// lowerInt1ForSelect generates the following instruction sequence: 558 /// 559 /// mov T, FalseValue 560 /// CC <- eval(Boolean) 561 /// movCC T, TrueValue 562 /// mov Dest, T 563 /// 564 /// It is used for lowering select i1, as well as i1 Sext. 565 void lowerInt1ForSelect(Variable *Dest, Operand *Boolean, Operand *TrueValue, 566 Operand *FalseValue); 567 568 /// LowerInt1BranchTarget is used by lowerIntForBranch. It wraps a CfgNode, or 569 /// an InstARM32Label (but never both) so that, during br i1 lowering, we can 570 /// create auxiliary labels for short circuiting the condition evaluation. 571 class LowerInt1BranchTarget { 572 public: LowerInt1BranchTarget(CfgNode * const Target)573 explicit LowerInt1BranchTarget(CfgNode *const Target) 574 : NodeTarget(Target) {} LowerInt1BranchTarget(InstARM32Label * const Target)575 explicit LowerInt1BranchTarget(InstARM32Label *const Target) 576 : LabelTarget(Target) {} 577 578 /// createForLabelOrDuplicate will return a new LowerInt1BranchTarget that 579 /// is the exact copy of this if Label is nullptr; otherwise, the returned 580 /// object will wrap Label instead. 581 LowerInt1BranchTarget createForLabelOrDuplicate(InstARM32Label * Label)582 createForLabelOrDuplicate(InstARM32Label *Label) const { 583 if (Label != nullptr) 584 return LowerInt1BranchTarget(Label); 585 if (NodeTarget) 586 return LowerInt1BranchTarget(NodeTarget); 587 return LowerInt1BranchTarget(LabelTarget); 588 } 589 590 CfgNode *const NodeTarget = nullptr; 591 InstARM32Label *const LabelTarget = nullptr; 592 }; 593 594 /// LowerInt1AllowShortCircuit is a helper type used by lowerInt1ForBranch for 595 /// determining which type arithmetic is allowed to be short circuited. This 596 /// is useful for lowering 597 /// 598 /// t1 = and i1 A, B 599 /// t2 = and i1 t1, C 600 /// br i1 t2, label %False, label %True 601 /// 602 /// to 603 /// 604 /// tst A, 1 605 /// beq %False 606 /// tst B, 1 607 /// beq %False 608 /// tst C, 1 609 /// bne %True 610 /// b %False 611 /// 612 /// Without this information, short circuiting would only allow to short 613 /// circuit a single high level instruction. For example: 614 /// 615 /// t1 = or i1 A, B 616 /// t2 = and i1 t1, C 617 /// br i1 t2, label %False, label %True 618 /// 619 /// cannot be lowered to 620 /// 621 /// tst A, 1 622 /// bne %True 623 /// tst B, 1 624 /// bne %True 625 /// tst C, 1 626 /// beq %True 627 /// b %False 628 /// 629 /// It needs to be lowered to 630 /// 631 /// tst A, 1 632 /// bne Aux 633 /// tst B, 1 634 /// beq %False 635 /// Aux: 636 /// tst C, 1 637 /// bne %True 638 /// b %False 639 /// 640 /// TODO(jpp): evaluate if this kind of short circuiting hurts performance (it 641 /// might.) 642 enum LowerInt1AllowShortCircuit { 643 SC_And = 1, 644 SC_Or = 2, 645 SC_All = SC_And | SC_Or, 646 }; 647 648 /// ShortCircuitCondAndLabel wraps the condition codes that should be used 649 /// after a lowerInt1ForBranch returns to branch to the 650 /// TrueTarget/FalseTarget. If ShortCircuitLabel is not nullptr, then the 651 /// called lowerInt1forBranch created an internal (i.e., short-circuit) label 652 /// used for short circuiting. 653 class ShortCircuitCondAndLabel { 654 public: 655 explicit ShortCircuitCondAndLabel(CondWhenTrue &&C, 656 InstARM32Label *L = nullptr) Cond(std::move (C))657 : Cond(std::move(C)), ShortCircuitTarget(L) {} 658 const CondWhenTrue Cond; 659 InstARM32Label *const ShortCircuitTarget; 660 assertNoLabelAndReturnCond()661 CondWhenTrue assertNoLabelAndReturnCond() const { 662 assert(ShortCircuitTarget == nullptr); 663 return Cond; 664 } 665 }; 666 667 /// lowerInt1ForBranch expands Boolean, and returns the condition codes that 668 /// are to be used for branching to the branch's TrueTarget. It may return a 669 /// label that the expansion of Boolean used to short circuit the chain's 670 /// evaluation. 671 ShortCircuitCondAndLabel 672 lowerInt1ForBranch(Operand *Boolean, const LowerInt1BranchTarget &TargetTrue, 673 const LowerInt1BranchTarget &TargetFalse, 674 uint32_t ShortCircuitable); 675 676 // _br is a convenience wrapper that emits br instructions to Target. 677 void _br(const LowerInt1BranchTarget &BrTarget, 678 CondARM32::Cond Cond = CondARM32::AL) { 679 assert((BrTarget.NodeTarget == nullptr) != 680 (BrTarget.LabelTarget == nullptr)); 681 if (BrTarget.NodeTarget != nullptr) 682 _br(BrTarget.NodeTarget, Cond); 683 else 684 _br(BrTarget.LabelTarget, Cond); 685 } 686 687 // _br_short_circuit is used when lowering InstArithmetic::And and 688 // InstArithmetic::Or and a short circuit branch is needed. _br_short_circuit(const LowerInt1BranchTarget & Target,const CondWhenTrue & Cond)689 void _br_short_circuit(const LowerInt1BranchTarget &Target, 690 const CondWhenTrue &Cond) { 691 if (Cond.WhenTrue1 != CondARM32::kNone) { 692 _br(Target, Cond.WhenTrue1); 693 } 694 if (Cond.WhenTrue0 != CondARM32::kNone) { 695 _br(Target, Cond.WhenTrue0); 696 } 697 } 698 // End of bool folding machinery 699 // -------------------------------------------------------------------------- 700 701 /// The Operand can only be a 16-bit immediate or a ConstantRelocatable (with 702 /// an upper16 relocation). 703 void _movt(Variable *Dest, Operand *Src0, 704 CondARM32::Cond Pred = CondARM32::AL) { 705 Context.insert<InstARM32Movt>(Dest, Src0, Pred); 706 } 707 void _movw(Variable *Dest, Operand *Src0, 708 CondARM32::Cond Pred = CondARM32::AL) { 709 Context.insert<InstARM32Movw>(Dest, Src0, Pred); 710 } 711 void _mul(Variable *Dest, Variable *Src0, Variable *Src1, 712 CondARM32::Cond Pred = CondARM32::AL) { 713 Context.insert<InstARM32Mul>(Dest, Src0, Src1, Pred); 714 } 715 void _mvn(Variable *Dest, Operand *Src0, 716 CondARM32::Cond Pred = CondARM32::AL) { 717 Context.insert<InstARM32Mvn>(Dest, Src0, Pred); 718 } 719 void _orr(Variable *Dest, Variable *Src0, Operand *Src1, 720 CondARM32::Cond Pred = CondARM32::AL) { 721 Context.insert<InstARM32Orr>(Dest, Src0, Src1, Pred); 722 } 723 void _orrs(Variable *Dest, Variable *Src0, Operand *Src1, 724 CondARM32::Cond Pred = CondARM32::AL) { 725 constexpr bool SetFlags = true; 726 Context.insert<InstARM32Orr>(Dest, Src0, Src1, Pred, SetFlags); 727 if (SetFlags) { 728 Context.insert<InstFakeUse>(Dest); 729 } 730 } _push(const VarList & Sources)731 void _push(const VarList &Sources) { Context.insert<InstARM32Push>(Sources); } _pop(const VarList & Dests)732 void _pop(const VarList &Dests) { 733 Context.insert<InstARM32Pop>(Dests); 734 // Mark dests as modified. 735 for (Variable *Dest : Dests) 736 Context.insert<InstFakeDef>(Dest); 737 } 738 void _rbit(Variable *Dest, Variable *Src0, 739 CondARM32::Cond Pred = CondARM32::AL) { 740 Context.insert<InstARM32Rbit>(Dest, Src0, Pred); 741 } 742 void _rev(Variable *Dest, Variable *Src0, 743 CondARM32::Cond Pred = CondARM32::AL) { 744 Context.insert<InstARM32Rev>(Dest, Src0, Pred); 745 } 746 void _ret(Variable *LR, Variable *Src0 = nullptr) { 747 Context.insert<InstARM32Ret>(LR, Src0); 748 } 749 void _rscs(Variable *Dest, Variable *Src0, Operand *Src1, 750 CondARM32::Cond Pred = CondARM32::AL) { 751 constexpr bool SetFlags = true; 752 Context.insert<InstARM32Rsc>(Dest, Src0, Src1, Pred, SetFlags); 753 if (SetFlags) { 754 Context.insert<InstFakeUse>(Dest); 755 } 756 } 757 void _rsc(Variable *Dest, Variable *Src0, Operand *Src1, 758 CondARM32::Cond Pred = CondARM32::AL) { 759 Context.insert<InstARM32Rsc>(Dest, Src0, Src1, Pred); 760 } 761 void _rsbs(Variable *Dest, Variable *Src0, Operand *Src1, 762 CondARM32::Cond Pred = CondARM32::AL) { 763 constexpr bool SetFlags = true; 764 Context.insert<InstARM32Rsb>(Dest, Src0, Src1, Pred, SetFlags); 765 if (SetFlags) { 766 Context.insert<InstFakeUse>(Dest); 767 } 768 } 769 void _rsb(Variable *Dest, Variable *Src0, Operand *Src1, 770 CondARM32::Cond Pred = CondARM32::AL) { 771 Context.insert<InstARM32Rsb>(Dest, Src0, Src1, Pred); 772 } 773 void _sbc(Variable *Dest, Variable *Src0, Operand *Src1, 774 CondARM32::Cond Pred = CondARM32::AL) { 775 Context.insert<InstARM32Sbc>(Dest, Src0, Src1, Pred); 776 } 777 void _sbcs(Variable *Dest, Variable *Src0, Operand *Src1, 778 CondARM32::Cond Pred = CondARM32::AL) { 779 constexpr bool SetFlags = true; 780 Context.insert<InstARM32Sbc>(Dest, Src0, Src1, Pred, SetFlags); 781 if (SetFlags) { 782 Context.insert<InstFakeUse>(Dest); 783 } 784 } 785 void _sdiv(Variable *Dest, Variable *Src0, Variable *Src1, 786 CondARM32::Cond Pred = CondARM32::AL) { 787 Context.insert<InstARM32Sdiv>(Dest, Src0, Src1, Pred); 788 } 789 /// _str, for all your Variable to memory transfers. Addr has the same 790 /// restrictions that it does in _ldr. 791 void _str(Variable *Value, OperandARM32Mem *Addr, 792 CondARM32::Cond Pred = CondARM32::AL) { 793 Context.insert<InstARM32Str>(Value, Addr, Pred); 794 } 795 InstARM32Strex *_strex(Variable *Dest, Variable *Value, OperandARM32Mem *Addr, 796 CondARM32::Cond Pred = CondARM32::AL) { 797 if (auto *Value64 = llvm::dyn_cast<Variable64On32>(Value)) { 798 Context.insert<InstFakeUse>(Value64->getLo()); 799 Context.insert<InstFakeUse>(Value64->getHi()); 800 } 801 return Context.insert<InstARM32Strex>(Dest, Value, Addr, Pred); 802 } 803 void _sub(Variable *Dest, Variable *Src0, Operand *Src1, 804 CondARM32::Cond Pred = CondARM32::AL) { 805 Context.insert<InstARM32Sub>(Dest, Src0, Src1, Pred); 806 } 807 void _subs(Variable *Dest, Variable *Src0, Operand *Src1, 808 CondARM32::Cond Pred = CondARM32::AL) { 809 constexpr bool SetFlags = true; 810 Context.insert<InstARM32Sub>(Dest, Src0, Src1, Pred, SetFlags); 811 if (SetFlags) { 812 Context.insert<InstFakeUse>(Dest); 813 } 814 } 815 void _sxt(Variable *Dest, Variable *Src0, 816 CondARM32::Cond Pred = CondARM32::AL) { 817 Context.insert<InstARM32Sxt>(Dest, Src0, Pred); 818 } 819 void _tst(Variable *Src0, Operand *Src1, 820 CondARM32::Cond Pred = CondARM32::AL) { 821 Context.insert<InstARM32Tst>(Src0, Src1, Pred); 822 } _trap()823 void _trap() { Context.insert<InstARM32Trap>(); } 824 void _udiv(Variable *Dest, Variable *Src0, Variable *Src1, 825 CondARM32::Cond Pred = CondARM32::AL) { 826 Context.insert<InstARM32Udiv>(Dest, Src0, Src1, Pred); 827 } 828 void _umull(Variable *DestLo, Variable *DestHi, Variable *Src0, 829 Variable *Src1, CondARM32::Cond Pred = CondARM32::AL) { 830 // umull requires DestLo and DestHi to be assigned to different GPRs. The 831 // following lines create overlapping liveness ranges for both variables. If 832 // either one of them is live, then they are both going to be live, and thus 833 // assigned to different registers; if they are both dead, then DCE will 834 // kick in and delete the following three instructions. 835 Context.insert<InstFakeDef>(DestHi); 836 Context.insert<InstARM32Umull>(DestLo, DestHi, Src0, Src1, Pred); 837 Context.insert<InstFakeDef>(DestHi, DestLo)->setDestRedefined(); 838 Context.insert<InstFakeUse>(DestHi); 839 } 840 void _uxt(Variable *Dest, Variable *Src0, 841 CondARM32::Cond Pred = CondARM32::AL) { 842 Context.insert<InstARM32Uxt>(Dest, Src0, Pred); 843 } 844 void _vabs(Variable *Dest, Variable *Src, 845 CondARM32::Cond Pred = CondARM32::AL) { 846 Context.insert<InstARM32Vabs>(Dest, Src, Pred); 847 } _vadd(Variable * Dest,Variable * Src0,Variable * Src1)848 void _vadd(Variable *Dest, Variable *Src0, Variable *Src1) { 849 Context.insert<InstARM32Vadd>(Dest, Src0, Src1); 850 } _vand(Variable * Dest,Variable * Src0,Variable * Src1)851 void _vand(Variable *Dest, Variable *Src0, Variable *Src1) { 852 Context.insert<InstARM32Vand>(Dest, Src0, Src1); 853 } _vbsl(Variable * Dest,Variable * Src0,Variable * Src1)854 InstARM32Vbsl *_vbsl(Variable *Dest, Variable *Src0, Variable *Src1) { 855 return Context.insert<InstARM32Vbsl>(Dest, Src0, Src1); 856 } _vceq(Variable * Dest,Variable * Src0,Variable * Src1)857 void _vceq(Variable *Dest, Variable *Src0, Variable *Src1) { 858 Context.insert<InstARM32Vceq>(Dest, Src0, Src1); 859 } _vcge(Variable * Dest,Variable * Src0,Variable * Src1)860 InstARM32Vcge *_vcge(Variable *Dest, Variable *Src0, Variable *Src1) { 861 return Context.insert<InstARM32Vcge>(Dest, Src0, Src1); 862 } _vcgt(Variable * Dest,Variable * Src0,Variable * Src1)863 InstARM32Vcgt *_vcgt(Variable *Dest, Variable *Src0, Variable *Src1) { 864 return Context.insert<InstARM32Vcgt>(Dest, Src0, Src1); 865 } 866 void _vcvt(Variable *Dest, Variable *Src, InstARM32Vcvt::VcvtVariant Variant, 867 CondARM32::Cond Pred = CondARM32::AL) { 868 Context.insert<InstARM32Vcvt>(Dest, Src, Variant, Pred); 869 } _vdiv(Variable * Dest,Variable * Src0,Variable * Src1)870 void _vdiv(Variable *Dest, Variable *Src0, Variable *Src1) { 871 Context.insert<InstARM32Vdiv>(Dest, Src0, Src1); 872 } 873 void _vcmp(Variable *Src0, Variable *Src1, 874 CondARM32::Cond Pred = CondARM32::AL) { 875 Context.insert<InstARM32Vcmp>(Src0, Src1, Pred); 876 } 877 void _vcmp(Variable *Src0, OperandARM32FlexFpZero *FpZero, 878 CondARM32::Cond Pred = CondARM32::AL) { 879 Context.insert<InstARM32Vcmp>(Src0, FpZero, Pred); 880 } _vdup(Variable * Dest,Variable * Src,int Idx)881 void _vdup(Variable *Dest, Variable *Src, int Idx) { 882 Context.insert<InstARM32Vdup>(Dest, Src, Idx); 883 } _veor(Variable * Dest,Variable * Src0,Variable * Src1)884 void _veor(Variable *Dest, Variable *Src0, Variable *Src1) { 885 Context.insert<InstARM32Veor>(Dest, Src0, Src1); 886 } 887 void _vldr1d(Variable *Dest, OperandARM32Mem *Addr, 888 CondARM32::Cond Pred = CondARM32::AL) { 889 Context.insert<InstARM32Vldr1d>(Dest, Addr, Pred); 890 } 891 void _vldr1q(Variable *Dest, OperandARM32Mem *Addr, 892 CondARM32::Cond Pred = CondARM32::AL) { 893 Context.insert<InstARM32Vldr1q>(Dest, Addr, Pred); 894 } 895 void _vmrs(CondARM32::Cond Pred = CondARM32::AL) { 896 Context.insert<InstARM32Vmrs>(Pred); 897 } _vmla(Variable * Dest,Variable * Src0,Variable * Src1)898 void _vmla(Variable *Dest, Variable *Src0, Variable *Src1) { 899 Context.insert<InstARM32Vmla>(Dest, Src0, Src1); 900 } _vmlap(Variable * Dest,Variable * Src0,Variable * Src1)901 void _vmlap(Variable *Dest, Variable *Src0, Variable *Src1) { 902 Context.insert<InstARM32Vmlap>(Dest, Src0, Src1); 903 } _vmls(Variable * Dest,Variable * Src0,Variable * Src1)904 void _vmls(Variable *Dest, Variable *Src0, Variable *Src1) { 905 Context.insert<InstARM32Vmls>(Dest, Src0, Src1); 906 } _vmovl(Variable * Dest,Variable * Src0,Variable * Src1)907 void _vmovl(Variable *Dest, Variable *Src0, Variable *Src1) { 908 Context.insert<InstARM32Vmovl>(Dest, Src0, Src1); 909 } _vmovh(Variable * Dest,Variable * Src0,Variable * Src1)910 void _vmovh(Variable *Dest, Variable *Src0, Variable *Src1) { 911 Context.insert<InstARM32Vmovh>(Dest, Src0, Src1); 912 } _vmovhl(Variable * Dest,Variable * Src0,Variable * Src1)913 void _vmovhl(Variable *Dest, Variable *Src0, Variable *Src1) { 914 Context.insert<InstARM32Vmovhl>(Dest, Src0, Src1); 915 } _vmovlh(Variable * Dest,Variable * Src0,Variable * Src1)916 void _vmovlh(Variable *Dest, Variable *Src0, Variable *Src1) { 917 Context.insert<InstARM32Vmovlh>(Dest, Src0, Src1); 918 } _vmul(Variable * Dest,Variable * Src0,Variable * Src1)919 void _vmul(Variable *Dest, Variable *Src0, Variable *Src1) { 920 Context.insert<InstARM32Vmul>(Dest, Src0, Src1); 921 } _vmulh(Variable * Dest,Variable * Src0,Variable * Src1,bool Unsigned)922 void _vmulh(Variable *Dest, Variable *Src0, Variable *Src1, bool Unsigned) { 923 Context.insert<InstARM32Vmulh>(Dest, Src0, Src1) 924 ->setSignType(Unsigned ? InstARM32::FS_Unsigned : InstARM32::FS_Signed); 925 } _vmvn(Variable * Dest,Variable * Src0)926 void _vmvn(Variable *Dest, Variable *Src0) { 927 Context.insert<InstARM32Vmvn>(Dest, Src0, CondARM32::AL); 928 } _vneg(Variable * Dest,Variable * Src0)929 void _vneg(Variable *Dest, Variable *Src0) { 930 Context.insert<InstARM32Vneg>(Dest, Src0, CondARM32::AL) 931 ->setSignType(InstARM32::FS_Signed); 932 } _vorr(Variable * Dest,Variable * Src0,Variable * Src1)933 void _vorr(Variable *Dest, Variable *Src0, Variable *Src1) { 934 Context.insert<InstARM32Vorr>(Dest, Src0, Src1); 935 } _vqadd(Variable * Dest,Variable * Src0,Variable * Src1,bool Unsigned)936 void _vqadd(Variable *Dest, Variable *Src0, Variable *Src1, bool Unsigned) { 937 Context.insert<InstARM32Vqadd>(Dest, Src0, Src1) 938 ->setSignType(Unsigned ? InstARM32::FS_Unsigned : InstARM32::FS_Signed); 939 } _vqmovn2(Variable * Dest,Variable * Src0,Variable * Src1,bool Unsigned,bool Saturating)940 void _vqmovn2(Variable *Dest, Variable *Src0, Variable *Src1, bool Unsigned, 941 bool Saturating) { 942 Context.insert<InstARM32Vqmovn2>(Dest, Src0, Src1) 943 ->setSignType(Saturating ? (Unsigned ? InstARM32::FS_Unsigned 944 : InstARM32::FS_Signed) 945 : InstARM32::FS_None); 946 } _vqsub(Variable * Dest,Variable * Src0,Variable * Src1,bool Unsigned)947 void _vqsub(Variable *Dest, Variable *Src0, Variable *Src1, bool Unsigned) { 948 Context.insert<InstARM32Vqsub>(Dest, Src0, Src1) 949 ->setSignType(Unsigned ? InstARM32::FS_Unsigned : InstARM32::FS_Signed); 950 } _vshl(Variable * Dest,Variable * Src0,Variable * Src1)951 InstARM32Vshl *_vshl(Variable *Dest, Variable *Src0, Variable *Src1) { 952 return Context.insert<InstARM32Vshl>(Dest, Src0, Src1); 953 } _vshl(Variable * Dest,Variable * Src0,ConstantInteger32 * Src1)954 void _vshl(Variable *Dest, Variable *Src0, ConstantInteger32 *Src1) { 955 Context.insert<InstARM32Vshl>(Dest, Src0, Src1) 956 ->setSignType(InstARM32::FS_Unsigned); 957 } _vshr(Variable * Dest,Variable * Src0,ConstantInteger32 * Src1)958 InstARM32Vshr *_vshr(Variable *Dest, Variable *Src0, 959 ConstantInteger32 *Src1) { 960 return Context.insert<InstARM32Vshr>(Dest, Src0, Src1); 961 } 962 void _vsqrt(Variable *Dest, Variable *Src, 963 CondARM32::Cond Pred = CondARM32::AL) { 964 Context.insert<InstARM32Vsqrt>(Dest, Src, Pred); 965 } 966 void _vstr1d(Variable *Value, OperandARM32Mem *Addr, 967 CondARM32::Cond Pred = CondARM32::AL) { 968 Context.insert<InstARM32Vstr1>(Value, Addr, Pred, 32); 969 } 970 void _vstr1q(Variable *Value, OperandARM32Mem *Addr, 971 CondARM32::Cond Pred = CondARM32::AL) { 972 Context.insert<InstARM32Vstr1>(Value, Addr, Pred, 64); 973 } _vsub(Variable * Dest,Variable * Src0,Variable * Src1)974 void _vsub(Variable *Dest, Variable *Src0, Variable *Src1) { 975 Context.insert<InstARM32Vsub>(Dest, Src0, Src1); 976 } _vzip(Variable * Dest,Variable * Src0,Variable * Src1)977 void _vzip(Variable *Dest, Variable *Src0, Variable *Src1) { 978 Context.insert<InstARM32Vzip>(Dest, Src0, Src1); 979 } 980 981 // Iterates over the CFG and determines the maximum outgoing stack arguments 982 // bytes. This information is later used during addProlog() to pre-allocate 983 // the outargs area. 984 // TODO(jpp): This could live in the Parser, if we provided a Target-specific 985 // method that the Parser could call. 986 void findMaxStackOutArgsSize(); 987 988 /// Returns true if the given Offset can be represented in a Load/Store Mem 989 /// Operand. 990 bool isLegalMemOffset(Type Ty, int32_t Offset) const; 991 992 void postLowerLegalization(); 993 994 /// Manages the GotPtr variable, which is used for Nonsfi sandboxing. 995 /// @{ 996 void createGotPtr(); 997 void insertGotPtrInitPlaceholder(); 998 VariableDeclaration *createGotRelocation(RelocOffset *AddPcReloc); 999 void materializeGotAddr(CfgNode *Node); 1000 Variable *GotPtr = nullptr; 1001 // TODO(jpp): use CfgLocalAllocator. 1002 /// @} 1003 1004 /// Manages the Gotoff relocations created during the function lowering. A 1005 /// single Gotoff relocation is created for each global variable used by the 1006 /// function being lowered. 1007 /// @{ 1008 // TODO(jpp): if the same global G is used in different functions, then this 1009 // method will emit one G(gotoff) relocation per function. 1010 GlobalString createGotoffRelocation(const ConstantRelocatable *CR); 1011 CfgUnorderedSet<GlobalString> KnownGotoffs; 1012 /// @} 1013 1014 /// Loads the constant relocatable Name to Register. Then invoke Finish to 1015 /// finish the relocatable lowering. Finish **must** use PC in its first 1016 /// emitted instruction, or the relocatable in Register will contain the wrong 1017 /// value. 1018 // 1019 // Lowered sequence: 1020 // 1021 // Movw: 1022 // movw Register, #:lower16:Name - (End - Movw) - 8 . 1023 // Movt: 1024 // movt Register, #:upper16:Name - (End - Movt) - 8 . 1025 // PC = fake-def 1026 // End: 1027 // Finish(PC) 1028 // 1029 // The -8 in movw/movt above is to account for the PC value that the first 1030 // instruction emitted by Finish(PC) will read. 1031 void 1032 loadNamedConstantRelocatablePIC(GlobalString Name, Variable *Register, 1033 std::function<void(Variable *PC)> Finish); 1034 1035 /// Sandboxer defines methods for ensuring that "dangerous" operations are 1036 /// masked during sandboxed code emission. For regular, non-sandboxed code 1037 /// emission, its methods are simple pass-through methods. 1038 /// 1039 /// The Sandboxer also emits BundleLock/BundleUnlock pseudo-instructions 1040 /// in the constructor/destructor during sandboxed code emission. Therefore, 1041 /// it is a bad idea to create an object of this type and "keep it around." 1042 /// The recommended usage is: 1043 /// 1044 /// AutoSandboxing(this).<<operation>>(...); 1045 /// 1046 /// This usage ensures that no other instructions are inadvertently added to 1047 /// the bundle. 1048 class Sandboxer { 1049 Sandboxer() = delete; 1050 Sandboxer(const Sandboxer &) = delete; 1051 Sandboxer &operator=(const Sandboxer &) = delete; 1052 1053 public: 1054 explicit Sandboxer( 1055 TargetARM32 *Target, 1056 InstBundleLock::Option BundleOption = InstBundleLock::Opt_None); 1057 ~Sandboxer(); 1058 1059 /// Increments sp: 1060 /// 1061 /// add sp, sp, AddAmount 1062 /// bic sp, sp, 0xc0000000 1063 /// 1064 /// (for the rationale, see the ARM 32-bit Sandbox Specification.) 1065 void add_sp(Operand *AddAmount); 1066 1067 /// Emits code to align sp to the specified alignment: 1068 /// 1069 /// bic/and sp, sp, Alignment 1070 /// bic, sp, sp, 0xc0000000 1071 void align_sp(size_t Alignment); 1072 1073 /// Emits a call instruction. If CallTarget is a Variable, it emits 1074 /// 1075 /// bic CallTarget, CallTarget, 0xc000000f 1076 /// bl CallTarget 1077 /// 1078 /// Otherwise, it emits 1079 /// 1080 /// bl CallTarget 1081 /// 1082 /// Note: in sandboxed code calls are always emitted in addresses 12 mod 16. 1083 InstARM32Call *bl(Variable *ReturnReg, Operand *CallTarget); 1084 1085 /// Emits a load: 1086 /// 1087 /// bic rBase, rBase, 0xc0000000 1088 /// ldr rDest, [rBase, #Offset] 1089 /// 1090 /// Exception: if rBase is r9 or sp, then the load is emitted as: 1091 /// 1092 /// ldr rDest, [rBase, #Offset] 1093 /// 1094 /// because the NaCl ARM 32-bit Sandbox Specification guarantees they are 1095 /// always valid. 1096 void ldr(Variable *Dest, OperandARM32Mem *Mem, CondARM32::Cond Pred); 1097 1098 /// Emits a load exclusive: 1099 /// 1100 /// bic rBase, rBase, 0xc0000000 1101 /// ldrex rDest, [rBase] 1102 /// 1103 /// Exception: if rBase is r9 or sp, then the load is emitted as: 1104 /// 1105 /// ldrex rDest, [rBase] 1106 /// 1107 /// because the NaCl ARM 32-bit Sandbox Specification guarantees they are 1108 /// always valid. 1109 void ldrex(Variable *Dest, OperandARM32Mem *Mem, CondARM32::Cond Pred); 1110 1111 /// Resets sp to Src: 1112 /// 1113 /// mov sp, Src 1114 /// bic sp, sp, 0xc0000000 1115 void reset_sp(Variable *Src); 1116 1117 /// Emits code to return from a function: 1118 /// 1119 /// bic lr, lr, 0xc000000f 1120 /// bx lr 1121 void ret(Variable *RetAddr, Variable *RetValue); 1122 1123 /// Emits a store: 1124 /// 1125 /// bic rBase, rBase, 0xc0000000 1126 /// str rSrc, [rBase, #Offset] 1127 /// 1128 /// Exception: if rBase is r9 or sp, then the store is emitted as: 1129 /// 1130 /// str rDest, [rBase, #Offset] 1131 /// 1132 /// because the NaCl ARM 32-bit Sandbox Specification guarantees they are 1133 /// always valid. 1134 void str(Variable *Src, OperandARM32Mem *Mem, CondARM32::Cond Pred); 1135 1136 /// Emits a store exclusive: 1137 /// 1138 /// bic rBase, rBase, 0xc0000000 1139 /// strex rDest, rSrc, [rBase] 1140 /// 1141 /// Exception: if rBase is r9 or sp, then the store is emitted as: 1142 /// 1143 /// strex rDest, rSrc, [rBase] 1144 /// 1145 /// because the NaCl ARM 32-bit Sandbox Specification guarantees they are 1146 /// always valid. 1147 void strex(Variable *Dest, Variable *Src, OperandARM32Mem *Mem, 1148 CondARM32::Cond Pred); 1149 1150 /// Decrements sp: 1151 /// 1152 /// sub sp, sp, SubAmount 1153 /// bic sp, sp, 0xc0000000 1154 void sub_sp(Operand *SubAmount); 1155 1156 private: 1157 TargetARM32 *const Target; 1158 const InstBundleLock::Option BundleOption; 1159 std::unique_ptr<AutoBundle> Bundler; 1160 1161 void createAutoBundle(); 1162 }; 1163 1164 class PostLoweringLegalizer { 1165 PostLoweringLegalizer() = delete; 1166 PostLoweringLegalizer(const PostLoweringLegalizer &) = delete; 1167 PostLoweringLegalizer &operator=(const PostLoweringLegalizer &) = delete; 1168 1169 public: PostLoweringLegalizer(TargetARM32 * Target)1170 explicit PostLoweringLegalizer(TargetARM32 *Target) 1171 : Target(Target), StackOrFrameReg(Target->getPhysicalRegister( 1172 Target->getFrameOrStackReg())) {} 1173 1174 void resetTempBaseIfClobberedBy(const Inst *Instr); 1175 1176 // Ensures that the TempBase register held by the this legalizer (if any) is 1177 // assigned to IP. assertNoTempOrAssignedToIP()1178 void assertNoTempOrAssignedToIP() const { 1179 assert(TempBaseReg == nullptr || 1180 TempBaseReg->getRegNum() == Target->getReservedTmpReg()); 1181 } 1182 1183 // Legalizes Mem. if Mem.Base is a Reamaterializable variable, Mem.Offset is 1184 // fixed up. 1185 OperandARM32Mem *legalizeMemOperand(OperandARM32Mem *Mem, 1186 bool AllowOffsets = true); 1187 1188 /// Legalizes Mov if its Source (or Destination) is a spilled Variable, or 1189 /// if its Source is a Rematerializable variable (this form is used in lieu 1190 /// of lea, which is not available in ARM.) 1191 /// 1192 /// Moves to memory become store instructions, and moves from memory, loads. 1193 void legalizeMov(InstARM32Mov *Mov); 1194 1195 private: 1196 /// Creates a new Base register centered around [Base, +/- Offset]. 1197 Variable *newBaseRegister(Variable *Base, int32_t Offset, 1198 RegNumT ScratchRegNum); 1199 1200 /// Creates a new, legal OperandARM32Mem for accessing Base + Offset. 1201 /// The returned mem operand is a legal operand for accessing memory that is 1202 /// of type Ty. 1203 /// 1204 /// If [Base, #Offset] is encodable, then the method returns a Mem operand 1205 /// expressing it. Otherwise, 1206 /// 1207 /// if [TempBaseReg, #Offset-TempBaseOffset] is a valid memory operand, the 1208 /// method will return that. Otherwise, 1209 /// 1210 /// a new base register ip=Base+Offset is created, and the method returns a 1211 /// memory operand expressing [ip, #0]. 1212 OperandARM32Mem *createMemOperand(Type Ty, Variable *Base, int32_t Offset, 1213 bool AllowOffsets = true); 1214 TargetARM32 *const Target; 1215 Variable *const StackOrFrameReg; 1216 Variable *TempBaseReg = nullptr; 1217 int32_t TempBaseOffset = 0; 1218 }; 1219 1220 const bool NeedSandboxing; 1221 TargetARM32Features CPUFeatures; 1222 bool UsesFramePointer = false; 1223 bool NeedsStackAlignment = false; 1224 bool MaybeLeafFunc = true; 1225 size_t SpillAreaSizeBytes = 0; 1226 size_t FixedAllocaSizeBytes = 0; 1227 size_t FixedAllocaAlignBytes = 0; 1228 bool PrologEmitsFixedAllocas = false; 1229 uint32_t MaxOutArgsSizeBytes = 0; 1230 // TODO(jpp): std::array instead of array. 1231 static SmallBitVector TypeToRegisterSet[RegARM32::RCARM32_NUM]; 1232 static SmallBitVector TypeToRegisterSetUnfiltered[RegARM32::RCARM32_NUM]; 1233 static SmallBitVector RegisterAliases[RegARM32::Reg_NUM]; 1234 SmallBitVector RegsUsed; 1235 VarList PhysicalRegisters[IceType_NUM]; 1236 VarList PreservedGPRs; 1237 VarList PreservedSRegs; 1238 1239 /// Helper class that understands the Calling Convention and register 1240 /// assignments. The first few integer type parameters can use r0-r3, 1241 /// regardless of their position relative to the floating-point/vector 1242 /// arguments in the argument list. Floating-point and vector arguments 1243 /// can use q0-q3 (aka d0-d7, s0-s15). For more information on the topic, 1244 /// see the ARM Architecture Procedure Calling Standards (AAPCS). 1245 /// 1246 /// Technically, arguments that can start with registers but extend beyond the 1247 /// available registers can be split between the registers and the stack. 1248 /// However, this is typically for passing GPR structs by value, and PNaCl 1249 /// transforms expand this out. 1250 /// 1251 /// At (public) function entry, the stack must be 8-byte aligned. 1252 class CallingConv { 1253 CallingConv(const CallingConv &) = delete; 1254 CallingConv &operator=(const CallingConv &) = delete; 1255 1256 public: 1257 CallingConv(); 1258 ~CallingConv() = default; 1259 1260 /// argInGPR returns true if there is a GPR available for the requested 1261 /// type, and false otherwise. If it returns true, Reg is set to the 1262 /// appropriate register number. Note that, when Ty == IceType_i64, Reg will 1263 /// be an I64 register pair. 1264 bool argInGPR(Type Ty, RegNumT *Reg); 1265 1266 /// argInVFP is to floating-point/vector types what argInGPR is for integer 1267 /// types. 1268 bool argInVFP(Type Ty, RegNumT *Reg); 1269 1270 private: 1271 void discardUnavailableGPRsAndTheirAliases(CfgVector<RegNumT> *Regs); 1272 SmallBitVector GPRegsUsed; 1273 CfgVector<RegNumT> GPRArgs; 1274 CfgVector<RegNumT> I64Args; 1275 1276 void discardUnavailableVFPRegs(CfgVector<RegNumT> *Regs); 1277 SmallBitVector VFPRegsUsed; 1278 CfgVector<RegNumT> FP32Args; 1279 CfgVector<RegNumT> FP64Args; 1280 CfgVector<RegNumT> Vec128Args; 1281 }; 1282 1283 private: 1284 ENABLE_MAKE_UNIQUE; 1285 1286 OperandARM32Mem *formAddressingMode(Type Ty, Cfg *Func, const Inst *LdSt, 1287 Operand *Base); 1288 1289 void postambleCtpop64(const InstCall *Instr); 1290 void preambleDivRem(const InstCall *Instr); 1291 CfgUnorderedMap<Operand *, void (TargetARM32::*)(const InstCall *Instr)> 1292 ARM32HelpersPreamble; 1293 CfgUnorderedMap<Operand *, void (TargetARM32::*)(const InstCall *Instr)> 1294 ARM32HelpersPostamble; 1295 1296 class ComputationTracker { 1297 public: 1298 ComputationTracker() = default; 1299 ~ComputationTracker() = default; 1300 forgetProducers()1301 void forgetProducers() { KnownComputations.clear(); } 1302 void recordProducers(CfgNode *Node); 1303 getProducerOf(const Operand * Opnd)1304 const Inst *getProducerOf(const Operand *Opnd) const { 1305 auto *Var = llvm::dyn_cast<Variable>(Opnd); 1306 if (Var == nullptr) { 1307 return nullptr; 1308 } 1309 1310 auto Iter = KnownComputations.find(Var->getIndex()); 1311 if (Iter == KnownComputations.end()) { 1312 return nullptr; 1313 } 1314 1315 return Iter->second.Instr; 1316 } 1317 dump(const Cfg * Func)1318 void dump(const Cfg *Func) const { 1319 if (!BuildDefs::dump() || !Func->isVerbose(IceV_Folding)) 1320 return; 1321 OstreamLocker L(Func->getContext()); 1322 Ostream &Str = Func->getContext()->getStrDump(); 1323 Str << "foldable producer:\n"; 1324 for (const auto &Computation : KnownComputations) { 1325 Str << " "; 1326 Computation.second.Instr->dump(Func); 1327 Str << "\n"; 1328 } 1329 Str << "\n"; 1330 } 1331 1332 private: 1333 class ComputationEntry { 1334 public: ComputationEntry(Inst * I,Type Ty)1335 ComputationEntry(Inst *I, Type Ty) : Instr(I), ComputationType(Ty) {} 1336 Inst *const Instr; 1337 // Boolean folding is disabled for variables whose live range is multi 1338 // block. We conservatively initialize IsLiveOut to true, and set it to 1339 // false once we find the end of the live range for the variable defined 1340 // by this instruction. If liveness analysis is not performed (e.g., in 1341 // Om1 mode) IsLiveOut will never be set to false, and folding will be 1342 // disabled. 1343 bool IsLiveOut = true; 1344 int32_t NumUses = 0; 1345 Type ComputationType; 1346 }; 1347 1348 // ComputationMap maps a Variable number to a payload identifying which 1349 // instruction defined it. 1350 using ComputationMap = CfgUnorderedMap<SizeT, ComputationEntry>; 1351 ComputationMap KnownComputations; 1352 }; 1353 1354 ComputationTracker Computations; 1355 1356 // AllowTemporaryWithNoReg indicates if TargetARM32::makeReg() can be invoked 1357 // without specifying a physical register. This is needed for creating unbound 1358 // temporaries during Ice -> ARM lowering, but before register allocation. 1359 // This a safe-guard that no unbound temporaries are created during the 1360 // legalization post-passes. 1361 bool AllowTemporaryWithNoReg = true; 1362 // ForbidTemporaryWithoutReg is a RAII class that manages 1363 // AllowTemporaryWithNoReg. 1364 class ForbidTemporaryWithoutReg { 1365 ForbidTemporaryWithoutReg() = delete; 1366 ForbidTemporaryWithoutReg(const ForbidTemporaryWithoutReg &) = delete; 1367 ForbidTemporaryWithoutReg & 1368 operator=(const ForbidTemporaryWithoutReg &) = delete; 1369 1370 public: ForbidTemporaryWithoutReg(TargetARM32 * Target)1371 explicit ForbidTemporaryWithoutReg(TargetARM32 *Target) : Target(Target) { 1372 Target->AllowTemporaryWithNoReg = false; 1373 } ~ForbidTemporaryWithoutReg()1374 ~ForbidTemporaryWithoutReg() { Target->AllowTemporaryWithNoReg = true; } 1375 1376 private: 1377 TargetARM32 *const Target; 1378 }; 1379 }; 1380 1381 class TargetDataARM32 final : public TargetDataLowering { 1382 TargetDataARM32() = delete; 1383 TargetDataARM32(const TargetDataARM32 &) = delete; 1384 TargetDataARM32 &operator=(const TargetDataARM32 &) = delete; 1385 1386 public: create(GlobalContext * Ctx)1387 static std::unique_ptr<TargetDataLowering> create(GlobalContext *Ctx) { 1388 return std::unique_ptr<TargetDataLowering>(new TargetDataARM32(Ctx)); 1389 } 1390 1391 void lowerGlobals(const VariableDeclarationList &Vars, 1392 const std::string &SectionSuffix) override; 1393 void lowerConstants() override; 1394 void lowerJumpTables() override; 1395 1396 protected: 1397 explicit TargetDataARM32(GlobalContext *Ctx); 1398 1399 private: 1400 ~TargetDataARM32() override = default; 1401 }; 1402 1403 class TargetHeaderARM32 final : public TargetHeaderLowering { 1404 TargetHeaderARM32() = delete; 1405 TargetHeaderARM32(const TargetHeaderARM32 &) = delete; 1406 TargetHeaderARM32 &operator=(const TargetHeaderARM32 &) = delete; 1407 1408 public: create(GlobalContext * Ctx)1409 static std::unique_ptr<TargetHeaderLowering> create(GlobalContext *Ctx) { 1410 return std::unique_ptr<TargetHeaderLowering>(new TargetHeaderARM32(Ctx)); 1411 } 1412 1413 void lower() override; 1414 1415 protected: 1416 explicit TargetHeaderARM32(GlobalContext *Ctx); 1417 1418 private: 1419 ~TargetHeaderARM32() = default; 1420 1421 TargetARM32Features CPUFeatures; 1422 }; 1423 1424 } // end of namespace ARM32 1425 } // end of namespace Ice 1426 1427 #endif // SUBZERO_SRC_ICETARGETLOWERINGARM32_H 1428