1 //===- subzero/src/IceTargetLowering.h - Lowering interface -----*- C++ -*-===// 2 // 3 // The Subzero Code Generator 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 /// 10 /// \file 11 /// \brief Declares the TargetLowering, LoweringContext, and TargetDataLowering 12 /// classes. 13 /// 14 /// TargetLowering is an abstract class used to drive the translation/lowering 15 /// process. LoweringContext maintains a context for lowering each instruction, 16 /// offering conveniences such as iterating over non-deleted instructions. 17 /// TargetDataLowering is an abstract class used to drive the lowering/emission 18 /// of global initializers, external global declarations, and internal constant 19 /// pools. 20 /// 21 //===----------------------------------------------------------------------===// 22 23 #ifndef SUBZERO_SRC_ICETARGETLOWERING_H 24 #define SUBZERO_SRC_ICETARGETLOWERING_H 25 26 #include "IceBitVector.h" 27 #include "IceCfgNode.h" 28 #include "IceDefs.h" 29 #include "IceInst.h" // for the names of the Inst subtypes 30 #include "IceOperand.h" 31 #include "IceRegAlloc.h" 32 #include "IceTypes.h" 33 34 #include <utility> 35 36 namespace Ice { 37 38 // UnimplementedError is defined as a macro so that we can get actual line 39 // numbers. 40 #define UnimplementedError(Flags) \ 41 do { \ 42 if (!static_cast<const ClFlags &>(Flags).getSkipUnimplemented()) { \ 43 /* Use llvm_unreachable instead of report_fatal_error, which gives \ 44 better stack traces. */ \ 45 llvm_unreachable("Not yet implemented"); \ 46 abort(); \ 47 } \ 48 } while (0) 49 50 // UnimplementedLoweringError is similar in style to UnimplementedError. Given 51 // a TargetLowering object pointer and an Inst pointer, it adds appropriate 52 // FakeDef and FakeUse instructions to try maintain liveness consistency. 53 #define UnimplementedLoweringError(Target, Instr) \ 54 do { \ 55 if (getFlags().getSkipUnimplemented()) { \ 56 (Target)->addFakeDefUses(Instr); \ 57 } else { \ 58 /* Use llvm_unreachable instead of report_fatal_error, which gives \ 59 better stack traces. */ \ 60 llvm_unreachable( \ 61 (std::string("Not yet implemented: ") + Instr->getInstName()) \ 62 .c_str()); \ 63 abort(); \ 64 } \ 65 } while (0) 66 67 /// LoweringContext makes it easy to iterate through non-deleted instructions in 68 /// a node, and insert new (lowered) instructions at the current point. Along 69 /// with the instruction list container and associated iterators, it holds the 70 /// current node, which is needed when inserting new instructions in order to 71 /// track whether variables are used as single-block or multi-block. 72 class LoweringContext { 73 LoweringContext(const LoweringContext &) = delete; 74 LoweringContext &operator=(const LoweringContext &) = delete; 75 76 public: 77 LoweringContext() = default; 78 ~LoweringContext() = default; 79 void init(CfgNode *Node); getNextInst()80 Inst *getNextInst() const { 81 if (Next == End) 82 return nullptr; 83 return iteratorToInst(Next); 84 } getNextInst(InstList::iterator & Iter)85 Inst *getNextInst(InstList::iterator &Iter) const { 86 advanceForward(Iter); 87 if (Iter == End) 88 return nullptr; 89 return iteratorToInst(Iter); 90 } getNode()91 CfgNode *getNode() const { return Node; } atEnd()92 bool atEnd() const { return Cur == End; } getCur()93 InstList::iterator getCur() const { return Cur; } getNext()94 InstList::iterator getNext() const { return Next; } getEnd()95 InstList::iterator getEnd() const { return End; } 96 void insert(Inst *Instr); insert(Args &&...A)97 template <typename Inst, typename... Args> Inst *insert(Args &&... A) { 98 auto *New = Inst::create(Node->getCfg(), std::forward<Args>(A)...); 99 insert(New); 100 return New; 101 } 102 Inst *getLastInserted() const; advanceCur()103 void advanceCur() { Cur = Next; } advanceNext()104 void advanceNext() { advanceForward(Next); } setCur(InstList::iterator C)105 void setCur(InstList::iterator C) { Cur = C; } setNext(InstList::iterator N)106 void setNext(InstList::iterator N) { Next = N; } 107 void rewind(); setInsertPoint(const InstList::iterator & Position)108 void setInsertPoint(const InstList::iterator &Position) { Next = Position; } 109 void availabilityReset(); 110 void availabilityUpdate(); 111 Variable *availabilityGet(Operand *Src) const; 112 113 private: 114 /// Node is the argument to Inst::updateVars(). 115 CfgNode *Node = nullptr; 116 Inst *LastInserted = nullptr; 117 /// Cur points to the current instruction being considered. It is guaranteed 118 /// to point to a non-deleted instruction, or to be End. 119 InstList::iterator Cur; 120 /// Next doubles as a pointer to the next valid instruction (if any), and the 121 /// new-instruction insertion point. It is also updated for the caller in case 122 /// the lowering consumes more than one high-level instruction. It is 123 /// guaranteed to point to a non-deleted instruction after Cur, or to be End. 124 // TODO: Consider separating the notion of "next valid instruction" and "new 125 // instruction insertion point", to avoid confusion when previously-deleted 126 // instructions come between the two points. 127 InstList::iterator Next; 128 /// Begin is a copy of Insts.begin(), used if iterators are moved backward. 129 InstList::iterator Begin; 130 /// End is a copy of Insts.end(), used if Next needs to be advanced. 131 InstList::iterator End; 132 /// LastDest and LastSrc capture the parameters of the last "Dest=Src" simple 133 /// assignment inserted (provided Src is a variable). This is used for simple 134 /// availability analysis. 135 Variable *LastDest = nullptr; 136 Variable *LastSrc = nullptr; 137 138 void skipDeleted(InstList::iterator &I) const; 139 void advanceForward(InstList::iterator &I) const; 140 }; 141 142 /// A helper class to advance the LoweringContext at each loop iteration. 143 class PostIncrLoweringContext { 144 PostIncrLoweringContext() = delete; 145 PostIncrLoweringContext(const PostIncrLoweringContext &) = delete; 146 PostIncrLoweringContext &operator=(const PostIncrLoweringContext &) = delete; 147 148 public: PostIncrLoweringContext(LoweringContext & Context)149 explicit PostIncrLoweringContext(LoweringContext &Context) 150 : Context(Context) {} ~PostIncrLoweringContext()151 ~PostIncrLoweringContext() { 152 Context.advanceCur(); 153 Context.advanceNext(); 154 } 155 156 private: 157 LoweringContext &Context; 158 }; 159 160 /// TargetLowering is the base class for all backends in Subzero. In addition to 161 /// implementing the abstract methods in this class, each concrete target must 162 /// also implement a named constructor in its own namespace. For instance, for 163 /// X8632 we have: 164 /// 165 /// namespace X8632 { 166 /// void createTargetLowering(Cfg *Func); 167 /// } 168 class TargetLowering { 169 TargetLowering() = delete; 170 TargetLowering(const TargetLowering &) = delete; 171 TargetLowering &operator=(const TargetLowering &) = delete; 172 173 public: 174 static void staticInit(GlobalContext *Ctx); 175 // Each target must define a public static method: 176 // static void staticInit(GlobalContext *Ctx); 177 static bool shouldBePooled(const class Constant *C); 178 static Type getPointerType(); 179 180 static std::unique_ptr<TargetLowering> createLowering(TargetArch Target, 181 Cfg *Func); 182 183 virtual std::unique_ptr<Assembler> createAssembler() const = 0; 184 translate()185 void translate() { 186 switch (Func->getOptLevel()) { 187 case Opt_m1: 188 translateOm1(); 189 break; 190 case Opt_0: 191 translateO0(); 192 break; 193 case Opt_1: 194 translateO1(); 195 break; 196 case Opt_2: 197 translateO2(); 198 break; 199 } 200 } translateOm1()201 virtual void translateOm1() { 202 Func->setError("Target doesn't specify Om1 lowering steps."); 203 } translateO0()204 virtual void translateO0() { 205 Func->setError("Target doesn't specify O0 lowering steps."); 206 } translateO1()207 virtual void translateO1() { 208 Func->setError("Target doesn't specify O1 lowering steps."); 209 } translateO2()210 virtual void translateO2() { 211 Func->setError("Target doesn't specify O2 lowering steps."); 212 } 213 214 /// Generates calls to intrinsics for operations the Target can't handle. 215 void genTargetHelperCalls(); 216 /// Tries to do address mode optimization on a single instruction. 217 void doAddressOpt(); 218 /// Randomly insert NOPs. 219 void doNopInsertion(RandomNumberGenerator &RNG); 220 /// Lowers a single non-Phi instruction. 221 void lower(); 222 /// Inserts and lowers a single high-level instruction at a specific insertion 223 /// point. 224 void lowerInst(CfgNode *Node, InstList::iterator Next, InstHighLevel *Instr); 225 /// Does preliminary lowering of the set of Phi instructions in the current 226 /// node. The main intention is to do what's needed to keep the unlowered Phi 227 /// instructions consistent with the lowered non-Phi instructions, e.g. to 228 /// lower 64-bit operands on a 32-bit target. prelowerPhis()229 virtual void prelowerPhis() {} 230 /// Tries to do branch optimization on a single instruction. Returns true if 231 /// some optimization was done. doBranchOpt(Inst *,const CfgNode *)232 virtual bool doBranchOpt(Inst * /*I*/, const CfgNode * /*NextNode*/) { 233 return false; 234 } 235 236 virtual SizeT getNumRegisters() const = 0; 237 /// Returns a variable pre-colored to the specified physical register. This is 238 /// generally used to get very direct access to the register such as in the 239 /// prolog or epilog or for marking scratch registers as killed by a call. If 240 /// a Type is not provided, a target-specific default type is used. 241 virtual Variable *getPhysicalRegister(RegNumT RegNum, 242 Type Ty = IceType_void) = 0; 243 /// Returns a printable name for the register. 244 virtual const char *getRegName(RegNumT RegNum, Type Ty) const = 0; 245 hasFramePointer()246 virtual bool hasFramePointer() const { return false; } 247 virtual void setHasFramePointer() = 0; 248 virtual RegNumT getStackReg() const = 0; 249 virtual RegNumT getFrameReg() const = 0; 250 virtual RegNumT getFrameOrStackReg() const = 0; 251 virtual size_t typeWidthInBytesOnStack(Type Ty) const = 0; 252 virtual uint32_t getStackAlignment() const = 0; needsStackPointerAlignment()253 virtual bool needsStackPointerAlignment() const { return false; } 254 virtual void reserveFixedAllocaArea(size_t Size, size_t Align) = 0; 255 virtual int32_t getFrameFixedAllocaOffset() const = 0; maxOutArgsSizeBytes()256 virtual uint32_t maxOutArgsSizeBytes() const { return 0; } 257 // Addressing relative to frame pointer differs in MIPS compared to X86/ARM 258 // since MIPS decrements its stack pointer prior to saving it in the frame 259 // pointer register. getFramePointerOffset(uint32_t CurrentOffset,uint32_t Size)260 virtual uint32_t getFramePointerOffset(uint32_t CurrentOffset, 261 uint32_t Size) const { 262 return -(CurrentOffset + Size); 263 } 264 /// Return whether a 64-bit Variable should be split into a Variable64On32. 265 virtual bool shouldSplitToVariable64On32(Type Ty) const = 0; 266 267 /// Return whether a Vector Variable should be split into a VariableVecOn32. shouldSplitToVariableVecOn32(Type Ty)268 virtual bool shouldSplitToVariableVecOn32(Type Ty) const { 269 (void)Ty; 270 return false; 271 } 272 hasComputedFrame()273 bool hasComputedFrame() const { return HasComputedFrame; } 274 /// Returns true if this function calls a function that has the "returns 275 /// twice" attribute. callsReturnsTwice()276 bool callsReturnsTwice() const { return CallsReturnsTwice; } setCallsReturnsTwice(bool RetTwice)277 void setCallsReturnsTwice(bool RetTwice) { CallsReturnsTwice = RetTwice; } makeNextLabelNumber()278 SizeT makeNextLabelNumber() { return NextLabelNumber++; } makeNextJumpTableNumber()279 SizeT makeNextJumpTableNumber() { return NextJumpTableNumber++; } getContext()280 LoweringContext &getContext() { return Context; } getFunc()281 Cfg *getFunc() const { return Func; } getGlobalContext()282 GlobalContext *getGlobalContext() const { return Ctx; } 283 284 enum RegSet { 285 RegSet_None = 0, 286 RegSet_CallerSave = 1 << 0, 287 RegSet_CalleeSave = 1 << 1, 288 RegSet_StackPointer = 1 << 2, 289 RegSet_FramePointer = 1 << 3, 290 RegSet_All = ~RegSet_None 291 }; 292 using RegSetMask = uint32_t; 293 294 virtual SmallBitVector getRegisterSet(RegSetMask Include, 295 RegSetMask Exclude) const = 0; 296 /// Get the set of physical registers available for the specified Variable's 297 /// register class, applying register restrictions from the command line. 298 virtual const SmallBitVector & 299 getRegistersForVariable(const Variable *Var) const = 0; 300 /// Get the set of *all* physical registers available for the specified 301 /// Variable's register class, *not* applying register restrictions from the 302 /// command line. 303 virtual const SmallBitVector & 304 getAllRegistersForVariable(const Variable *Var) const = 0; 305 virtual const SmallBitVector &getAliasesForRegister(RegNumT) const = 0; 306 307 void regAlloc(RegAllocKind Kind); 308 void postRegallocSplitting(const SmallBitVector &RegMask); 309 310 virtual void 311 makeRandomRegisterPermutation(llvm::SmallVectorImpl<RegNumT> &Permutation, 312 const SmallBitVector &ExcludeRegisters, 313 uint64_t Salt) const = 0; 314 315 /// Get the minimum number of clusters required for a jump table to be 316 /// considered. 317 virtual SizeT getMinJumpTableSize() const = 0; 318 virtual void emitJumpTable(const Cfg *Func, 319 const InstJumpTable *JumpTable) const = 0; 320 321 virtual void emitVariable(const Variable *Var) const = 0; 322 323 void emitWithoutPrefix(const ConstantRelocatable *CR, 324 const char *Suffix = "") const; 325 326 virtual void emit(const ConstantInteger32 *C) const = 0; 327 virtual void emit(const ConstantInteger64 *C) const = 0; 328 virtual void emit(const ConstantFloat *C) const = 0; 329 virtual void emit(const ConstantDouble *C) const = 0; 330 virtual void emit(const ConstantUndef *C) const = 0; 331 virtual void emit(const ConstantRelocatable *CR) const = 0; 332 333 /// Performs target-specific argument lowering. 334 virtual void lowerArguments() = 0; 335 initNodeForLowering(CfgNode *)336 virtual void initNodeForLowering(CfgNode *) {} 337 virtual void addProlog(CfgNode *Node) = 0; 338 virtual void addEpilog(CfgNode *Node) = 0; 339 340 /// Create a properly-typed "mov" instruction. This is primarily for local 341 /// variable splitting. createLoweredMove(Variable * Dest,Variable * SrcVar)342 virtual Inst *createLoweredMove(Variable *Dest, Variable *SrcVar) { 343 // TODO(stichnot): make pure virtual by implementing for all targets 344 (void)Dest; 345 (void)SrcVar; 346 llvm::report_fatal_error("createLoweredMove() unimplemented"); 347 return nullptr; 348 } 349 350 virtual ~TargetLowering() = default; 351 352 private: 353 // This control variable is used by AutoBundle (RAII-style bundle 354 // locking/unlocking) to prevent nested bundles. 355 bool AutoBundling = false; 356 357 /// This indicates whether we are in the genTargetHelperCalls phase, and 358 /// therefore can do things like scalarization. 359 bool GeneratingTargetHelpers = false; 360 361 // _bundle_lock(), and _bundle_unlock(), were made private to force subtargets 362 // to use the AutoBundle helper. 363 void 364 _bundle_lock(InstBundleLock::Option BundleOption = InstBundleLock::Opt_None) { 365 Context.insert<InstBundleLock>(BundleOption); 366 } _bundle_unlock()367 void _bundle_unlock() { Context.insert<InstBundleUnlock>(); } 368 369 protected: 370 /// AutoBundle provides RIAA-style bundling. Sub-targets are expected to use 371 /// it when emitting NaCl Bundles to ensure proper bundle_unlocking, and 372 /// prevent nested bundles. 373 /// 374 /// AutoBundle objects will emit a _bundle_lock during construction (but only 375 /// if sandboxed code generation was requested), and a bundle_unlock() during 376 /// destruction. By carefully scoping objects of this type, Subtargets can 377 /// ensure proper bundle emission. 378 class AutoBundle { 379 AutoBundle() = delete; 380 AutoBundle(const AutoBundle &) = delete; 381 AutoBundle &operator=(const AutoBundle &) = delete; 382 383 public: 384 explicit AutoBundle(TargetLowering *Target, InstBundleLock::Option Option = 385 InstBundleLock::Opt_None); 386 ~AutoBundle(); 387 388 private: 389 TargetLowering *const Target; 390 const bool NeedSandboxing; 391 }; 392 393 explicit TargetLowering(Cfg *Func); 394 // Applies command line filters to TypeToRegisterSet array. 395 static void filterTypeToRegisterSet( 396 GlobalContext *Ctx, int32_t NumRegs, SmallBitVector TypeToRegisterSet[], 397 size_t TypeToRegisterSetSize, 398 std::function<std::string(RegNumT)> getRegName, 399 std::function<const char *(RegClass)> getRegClassName); 400 virtual void lowerAlloca(const InstAlloca *Instr) = 0; 401 virtual void lowerArithmetic(const InstArithmetic *Instr) = 0; 402 virtual void lowerAssign(const InstAssign *Instr) = 0; 403 virtual void lowerBr(const InstBr *Instr) = 0; 404 virtual void lowerBreakpoint(const InstBreakpoint *Instr) = 0; 405 virtual void lowerCall(const InstCall *Instr) = 0; 406 virtual void lowerCast(const InstCast *Instr) = 0; 407 virtual void lowerFcmp(const InstFcmp *Instr) = 0; 408 virtual void lowerExtractElement(const InstExtractElement *Instr) = 0; 409 virtual void lowerIcmp(const InstIcmp *Instr) = 0; 410 virtual void lowerInsertElement(const InstInsertElement *Instr) = 0; 411 virtual void lowerIntrinsicCall(const InstIntrinsicCall *Instr) = 0; 412 virtual void lowerLoad(const InstLoad *Instr) = 0; 413 virtual void lowerPhi(const InstPhi *Instr) = 0; 414 virtual void lowerRet(const InstRet *Instr) = 0; 415 virtual void lowerSelect(const InstSelect *Instr) = 0; 416 virtual void lowerShuffleVector(const InstShuffleVector *Instr) = 0; 417 virtual void lowerStore(const InstStore *Instr) = 0; 418 virtual void lowerSwitch(const InstSwitch *Instr) = 0; 419 virtual void lowerUnreachable(const InstUnreachable *Instr) = 0; 420 virtual void lowerOther(const Inst *Instr); 421 422 virtual void genTargetHelperCallFor(Inst *Instr) = 0; 423 virtual uint32_t getCallStackArgumentsSizeBytes(const InstCall *Instr) = 0; 424 425 /// Opportunity to modify other instructions to help Address Optimization doAddressOptOther()426 virtual void doAddressOptOther() {} doAddressOptLoad()427 virtual void doAddressOptLoad() {} doAddressOptStore()428 virtual void doAddressOptStore() {} doAddressOptLoadSubVector()429 virtual void doAddressOptLoadSubVector() {} doAddressOptStoreSubVector()430 virtual void doAddressOptStoreSubVector() {} doMockBoundsCheck(Operand *)431 virtual void doMockBoundsCheck(Operand *) {} 432 virtual void randomlyInsertNop(float Probability, 433 RandomNumberGenerator &RNG) = 0; 434 /// This gives the target an opportunity to post-process the lowered expansion 435 /// before returning. postLower()436 virtual void postLower() {} 437 438 /// When the SkipUnimplemented flag is set, addFakeDefUses() gets invoked by 439 /// the UnimplementedLoweringError macro to insert fake uses of all the 440 /// instruction variables and a fake def of the instruction dest, in order to 441 /// preserve integrity of liveness analysis. 442 void addFakeDefUses(const Inst *Instr); 443 444 /// Find (non-SSA) instructions where the Dest variable appears in some source 445 /// operand, and set the IsDestRedefined flag. This keeps liveness analysis 446 /// consistent. 447 void markRedefinitions(); 448 449 /// Make a pass over the Cfg to determine which variables need stack slots and 450 /// place them in a sorted list (SortedSpilledVariables). Among those, vars, 451 /// classify the spill variables as local to the basic block vs global 452 /// (multi-block) in order to compute the parameters GlobalsSize and 453 /// SpillAreaSizeBytes (represents locals or general vars if the coalescing of 454 /// locals is disallowed) along with alignments required for variables in each 455 /// area. We rely on accurate VMetadata in order to classify a variable as 456 /// global vs local (otherwise the variable is conservatively global). The 457 /// in-args should be initialized to 0. 458 /// 459 /// This is only a pre-pass and the actual stack slot assignment is handled 460 /// separately. 461 /// 462 /// There may be target-specific Variable types, which will be handled by 463 /// TargetVarHook. If the TargetVarHook returns true, then the variable is 464 /// skipped and not considered with the rest of the spilled variables. 465 void getVarStackSlotParams(VarList &SortedSpilledVariables, 466 SmallBitVector &RegsUsed, size_t *GlobalsSize, 467 size_t *SpillAreaSizeBytes, 468 uint32_t *SpillAreaAlignmentBytes, 469 uint32_t *LocalsSlotsAlignmentBytes, 470 std::function<bool(Variable *)> TargetVarHook); 471 472 /// Calculate the amount of padding needed to align the local and global areas 473 /// to the required alignment. This assumes the globals/locals layout used by 474 /// getVarStackSlotParams and assignVarStackSlots. 475 void alignStackSpillAreas(uint32_t SpillAreaStartOffset, 476 uint32_t SpillAreaAlignmentBytes, 477 size_t GlobalsSize, 478 uint32_t LocalsSlotsAlignmentBytes, 479 uint32_t *SpillAreaPaddingBytes, 480 uint32_t *LocalsSlotsPaddingBytes); 481 482 /// Make a pass through the SortedSpilledVariables and actually assign stack 483 /// slots. SpillAreaPaddingBytes takes into account stack alignment padding. 484 /// The SpillArea starts after that amount of padding. This matches the scheme 485 /// in getVarStackSlotParams, where there may be a separate multi-block global 486 /// var spill area and a local var spill area. 487 void assignVarStackSlots(VarList &SortedSpilledVariables, 488 size_t SpillAreaPaddingBytes, 489 size_t SpillAreaSizeBytes, 490 size_t GlobalsAndSubsequentPaddingSize, 491 bool UsesFramePointer); 492 493 /// Sort the variables in Source based on required alignment. The variables 494 /// with the largest alignment need are placed in the front of the Dest list. 495 void sortVarsByAlignment(VarList &Dest, const VarList &Source) const; 496 497 InstCall *makeHelperCall(RuntimeHelper FuncID, Variable *Dest, SizeT MaxSrcs); 498 _set_dest_redefined()499 void _set_dest_redefined() { Context.getLastInserted()->setDestRedefined(); } 500 501 bool shouldOptimizeMemIntrins(); 502 503 void scalarizeArithmetic(InstArithmetic::OpKind K, Variable *Dest, 504 Operand *Src0, Operand *Src1); 505 506 /// Generalizes scalarizeArithmetic to support other instruction types. 507 /// 508 /// insertScalarInstruction is a function-like object with signature 509 /// (Variable *Dest, Variable *Src0, Variable *Src1) -> Instr *. 510 template <typename... Operands, 511 typename F = std::function<Inst *(Variable *, Operands *...)>> scalarizeInstruction(Variable * Dest,F insertScalarInstruction,Operands * ...Srcs)512 void scalarizeInstruction(Variable *Dest, F insertScalarInstruction, 513 Operands *... Srcs) { 514 assert(GeneratingTargetHelpers && 515 "scalarizeInstruction called during incorrect phase"); 516 const Type DestTy = Dest->getType(); 517 assert(isVectorType(DestTy)); 518 const Type DestElementTy = typeElementType(DestTy); 519 const SizeT NumElements = typeNumElements(DestTy); 520 521 Variable *T = Func->makeVariable(DestTy); 522 if (auto *VarVecOn32 = llvm::dyn_cast<VariableVecOn32>(T)) { 523 VarVecOn32->initVecElement(Func); 524 auto *Undef = ConstantUndef::create(Ctx, DestTy); 525 Context.insert<InstAssign>(T, Undef); 526 } else { 527 Context.insert<InstFakeDef>(T); 528 } 529 530 for (SizeT I = 0; I < NumElements; ++I) { 531 auto *Index = Ctx->getConstantInt32(I); 532 533 auto makeExtractThunk = [this, Index, NumElements](Operand *Src) { 534 return [this, Index, NumElements, Src]() { 535 (void)NumElements; 536 assert(typeNumElements(Src->getType()) == NumElements); 537 538 const auto ElementTy = typeElementType(Src->getType()); 539 auto *Op = Func->makeVariable(ElementTy); 540 Context.insert<InstExtractElement>(Op, Src, Index); 541 return Op; 542 }; 543 }; 544 545 // Perform the operation as a scalar operation. 546 auto *Res = Func->makeVariable(DestElementTy); 547 auto *Arith = applyToThunkedArgs(insertScalarInstruction, Res, 548 makeExtractThunk(Srcs)...); 549 genTargetHelperCallFor(Arith); 550 551 Variable *DestT = Func->makeVariable(DestTy); 552 Context.insert<InstInsertElement>(DestT, T, Res, Index); 553 T = DestT; 554 } 555 Context.insert<InstAssign>(Dest, T); 556 } 557 558 // applyToThunkedArgs is used by scalarizeInstruction. Ideally, we would just 559 // call insertScalarInstruction(Res, Srcs...), but C++ does not specify 560 // evaluation order which means this leads to an unpredictable final 561 // output. Instead, we wrap each of the Srcs in a thunk and these 562 // applyToThunkedArgs functions apply the thunks in a well defined order so we 563 // still get well-defined output. applyToThunkedArgs(std::function<Inst * (Variable *,Variable *)> insertScalarInstruction,Variable * Res,std::function<Variable * ()> thunk0)564 Inst *applyToThunkedArgs( 565 std::function<Inst *(Variable *, Variable *)> insertScalarInstruction, 566 Variable *Res, std::function<Variable *()> thunk0) { 567 auto *Src0 = thunk0(); 568 return insertScalarInstruction(Res, Src0); 569 } 570 571 Inst * applyToThunkedArgs(std::function<Inst * (Variable *,Variable *,Variable *)> insertScalarInstruction,Variable * Res,std::function<Variable * ()> thunk0,std::function<Variable * ()> thunk1)572 applyToThunkedArgs(std::function<Inst *(Variable *, Variable *, Variable *)> 573 insertScalarInstruction, 574 Variable *Res, std::function<Variable *()> thunk0, 575 std::function<Variable *()> thunk1) { 576 auto *Src0 = thunk0(); 577 auto *Src1 = thunk1(); 578 return insertScalarInstruction(Res, Src0, Src1); 579 } 580 applyToThunkedArgs(std::function<Inst * (Variable *,Variable *,Variable *,Variable *)> insertScalarInstruction,Variable * Res,std::function<Variable * ()> thunk0,std::function<Variable * ()> thunk1,std::function<Variable * ()> thunk2)581 Inst *applyToThunkedArgs( 582 std::function<Inst *(Variable *, Variable *, Variable *, Variable *)> 583 insertScalarInstruction, 584 Variable *Res, std::function<Variable *()> thunk0, 585 std::function<Variable *()> thunk1, std::function<Variable *()> thunk2) { 586 auto *Src0 = thunk0(); 587 auto *Src1 = thunk1(); 588 auto *Src2 = thunk2(); 589 return insertScalarInstruction(Res, Src0, Src1, Src2); 590 } 591 592 /// SandboxType enumerates all possible sandboxing strategies that 593 enum SandboxType { 594 ST_None, 595 ST_NaCl, 596 ST_Nonsfi, 597 }; 598 599 static SandboxType determineSandboxTypeFromFlags(const ClFlags &Flags); 600 601 Cfg *Func; 602 GlobalContext *Ctx; 603 bool HasComputedFrame = false; 604 bool CallsReturnsTwice = false; 605 SizeT NextLabelNumber = 0; 606 SizeT NextJumpTableNumber = 0; 607 LoweringContext Context; 608 const SandboxType SandboxingType = ST_None; 609 610 const static constexpr char *H_getIP_prefix = "__Sz_getIP_"; 611 }; 612 613 /// TargetDataLowering is used for "lowering" data including initializers for 614 /// global variables, and the internal constant pools. It is separated out from 615 /// TargetLowering because it does not require a Cfg. 616 class TargetDataLowering { 617 TargetDataLowering() = delete; 618 TargetDataLowering(const TargetDataLowering &) = delete; 619 TargetDataLowering &operator=(const TargetDataLowering &) = delete; 620 621 public: 622 static std::unique_ptr<TargetDataLowering> createLowering(GlobalContext *Ctx); 623 virtual ~TargetDataLowering(); 624 625 virtual void lowerGlobals(const VariableDeclarationList &Vars, 626 const std::string &SectionSuffix) = 0; 627 virtual void lowerConstants() = 0; 628 virtual void lowerJumpTables() = 0; emitTargetRODataSections()629 virtual void emitTargetRODataSections() {} 630 631 protected: 632 void emitGlobal(const VariableDeclaration &Var, 633 const std::string &SectionSuffix); 634 635 /// For now, we assume .long is the right directive for emitting 4 byte emit 636 /// global relocations. However, LLVM MIPS usually uses .4byte instead. 637 /// Perhaps there is some difference when the location is unaligned. getEmit32Directive()638 static const char *getEmit32Directive() { return ".long"; } 639 TargetDataLowering(GlobalContext * Ctx)640 explicit TargetDataLowering(GlobalContext *Ctx) : Ctx(Ctx) {} 641 GlobalContext *Ctx; 642 }; 643 644 /// TargetHeaderLowering is used to "lower" the header of an output file. It 645 /// writes out the target-specific header attributes. E.g., for ARM this writes 646 /// out the build attributes (float ABI, etc.). 647 class TargetHeaderLowering { 648 TargetHeaderLowering() = delete; 649 TargetHeaderLowering(const TargetHeaderLowering &) = delete; 650 TargetHeaderLowering &operator=(const TargetHeaderLowering &) = delete; 651 652 public: 653 static std::unique_ptr<TargetHeaderLowering> 654 createLowering(GlobalContext *Ctx); 655 virtual ~TargetHeaderLowering(); 656 lower()657 virtual void lower() {} 658 659 protected: TargetHeaderLowering(GlobalContext * Ctx)660 explicit TargetHeaderLowering(GlobalContext *Ctx) : Ctx(Ctx) {} 661 GlobalContext *Ctx; 662 }; 663 664 } // end of namespace Ice 665 666 #endif // SUBZERO_SRC_ICETARGETLOWERING_H 667