• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 //===- subzero/src/IceTargetLoweringARM32.h - ARM32 lowering ----*- C++ -*-===//
2 //
3 //                        The Subzero Code Generator
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 ///
10 /// \file
11 /// \brief Declares the TargetLoweringARM32 class, which implements the
12 /// TargetLowering interface for the ARM 32-bit architecture.
13 ///
14 //===----------------------------------------------------------------------===//
15 
16 #ifndef SUBZERO_SRC_ICETARGETLOWERINGARM32_H
17 #define SUBZERO_SRC_ICETARGETLOWERINGARM32_H
18 
19 #include "IceAssemblerARM32.h"
20 #include "IceDefs.h"
21 #include "IceInstARM32.h"
22 #include "IceRegistersARM32.h"
23 #include "IceTargetLowering.h"
24 
25 #include <utility>
26 
27 namespace Ice {
28 namespace ARM32 {
29 
30 // Class encapsulating ARM cpu features / instruction set.
31 class TargetARM32Features {
32   TargetARM32Features() = delete;
33   TargetARM32Features(const TargetARM32Features &) = delete;
34   TargetARM32Features &operator=(const TargetARM32Features &) = delete;
35 
36 public:
37   explicit TargetARM32Features(const ClFlags &Flags);
38 
39   enum ARM32InstructionSet {
40     Begin,
41     // Neon is the PNaCl baseline instruction set.
42     Neon = Begin,
43     HWDivArm, // HW divide in ARM mode (not just Thumb mode).
44     End
45   };
46 
hasFeature(ARM32InstructionSet I)47   bool hasFeature(ARM32InstructionSet I) const { return I <= InstructionSet; }
48 
49 private:
50   ARM32InstructionSet InstructionSet = ARM32InstructionSet::Begin;
51 };
52 
53 // The target lowering logic for ARM32.
54 class TargetARM32 : public TargetLowering {
55   TargetARM32() = delete;
56   TargetARM32(const TargetARM32 &) = delete;
57   TargetARM32 &operator=(const TargetARM32 &) = delete;
58 
59 public:
60   static void staticInit(GlobalContext *Ctx);
61 
shouldBePooled(const Constant * C)62   static bool shouldBePooled(const Constant *C) {
63     if (auto *ConstDouble = llvm::dyn_cast<ConstantDouble>(C)) {
64       return !Utils::isPositiveZero(ConstDouble->getValue());
65     }
66     if (llvm::isa<ConstantFloat>(C))
67       return true;
68     return false;
69   }
70 
getPointerType()71   static ::Ice::Type getPointerType() { return ::Ice::IceType_i32; }
72 
73   // TODO(jvoung): return a unique_ptr.
create(Cfg * Func)74   static std::unique_ptr<::Ice::TargetLowering> create(Cfg *Func) {
75     return makeUnique<TargetARM32>(Func);
76   }
77 
createAssembler()78   std::unique_ptr<::Ice::Assembler> createAssembler() const override {
79     const bool IsNonsfi = SandboxingType == ST_Nonsfi;
80     return makeUnique<ARM32::AssemblerARM32>(IsNonsfi);
81   }
82 
initNodeForLowering(CfgNode * Node)83   void initNodeForLowering(CfgNode *Node) override {
84     Computations.forgetProducers();
85     Computations.recordProducers(Node);
86     Computations.dump(Func);
87   }
88 
89   void translateOm1() override;
90   void translateO2() override;
91   bool doBranchOpt(Inst *I, const CfgNode *NextNode) override;
92 
getNumRegisters()93   SizeT getNumRegisters() const override { return RegARM32::Reg_NUM; }
94   Variable *getPhysicalRegister(RegNumT RegNum,
95                                 Type Ty = IceType_void) override;
96   const char *getRegName(RegNumT RegNum, Type Ty) const override;
97   SmallBitVector getRegisterSet(RegSetMask Include,
98                                 RegSetMask Exclude) const override;
99   const SmallBitVector &
getRegistersForVariable(const Variable * Var)100   getRegistersForVariable(const Variable *Var) const override {
101     RegClass RC = Var->getRegClass();
102     switch (RC) {
103     default:
104       assert(RC < RC_Target);
105       return TypeToRegisterSet[RC];
106     case RegARM32::RCARM32_QtoS:
107       return TypeToRegisterSet[RC];
108     }
109   }
110   const SmallBitVector &
getAllRegistersForVariable(const Variable * Var)111   getAllRegistersForVariable(const Variable *Var) const override {
112     RegClass RC = Var->getRegClass();
113     assert((RegARM32::RegClassARM32)RC < RegARM32::RCARM32_NUM);
114     return TypeToRegisterSetUnfiltered[RC];
115   }
getAliasesForRegister(RegNumT Reg)116   const SmallBitVector &getAliasesForRegister(RegNumT Reg) const override {
117     return RegisterAliases[Reg];
118   }
hasFramePointer()119   bool hasFramePointer() const override { return UsesFramePointer; }
setHasFramePointer()120   void setHasFramePointer() override { UsesFramePointer = true; }
getStackReg()121   RegNumT getStackReg() const override { return RegARM32::Reg_sp; }
getFrameReg()122   RegNumT getFrameReg() const override { return RegARM32::Reg_fp; }
getFrameOrStackReg()123   RegNumT getFrameOrStackReg() const override {
124     return UsesFramePointer ? getFrameReg() : getStackReg();
125   }
getReservedTmpReg()126   RegNumT getReservedTmpReg() const { return RegARM32::Reg_ip; }
127 
typeWidthInBytesOnStack(Type Ty)128   size_t typeWidthInBytesOnStack(Type Ty) const override {
129     // Round up to the next multiple of 4 bytes. In particular, i1, i8, and i16
130     // are rounded up to 4 bytes.
131     return (typeWidthInBytes(Ty) + 3) & ~3;
132   }
133   uint32_t getStackAlignment() const override;
reserveFixedAllocaArea(size_t Size,size_t Align)134   void reserveFixedAllocaArea(size_t Size, size_t Align) override {
135     FixedAllocaSizeBytes = Size;
136     assert(llvm::isPowerOf2_32(Align));
137     FixedAllocaAlignBytes = Align;
138     PrologEmitsFixedAllocas = true;
139   }
getFrameFixedAllocaOffset()140   int32_t getFrameFixedAllocaOffset() const override {
141     return FixedAllocaSizeBytes - (SpillAreaSizeBytes - MaxOutArgsSizeBytes);
142   }
maxOutArgsSizeBytes()143   uint32_t maxOutArgsSizeBytes() const override { return MaxOutArgsSizeBytes; }
144 
shouldSplitToVariable64On32(Type Ty)145   bool shouldSplitToVariable64On32(Type Ty) const override {
146     return Ty == IceType_i64;
147   }
148 
149   // TODO(ascull): what size is best for ARM?
getMinJumpTableSize()150   SizeT getMinJumpTableSize() const override { return 3; }
151   void emitJumpTable(const Cfg *Func,
152                      const InstJumpTable *JumpTable) const override;
153 
154   void emitVariable(const Variable *Var) const override;
155 
156   void emit(const ConstantUndef *C) const final;
157   void emit(const ConstantInteger32 *C) const final;
158   void emit(const ConstantInteger64 *C) const final;
159   void emit(const ConstantFloat *C) const final;
160   void emit(const ConstantDouble *C) const final;
161   void emit(const ConstantRelocatable *C) const final;
162 
163   void lowerArguments() override;
164   void addProlog(CfgNode *Node) override;
165   void addEpilog(CfgNode *Node) override;
166 
167   Operand *loOperand(Operand *Operand);
168   Operand *hiOperand(Operand *Operand);
169   void finishArgumentLowering(Variable *Arg, Variable *FramePtr,
170                               size_t BasicFrameOffset, size_t *InArgsSizeBytes);
171 
hasCPUFeature(TargetARM32Features::ARM32InstructionSet I)172   bool hasCPUFeature(TargetARM32Features::ARM32InstructionSet I) const {
173     return CPUFeatures.hasFeature(I);
174   }
175 
176   enum OperandLegalization {
177     Legal_Reg = 1 << 0,  /// physical register, not stack location
178     Legal_Flex = 1 << 1, /// A flexible operand2, which can hold rotated small
179                          /// immediates, shifted registers, or modified fp imm.
180     Legal_Mem = 1 << 2,  /// includes [r0, r1 lsl #2] as well as [sp, #12]
181     Legal_Rematerializable = 1 << 3,
182     Legal_Default = ~Legal_Rematerializable,
183   };
184 
185   using LegalMask = uint32_t;
186   Operand *legalizeUndef(Operand *From, RegNumT RegNum = RegNumT());
187   Operand *legalize(Operand *From, LegalMask Allowed = Legal_Default,
188                     RegNumT RegNum = RegNumT());
189   Variable *legalizeToReg(Operand *From, RegNumT RegNum = RegNumT());
190 
shAmtImm(uint32_t ShAmtImm)191   OperandARM32ShAmtImm *shAmtImm(uint32_t ShAmtImm) const {
192     assert(ShAmtImm < 32);
193     return OperandARM32ShAmtImm::create(
194         Func,
195         llvm::cast<ConstantInteger32>(Ctx->getConstantInt32(ShAmtImm & 0x1F)));
196   }
197 
getCtx()198   GlobalContext *getCtx() const { return Ctx; }
199 
200 protected:
201   explicit TargetARM32(Cfg *Func);
202 
203   void postLower() override;
204 
205   enum SafeBoolChain {
206     SBC_No,
207     SBC_Yes,
208   };
209 
210   void lowerAlloca(const InstAlloca *Instr) override;
211   SafeBoolChain lowerInt1Arithmetic(const InstArithmetic *Instr);
212   void lowerInt64Arithmetic(InstArithmetic::OpKind Op, Variable *Dest,
213                             Operand *Src0, Operand *Src1);
214   void lowerArithmetic(const InstArithmetic *Instr) override;
215   void lowerAssign(const InstAssign *Instr) override;
216   void lowerBr(const InstBr *Instr) override;
217   void lowerCall(const InstCall *Instr) override;
218   void lowerCast(const InstCast *Instr) override;
219   void lowerExtractElement(const InstExtractElement *Instr) override;
220 
221   /// CondWhenTrue is a helper type returned by every method in the lowering
222   /// that emits code to set the condition codes.
223   class CondWhenTrue {
224   public:
225     explicit CondWhenTrue(CondARM32::Cond T0,
226                           CondARM32::Cond T1 = CondARM32::kNone)
WhenTrue0(T0)227         : WhenTrue0(T0), WhenTrue1(T1) {
228       assert(T1 == CondARM32::kNone || T0 != CondARM32::kNone);
229       assert(T1 != T0 || T0 == CondARM32::kNone);
230     }
231     CondARM32::Cond WhenTrue0;
232     CondARM32::Cond WhenTrue1;
233 
234     /// invert returns a new object with WhenTrue0 and WhenTrue1 inverted.
invert()235     CondWhenTrue invert() const {
236       switch (WhenTrue0) {
237       default:
238         if (WhenTrue1 == CondARM32::kNone)
239           return CondWhenTrue(InstARM32::getOppositeCondition(WhenTrue0));
240         return CondWhenTrue(InstARM32::getOppositeCondition(WhenTrue0),
241                             InstARM32::getOppositeCondition(WhenTrue1));
242       case CondARM32::AL:
243         return CondWhenTrue(CondARM32::kNone);
244       case CondARM32::kNone:
245         return CondWhenTrue(CondARM32::AL);
246       }
247     }
248   };
249 
250   CondWhenTrue lowerFcmpCond(const InstFcmp *Instr);
251   void lowerFcmp(const InstFcmp *Instr) override;
252   CondWhenTrue lowerInt8AndInt16IcmpCond(InstIcmp::ICond Condition,
253                                          Operand *Src0, Operand *Src1);
254   CondWhenTrue lowerInt32IcmpCond(InstIcmp::ICond Condition, Operand *Src0,
255                                   Operand *Src1);
256   CondWhenTrue lowerInt64IcmpCond(InstIcmp::ICond Condition, Operand *Src0,
257                                   Operand *Src1);
258   CondWhenTrue lowerIcmpCond(InstIcmp::ICond Condition, Operand *Src0,
259                              Operand *Src1);
260   CondWhenTrue lowerIcmpCond(const InstIcmp *Instr);
261   void lowerIcmp(const InstIcmp *Instr) override;
262   /// Emits the basic sequence for lower-linked/store-exclusive loops:
263   ///
264   /// retry:
265   ///        ldrex tmp, [Addr]
266   ///        StoreValue = Operation(tmp)
267   ///        strexCond success, StoreValue, [Addr]
268   ///        cmpCond success, #0
269   ///        bne retry
270   ///
271   /// Operation needs to return which value to strex in Addr, it must not change
272   /// the flags if Cond is not AL, and must not emit any instructions that could
273   /// end up writing to memory. Operation also needs to handle fake-defing for
274   /// i64 handling.
275   void
276   lowerLoadLinkedStoreExclusive(Type Ty, Operand *Addr,
277                                 std::function<Variable *(Variable *)> Operation,
278                                 CondARM32::Cond Cond = CondARM32::AL);
279   void lowerInt64AtomicRMW(Variable *Dest, uint32_t Operation, Operand *Ptr,
280                            Operand *Val);
281   void lowerAtomicRMW(Variable *Dest, uint32_t Operation, Operand *Ptr,
282                       Operand *Val);
283   void lowerBreakpoint(const InstBreakpoint *Instr) override;
284   void lowerIntrinsicCall(const InstIntrinsicCall *Instr) override;
285   void lowerInsertElement(const InstInsertElement *Instr) override;
286   void lowerLoad(const InstLoad *Instr) override;
287   void lowerPhi(const InstPhi *Instr) override;
288   void lowerRet(const InstRet *Instr) override;
289   void lowerSelect(const InstSelect *Instr) override;
290   void lowerShuffleVector(const InstShuffleVector *Instr) override;
291   void lowerStore(const InstStore *Instr) override;
292   void lowerSwitch(const InstSwitch *Instr) override;
293   void lowerUnreachable(const InstUnreachable *Instr) override;
294   void prelowerPhis() override;
295   uint32_t getCallStackArgumentsSizeBytes(const InstCall *Instr) override;
296   void genTargetHelperCallFor(Inst *Instr) override;
297   void doAddressOptLoad() override;
298   void doAddressOptStore() override;
299   void randomlyInsertNop(float Probability,
300                          RandomNumberGenerator &RNG) override;
301 
302   OperandARM32Mem *formMemoryOperand(Operand *Ptr, Type Ty);
303 
304   Variable64On32 *makeI64RegPair();
305   Variable *makeReg(Type Ty, RegNumT RegNum = RegNumT());
306   static Type stackSlotType();
307   Variable *copyToReg(Operand *Src, RegNumT RegNum = RegNumT());
308   void alignRegisterPow2(Variable *Reg, uint32_t Align,
309                          RegNumT TmpRegNum = RegNumT());
310 
311   /// Returns a vector in a register with the given constant entries.
312   Variable *makeVectorOfZeros(Type Ty, RegNumT RegNum = RegNumT());
313 
314   void
315   makeRandomRegisterPermutation(llvm::SmallVectorImpl<RegNumT> &Permutation,
316                                 const SmallBitVector &ExcludeRegisters,
317                                 uint64_t Salt) const override;
318 
319   // If a divide-by-zero check is needed, inserts a: test; branch .LSKIP; trap;
320   // .LSKIP: <continuation>. If no check is needed nothing is inserted.
321   void div0Check(Type Ty, Operand *SrcLo, Operand *SrcHi);
322   using ExtInstr = void (TargetARM32::*)(Variable *, Variable *,
323                                          CondARM32::Cond);
324   using DivInstr = void (TargetARM32::*)(Variable *, Variable *, Variable *,
325                                          CondARM32::Cond);
326   void lowerIDivRem(Variable *Dest, Variable *T, Variable *Src0R, Operand *Src1,
327                     ExtInstr ExtFunc, DivInstr DivFunc, bool IsRemainder);
328 
329   void lowerCLZ(Variable *Dest, Variable *ValLo, Variable *ValHi);
330 
331   // The following are helpers that insert lowered ARM32 instructions with
332   // minimal syntactic overhead, so that the lowering code can look as close to
333   // assembly as practical.
334   void _add(Variable *Dest, Variable *Src0, Operand *Src1,
335             CondARM32::Cond Pred = CondARM32::AL) {
336     Context.insert<InstARM32Add>(Dest, Src0, Src1, Pred);
337   }
338   void _adds(Variable *Dest, Variable *Src0, Operand *Src1,
339              CondARM32::Cond Pred = CondARM32::AL) {
340     constexpr bool SetFlags = true;
341     Context.insert<InstARM32Add>(Dest, Src0, Src1, Pred, SetFlags);
342     if (SetFlags) {
343       Context.insert<InstFakeUse>(Dest);
344     }
345   }
346   void _adc(Variable *Dest, Variable *Src0, Operand *Src1,
347             CondARM32::Cond Pred = CondARM32::AL) {
348     Context.insert<InstARM32Adc>(Dest, Src0, Src1, Pred);
349   }
350   void _and(Variable *Dest, Variable *Src0, Operand *Src1,
351             CondARM32::Cond Pred = CondARM32::AL) {
352     Context.insert<InstARM32And>(Dest, Src0, Src1, Pred);
353   }
354   void _asr(Variable *Dest, Variable *Src0, Operand *Src1,
355             CondARM32::Cond Pred = CondARM32::AL) {
356     Context.insert<InstARM32Asr>(Dest, Src0, Src1, Pred);
357   }
358   void _bic(Variable *Dest, Variable *Src0, Operand *Src1,
359             CondARM32::Cond Pred = CondARM32::AL) {
360     Context.insert<InstARM32Bic>(Dest, Src0, Src1, Pred);
361   }
_br(CfgNode * TargetTrue,CfgNode * TargetFalse,CondARM32::Cond Condition)362   void _br(CfgNode *TargetTrue, CfgNode *TargetFalse,
363            CondARM32::Cond Condition) {
364     Context.insert<InstARM32Br>(TargetTrue, TargetFalse, Condition);
365   }
_br(CfgNode * Target)366   void _br(CfgNode *Target) { Context.insert<InstARM32Br>(Target); }
_br(CfgNode * Target,CondARM32::Cond Condition)367   void _br(CfgNode *Target, CondARM32::Cond Condition) {
368     Context.insert<InstARM32Br>(Target, Condition);
369   }
_br(InstARM32Label * Label,CondARM32::Cond Condition)370   void _br(InstARM32Label *Label, CondARM32::Cond Condition) {
371     Context.insert<InstARM32Br>(Label, Condition);
372   }
373   void _cmn(Variable *Src0, Operand *Src1,
374             CondARM32::Cond Pred = CondARM32::AL) {
375     Context.insert<InstARM32Cmn>(Src0, Src1, Pred);
376   }
377   void _cmp(Variable *Src0, Operand *Src1,
378             CondARM32::Cond Pred = CondARM32::AL) {
379     Context.insert<InstARM32Cmp>(Src0, Src1, Pred);
380   }
381   void _clz(Variable *Dest, Variable *Src0,
382             CondARM32::Cond Pred = CondARM32::AL) {
383     Context.insert<InstARM32Clz>(Dest, Src0, Pred);
384   }
_dmb()385   void _dmb() { Context.insert<InstARM32Dmb>(); }
386   void _eor(Variable *Dest, Variable *Src0, Operand *Src1,
387             CondARM32::Cond Pred = CondARM32::AL) {
388     Context.insert<InstARM32Eor>(Dest, Src0, Src1, Pred);
389   }
390   /// _ldr, for all your memory to Variable data moves. It handles all types
391   /// (integer, floating point, and vectors.) Addr needs to be valid for Dest's
392   /// type (e.g., no immediates for vector loads, and no index registers for fp
393   /// loads.)
394   void _ldr(Variable *Dest, OperandARM32Mem *Addr,
395             CondARM32::Cond Pred = CondARM32::AL) {
396     Context.insert<InstARM32Ldr>(Dest, Addr, Pred);
397   }
398   InstARM32Ldrex *_ldrex(Variable *Dest, OperandARM32Mem *Addr,
399                          CondARM32::Cond Pred = CondARM32::AL) {
400     auto *Ldrex = Context.insert<InstARM32Ldrex>(Dest, Addr, Pred);
401     if (auto *Dest64 = llvm::dyn_cast<Variable64On32>(Dest)) {
402       Context.insert<InstFakeDef>(Dest64->getLo(), Dest);
403       Context.insert<InstFakeDef>(Dest64->getHi(), Dest);
404     }
405     return Ldrex;
406   }
407   void _lsl(Variable *Dest, Variable *Src0, Operand *Src1,
408             CondARM32::Cond Pred = CondARM32::AL) {
409     Context.insert<InstARM32Lsl>(Dest, Src0, Src1, Pred);
410   }
411   void _lsls(Variable *Dest, Variable *Src0, Operand *Src1,
412              CondARM32::Cond Pred = CondARM32::AL) {
413     constexpr bool SetFlags = true;
414     Context.insert<InstARM32Lsl>(Dest, Src0, Src1, Pred, SetFlags);
415     if (SetFlags) {
416       Context.insert<InstFakeUse>(Dest);
417     }
418   }
419   void _lsr(Variable *Dest, Variable *Src0, Operand *Src1,
420             CondARM32::Cond Pred = CondARM32::AL) {
421     Context.insert<InstARM32Lsr>(Dest, Src0, Src1, Pred);
422   }
423   void _mla(Variable *Dest, Variable *Src0, Variable *Src1, Variable *Acc,
424             CondARM32::Cond Pred = CondARM32::AL) {
425     Context.insert<InstARM32Mla>(Dest, Src0, Src1, Acc, Pred);
426   }
427   void _mls(Variable *Dest, Variable *Src0, Variable *Src1, Variable *Acc,
428             CondARM32::Cond Pred = CondARM32::AL) {
429     Context.insert<InstARM32Mls>(Dest, Src0, Src1, Acc, Pred);
430   }
431   /// _mov, for all your Variable to Variable data movement needs. It handles
432   /// all types (integer, floating point, and vectors), as well as moves between
433   /// Core and VFP registers. This is not a panacea: you must obey the (weird,
434   /// confusing, non-uniform) rules for data moves in ARM.
435   void _mov(Variable *Dest, Operand *Src0,
436             CondARM32::Cond Pred = CondARM32::AL) {
437     // _mov used to be unique in the sense that it would create a temporary
438     // automagically if Dest was nullptr. It won't do that anymore, so we keep
439     // an assert around just in case there is some untested code path where Dest
440     // is nullptr.
441     assert(Dest != nullptr);
442     assert(!llvm::isa<OperandARM32Mem>(Src0));
443     auto *Instr = Context.insert<InstARM32Mov>(Dest, Src0, Pred);
444 
445     if (Instr->isMultiDest()) {
446       // If Instr is multi-dest, then Dest must be a Variable64On32. We add a
447       // fake-def for Instr.DestHi here.
448       assert(llvm::isa<Variable64On32>(Dest));
449       Context.insert<InstFakeDef>(Instr->getDestHi());
450     }
451   }
452 
453   void _mov_redefined(Variable *Dest, Operand *Src0,
454                       CondARM32::Cond Pred = CondARM32::AL) {
455     auto *Instr = Context.insert<InstARM32Mov>(Dest, Src0, Pred);
456     Instr->setDestRedefined();
457     if (Instr->isMultiDest()) {
458       // If Instr is multi-dest, then Dest must be a Variable64On32. We add a
459       // fake-def for Instr.DestHi here.
460       assert(llvm::isa<Variable64On32>(Dest));
461       Context.insert<InstFakeDef>(Instr->getDestHi());
462     }
463   }
464 
_nop()465   void _nop() { Context.insert<InstARM32Nop>(); }
466 
467   // Generates a vmov instruction to extract the given index from a vector
468   // register.
469   void _extractelement(Variable *Dest, Variable *Src0, uint32_t Index,
470                        CondARM32::Cond Pred = CondARM32::AL) {
471     Context.insert<InstARM32Extract>(Dest, Src0, Index, Pred);
472   }
473 
474   // Generates a vmov instruction to insert a value into the given index of a
475   // vector register.
476   void _insertelement(Variable *Dest, Variable *Src0, uint32_t Index,
477                       CondARM32::Cond Pred = CondARM32::AL) {
478     Context.insert<InstARM32Insert>(Dest, Src0, Index, Pred);
479   }
480 
481   // --------------------------------------------------------------------------
482   // Begin bool folding machinery.
483   //
484   // There are three types of boolean lowerings handled by this target:
485   //
486   // 1) Boolean expressions leading to a boolean Variable definition
487   // ---------------------------------------------------------------
488   //
489   // Whenever a i1 Variable is live out (i.e., its live range extends beyond
490   // the defining basic block) we do not fold the operation. We instead
491   // materialize (i.e., compute) the variable normally, so that it can be used
492   // when needed. We also materialize i1 values that are not single use to
493   // avoid code duplication. These expressions are not short circuited.
494   //
495   // 2) Boolean expressions leading to a select
496   // ------------------------------------------
497   //
498   // These include boolean chains leading to a select instruction, as well as
499   // i1 Sexts. These boolean expressions are lowered to:
500   //
501   // mov T, <false value>
502   // CC <- eval(Boolean Expression)
503   // movCC T, <true value>
504   //
505   // For Sexts, <false value> is 0, and <true value> is -1.
506   //
507   // 3) Boolean expressions leading to a br i1
508   // -----------------------------------------
509   //
510   // These are the boolean chains leading to a branch. These chains are
511   // short-circuited, i.e.:
512   //
513   //   A = or i1 B, C
514   //   br i1 A, label %T, label %F
515   //
516   // becomes
517   //
518   //   tst B
519   //   jne %T
520   //   tst B
521   //   jne %T
522   //   j %F
523   //
524   // and
525   //
526   //   A = and i1 B, C
527   //   br i1 A, label %T, label %F
528   //
529   // becomes
530   //
531   //   tst B
532   //   jeq %F
533   //   tst B
534   //   jeq %F
535   //   j %T
536   //
537   // Arbitrarily long chains are short circuited, e.g
538   //
539   //   A = or  i1 B, C
540   //   D = and i1 A, E
541   //   F = and i1 G, H
542   //   I = or i1 D, F
543   //   br i1 I, label %True, label %False
544   //
545   // becomes
546   //
547   // Label[A]:
548   //   tst B, 1
549   //   bne Label[D]
550   //   tst C, 1
551   //   beq Label[I]
552   // Label[D]:
553   //   tst E, 1
554   //   bne %True
555   // Label[I]
556   //   tst G, 1
557   //   beq %False
558   //   tst H, 1
559   //   beq %False (bne %True)
560 
561   /// lowerInt1 materializes Boolean to a Variable.
562   SafeBoolChain lowerInt1(Variable *Dest, Operand *Boolean);
563 
564   /// lowerInt1ForSelect generates the following instruction sequence:
565   ///
566   ///   mov T, FalseValue
567   ///   CC <- eval(Boolean)
568   ///   movCC T, TrueValue
569   ///   mov Dest, T
570   ///
571   /// It is used for lowering select i1, as well as i1 Sext.
572   void lowerInt1ForSelect(Variable *Dest, Operand *Boolean, Operand *TrueValue,
573                           Operand *FalseValue);
574 
575   /// LowerInt1BranchTarget is used by lowerIntForBranch. It wraps a CfgNode, or
576   /// an InstARM32Label (but never both) so that, during br i1 lowering, we can
577   /// create auxiliary labels for short circuiting the condition evaluation.
578   class LowerInt1BranchTarget {
579   public:
LowerInt1BranchTarget(CfgNode * const Target)580     explicit LowerInt1BranchTarget(CfgNode *const Target)
581         : NodeTarget(Target) {}
LowerInt1BranchTarget(InstARM32Label * const Target)582     explicit LowerInt1BranchTarget(InstARM32Label *const Target)
583         : LabelTarget(Target) {}
584 
585     /// createForLabelOrDuplicate will return a new LowerInt1BranchTarget that
586     /// is the exact copy of this if Label is nullptr; otherwise, the returned
587     /// object will wrap Label instead.
588     LowerInt1BranchTarget
createForLabelOrDuplicate(InstARM32Label * Label)589     createForLabelOrDuplicate(InstARM32Label *Label) const {
590       if (Label != nullptr)
591         return LowerInt1BranchTarget(Label);
592       if (NodeTarget)
593         return LowerInt1BranchTarget(NodeTarget);
594       return LowerInt1BranchTarget(LabelTarget);
595     }
596 
597     CfgNode *const NodeTarget = nullptr;
598     InstARM32Label *const LabelTarget = nullptr;
599   };
600 
601   /// LowerInt1AllowShortCircuit is a helper type used by lowerInt1ForBranch for
602   /// determining which type arithmetic is allowed to be short circuited. This
603   /// is useful for lowering
604   ///
605   ///   t1 = and i1 A, B
606   ///   t2 = and i1 t1, C
607   ///   br i1 t2, label %False, label %True
608   ///
609   /// to
610   ///
611   ///   tst A, 1
612   ///   beq %False
613   ///   tst B, 1
614   ///   beq %False
615   ///   tst C, 1
616   ///   bne %True
617   ///   b %False
618   ///
619   /// Without this information, short circuiting would only allow to short
620   /// circuit a single high level instruction. For example:
621   ///
622   ///   t1 = or i1 A, B
623   ///   t2 = and i1 t1, C
624   ///   br i1 t2, label %False, label %True
625   ///
626   /// cannot be lowered to
627   ///
628   ///   tst A, 1
629   ///   bne %True
630   ///   tst B, 1
631   ///   bne %True
632   ///   tst C, 1
633   ///   beq %True
634   ///   b %False
635   ///
636   /// It needs to be lowered to
637   ///
638   ///   tst A, 1
639   ///   bne Aux
640   ///   tst B, 1
641   ///   beq %False
642   /// Aux:
643   ///   tst C, 1
644   ///   bne %True
645   ///   b %False
646   ///
647   /// TODO(jpp): evaluate if this kind of short circuiting hurts performance (it
648   /// might.)
649   enum LowerInt1AllowShortCircuit {
650     SC_And = 1,
651     SC_Or = 2,
652     SC_All = SC_And | SC_Or,
653   };
654 
655   /// ShortCircuitCondAndLabel wraps the condition codes that should be used
656   /// after a lowerInt1ForBranch returns to branch to the
657   /// TrueTarget/FalseTarget. If ShortCircuitLabel is not nullptr, then the
658   /// called lowerInt1forBranch created an internal (i.e., short-circuit) label
659   /// used for short circuiting.
660   class ShortCircuitCondAndLabel {
661   public:
662     explicit ShortCircuitCondAndLabel(CondWhenTrue &&C,
663                                       InstARM32Label *L = nullptr)
Cond(std::move (C))664         : Cond(std::move(C)), ShortCircuitTarget(L) {}
665     const CondWhenTrue Cond;
666     InstARM32Label *const ShortCircuitTarget;
667 
assertNoLabelAndReturnCond()668     CondWhenTrue assertNoLabelAndReturnCond() const {
669       assert(ShortCircuitTarget == nullptr);
670       return Cond;
671     }
672   };
673 
674   /// lowerInt1ForBranch expands Boolean, and returns the condition codes that
675   /// are to be used for branching to the branch's TrueTarget. It may return a
676   /// label that the expansion of Boolean used to short circuit the chain's
677   /// evaluation.
678   ShortCircuitCondAndLabel
679   lowerInt1ForBranch(Operand *Boolean, const LowerInt1BranchTarget &TargetTrue,
680                      const LowerInt1BranchTarget &TargetFalse,
681                      uint32_t ShortCircuitable);
682 
683   // _br is a convenience wrapper that emits br instructions to Target.
684   void _br(const LowerInt1BranchTarget &BrTarget,
685            CondARM32::Cond Cond = CondARM32::AL) {
686     assert((BrTarget.NodeTarget == nullptr) !=
687            (BrTarget.LabelTarget == nullptr));
688     if (BrTarget.NodeTarget != nullptr)
689       _br(BrTarget.NodeTarget, Cond);
690     else
691       _br(BrTarget.LabelTarget, Cond);
692   }
693 
694   // _br_short_circuit is used when lowering InstArithmetic::And and
695   // InstArithmetic::Or and a short circuit branch is needed.
_br_short_circuit(const LowerInt1BranchTarget & Target,const CondWhenTrue & Cond)696   void _br_short_circuit(const LowerInt1BranchTarget &Target,
697                          const CondWhenTrue &Cond) {
698     if (Cond.WhenTrue1 != CondARM32::kNone) {
699       _br(Target, Cond.WhenTrue1);
700     }
701     if (Cond.WhenTrue0 != CondARM32::kNone) {
702       _br(Target, Cond.WhenTrue0);
703     }
704   }
705   // End of bool folding machinery
706   // --------------------------------------------------------------------------
707 
708   /// The Operand can only be a 16-bit immediate or a ConstantRelocatable (with
709   /// an upper16 relocation).
710   void _movt(Variable *Dest, Operand *Src0,
711              CondARM32::Cond Pred = CondARM32::AL) {
712     Context.insert<InstARM32Movt>(Dest, Src0, Pred);
713   }
714   void _movw(Variable *Dest, Operand *Src0,
715              CondARM32::Cond Pred = CondARM32::AL) {
716     Context.insert<InstARM32Movw>(Dest, Src0, Pred);
717   }
718   void _mul(Variable *Dest, Variable *Src0, Variable *Src1,
719             CondARM32::Cond Pred = CondARM32::AL) {
720     Context.insert<InstARM32Mul>(Dest, Src0, Src1, Pred);
721   }
722   void _mvn(Variable *Dest, Operand *Src0,
723             CondARM32::Cond Pred = CondARM32::AL) {
724     Context.insert<InstARM32Mvn>(Dest, Src0, Pred);
725   }
726   void _orr(Variable *Dest, Variable *Src0, Operand *Src1,
727             CondARM32::Cond Pred = CondARM32::AL) {
728     Context.insert<InstARM32Orr>(Dest, Src0, Src1, Pred);
729   }
730   void _orrs(Variable *Dest, Variable *Src0, Operand *Src1,
731              CondARM32::Cond Pred = CondARM32::AL) {
732     constexpr bool SetFlags = true;
733     Context.insert<InstARM32Orr>(Dest, Src0, Src1, Pred, SetFlags);
734     if (SetFlags) {
735       Context.insert<InstFakeUse>(Dest);
736     }
737   }
_push(const VarList & Sources)738   void _push(const VarList &Sources) { Context.insert<InstARM32Push>(Sources); }
_pop(const VarList & Dests)739   void _pop(const VarList &Dests) {
740     Context.insert<InstARM32Pop>(Dests);
741     // Mark dests as modified.
742     for (Variable *Dest : Dests)
743       Context.insert<InstFakeDef>(Dest);
744   }
745   void _rbit(Variable *Dest, Variable *Src0,
746              CondARM32::Cond Pred = CondARM32::AL) {
747     Context.insert<InstARM32Rbit>(Dest, Src0, Pred);
748   }
749   void _rev(Variable *Dest, Variable *Src0,
750             CondARM32::Cond Pred = CondARM32::AL) {
751     Context.insert<InstARM32Rev>(Dest, Src0, Pred);
752   }
753   void _ret(Variable *LR, Variable *Src0 = nullptr) {
754     Context.insert<InstARM32Ret>(LR, Src0);
755   }
756   void _rscs(Variable *Dest, Variable *Src0, Operand *Src1,
757              CondARM32::Cond Pred = CondARM32::AL) {
758     constexpr bool SetFlags = true;
759     Context.insert<InstARM32Rsc>(Dest, Src0, Src1, Pred, SetFlags);
760     if (SetFlags) {
761       Context.insert<InstFakeUse>(Dest);
762     }
763   }
764   void _rsc(Variable *Dest, Variable *Src0, Operand *Src1,
765             CondARM32::Cond Pred = CondARM32::AL) {
766     Context.insert<InstARM32Rsc>(Dest, Src0, Src1, Pred);
767   }
768   void _rsbs(Variable *Dest, Variable *Src0, Operand *Src1,
769              CondARM32::Cond Pred = CondARM32::AL) {
770     constexpr bool SetFlags = true;
771     Context.insert<InstARM32Rsb>(Dest, Src0, Src1, Pred, SetFlags);
772     if (SetFlags) {
773       Context.insert<InstFakeUse>(Dest);
774     }
775   }
776   void _rsb(Variable *Dest, Variable *Src0, Operand *Src1,
777             CondARM32::Cond Pred = CondARM32::AL) {
778     Context.insert<InstARM32Rsb>(Dest, Src0, Src1, Pred);
779   }
780   void _sbc(Variable *Dest, Variable *Src0, Operand *Src1,
781             CondARM32::Cond Pred = CondARM32::AL) {
782     Context.insert<InstARM32Sbc>(Dest, Src0, Src1, Pred);
783   }
784   void _sbcs(Variable *Dest, Variable *Src0, Operand *Src1,
785              CondARM32::Cond Pred = CondARM32::AL) {
786     constexpr bool SetFlags = true;
787     Context.insert<InstARM32Sbc>(Dest, Src0, Src1, Pred, SetFlags);
788     if (SetFlags) {
789       Context.insert<InstFakeUse>(Dest);
790     }
791   }
792   void _sdiv(Variable *Dest, Variable *Src0, Variable *Src1,
793              CondARM32::Cond Pred = CondARM32::AL) {
794     Context.insert<InstARM32Sdiv>(Dest, Src0, Src1, Pred);
795   }
796   /// _str, for all your Variable to memory transfers. Addr has the same
797   /// restrictions that it does in _ldr.
798   void _str(Variable *Value, OperandARM32Mem *Addr,
799             CondARM32::Cond Pred = CondARM32::AL) {
800     Context.insert<InstARM32Str>(Value, Addr, Pred);
801   }
802   InstARM32Strex *_strex(Variable *Dest, Variable *Value, OperandARM32Mem *Addr,
803                          CondARM32::Cond Pred = CondARM32::AL) {
804     if (auto *Value64 = llvm::dyn_cast<Variable64On32>(Value)) {
805       Context.insert<InstFakeUse>(Value64->getLo());
806       Context.insert<InstFakeUse>(Value64->getHi());
807     }
808     return Context.insert<InstARM32Strex>(Dest, Value, Addr, Pred);
809   }
810   void _sub(Variable *Dest, Variable *Src0, Operand *Src1,
811             CondARM32::Cond Pred = CondARM32::AL) {
812     Context.insert<InstARM32Sub>(Dest, Src0, Src1, Pred);
813   }
814   void _subs(Variable *Dest, Variable *Src0, Operand *Src1,
815              CondARM32::Cond Pred = CondARM32::AL) {
816     constexpr bool SetFlags = true;
817     Context.insert<InstARM32Sub>(Dest, Src0, Src1, Pred, SetFlags);
818     if (SetFlags) {
819       Context.insert<InstFakeUse>(Dest);
820     }
821   }
822   void _sxt(Variable *Dest, Variable *Src0,
823             CondARM32::Cond Pred = CondARM32::AL) {
824     Context.insert<InstARM32Sxt>(Dest, Src0, Pred);
825   }
826   void _tst(Variable *Src0, Operand *Src1,
827             CondARM32::Cond Pred = CondARM32::AL) {
828     Context.insert<InstARM32Tst>(Src0, Src1, Pred);
829   }
_trap()830   void _trap() { Context.insert<InstARM32Trap>(); }
831   void _udiv(Variable *Dest, Variable *Src0, Variable *Src1,
832              CondARM32::Cond Pred = CondARM32::AL) {
833     Context.insert<InstARM32Udiv>(Dest, Src0, Src1, Pred);
834   }
835   void _umull(Variable *DestLo, Variable *DestHi, Variable *Src0,
836               Variable *Src1, CondARM32::Cond Pred = CondARM32::AL) {
837     // umull requires DestLo and DestHi to be assigned to different GPRs. The
838     // following lines create overlapping liveness ranges for both variables. If
839     // either one of them is live, then they are both going to be live, and thus
840     // assigned to different registers; if they are both dead, then DCE will
841     // kick in and delete the following three instructions.
842     Context.insert<InstFakeDef>(DestHi);
843     Context.insert<InstARM32Umull>(DestLo, DestHi, Src0, Src1, Pred);
844     Context.insert<InstFakeDef>(DestHi, DestLo)->setDestRedefined();
845     Context.insert<InstFakeUse>(DestHi);
846   }
847   void _uxt(Variable *Dest, Variable *Src0,
848             CondARM32::Cond Pred = CondARM32::AL) {
849     Context.insert<InstARM32Uxt>(Dest, Src0, Pred);
850   }
851   void _vabs(Variable *Dest, Variable *Src,
852              CondARM32::Cond Pred = CondARM32::AL) {
853     Context.insert<InstARM32Vabs>(Dest, Src, Pred);
854   }
_vadd(Variable * Dest,Variable * Src0,Variable * Src1)855   void _vadd(Variable *Dest, Variable *Src0, Variable *Src1) {
856     Context.insert<InstARM32Vadd>(Dest, Src0, Src1);
857   }
_vand(Variable * Dest,Variable * Src0,Variable * Src1)858   void _vand(Variable *Dest, Variable *Src0, Variable *Src1) {
859     Context.insert<InstARM32Vand>(Dest, Src0, Src1);
860   }
_vbsl(Variable * Dest,Variable * Src0,Variable * Src1)861   InstARM32Vbsl *_vbsl(Variable *Dest, Variable *Src0, Variable *Src1) {
862     return Context.insert<InstARM32Vbsl>(Dest, Src0, Src1);
863   }
_vceq(Variable * Dest,Variable * Src0,Variable * Src1)864   void _vceq(Variable *Dest, Variable *Src0, Variable *Src1) {
865     Context.insert<InstARM32Vceq>(Dest, Src0, Src1);
866   }
_vcge(Variable * Dest,Variable * Src0,Variable * Src1)867   InstARM32Vcge *_vcge(Variable *Dest, Variable *Src0, Variable *Src1) {
868     return Context.insert<InstARM32Vcge>(Dest, Src0, Src1);
869   }
_vcgt(Variable * Dest,Variable * Src0,Variable * Src1)870   InstARM32Vcgt *_vcgt(Variable *Dest, Variable *Src0, Variable *Src1) {
871     return Context.insert<InstARM32Vcgt>(Dest, Src0, Src1);
872   }
873   void _vcvt(Variable *Dest, Variable *Src, InstARM32Vcvt::VcvtVariant Variant,
874              CondARM32::Cond Pred = CondARM32::AL) {
875     Context.insert<InstARM32Vcvt>(Dest, Src, Variant, Pred);
876   }
_vdiv(Variable * Dest,Variable * Src0,Variable * Src1)877   void _vdiv(Variable *Dest, Variable *Src0, Variable *Src1) {
878     Context.insert<InstARM32Vdiv>(Dest, Src0, Src1);
879   }
880   void _vcmp(Variable *Src0, Variable *Src1,
881              CondARM32::Cond Pred = CondARM32::AL) {
882     Context.insert<InstARM32Vcmp>(Src0, Src1, Pred);
883   }
884   void _vcmp(Variable *Src0, OperandARM32FlexFpZero *FpZero,
885              CondARM32::Cond Pred = CondARM32::AL) {
886     Context.insert<InstARM32Vcmp>(Src0, FpZero, Pred);
887   }
_veor(Variable * Dest,Variable * Src0,Variable * Src1)888   void _veor(Variable *Dest, Variable *Src0, Variable *Src1) {
889     Context.insert<InstARM32Veor>(Dest, Src0, Src1);
890   }
891   void _vmrs(CondARM32::Cond Pred = CondARM32::AL) {
892     Context.insert<InstARM32Vmrs>(Pred);
893   }
_vmla(Variable * Dest,Variable * Src0,Variable * Src1)894   void _vmla(Variable *Dest, Variable *Src0, Variable *Src1) {
895     Context.insert<InstARM32Vmla>(Dest, Src0, Src1);
896   }
_vmls(Variable * Dest,Variable * Src0,Variable * Src1)897   void _vmls(Variable *Dest, Variable *Src0, Variable *Src1) {
898     Context.insert<InstARM32Vmls>(Dest, Src0, Src1);
899   }
_vmul(Variable * Dest,Variable * Src0,Variable * Src1)900   void _vmul(Variable *Dest, Variable *Src0, Variable *Src1) {
901     Context.insert<InstARM32Vmul>(Dest, Src0, Src1);
902   }
_vmvn(Variable * Dest,Variable * Src0)903   void _vmvn(Variable *Dest, Variable *Src0) {
904     Context.insert<InstARM32Vmvn>(Dest, Src0, CondARM32::AL);
905   }
_vneg(Variable * Dest,Variable * Src0)906   void _vneg(Variable *Dest, Variable *Src0) {
907     Context.insert<InstARM32Vneg>(Dest, Src0, CondARM32::AL)
908         ->setSignType(InstARM32::FS_Signed);
909   }
_vorr(Variable * Dest,Variable * Src0,Variable * Src1)910   void _vorr(Variable *Dest, Variable *Src0, Variable *Src1) {
911     Context.insert<InstARM32Vorr>(Dest, Src0, Src1);
912   }
_vshl(Variable * Dest,Variable * Src0,Variable * Src1)913   InstARM32Vshl *_vshl(Variable *Dest, Variable *Src0, Variable *Src1) {
914     return Context.insert<InstARM32Vshl>(Dest, Src0, Src1);
915   }
_vshl(Variable * Dest,Variable * Src0,ConstantInteger32 * Src1)916   void _vshl(Variable *Dest, Variable *Src0, ConstantInteger32 *Src1) {
917     Context.insert<InstARM32Vshl>(Dest, Src0, Src1)
918         ->setSignType(InstARM32::FS_Unsigned);
919   }
_vshr(Variable * Dest,Variable * Src0,ConstantInteger32 * Src1)920   InstARM32Vshr *_vshr(Variable *Dest, Variable *Src0,
921                        ConstantInteger32 *Src1) {
922     return Context.insert<InstARM32Vshr>(Dest, Src0, Src1);
923   }
924   void _vsqrt(Variable *Dest, Variable *Src,
925               CondARM32::Cond Pred = CondARM32::AL) {
926     Context.insert<InstARM32Vsqrt>(Dest, Src, Pred);
927   }
_vsub(Variable * Dest,Variable * Src0,Variable * Src1)928   void _vsub(Variable *Dest, Variable *Src0, Variable *Src1) {
929     Context.insert<InstARM32Vsub>(Dest, Src0, Src1);
930   }
931 
932   // Iterates over the CFG and determines the maximum outgoing stack arguments
933   // bytes. This information is later used during addProlog() to pre-allocate
934   // the outargs area.
935   // TODO(jpp): This could live in the Parser, if we provided a Target-specific
936   // method that the Parser could call.
937   void findMaxStackOutArgsSize();
938 
939   /// Returns true if the given Offset can be represented in a Load/Store Mem
940   /// Operand.
941   bool isLegalMemOffset(Type Ty, int32_t Offset) const;
942 
943   void postLowerLegalization();
944 
945   /// Manages the GotPtr variable, which is used for Nonsfi sandboxing.
946   /// @{
947   void createGotPtr();
948   void insertGotPtrInitPlaceholder();
949   VariableDeclaration *createGotRelocation(RelocOffset *AddPcReloc);
950   void materializeGotAddr(CfgNode *Node);
951   Variable *GotPtr = nullptr;
952   // TODO(jpp): use CfgLocalAllocator.
953   /// @}
954 
955   /// Manages the Gotoff relocations created during the function lowering. A
956   /// single Gotoff relocation is created for each global variable used by the
957   /// function being lowered.
958   /// @{
959   // TODO(jpp): if the same global G is used in different functions, then this
960   // method will emit one G(gotoff) relocation per function.
961   GlobalString createGotoffRelocation(const ConstantRelocatable *CR);
962   CfgUnorderedSet<GlobalString> KnownGotoffs;
963   /// @}
964 
965   /// Loads the constant relocatable Name to Register. Then invoke Finish to
966   /// finish the relocatable lowering. Finish **must** use PC in its first
967   /// emitted instruction, or the relocatable in Register will contain the wrong
968   /// value.
969   //
970   // Lowered sequence:
971   //
972   // Movw:
973   //     movw Register, #:lower16:Name - (End - Movw) - 8 .
974   // Movt:
975   //     movt Register, #:upper16:Name - (End - Movt) - 8 .
976   //     PC = fake-def
977   // End:
978   //     Finish(PC)
979   //
980   // The -8 in movw/movt above is to account for the PC value that the first
981   // instruction emitted by Finish(PC) will read.
982   void
983   loadNamedConstantRelocatablePIC(GlobalString Name, Variable *Register,
984                                   std::function<void(Variable *PC)> Finish);
985 
986   /// Sandboxer defines methods for ensuring that "dangerous" operations are
987   /// masked during sandboxed code emission. For regular, non-sandboxed code
988   /// emission, its methods are simple pass-through methods.
989   ///
990   /// The Sandboxer also emits BundleLock/BundleUnlock pseudo-instructions
991   /// in the constructor/destructor during sandboxed code emission. Therefore,
992   /// it is a bad idea to create an object of this type and "keep it around."
993   /// The recommended usage is:
994   ///
995   /// AutoSandboxing(this).<<operation>>(...);
996   ///
997   /// This usage ensures that no other instructions are inadvertently added to
998   /// the bundle.
999   class Sandboxer {
1000     Sandboxer() = delete;
1001     Sandboxer(const Sandboxer &) = delete;
1002     Sandboxer &operator=(const Sandboxer &) = delete;
1003 
1004   public:
1005     explicit Sandboxer(
1006         TargetARM32 *Target,
1007         InstBundleLock::Option BundleOption = InstBundleLock::Opt_None);
1008     ~Sandboxer();
1009 
1010     /// Increments sp:
1011     ///
1012     ///   add sp, sp, AddAmount
1013     ///   bic sp, sp, 0xc0000000
1014     ///
1015     /// (for the rationale, see the ARM 32-bit Sandbox Specification.)
1016     void add_sp(Operand *AddAmount);
1017 
1018     /// Emits code to align sp to the specified alignment:
1019     ///
1020     ///   bic/and sp, sp, Alignment
1021     ///   bic, sp, sp, 0xc0000000
1022     void align_sp(size_t Alignment);
1023 
1024     /// Emits a call instruction. If CallTarget is a Variable, it emits
1025     ///
1026     ///   bic CallTarget, CallTarget, 0xc000000f
1027     ///   bl CallTarget
1028     ///
1029     /// Otherwise, it emits
1030     ///
1031     ///   bl CallTarget
1032     ///
1033     /// Note: in sandboxed code calls are always emitted in addresses 12 mod 16.
1034     InstARM32Call *bl(Variable *ReturnReg, Operand *CallTarget);
1035 
1036     /// Emits a load:
1037     ///
1038     ///   bic rBase, rBase, 0xc0000000
1039     ///   ldr rDest, [rBase, #Offset]
1040     ///
1041     /// Exception: if rBase is r9 or sp, then the load is emitted as:
1042     ///
1043     ///   ldr rDest, [rBase, #Offset]
1044     ///
1045     /// because the NaCl ARM 32-bit Sandbox Specification guarantees they are
1046     /// always valid.
1047     void ldr(Variable *Dest, OperandARM32Mem *Mem, CondARM32::Cond Pred);
1048 
1049     /// Emits a load exclusive:
1050     ///
1051     ///   bic rBase, rBase, 0xc0000000
1052     ///   ldrex rDest, [rBase]
1053     ///
1054     /// Exception: if rBase is r9 or sp, then the load is emitted as:
1055     ///
1056     ///   ldrex rDest, [rBase]
1057     ///
1058     /// because the NaCl ARM 32-bit Sandbox Specification guarantees they are
1059     /// always valid.
1060     void ldrex(Variable *Dest, OperandARM32Mem *Mem, CondARM32::Cond Pred);
1061 
1062     /// Resets sp to Src:
1063     ///
1064     ///   mov sp, Src
1065     ///   bic sp, sp, 0xc0000000
1066     void reset_sp(Variable *Src);
1067 
1068     /// Emits code to return from a function:
1069     ///
1070     ///   bic lr, lr, 0xc000000f
1071     ///   bx lr
1072     void ret(Variable *RetAddr, Variable *RetValue);
1073 
1074     /// Emits a store:
1075     ///
1076     ///   bic rBase, rBase, 0xc0000000
1077     ///   str rSrc, [rBase, #Offset]
1078     ///
1079     /// Exception: if rBase is r9 or sp, then the store is emitted as:
1080     ///
1081     ///   str rDest, [rBase, #Offset]
1082     ///
1083     /// because the NaCl ARM 32-bit Sandbox Specification guarantees they are
1084     /// always valid.
1085     void str(Variable *Src, OperandARM32Mem *Mem, CondARM32::Cond Pred);
1086 
1087     /// Emits a store exclusive:
1088     ///
1089     ///   bic rBase, rBase, 0xc0000000
1090     ///   strex rDest, rSrc, [rBase]
1091     ///
1092     /// Exception: if rBase is r9 or sp, then the store is emitted as:
1093     ///
1094     ///   strex rDest, rSrc, [rBase]
1095     ///
1096     /// because the NaCl ARM 32-bit Sandbox Specification guarantees they are
1097     /// always valid.
1098     void strex(Variable *Dest, Variable *Src, OperandARM32Mem *Mem,
1099                CondARM32::Cond Pred);
1100 
1101     /// Decrements sp:
1102     ///
1103     ///   sub sp, sp, SubAmount
1104     ///   bic sp, sp, 0xc0000000
1105     void sub_sp(Operand *SubAmount);
1106 
1107   private:
1108     TargetARM32 *const Target;
1109     const InstBundleLock::Option BundleOption;
1110     std::unique_ptr<AutoBundle> Bundler;
1111 
1112     void createAutoBundle();
1113   };
1114 
1115   class PostLoweringLegalizer {
1116     PostLoweringLegalizer() = delete;
1117     PostLoweringLegalizer(const PostLoweringLegalizer &) = delete;
1118     PostLoweringLegalizer &operator=(const PostLoweringLegalizer &) = delete;
1119 
1120   public:
PostLoweringLegalizer(TargetARM32 * Target)1121     explicit PostLoweringLegalizer(TargetARM32 *Target)
1122         : Target(Target), StackOrFrameReg(Target->getPhysicalRegister(
1123                               Target->getFrameOrStackReg())) {}
1124 
1125     void resetTempBaseIfClobberedBy(const Inst *Instr);
1126 
1127     // Ensures that the TempBase register held by the this legalizer (if any) is
1128     // assigned to IP.
assertNoTempOrAssignedToIP()1129     void assertNoTempOrAssignedToIP() const {
1130       assert(TempBaseReg == nullptr ||
1131              TempBaseReg->getRegNum() == Target->getReservedTmpReg());
1132     }
1133 
1134     // Legalizes Mem. if Mem.Base is a Reamaterializable variable, Mem.Offset is
1135     // fixed up.
1136     OperandARM32Mem *legalizeMemOperand(OperandARM32Mem *Mem,
1137                                         bool AllowOffsets = true);
1138 
1139     /// Legalizes Mov if its Source (or Destination) is a spilled Variable, or
1140     /// if its Source is a Rematerializable variable (this form is used in lieu
1141     /// of lea, which is not available in ARM.)
1142     ///
1143     /// Moves to memory become store instructions, and moves from memory, loads.
1144     void legalizeMov(InstARM32Mov *Mov);
1145 
1146   private:
1147     /// Creates a new Base register centered around [Base, +/- Offset].
1148     Variable *newBaseRegister(Variable *Base, int32_t Offset,
1149                               RegNumT ScratchRegNum);
1150 
1151     /// Creates a new, legal OperandARM32Mem for accessing Base + Offset.
1152     /// The returned mem operand is a legal operand for accessing memory that is
1153     /// of type Ty.
1154     ///
1155     /// If [Base, #Offset] is encodable, then the method returns a Mem operand
1156     /// expressing it. Otherwise,
1157     ///
1158     /// if [TempBaseReg, #Offset-TempBaseOffset] is a valid memory operand, the
1159     /// method will return that. Otherwise,
1160     ///
1161     /// a new base register ip=Base+Offset is created, and the method returns a
1162     /// memory operand expressing [ip, #0].
1163     OperandARM32Mem *createMemOperand(Type Ty, Variable *Base, int32_t Offset,
1164                                       bool AllowOffsets = true);
1165     TargetARM32 *const Target;
1166     Variable *const StackOrFrameReg;
1167     Variable *TempBaseReg = nullptr;
1168     int32_t TempBaseOffset = 0;
1169   };
1170 
1171   const bool NeedSandboxing;
1172   TargetARM32Features CPUFeatures;
1173   bool UsesFramePointer = false;
1174   bool NeedsStackAlignment = false;
1175   bool MaybeLeafFunc = true;
1176   size_t SpillAreaSizeBytes = 0;
1177   size_t FixedAllocaSizeBytes = 0;
1178   size_t FixedAllocaAlignBytes = 0;
1179   bool PrologEmitsFixedAllocas = false;
1180   uint32_t MaxOutArgsSizeBytes = 0;
1181   // TODO(jpp): std::array instead of array.
1182   static SmallBitVector TypeToRegisterSet[RegARM32::RCARM32_NUM];
1183   static SmallBitVector TypeToRegisterSetUnfiltered[RegARM32::RCARM32_NUM];
1184   static SmallBitVector RegisterAliases[RegARM32::Reg_NUM];
1185   SmallBitVector RegsUsed;
1186   VarList PhysicalRegisters[IceType_NUM];
1187   VarList PreservedGPRs;
1188   VarList PreservedSRegs;
1189 
1190   /// Helper class that understands the Calling Convention and register
1191   /// assignments. The first few integer type parameters can use r0-r3,
1192   /// regardless of their position relative to the floating-point/vector
1193   /// arguments in the argument list. Floating-point and vector arguments
1194   /// can use q0-q3 (aka d0-d7, s0-s15). For more information on the topic,
1195   /// see the ARM Architecture Procedure Calling Standards (AAPCS).
1196   ///
1197   /// Technically, arguments that can start with registers but extend beyond the
1198   /// available registers can be split between the registers and the stack.
1199   /// However, this is typically  for passing GPR structs by value, and PNaCl
1200   /// transforms expand this out.
1201   ///
1202   /// At (public) function entry, the stack must be 8-byte aligned.
1203   class CallingConv {
1204     CallingConv(const CallingConv &) = delete;
1205     CallingConv &operator=(const CallingConv &) = delete;
1206 
1207   public:
1208     CallingConv();
1209     ~CallingConv() = default;
1210 
1211     /// argInGPR returns true if there is a GPR available for the requested
1212     /// type, and false otherwise. If it returns true, Reg is set to the
1213     /// appropriate register number. Note that, when Ty == IceType_i64, Reg will
1214     /// be an I64 register pair.
1215     bool argInGPR(Type Ty, RegNumT *Reg);
1216 
1217     /// argInVFP is to floating-point/vector types what argInGPR is for integer
1218     /// types.
1219     bool argInVFP(Type Ty, RegNumT *Reg);
1220 
1221   private:
1222     void discardUnavailableGPRsAndTheirAliases(CfgVector<RegNumT> *Regs);
1223     SmallBitVector GPRegsUsed;
1224     CfgVector<RegNumT> GPRArgs;
1225     CfgVector<RegNumT> I64Args;
1226 
1227     void discardUnavailableVFPRegs(CfgVector<RegNumT> *Regs);
1228     SmallBitVector VFPRegsUsed;
1229     CfgVector<RegNumT> FP32Args;
1230     CfgVector<RegNumT> FP64Args;
1231     CfgVector<RegNumT> Vec128Args;
1232   };
1233 
1234 private:
1235   ENABLE_MAKE_UNIQUE;
1236 
1237   OperandARM32Mem *formAddressingMode(Type Ty, Cfg *Func, const Inst *LdSt,
1238                                       Operand *Base);
1239 
1240   void postambleCtpop64(const InstCall *Instr);
1241   void preambleDivRem(const InstCall *Instr);
1242   CfgUnorderedMap<Operand *, void (TargetARM32::*)(const InstCall *Instr)>
1243       ARM32HelpersPreamble;
1244   CfgUnorderedMap<Operand *, void (TargetARM32::*)(const InstCall *Instr)>
1245       ARM32HelpersPostamble;
1246 
1247   class ComputationTracker {
1248   public:
1249     ComputationTracker() = default;
1250     ~ComputationTracker() = default;
1251 
forgetProducers()1252     void forgetProducers() { KnownComputations.clear(); }
1253     void recordProducers(CfgNode *Node);
1254 
getProducerOf(const Operand * Opnd)1255     const Inst *getProducerOf(const Operand *Opnd) const {
1256       auto *Var = llvm::dyn_cast<Variable>(Opnd);
1257       if (Var == nullptr) {
1258         return nullptr;
1259       }
1260 
1261       auto Iter = KnownComputations.find(Var->getIndex());
1262       if (Iter == KnownComputations.end()) {
1263         return nullptr;
1264       }
1265 
1266       return Iter->second.Instr;
1267     }
1268 
dump(const Cfg * Func)1269     void dump(const Cfg *Func) const {
1270       if (!BuildDefs::dump() || !Func->isVerbose(IceV_Folding))
1271         return;
1272       OstreamLocker L(Func->getContext());
1273       Ostream &Str = Func->getContext()->getStrDump();
1274       Str << "foldable producer:\n";
1275       for (const auto &Computation : KnownComputations) {
1276         Str << "    ";
1277         Computation.second.Instr->dump(Func);
1278         Str << "\n";
1279       }
1280       Str << "\n";
1281     }
1282 
1283   private:
1284     class ComputationEntry {
1285     public:
ComputationEntry(Inst * I,Type Ty)1286       ComputationEntry(Inst *I, Type Ty) : Instr(I), ComputationType(Ty) {}
1287       Inst *const Instr;
1288       // Boolean folding is disabled for variables whose live range is multi
1289       // block. We conservatively initialize IsLiveOut to true, and set it to
1290       // false once we find the end of the live range for the variable defined
1291       // by this instruction. If liveness analysis is not performed (e.g., in
1292       // Om1 mode) IsLiveOut will never be set to false, and folding will be
1293       // disabled.
1294       bool IsLiveOut = true;
1295       int32_t NumUses = 0;
1296       Type ComputationType;
1297     };
1298 
1299     // ComputationMap maps a Variable number to a payload identifying which
1300     // instruction defined it.
1301     using ComputationMap = CfgUnorderedMap<SizeT, ComputationEntry>;
1302     ComputationMap KnownComputations;
1303   };
1304 
1305   ComputationTracker Computations;
1306 
1307   // AllowTemporaryWithNoReg indicates if TargetARM32::makeReg() can be invoked
1308   // without specifying a physical register. This is needed for creating unbound
1309   // temporaries during Ice -> ARM lowering, but before register allocation.
1310   // This a safe-guard that no unbound temporaries are created during the
1311   // legalization post-passes.
1312   bool AllowTemporaryWithNoReg = true;
1313   // ForbidTemporaryWithoutReg is a RAII class that manages
1314   // AllowTemporaryWithNoReg.
1315   class ForbidTemporaryWithoutReg {
1316     ForbidTemporaryWithoutReg() = delete;
1317     ForbidTemporaryWithoutReg(const ForbidTemporaryWithoutReg &) = delete;
1318     ForbidTemporaryWithoutReg &
1319     operator=(const ForbidTemporaryWithoutReg &) = delete;
1320 
1321   public:
ForbidTemporaryWithoutReg(TargetARM32 * Target)1322     explicit ForbidTemporaryWithoutReg(TargetARM32 *Target) : Target(Target) {
1323       Target->AllowTemporaryWithNoReg = false;
1324     }
~ForbidTemporaryWithoutReg()1325     ~ForbidTemporaryWithoutReg() { Target->AllowTemporaryWithNoReg = true; }
1326 
1327   private:
1328     TargetARM32 *const Target;
1329   };
1330 };
1331 
1332 class TargetDataARM32 final : public TargetDataLowering {
1333   TargetDataARM32() = delete;
1334   TargetDataARM32(const TargetDataARM32 &) = delete;
1335   TargetDataARM32 &operator=(const TargetDataARM32 &) = delete;
1336 
1337 public:
create(GlobalContext * Ctx)1338   static std::unique_ptr<TargetDataLowering> create(GlobalContext *Ctx) {
1339     return std::unique_ptr<TargetDataLowering>(new TargetDataARM32(Ctx));
1340   }
1341 
1342   void lowerGlobals(const VariableDeclarationList &Vars,
1343                     const std::string &SectionSuffix) override;
1344   void lowerConstants() override;
1345   void lowerJumpTables() override;
1346 
1347 protected:
1348   explicit TargetDataARM32(GlobalContext *Ctx);
1349 
1350 private:
1351   ~TargetDataARM32() override = default;
1352 };
1353 
1354 class TargetHeaderARM32 final : public TargetHeaderLowering {
1355   TargetHeaderARM32() = delete;
1356   TargetHeaderARM32(const TargetHeaderARM32 &) = delete;
1357   TargetHeaderARM32 &operator=(const TargetHeaderARM32 &) = delete;
1358 
1359 public:
create(GlobalContext * Ctx)1360   static std::unique_ptr<TargetHeaderLowering> create(GlobalContext *Ctx) {
1361     return std::unique_ptr<TargetHeaderLowering>(new TargetHeaderARM32(Ctx));
1362   }
1363 
1364   void lower() override;
1365 
1366 protected:
1367   explicit TargetHeaderARM32(GlobalContext *Ctx);
1368 
1369 private:
1370   ~TargetHeaderARM32() = default;
1371 
1372   TargetARM32Features CPUFeatures;
1373 };
1374 
1375 } // end of namespace ARM32
1376 } // end of namespace Ice
1377 
1378 #endif // SUBZERO_SRC_ICETARGETLOWERINGARM32_H
1379