• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 //===-- AArch6464FastISel.cpp - AArch64 FastISel implementation -----------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file defines the AArch64-specific support for the FastISel class. Some
11 // of the target-specific code is generated by tablegen in the file
12 // AArch64GenFastISel.inc, which is #included here.
13 //
14 //===----------------------------------------------------------------------===//
15 
16 #include "AArch64.h"
17 #include "AArch64CallingConvention.h"
18 #include "AArch64Subtarget.h"
19 #include "AArch64TargetMachine.h"
20 #include "MCTargetDesc/AArch64AddressingModes.h"
21 #include "llvm/Analysis/BranchProbabilityInfo.h"
22 #include "llvm/CodeGen/CallingConvLower.h"
23 #include "llvm/CodeGen/FastISel.h"
24 #include "llvm/CodeGen/FunctionLoweringInfo.h"
25 #include "llvm/CodeGen/MachineConstantPool.h"
26 #include "llvm/CodeGen/MachineFrameInfo.h"
27 #include "llvm/CodeGen/MachineInstrBuilder.h"
28 #include "llvm/CodeGen/MachineRegisterInfo.h"
29 #include "llvm/IR/CallingConv.h"
30 #include "llvm/IR/DataLayout.h"
31 #include "llvm/IR/DerivedTypes.h"
32 #include "llvm/IR/Function.h"
33 #include "llvm/IR/GetElementPtrTypeIterator.h"
34 #include "llvm/IR/GlobalAlias.h"
35 #include "llvm/IR/GlobalVariable.h"
36 #include "llvm/IR/Instructions.h"
37 #include "llvm/IR/IntrinsicInst.h"
38 #include "llvm/IR/Operator.h"
39 #include "llvm/MC/MCSymbol.h"
40 using namespace llvm;
41 
42 namespace {
43 
44 class AArch64FastISel final : public FastISel {
45   class Address {
46   public:
47     typedef enum {
48       RegBase,
49       FrameIndexBase
50     } BaseKind;
51 
52   private:
53     BaseKind Kind;
54     AArch64_AM::ShiftExtendType ExtType;
55     union {
56       unsigned Reg;
57       int FI;
58     } Base;
59     unsigned OffsetReg;
60     unsigned Shift;
61     int64_t Offset;
62     const GlobalValue *GV;
63 
64   public:
Address()65     Address() : Kind(RegBase), ExtType(AArch64_AM::InvalidShiftExtend),
66       OffsetReg(0), Shift(0), Offset(0), GV(nullptr) { Base.Reg = 0; }
setKind(BaseKind K)67     void setKind(BaseKind K) { Kind = K; }
getKind() const68     BaseKind getKind() const { return Kind; }
setExtendType(AArch64_AM::ShiftExtendType E)69     void setExtendType(AArch64_AM::ShiftExtendType E) { ExtType = E; }
getExtendType() const70     AArch64_AM::ShiftExtendType getExtendType() const { return ExtType; }
isRegBase() const71     bool isRegBase() const { return Kind == RegBase; }
isFIBase() const72     bool isFIBase() const { return Kind == FrameIndexBase; }
setReg(unsigned Reg)73     void setReg(unsigned Reg) {
74       assert(isRegBase() && "Invalid base register access!");
75       Base.Reg = Reg;
76     }
getReg() const77     unsigned getReg() const {
78       assert(isRegBase() && "Invalid base register access!");
79       return Base.Reg;
80     }
setOffsetReg(unsigned Reg)81     void setOffsetReg(unsigned Reg) {
82       OffsetReg = Reg;
83     }
getOffsetReg() const84     unsigned getOffsetReg() const {
85       return OffsetReg;
86     }
setFI(unsigned FI)87     void setFI(unsigned FI) {
88       assert(isFIBase() && "Invalid base frame index  access!");
89       Base.FI = FI;
90     }
getFI() const91     unsigned getFI() const {
92       assert(isFIBase() && "Invalid base frame index access!");
93       return Base.FI;
94     }
setOffset(int64_t O)95     void setOffset(int64_t O) { Offset = O; }
getOffset()96     int64_t getOffset() { return Offset; }
setShift(unsigned S)97     void setShift(unsigned S) { Shift = S; }
getShift()98     unsigned getShift() { return Shift; }
99 
setGlobalValue(const GlobalValue * G)100     void setGlobalValue(const GlobalValue *G) { GV = G; }
getGlobalValue()101     const GlobalValue *getGlobalValue() { return GV; }
102   };
103 
104   /// Subtarget - Keep a pointer to the AArch64Subtarget around so that we can
105   /// make the right decision when generating code for different targets.
106   const AArch64Subtarget *Subtarget;
107   LLVMContext *Context;
108 
109   bool fastLowerArguments() override;
110   bool fastLowerCall(CallLoweringInfo &CLI) override;
111   bool fastLowerIntrinsicCall(const IntrinsicInst *II) override;
112 
113 private:
114   // Selection routines.
115   bool selectAddSub(const Instruction *I);
116   bool selectLogicalOp(const Instruction *I);
117   bool selectLoad(const Instruction *I);
118   bool selectStore(const Instruction *I);
119   bool selectBranch(const Instruction *I);
120   bool selectIndirectBr(const Instruction *I);
121   bool selectCmp(const Instruction *I);
122   bool selectSelect(const Instruction *I);
123   bool selectFPExt(const Instruction *I);
124   bool selectFPTrunc(const Instruction *I);
125   bool selectFPToInt(const Instruction *I, bool Signed);
126   bool selectIntToFP(const Instruction *I, bool Signed);
127   bool selectRem(const Instruction *I, unsigned ISDOpcode);
128   bool selectRet(const Instruction *I);
129   bool selectTrunc(const Instruction *I);
130   bool selectIntExt(const Instruction *I);
131   bool selectMul(const Instruction *I);
132   bool selectShift(const Instruction *I);
133   bool selectBitCast(const Instruction *I);
134   bool selectFRem(const Instruction *I);
135   bool selectSDiv(const Instruction *I);
136   bool selectGetElementPtr(const Instruction *I);
137 
138   // Utility helper routines.
139   bool isTypeLegal(Type *Ty, MVT &VT);
140   bool isTypeSupported(Type *Ty, MVT &VT, bool IsVectorAllowed = false);
141   bool isValueAvailable(const Value *V) const;
142   bool computeAddress(const Value *Obj, Address &Addr, Type *Ty = nullptr);
143   bool computeCallAddress(const Value *V, Address &Addr);
144   bool simplifyAddress(Address &Addr, MVT VT);
145   void addLoadStoreOperands(Address &Addr, const MachineInstrBuilder &MIB,
146                             unsigned Flags, unsigned ScaleFactor,
147                             MachineMemOperand *MMO);
148   bool isMemCpySmall(uint64_t Len, unsigned Alignment);
149   bool tryEmitSmallMemCpy(Address Dest, Address Src, uint64_t Len,
150                           unsigned Alignment);
151   bool foldXALUIntrinsic(AArch64CC::CondCode &CC, const Instruction *I,
152                          const Value *Cond);
153   bool optimizeIntExtLoad(const Instruction *I, MVT RetVT, MVT SrcVT);
154   bool optimizeSelect(const SelectInst *SI);
155   std::pair<unsigned, bool> getRegForGEPIndex(const Value *Idx);
156 
157   // Emit helper routines.
158   unsigned emitAddSub(bool UseAdd, MVT RetVT, const Value *LHS,
159                       const Value *RHS, bool SetFlags = false,
160                       bool WantResult = true,  bool IsZExt = false);
161   unsigned emitAddSub_rr(bool UseAdd, MVT RetVT, unsigned LHSReg,
162                          bool LHSIsKill, unsigned RHSReg, bool RHSIsKill,
163                          bool SetFlags = false, bool WantResult = true);
164   unsigned emitAddSub_ri(bool UseAdd, MVT RetVT, unsigned LHSReg,
165                          bool LHSIsKill, uint64_t Imm, bool SetFlags = false,
166                          bool WantResult = true);
167   unsigned emitAddSub_rs(bool UseAdd, MVT RetVT, unsigned LHSReg,
168                          bool LHSIsKill, unsigned RHSReg, bool RHSIsKill,
169                          AArch64_AM::ShiftExtendType ShiftType,
170                          uint64_t ShiftImm, bool SetFlags = false,
171                          bool WantResult = true);
172   unsigned emitAddSub_rx(bool UseAdd, MVT RetVT, unsigned LHSReg,
173                          bool LHSIsKill, unsigned RHSReg, bool RHSIsKill,
174                           AArch64_AM::ShiftExtendType ExtType,
175                           uint64_t ShiftImm, bool SetFlags = false,
176                          bool WantResult = true);
177 
178   // Emit functions.
179   bool emitCompareAndBranch(const BranchInst *BI);
180   bool emitCmp(const Value *LHS, const Value *RHS, bool IsZExt);
181   bool emitICmp(MVT RetVT, const Value *LHS, const Value *RHS, bool IsZExt);
182   bool emitICmp_ri(MVT RetVT, unsigned LHSReg, bool LHSIsKill, uint64_t Imm);
183   bool emitFCmp(MVT RetVT, const Value *LHS, const Value *RHS);
184   unsigned emitLoad(MVT VT, MVT ResultVT, Address Addr, bool WantZExt = true,
185                     MachineMemOperand *MMO = nullptr);
186   bool emitStore(MVT VT, unsigned SrcReg, Address Addr,
187                  MachineMemOperand *MMO = nullptr);
188   unsigned emitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT, bool isZExt);
189   unsigned emiti1Ext(unsigned SrcReg, MVT DestVT, bool isZExt);
190   unsigned emitAdd(MVT RetVT, const Value *LHS, const Value *RHS,
191                    bool SetFlags = false, bool WantResult = true,
192                    bool IsZExt = false);
193   unsigned emitAdd_ri_(MVT VT, unsigned Op0, bool Op0IsKill, int64_t Imm);
194   unsigned emitSub(MVT RetVT, const Value *LHS, const Value *RHS,
195                    bool SetFlags = false, bool WantResult = true,
196                    bool IsZExt = false);
197   unsigned emitSubs_rr(MVT RetVT, unsigned LHSReg, bool LHSIsKill,
198                        unsigned RHSReg, bool RHSIsKill, bool WantResult = true);
199   unsigned emitSubs_rs(MVT RetVT, unsigned LHSReg, bool LHSIsKill,
200                        unsigned RHSReg, bool RHSIsKill,
201                        AArch64_AM::ShiftExtendType ShiftType, uint64_t ShiftImm,
202                        bool WantResult = true);
203   unsigned emitLogicalOp(unsigned ISDOpc, MVT RetVT, const Value *LHS,
204                          const Value *RHS);
205   unsigned emitLogicalOp_ri(unsigned ISDOpc, MVT RetVT, unsigned LHSReg,
206                             bool LHSIsKill, uint64_t Imm);
207   unsigned emitLogicalOp_rs(unsigned ISDOpc, MVT RetVT, unsigned LHSReg,
208                             bool LHSIsKill, unsigned RHSReg, bool RHSIsKill,
209                             uint64_t ShiftImm);
210   unsigned emitAnd_ri(MVT RetVT, unsigned LHSReg, bool LHSIsKill, uint64_t Imm);
211   unsigned emitMul_rr(MVT RetVT, unsigned Op0, bool Op0IsKill,
212                       unsigned Op1, bool Op1IsKill);
213   unsigned emitSMULL_rr(MVT RetVT, unsigned Op0, bool Op0IsKill,
214                         unsigned Op1, bool Op1IsKill);
215   unsigned emitUMULL_rr(MVT RetVT, unsigned Op0, bool Op0IsKill,
216                         unsigned Op1, bool Op1IsKill);
217   unsigned emitLSL_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill,
218                       unsigned Op1Reg, bool Op1IsKill);
219   unsigned emitLSL_ri(MVT RetVT, MVT SrcVT, unsigned Op0Reg, bool Op0IsKill,
220                       uint64_t Imm, bool IsZExt = true);
221   unsigned emitLSR_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill,
222                       unsigned Op1Reg, bool Op1IsKill);
223   unsigned emitLSR_ri(MVT RetVT, MVT SrcVT, unsigned Op0Reg, bool Op0IsKill,
224                       uint64_t Imm, bool IsZExt = true);
225   unsigned emitASR_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill,
226                       unsigned Op1Reg, bool Op1IsKill);
227   unsigned emitASR_ri(MVT RetVT, MVT SrcVT, unsigned Op0Reg, bool Op0IsKill,
228                       uint64_t Imm, bool IsZExt = false);
229 
230   unsigned materializeInt(const ConstantInt *CI, MVT VT);
231   unsigned materializeFP(const ConstantFP *CFP, MVT VT);
232   unsigned materializeGV(const GlobalValue *GV);
233 
234   // Call handling routines.
235 private:
236   CCAssignFn *CCAssignFnForCall(CallingConv::ID CC) const;
237   bool processCallArgs(CallLoweringInfo &CLI, SmallVectorImpl<MVT> &ArgVTs,
238                        unsigned &NumBytes);
239   bool finishCall(CallLoweringInfo &CLI, MVT RetVT, unsigned NumBytes);
240 
241 public:
242   // Backend specific FastISel code.
243   unsigned fastMaterializeAlloca(const AllocaInst *AI) override;
244   unsigned fastMaterializeConstant(const Constant *C) override;
245   unsigned fastMaterializeFloatZero(const ConstantFP* CF) override;
246 
AArch64FastISel(FunctionLoweringInfo & FuncInfo,const TargetLibraryInfo * LibInfo)247   explicit AArch64FastISel(FunctionLoweringInfo &FuncInfo,
248                            const TargetLibraryInfo *LibInfo)
249       : FastISel(FuncInfo, LibInfo, /*SkipTargetIndependentISel=*/true) {
250     Subtarget =
251         &static_cast<const AArch64Subtarget &>(FuncInfo.MF->getSubtarget());
252     Context = &FuncInfo.Fn->getContext();
253   }
254 
255   bool fastSelectInstruction(const Instruction *I) override;
256 
257 #include "AArch64GenFastISel.inc"
258 };
259 
260 } // end anonymous namespace
261 
262 #include "AArch64GenCallingConv.inc"
263 
264 /// \brief Check if the sign-/zero-extend will be a noop.
isIntExtFree(const Instruction * I)265 static bool isIntExtFree(const Instruction *I) {
266   assert((isa<ZExtInst>(I) || isa<SExtInst>(I)) &&
267          "Unexpected integer extend instruction.");
268   assert(!I->getType()->isVectorTy() && I->getType()->isIntegerTy() &&
269          "Unexpected value type.");
270   bool IsZExt = isa<ZExtInst>(I);
271 
272   if (const auto *LI = dyn_cast<LoadInst>(I->getOperand(0)))
273     if (LI->hasOneUse())
274       return true;
275 
276   if (const auto *Arg = dyn_cast<Argument>(I->getOperand(0)))
277     if ((IsZExt && Arg->hasZExtAttr()) || (!IsZExt && Arg->hasSExtAttr()))
278       return true;
279 
280   return false;
281 }
282 
283 /// \brief Determine the implicit scale factor that is applied by a memory
284 /// operation for a given value type.
getImplicitScaleFactor(MVT VT)285 static unsigned getImplicitScaleFactor(MVT VT) {
286   switch (VT.SimpleTy) {
287   default:
288     return 0;    // invalid
289   case MVT::i1:  // fall-through
290   case MVT::i8:
291     return 1;
292   case MVT::i16:
293     return 2;
294   case MVT::i32: // fall-through
295   case MVT::f32:
296     return 4;
297   case MVT::i64: // fall-through
298   case MVT::f64:
299     return 8;
300   }
301 }
302 
CCAssignFnForCall(CallingConv::ID CC) const303 CCAssignFn *AArch64FastISel::CCAssignFnForCall(CallingConv::ID CC) const {
304   if (CC == CallingConv::WebKit_JS)
305     return CC_AArch64_WebKit_JS;
306   if (CC == CallingConv::GHC)
307     return CC_AArch64_GHC;
308   return Subtarget->isTargetDarwin() ? CC_AArch64_DarwinPCS : CC_AArch64_AAPCS;
309 }
310 
fastMaterializeAlloca(const AllocaInst * AI)311 unsigned AArch64FastISel::fastMaterializeAlloca(const AllocaInst *AI) {
312   assert(TLI.getValueType(DL, AI->getType(), true) == MVT::i64 &&
313          "Alloca should always return a pointer.");
314 
315   // Don't handle dynamic allocas.
316   if (!FuncInfo.StaticAllocaMap.count(AI))
317     return 0;
318 
319   DenseMap<const AllocaInst *, int>::iterator SI =
320       FuncInfo.StaticAllocaMap.find(AI);
321 
322   if (SI != FuncInfo.StaticAllocaMap.end()) {
323     unsigned ResultReg = createResultReg(&AArch64::GPR64spRegClass);
324     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADDXri),
325             ResultReg)
326         .addFrameIndex(SI->second)
327         .addImm(0)
328         .addImm(0);
329     return ResultReg;
330   }
331 
332   return 0;
333 }
334 
materializeInt(const ConstantInt * CI,MVT VT)335 unsigned AArch64FastISel::materializeInt(const ConstantInt *CI, MVT VT) {
336   if (VT > MVT::i64)
337     return 0;
338 
339   if (!CI->isZero())
340     return fastEmit_i(VT, VT, ISD::Constant, CI->getZExtValue());
341 
342   // Create a copy from the zero register to materialize a "0" value.
343   const TargetRegisterClass *RC = (VT == MVT::i64) ? &AArch64::GPR64RegClass
344                                                    : &AArch64::GPR32RegClass;
345   unsigned ZeroReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
346   unsigned ResultReg = createResultReg(RC);
347   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpcode::COPY),
348           ResultReg).addReg(ZeroReg, getKillRegState(true));
349   return ResultReg;
350 }
351 
materializeFP(const ConstantFP * CFP,MVT VT)352 unsigned AArch64FastISel::materializeFP(const ConstantFP *CFP, MVT VT) {
353   // Positive zero (+0.0) has to be materialized with a fmov from the zero
354   // register, because the immediate version of fmov cannot encode zero.
355   if (CFP->isNullValue())
356     return fastMaterializeFloatZero(CFP);
357 
358   if (VT != MVT::f32 && VT != MVT::f64)
359     return 0;
360 
361   const APFloat Val = CFP->getValueAPF();
362   bool Is64Bit = (VT == MVT::f64);
363   // This checks to see if we can use FMOV instructions to materialize
364   // a constant, otherwise we have to materialize via the constant pool.
365   if (TLI.isFPImmLegal(Val, VT)) {
366     int Imm =
367         Is64Bit ? AArch64_AM::getFP64Imm(Val) : AArch64_AM::getFP32Imm(Val);
368     assert((Imm != -1) && "Cannot encode floating-point constant.");
369     unsigned Opc = Is64Bit ? AArch64::FMOVDi : AArch64::FMOVSi;
370     return fastEmitInst_i(Opc, TLI.getRegClassFor(VT), Imm);
371   }
372 
373   // For the MachO large code model materialize the FP constant in code.
374   if (Subtarget->isTargetMachO() && TM.getCodeModel() == CodeModel::Large) {
375     unsigned Opc1 = Is64Bit ? AArch64::MOVi64imm : AArch64::MOVi32imm;
376     const TargetRegisterClass *RC = Is64Bit ?
377         &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
378 
379     unsigned TmpReg = createResultReg(RC);
380     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc1), TmpReg)
381         .addImm(CFP->getValueAPF().bitcastToAPInt().getZExtValue());
382 
383     unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT));
384     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
385             TII.get(TargetOpcode::COPY), ResultReg)
386         .addReg(TmpReg, getKillRegState(true));
387 
388     return ResultReg;
389   }
390 
391   // Materialize via constant pool.  MachineConstantPool wants an explicit
392   // alignment.
393   unsigned Align = DL.getPrefTypeAlignment(CFP->getType());
394   if (Align == 0)
395     Align = DL.getTypeAllocSize(CFP->getType());
396 
397   unsigned CPI = MCP.getConstantPoolIndex(cast<Constant>(CFP), Align);
398   unsigned ADRPReg = createResultReg(&AArch64::GPR64commonRegClass);
399   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP),
400           ADRPReg).addConstantPoolIndex(CPI, 0, AArch64II::MO_PAGE);
401 
402   unsigned Opc = Is64Bit ? AArch64::LDRDui : AArch64::LDRSui;
403   unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT));
404   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
405       .addReg(ADRPReg)
406       .addConstantPoolIndex(CPI, 0, AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
407   return ResultReg;
408 }
409 
materializeGV(const GlobalValue * GV)410 unsigned AArch64FastISel::materializeGV(const GlobalValue *GV) {
411   // We can't handle thread-local variables quickly yet.
412   if (GV->isThreadLocal())
413     return 0;
414 
415   // MachO still uses GOT for large code-model accesses, but ELF requires
416   // movz/movk sequences, which FastISel doesn't handle yet.
417   if (TM.getCodeModel() != CodeModel::Small && !Subtarget->isTargetMachO())
418     return 0;
419 
420   unsigned char OpFlags = Subtarget->ClassifyGlobalReference(GV, TM);
421 
422   EVT DestEVT = TLI.getValueType(DL, GV->getType(), true);
423   if (!DestEVT.isSimple())
424     return 0;
425 
426   unsigned ADRPReg = createResultReg(&AArch64::GPR64commonRegClass);
427   unsigned ResultReg;
428 
429   if (OpFlags & AArch64II::MO_GOT) {
430     // ADRP + LDRX
431     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP),
432             ADRPReg)
433       .addGlobalAddress(GV, 0, AArch64II::MO_GOT | AArch64II::MO_PAGE);
434 
435     ResultReg = createResultReg(&AArch64::GPR64RegClass);
436     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::LDRXui),
437             ResultReg)
438       .addReg(ADRPReg)
439       .addGlobalAddress(GV, 0, AArch64II::MO_GOT | AArch64II::MO_PAGEOFF |
440                         AArch64II::MO_NC);
441   } else {
442     // ADRP + ADDX
443     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP),
444             ADRPReg)
445       .addGlobalAddress(GV, 0, AArch64II::MO_PAGE);
446 
447     ResultReg = createResultReg(&AArch64::GPR64spRegClass);
448     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADDXri),
449             ResultReg)
450       .addReg(ADRPReg)
451       .addGlobalAddress(GV, 0, AArch64II::MO_PAGEOFF | AArch64II::MO_NC)
452       .addImm(0);
453   }
454   return ResultReg;
455 }
456 
fastMaterializeConstant(const Constant * C)457 unsigned AArch64FastISel::fastMaterializeConstant(const Constant *C) {
458   EVT CEVT = TLI.getValueType(DL, C->getType(), true);
459 
460   // Only handle simple types.
461   if (!CEVT.isSimple())
462     return 0;
463   MVT VT = CEVT.getSimpleVT();
464 
465   if (const auto *CI = dyn_cast<ConstantInt>(C))
466     return materializeInt(CI, VT);
467   else if (const ConstantFP *CFP = dyn_cast<ConstantFP>(C))
468     return materializeFP(CFP, VT);
469   else if (const GlobalValue *GV = dyn_cast<GlobalValue>(C))
470     return materializeGV(GV);
471 
472   return 0;
473 }
474 
fastMaterializeFloatZero(const ConstantFP * CFP)475 unsigned AArch64FastISel::fastMaterializeFloatZero(const ConstantFP* CFP) {
476   assert(CFP->isNullValue() &&
477          "Floating-point constant is not a positive zero.");
478   MVT VT;
479   if (!isTypeLegal(CFP->getType(), VT))
480     return 0;
481 
482   if (VT != MVT::f32 && VT != MVT::f64)
483     return 0;
484 
485   bool Is64Bit = (VT == MVT::f64);
486   unsigned ZReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
487   unsigned Opc = Is64Bit ? AArch64::FMOVXDr : AArch64::FMOVWSr;
488   return fastEmitInst_r(Opc, TLI.getRegClassFor(VT), ZReg, /*IsKill=*/true);
489 }
490 
491 /// \brief Check if the multiply is by a power-of-2 constant.
isMulPowOf2(const Value * I)492 static bool isMulPowOf2(const Value *I) {
493   if (const auto *MI = dyn_cast<MulOperator>(I)) {
494     if (const auto *C = dyn_cast<ConstantInt>(MI->getOperand(0)))
495       if (C->getValue().isPowerOf2())
496         return true;
497     if (const auto *C = dyn_cast<ConstantInt>(MI->getOperand(1)))
498       if (C->getValue().isPowerOf2())
499         return true;
500   }
501   return false;
502 }
503 
504 // Computes the address to get to an object.
computeAddress(const Value * Obj,Address & Addr,Type * Ty)505 bool AArch64FastISel::computeAddress(const Value *Obj, Address &Addr, Type *Ty)
506 {
507   const User *U = nullptr;
508   unsigned Opcode = Instruction::UserOp1;
509   if (const Instruction *I = dyn_cast<Instruction>(Obj)) {
510     // Don't walk into other basic blocks unless the object is an alloca from
511     // another block, otherwise it may not have a virtual register assigned.
512     if (FuncInfo.StaticAllocaMap.count(static_cast<const AllocaInst *>(Obj)) ||
513         FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) {
514       Opcode = I->getOpcode();
515       U = I;
516     }
517   } else if (const ConstantExpr *C = dyn_cast<ConstantExpr>(Obj)) {
518     Opcode = C->getOpcode();
519     U = C;
520   }
521 
522   if (auto *Ty = dyn_cast<PointerType>(Obj->getType()))
523     if (Ty->getAddressSpace() > 255)
524       // Fast instruction selection doesn't support the special
525       // address spaces.
526       return false;
527 
528   switch (Opcode) {
529   default:
530     break;
531   case Instruction::BitCast: {
532     // Look through bitcasts.
533     return computeAddress(U->getOperand(0), Addr, Ty);
534   }
535   case Instruction::IntToPtr: {
536     // Look past no-op inttoptrs.
537     if (TLI.getValueType(DL, U->getOperand(0)->getType()) ==
538         TLI.getPointerTy(DL))
539       return computeAddress(U->getOperand(0), Addr, Ty);
540     break;
541   }
542   case Instruction::PtrToInt: {
543     // Look past no-op ptrtoints.
544     if (TLI.getValueType(DL, U->getType()) == TLI.getPointerTy(DL))
545       return computeAddress(U->getOperand(0), Addr, Ty);
546     break;
547   }
548   case Instruction::GetElementPtr: {
549     Address SavedAddr = Addr;
550     uint64_t TmpOffset = Addr.getOffset();
551 
552     // Iterate through the GEP folding the constants into offsets where
553     // we can.
554     for (gep_type_iterator GTI = gep_type_begin(U), E = gep_type_end(U);
555          GTI != E; ++GTI) {
556       const Value *Op = GTI.getOperand();
557       if (StructType *STy = dyn_cast<StructType>(*GTI)) {
558         const StructLayout *SL = DL.getStructLayout(STy);
559         unsigned Idx = cast<ConstantInt>(Op)->getZExtValue();
560         TmpOffset += SL->getElementOffset(Idx);
561       } else {
562         uint64_t S = DL.getTypeAllocSize(GTI.getIndexedType());
563         for (;;) {
564           if (const ConstantInt *CI = dyn_cast<ConstantInt>(Op)) {
565             // Constant-offset addressing.
566             TmpOffset += CI->getSExtValue() * S;
567             break;
568           }
569           if (canFoldAddIntoGEP(U, Op)) {
570             // A compatible add with a constant operand. Fold the constant.
571             ConstantInt *CI =
572                 cast<ConstantInt>(cast<AddOperator>(Op)->getOperand(1));
573             TmpOffset += CI->getSExtValue() * S;
574             // Iterate on the other operand.
575             Op = cast<AddOperator>(Op)->getOperand(0);
576             continue;
577           }
578           // Unsupported
579           goto unsupported_gep;
580         }
581       }
582     }
583 
584     // Try to grab the base operand now.
585     Addr.setOffset(TmpOffset);
586     if (computeAddress(U->getOperand(0), Addr, Ty))
587       return true;
588 
589     // We failed, restore everything and try the other options.
590     Addr = SavedAddr;
591 
592   unsupported_gep:
593     break;
594   }
595   case Instruction::Alloca: {
596     const AllocaInst *AI = cast<AllocaInst>(Obj);
597     DenseMap<const AllocaInst *, int>::iterator SI =
598         FuncInfo.StaticAllocaMap.find(AI);
599     if (SI != FuncInfo.StaticAllocaMap.end()) {
600       Addr.setKind(Address::FrameIndexBase);
601       Addr.setFI(SI->second);
602       return true;
603     }
604     break;
605   }
606   case Instruction::Add: {
607     // Adds of constants are common and easy enough.
608     const Value *LHS = U->getOperand(0);
609     const Value *RHS = U->getOperand(1);
610 
611     if (isa<ConstantInt>(LHS))
612       std::swap(LHS, RHS);
613 
614     if (const ConstantInt *CI = dyn_cast<ConstantInt>(RHS)) {
615       Addr.setOffset(Addr.getOffset() + CI->getSExtValue());
616       return computeAddress(LHS, Addr, Ty);
617     }
618 
619     Address Backup = Addr;
620     if (computeAddress(LHS, Addr, Ty) && computeAddress(RHS, Addr, Ty))
621       return true;
622     Addr = Backup;
623 
624     break;
625   }
626   case Instruction::Sub: {
627     // Subs of constants are common and easy enough.
628     const Value *LHS = U->getOperand(0);
629     const Value *RHS = U->getOperand(1);
630 
631     if (const ConstantInt *CI = dyn_cast<ConstantInt>(RHS)) {
632       Addr.setOffset(Addr.getOffset() - CI->getSExtValue());
633       return computeAddress(LHS, Addr, Ty);
634     }
635     break;
636   }
637   case Instruction::Shl: {
638     if (Addr.getOffsetReg())
639       break;
640 
641     const auto *CI = dyn_cast<ConstantInt>(U->getOperand(1));
642     if (!CI)
643       break;
644 
645     unsigned Val = CI->getZExtValue();
646     if (Val < 1 || Val > 3)
647       break;
648 
649     uint64_t NumBytes = 0;
650     if (Ty && Ty->isSized()) {
651       uint64_t NumBits = DL.getTypeSizeInBits(Ty);
652       NumBytes = NumBits / 8;
653       if (!isPowerOf2_64(NumBits))
654         NumBytes = 0;
655     }
656 
657     if (NumBytes != (1ULL << Val))
658       break;
659 
660     Addr.setShift(Val);
661     Addr.setExtendType(AArch64_AM::LSL);
662 
663     const Value *Src = U->getOperand(0);
664     if (const auto *I = dyn_cast<Instruction>(Src)) {
665       if (FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) {
666         // Fold the zext or sext when it won't become a noop.
667         if (const auto *ZE = dyn_cast<ZExtInst>(I)) {
668           if (!isIntExtFree(ZE) &&
669               ZE->getOperand(0)->getType()->isIntegerTy(32)) {
670             Addr.setExtendType(AArch64_AM::UXTW);
671             Src = ZE->getOperand(0);
672           }
673         } else if (const auto *SE = dyn_cast<SExtInst>(I)) {
674           if (!isIntExtFree(SE) &&
675               SE->getOperand(0)->getType()->isIntegerTy(32)) {
676             Addr.setExtendType(AArch64_AM::SXTW);
677             Src = SE->getOperand(0);
678           }
679         }
680       }
681     }
682 
683     if (const auto *AI = dyn_cast<BinaryOperator>(Src))
684       if (AI->getOpcode() == Instruction::And) {
685         const Value *LHS = AI->getOperand(0);
686         const Value *RHS = AI->getOperand(1);
687 
688         if (const auto *C = dyn_cast<ConstantInt>(LHS))
689           if (C->getValue() == 0xffffffff)
690             std::swap(LHS, RHS);
691 
692         if (const auto *C = dyn_cast<ConstantInt>(RHS))
693           if (C->getValue() == 0xffffffff) {
694             Addr.setExtendType(AArch64_AM::UXTW);
695             unsigned Reg = getRegForValue(LHS);
696             if (!Reg)
697               return false;
698             bool RegIsKill = hasTrivialKill(LHS);
699             Reg = fastEmitInst_extractsubreg(MVT::i32, Reg, RegIsKill,
700                                              AArch64::sub_32);
701             Addr.setOffsetReg(Reg);
702             return true;
703           }
704       }
705 
706     unsigned Reg = getRegForValue(Src);
707     if (!Reg)
708       return false;
709     Addr.setOffsetReg(Reg);
710     return true;
711   }
712   case Instruction::Mul: {
713     if (Addr.getOffsetReg())
714       break;
715 
716     if (!isMulPowOf2(U))
717       break;
718 
719     const Value *LHS = U->getOperand(0);
720     const Value *RHS = U->getOperand(1);
721 
722     // Canonicalize power-of-2 value to the RHS.
723     if (const auto *C = dyn_cast<ConstantInt>(LHS))
724       if (C->getValue().isPowerOf2())
725         std::swap(LHS, RHS);
726 
727     assert(isa<ConstantInt>(RHS) && "Expected an ConstantInt.");
728     const auto *C = cast<ConstantInt>(RHS);
729     unsigned Val = C->getValue().logBase2();
730     if (Val < 1 || Val > 3)
731       break;
732 
733     uint64_t NumBytes = 0;
734     if (Ty && Ty->isSized()) {
735       uint64_t NumBits = DL.getTypeSizeInBits(Ty);
736       NumBytes = NumBits / 8;
737       if (!isPowerOf2_64(NumBits))
738         NumBytes = 0;
739     }
740 
741     if (NumBytes != (1ULL << Val))
742       break;
743 
744     Addr.setShift(Val);
745     Addr.setExtendType(AArch64_AM::LSL);
746 
747     const Value *Src = LHS;
748     if (const auto *I = dyn_cast<Instruction>(Src)) {
749       if (FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) {
750         // Fold the zext or sext when it won't become a noop.
751         if (const auto *ZE = dyn_cast<ZExtInst>(I)) {
752           if (!isIntExtFree(ZE) &&
753               ZE->getOperand(0)->getType()->isIntegerTy(32)) {
754             Addr.setExtendType(AArch64_AM::UXTW);
755             Src = ZE->getOperand(0);
756           }
757         } else if (const auto *SE = dyn_cast<SExtInst>(I)) {
758           if (!isIntExtFree(SE) &&
759               SE->getOperand(0)->getType()->isIntegerTy(32)) {
760             Addr.setExtendType(AArch64_AM::SXTW);
761             Src = SE->getOperand(0);
762           }
763         }
764       }
765     }
766 
767     unsigned Reg = getRegForValue(Src);
768     if (!Reg)
769       return false;
770     Addr.setOffsetReg(Reg);
771     return true;
772   }
773   case Instruction::And: {
774     if (Addr.getOffsetReg())
775       break;
776 
777     if (!Ty || DL.getTypeSizeInBits(Ty) != 8)
778       break;
779 
780     const Value *LHS = U->getOperand(0);
781     const Value *RHS = U->getOperand(1);
782 
783     if (const auto *C = dyn_cast<ConstantInt>(LHS))
784       if (C->getValue() == 0xffffffff)
785         std::swap(LHS, RHS);
786 
787     if (const auto *C = dyn_cast<ConstantInt>(RHS))
788       if (C->getValue() == 0xffffffff) {
789         Addr.setShift(0);
790         Addr.setExtendType(AArch64_AM::LSL);
791         Addr.setExtendType(AArch64_AM::UXTW);
792 
793         unsigned Reg = getRegForValue(LHS);
794         if (!Reg)
795           return false;
796         bool RegIsKill = hasTrivialKill(LHS);
797         Reg = fastEmitInst_extractsubreg(MVT::i32, Reg, RegIsKill,
798                                          AArch64::sub_32);
799         Addr.setOffsetReg(Reg);
800         return true;
801       }
802     break;
803   }
804   case Instruction::SExt:
805   case Instruction::ZExt: {
806     if (!Addr.getReg() || Addr.getOffsetReg())
807       break;
808 
809     const Value *Src = nullptr;
810     // Fold the zext or sext when it won't become a noop.
811     if (const auto *ZE = dyn_cast<ZExtInst>(U)) {
812       if (!isIntExtFree(ZE) && ZE->getOperand(0)->getType()->isIntegerTy(32)) {
813         Addr.setExtendType(AArch64_AM::UXTW);
814         Src = ZE->getOperand(0);
815       }
816     } else if (const auto *SE = dyn_cast<SExtInst>(U)) {
817       if (!isIntExtFree(SE) && SE->getOperand(0)->getType()->isIntegerTy(32)) {
818         Addr.setExtendType(AArch64_AM::SXTW);
819         Src = SE->getOperand(0);
820       }
821     }
822 
823     if (!Src)
824       break;
825 
826     Addr.setShift(0);
827     unsigned Reg = getRegForValue(Src);
828     if (!Reg)
829       return false;
830     Addr.setOffsetReg(Reg);
831     return true;
832   }
833   } // end switch
834 
835   if (Addr.isRegBase() && !Addr.getReg()) {
836     unsigned Reg = getRegForValue(Obj);
837     if (!Reg)
838       return false;
839     Addr.setReg(Reg);
840     return true;
841   }
842 
843   if (!Addr.getOffsetReg()) {
844     unsigned Reg = getRegForValue(Obj);
845     if (!Reg)
846       return false;
847     Addr.setOffsetReg(Reg);
848     return true;
849   }
850 
851   return false;
852 }
853 
computeCallAddress(const Value * V,Address & Addr)854 bool AArch64FastISel::computeCallAddress(const Value *V, Address &Addr) {
855   const User *U = nullptr;
856   unsigned Opcode = Instruction::UserOp1;
857   bool InMBB = true;
858 
859   if (const auto *I = dyn_cast<Instruction>(V)) {
860     Opcode = I->getOpcode();
861     U = I;
862     InMBB = I->getParent() == FuncInfo.MBB->getBasicBlock();
863   } else if (const auto *C = dyn_cast<ConstantExpr>(V)) {
864     Opcode = C->getOpcode();
865     U = C;
866   }
867 
868   switch (Opcode) {
869   default: break;
870   case Instruction::BitCast:
871     // Look past bitcasts if its operand is in the same BB.
872     if (InMBB)
873       return computeCallAddress(U->getOperand(0), Addr);
874     break;
875   case Instruction::IntToPtr:
876     // Look past no-op inttoptrs if its operand is in the same BB.
877     if (InMBB &&
878         TLI.getValueType(DL, U->getOperand(0)->getType()) ==
879             TLI.getPointerTy(DL))
880       return computeCallAddress(U->getOperand(0), Addr);
881     break;
882   case Instruction::PtrToInt:
883     // Look past no-op ptrtoints if its operand is in the same BB.
884     if (InMBB && TLI.getValueType(DL, U->getType()) == TLI.getPointerTy(DL))
885       return computeCallAddress(U->getOperand(0), Addr);
886     break;
887   }
888 
889   if (const GlobalValue *GV = dyn_cast<GlobalValue>(V)) {
890     Addr.setGlobalValue(GV);
891     return true;
892   }
893 
894   // If all else fails, try to materialize the value in a register.
895   if (!Addr.getGlobalValue()) {
896     Addr.setReg(getRegForValue(V));
897     return Addr.getReg() != 0;
898   }
899 
900   return false;
901 }
902 
903 
isTypeLegal(Type * Ty,MVT & VT)904 bool AArch64FastISel::isTypeLegal(Type *Ty, MVT &VT) {
905   EVT evt = TLI.getValueType(DL, Ty, true);
906 
907   // Only handle simple types.
908   if (evt == MVT::Other || !evt.isSimple())
909     return false;
910   VT = evt.getSimpleVT();
911 
912   // This is a legal type, but it's not something we handle in fast-isel.
913   if (VT == MVT::f128)
914     return false;
915 
916   // Handle all other legal types, i.e. a register that will directly hold this
917   // value.
918   return TLI.isTypeLegal(VT);
919 }
920 
921 /// \brief Determine if the value type is supported by FastISel.
922 ///
923 /// FastISel for AArch64 can handle more value types than are legal. This adds
924 /// simple value type such as i1, i8, and i16.
isTypeSupported(Type * Ty,MVT & VT,bool IsVectorAllowed)925 bool AArch64FastISel::isTypeSupported(Type *Ty, MVT &VT, bool IsVectorAllowed) {
926   if (Ty->isVectorTy() && !IsVectorAllowed)
927     return false;
928 
929   if (isTypeLegal(Ty, VT))
930     return true;
931 
932   // If this is a type than can be sign or zero-extended to a basic operation
933   // go ahead and accept it now.
934   if (VT == MVT::i1 || VT == MVT::i8 || VT == MVT::i16)
935     return true;
936 
937   return false;
938 }
939 
isValueAvailable(const Value * V) const940 bool AArch64FastISel::isValueAvailable(const Value *V) const {
941   if (!isa<Instruction>(V))
942     return true;
943 
944   const auto *I = cast<Instruction>(V);
945   return FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB;
946 }
947 
simplifyAddress(Address & Addr,MVT VT)948 bool AArch64FastISel::simplifyAddress(Address &Addr, MVT VT) {
949   unsigned ScaleFactor = getImplicitScaleFactor(VT);
950   if (!ScaleFactor)
951     return false;
952 
953   bool ImmediateOffsetNeedsLowering = false;
954   bool RegisterOffsetNeedsLowering = false;
955   int64_t Offset = Addr.getOffset();
956   if (((Offset < 0) || (Offset & (ScaleFactor - 1))) && !isInt<9>(Offset))
957     ImmediateOffsetNeedsLowering = true;
958   else if (Offset > 0 && !(Offset & (ScaleFactor - 1)) &&
959            !isUInt<12>(Offset / ScaleFactor))
960     ImmediateOffsetNeedsLowering = true;
961 
962   // Cannot encode an offset register and an immediate offset in the same
963   // instruction. Fold the immediate offset into the load/store instruction and
964   // emit an additional add to take care of the offset register.
965   if (!ImmediateOffsetNeedsLowering && Addr.getOffset() && Addr.getOffsetReg())
966     RegisterOffsetNeedsLowering = true;
967 
968   // Cannot encode zero register as base.
969   if (Addr.isRegBase() && Addr.getOffsetReg() && !Addr.getReg())
970     RegisterOffsetNeedsLowering = true;
971 
972   // If this is a stack pointer and the offset needs to be simplified then put
973   // the alloca address into a register, set the base type back to register and
974   // continue. This should almost never happen.
975   if ((ImmediateOffsetNeedsLowering || Addr.getOffsetReg()) && Addr.isFIBase())
976   {
977     unsigned ResultReg = createResultReg(&AArch64::GPR64spRegClass);
978     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADDXri),
979             ResultReg)
980       .addFrameIndex(Addr.getFI())
981       .addImm(0)
982       .addImm(0);
983     Addr.setKind(Address::RegBase);
984     Addr.setReg(ResultReg);
985   }
986 
987   if (RegisterOffsetNeedsLowering) {
988     unsigned ResultReg = 0;
989     if (Addr.getReg()) {
990       if (Addr.getExtendType() == AArch64_AM::SXTW ||
991           Addr.getExtendType() == AArch64_AM::UXTW   )
992         ResultReg = emitAddSub_rx(/*UseAdd=*/true, MVT::i64, Addr.getReg(),
993                                   /*TODO:IsKill=*/false, Addr.getOffsetReg(),
994                                   /*TODO:IsKill=*/false, Addr.getExtendType(),
995                                   Addr.getShift());
996       else
997         ResultReg = emitAddSub_rs(/*UseAdd=*/true, MVT::i64, Addr.getReg(),
998                                   /*TODO:IsKill=*/false, Addr.getOffsetReg(),
999                                   /*TODO:IsKill=*/false, AArch64_AM::LSL,
1000                                   Addr.getShift());
1001     } else {
1002       if (Addr.getExtendType() == AArch64_AM::UXTW)
1003         ResultReg = emitLSL_ri(MVT::i64, MVT::i32, Addr.getOffsetReg(),
1004                                /*Op0IsKill=*/false, Addr.getShift(),
1005                                /*IsZExt=*/true);
1006       else if (Addr.getExtendType() == AArch64_AM::SXTW)
1007         ResultReg = emitLSL_ri(MVT::i64, MVT::i32, Addr.getOffsetReg(),
1008                                /*Op0IsKill=*/false, Addr.getShift(),
1009                                /*IsZExt=*/false);
1010       else
1011         ResultReg = emitLSL_ri(MVT::i64, MVT::i64, Addr.getOffsetReg(),
1012                                /*Op0IsKill=*/false, Addr.getShift());
1013     }
1014     if (!ResultReg)
1015       return false;
1016 
1017     Addr.setReg(ResultReg);
1018     Addr.setOffsetReg(0);
1019     Addr.setShift(0);
1020     Addr.setExtendType(AArch64_AM::InvalidShiftExtend);
1021   }
1022 
1023   // Since the offset is too large for the load/store instruction get the
1024   // reg+offset into a register.
1025   if (ImmediateOffsetNeedsLowering) {
1026     unsigned ResultReg;
1027     if (Addr.getReg())
1028       // Try to fold the immediate into the add instruction.
1029       ResultReg = emitAdd_ri_(MVT::i64, Addr.getReg(), /*IsKill=*/false, Offset);
1030     else
1031       ResultReg = fastEmit_i(MVT::i64, MVT::i64, ISD::Constant, Offset);
1032 
1033     if (!ResultReg)
1034       return false;
1035     Addr.setReg(ResultReg);
1036     Addr.setOffset(0);
1037   }
1038   return true;
1039 }
1040 
addLoadStoreOperands(Address & Addr,const MachineInstrBuilder & MIB,unsigned Flags,unsigned ScaleFactor,MachineMemOperand * MMO)1041 void AArch64FastISel::addLoadStoreOperands(Address &Addr,
1042                                            const MachineInstrBuilder &MIB,
1043                                            unsigned Flags,
1044                                            unsigned ScaleFactor,
1045                                            MachineMemOperand *MMO) {
1046   int64_t Offset = Addr.getOffset() / ScaleFactor;
1047   // Frame base works a bit differently. Handle it separately.
1048   if (Addr.isFIBase()) {
1049     int FI = Addr.getFI();
1050     // FIXME: We shouldn't be using getObjectSize/getObjectAlignment.  The size
1051     // and alignment should be based on the VT.
1052     MMO = FuncInfo.MF->getMachineMemOperand(
1053         MachinePointerInfo::getFixedStack(*FuncInfo.MF, FI, Offset), Flags,
1054         MFI.getObjectSize(FI), MFI.getObjectAlignment(FI));
1055     // Now add the rest of the operands.
1056     MIB.addFrameIndex(FI).addImm(Offset);
1057   } else {
1058     assert(Addr.isRegBase() && "Unexpected address kind.");
1059     const MCInstrDesc &II = MIB->getDesc();
1060     unsigned Idx = (Flags & MachineMemOperand::MOStore) ? 1 : 0;
1061     Addr.setReg(
1062       constrainOperandRegClass(II, Addr.getReg(), II.getNumDefs()+Idx));
1063     Addr.setOffsetReg(
1064       constrainOperandRegClass(II, Addr.getOffsetReg(), II.getNumDefs()+Idx+1));
1065     if (Addr.getOffsetReg()) {
1066       assert(Addr.getOffset() == 0 && "Unexpected offset");
1067       bool IsSigned = Addr.getExtendType() == AArch64_AM::SXTW ||
1068                       Addr.getExtendType() == AArch64_AM::SXTX;
1069       MIB.addReg(Addr.getReg());
1070       MIB.addReg(Addr.getOffsetReg());
1071       MIB.addImm(IsSigned);
1072       MIB.addImm(Addr.getShift() != 0);
1073     } else
1074       MIB.addReg(Addr.getReg()).addImm(Offset);
1075   }
1076 
1077   if (MMO)
1078     MIB.addMemOperand(MMO);
1079 }
1080 
emitAddSub(bool UseAdd,MVT RetVT,const Value * LHS,const Value * RHS,bool SetFlags,bool WantResult,bool IsZExt)1081 unsigned AArch64FastISel::emitAddSub(bool UseAdd, MVT RetVT, const Value *LHS,
1082                                      const Value *RHS, bool SetFlags,
1083                                      bool WantResult,  bool IsZExt) {
1084   AArch64_AM::ShiftExtendType ExtendType = AArch64_AM::InvalidShiftExtend;
1085   bool NeedExtend = false;
1086   switch (RetVT.SimpleTy) {
1087   default:
1088     return 0;
1089   case MVT::i1:
1090     NeedExtend = true;
1091     break;
1092   case MVT::i8:
1093     NeedExtend = true;
1094     ExtendType = IsZExt ? AArch64_AM::UXTB : AArch64_AM::SXTB;
1095     break;
1096   case MVT::i16:
1097     NeedExtend = true;
1098     ExtendType = IsZExt ? AArch64_AM::UXTH : AArch64_AM::SXTH;
1099     break;
1100   case MVT::i32:  // fall-through
1101   case MVT::i64:
1102     break;
1103   }
1104   MVT SrcVT = RetVT;
1105   RetVT.SimpleTy = std::max(RetVT.SimpleTy, MVT::i32);
1106 
1107   // Canonicalize immediates to the RHS first.
1108   if (UseAdd && isa<Constant>(LHS) && !isa<Constant>(RHS))
1109     std::swap(LHS, RHS);
1110 
1111   // Canonicalize mul by power of 2 to the RHS.
1112   if (UseAdd && LHS->hasOneUse() && isValueAvailable(LHS))
1113     if (isMulPowOf2(LHS))
1114       std::swap(LHS, RHS);
1115 
1116   // Canonicalize shift immediate to the RHS.
1117   if (UseAdd && LHS->hasOneUse() && isValueAvailable(LHS))
1118     if (const auto *SI = dyn_cast<BinaryOperator>(LHS))
1119       if (isa<ConstantInt>(SI->getOperand(1)))
1120         if (SI->getOpcode() == Instruction::Shl  ||
1121             SI->getOpcode() == Instruction::LShr ||
1122             SI->getOpcode() == Instruction::AShr   )
1123           std::swap(LHS, RHS);
1124 
1125   unsigned LHSReg = getRegForValue(LHS);
1126   if (!LHSReg)
1127     return 0;
1128   bool LHSIsKill = hasTrivialKill(LHS);
1129 
1130   if (NeedExtend)
1131     LHSReg = emitIntExt(SrcVT, LHSReg, RetVT, IsZExt);
1132 
1133   unsigned ResultReg = 0;
1134   if (const auto *C = dyn_cast<ConstantInt>(RHS)) {
1135     uint64_t Imm = IsZExt ? C->getZExtValue() : C->getSExtValue();
1136     if (C->isNegative())
1137       ResultReg = emitAddSub_ri(!UseAdd, RetVT, LHSReg, LHSIsKill, -Imm,
1138                                 SetFlags, WantResult);
1139     else
1140       ResultReg = emitAddSub_ri(UseAdd, RetVT, LHSReg, LHSIsKill, Imm, SetFlags,
1141                                 WantResult);
1142   } else if (const auto *C = dyn_cast<Constant>(RHS))
1143     if (C->isNullValue())
1144       ResultReg = emitAddSub_ri(UseAdd, RetVT, LHSReg, LHSIsKill, 0, SetFlags,
1145                                 WantResult);
1146 
1147   if (ResultReg)
1148     return ResultReg;
1149 
1150   // Only extend the RHS within the instruction if there is a valid extend type.
1151   if (ExtendType != AArch64_AM::InvalidShiftExtend && RHS->hasOneUse() &&
1152       isValueAvailable(RHS)) {
1153     if (const auto *SI = dyn_cast<BinaryOperator>(RHS))
1154       if (const auto *C = dyn_cast<ConstantInt>(SI->getOperand(1)))
1155         if ((SI->getOpcode() == Instruction::Shl) && (C->getZExtValue() < 4)) {
1156           unsigned RHSReg = getRegForValue(SI->getOperand(0));
1157           if (!RHSReg)
1158             return 0;
1159           bool RHSIsKill = hasTrivialKill(SI->getOperand(0));
1160           return emitAddSub_rx(UseAdd, RetVT, LHSReg, LHSIsKill, RHSReg,
1161                                RHSIsKill, ExtendType, C->getZExtValue(),
1162                                SetFlags, WantResult);
1163         }
1164     unsigned RHSReg = getRegForValue(RHS);
1165     if (!RHSReg)
1166       return 0;
1167     bool RHSIsKill = hasTrivialKill(RHS);
1168     return emitAddSub_rx(UseAdd, RetVT, LHSReg, LHSIsKill, RHSReg, RHSIsKill,
1169                          ExtendType, 0, SetFlags, WantResult);
1170   }
1171 
1172   // Check if the mul can be folded into the instruction.
1173   if (RHS->hasOneUse() && isValueAvailable(RHS)) {
1174     if (isMulPowOf2(RHS)) {
1175       const Value *MulLHS = cast<MulOperator>(RHS)->getOperand(0);
1176       const Value *MulRHS = cast<MulOperator>(RHS)->getOperand(1);
1177 
1178       if (const auto *C = dyn_cast<ConstantInt>(MulLHS))
1179         if (C->getValue().isPowerOf2())
1180           std::swap(MulLHS, MulRHS);
1181 
1182       assert(isa<ConstantInt>(MulRHS) && "Expected a ConstantInt.");
1183       uint64_t ShiftVal = cast<ConstantInt>(MulRHS)->getValue().logBase2();
1184       unsigned RHSReg = getRegForValue(MulLHS);
1185       if (!RHSReg)
1186         return 0;
1187       bool RHSIsKill = hasTrivialKill(MulLHS);
1188       ResultReg = emitAddSub_rs(UseAdd, RetVT, LHSReg, LHSIsKill, RHSReg,
1189                                 RHSIsKill, AArch64_AM::LSL, ShiftVal, SetFlags,
1190                                 WantResult);
1191       if (ResultReg)
1192         return ResultReg;
1193     }
1194   }
1195 
1196   // Check if the shift can be folded into the instruction.
1197   if (RHS->hasOneUse() && isValueAvailable(RHS)) {
1198     if (const auto *SI = dyn_cast<BinaryOperator>(RHS)) {
1199       if (const auto *C = dyn_cast<ConstantInt>(SI->getOperand(1))) {
1200         AArch64_AM::ShiftExtendType ShiftType = AArch64_AM::InvalidShiftExtend;
1201         switch (SI->getOpcode()) {
1202         default: break;
1203         case Instruction::Shl:  ShiftType = AArch64_AM::LSL; break;
1204         case Instruction::LShr: ShiftType = AArch64_AM::LSR; break;
1205         case Instruction::AShr: ShiftType = AArch64_AM::ASR; break;
1206         }
1207         uint64_t ShiftVal = C->getZExtValue();
1208         if (ShiftType != AArch64_AM::InvalidShiftExtend) {
1209           unsigned RHSReg = getRegForValue(SI->getOperand(0));
1210           if (!RHSReg)
1211             return 0;
1212           bool RHSIsKill = hasTrivialKill(SI->getOperand(0));
1213           ResultReg = emitAddSub_rs(UseAdd, RetVT, LHSReg, LHSIsKill, RHSReg,
1214                                     RHSIsKill, ShiftType, ShiftVal, SetFlags,
1215                                     WantResult);
1216           if (ResultReg)
1217             return ResultReg;
1218         }
1219       }
1220     }
1221   }
1222 
1223   unsigned RHSReg = getRegForValue(RHS);
1224   if (!RHSReg)
1225     return 0;
1226   bool RHSIsKill = hasTrivialKill(RHS);
1227 
1228   if (NeedExtend)
1229     RHSReg = emitIntExt(SrcVT, RHSReg, RetVT, IsZExt);
1230 
1231   return emitAddSub_rr(UseAdd, RetVT, LHSReg, LHSIsKill, RHSReg, RHSIsKill,
1232                        SetFlags, WantResult);
1233 }
1234 
emitAddSub_rr(bool UseAdd,MVT RetVT,unsigned LHSReg,bool LHSIsKill,unsigned RHSReg,bool RHSIsKill,bool SetFlags,bool WantResult)1235 unsigned AArch64FastISel::emitAddSub_rr(bool UseAdd, MVT RetVT, unsigned LHSReg,
1236                                         bool LHSIsKill, unsigned RHSReg,
1237                                         bool RHSIsKill, bool SetFlags,
1238                                         bool WantResult) {
1239   assert(LHSReg && RHSReg && "Invalid register number.");
1240 
1241   if (RetVT != MVT::i32 && RetVT != MVT::i64)
1242     return 0;
1243 
1244   static const unsigned OpcTable[2][2][2] = {
1245     { { AArch64::SUBWrr,  AArch64::SUBXrr  },
1246       { AArch64::ADDWrr,  AArch64::ADDXrr  }  },
1247     { { AArch64::SUBSWrr, AArch64::SUBSXrr },
1248       { AArch64::ADDSWrr, AArch64::ADDSXrr }  }
1249   };
1250   bool Is64Bit = RetVT == MVT::i64;
1251   unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit];
1252   const TargetRegisterClass *RC =
1253       Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
1254   unsigned ResultReg;
1255   if (WantResult)
1256     ResultReg = createResultReg(RC);
1257   else
1258     ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
1259 
1260   const MCInstrDesc &II = TII.get(Opc);
1261   LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs());
1262   RHSReg = constrainOperandRegClass(II, RHSReg, II.getNumDefs() + 1);
1263   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
1264       .addReg(LHSReg, getKillRegState(LHSIsKill))
1265       .addReg(RHSReg, getKillRegState(RHSIsKill));
1266   return ResultReg;
1267 }
1268 
emitAddSub_ri(bool UseAdd,MVT RetVT,unsigned LHSReg,bool LHSIsKill,uint64_t Imm,bool SetFlags,bool WantResult)1269 unsigned AArch64FastISel::emitAddSub_ri(bool UseAdd, MVT RetVT, unsigned LHSReg,
1270                                         bool LHSIsKill, uint64_t Imm,
1271                                         bool SetFlags, bool WantResult) {
1272   assert(LHSReg && "Invalid register number.");
1273 
1274   if (RetVT != MVT::i32 && RetVT != MVT::i64)
1275     return 0;
1276 
1277   unsigned ShiftImm;
1278   if (isUInt<12>(Imm))
1279     ShiftImm = 0;
1280   else if ((Imm & 0xfff000) == Imm) {
1281     ShiftImm = 12;
1282     Imm >>= 12;
1283   } else
1284     return 0;
1285 
1286   static const unsigned OpcTable[2][2][2] = {
1287     { { AArch64::SUBWri,  AArch64::SUBXri  },
1288       { AArch64::ADDWri,  AArch64::ADDXri  }  },
1289     { { AArch64::SUBSWri, AArch64::SUBSXri },
1290       { AArch64::ADDSWri, AArch64::ADDSXri }  }
1291   };
1292   bool Is64Bit = RetVT == MVT::i64;
1293   unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit];
1294   const TargetRegisterClass *RC;
1295   if (SetFlags)
1296     RC = Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
1297   else
1298     RC = Is64Bit ? &AArch64::GPR64spRegClass : &AArch64::GPR32spRegClass;
1299   unsigned ResultReg;
1300   if (WantResult)
1301     ResultReg = createResultReg(RC);
1302   else
1303     ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
1304 
1305   const MCInstrDesc &II = TII.get(Opc);
1306   LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs());
1307   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
1308       .addReg(LHSReg, getKillRegState(LHSIsKill))
1309       .addImm(Imm)
1310       .addImm(getShifterImm(AArch64_AM::LSL, ShiftImm));
1311   return ResultReg;
1312 }
1313 
emitAddSub_rs(bool UseAdd,MVT RetVT,unsigned LHSReg,bool LHSIsKill,unsigned RHSReg,bool RHSIsKill,AArch64_AM::ShiftExtendType ShiftType,uint64_t ShiftImm,bool SetFlags,bool WantResult)1314 unsigned AArch64FastISel::emitAddSub_rs(bool UseAdd, MVT RetVT, unsigned LHSReg,
1315                                         bool LHSIsKill, unsigned RHSReg,
1316                                         bool RHSIsKill,
1317                                         AArch64_AM::ShiftExtendType ShiftType,
1318                                         uint64_t ShiftImm, bool SetFlags,
1319                                         bool WantResult) {
1320   assert(LHSReg && RHSReg && "Invalid register number.");
1321 
1322   if (RetVT != MVT::i32 && RetVT != MVT::i64)
1323     return 0;
1324 
1325   // Don't deal with undefined shifts.
1326   if (ShiftImm >= RetVT.getSizeInBits())
1327     return 0;
1328 
1329   static const unsigned OpcTable[2][2][2] = {
1330     { { AArch64::SUBWrs,  AArch64::SUBXrs  },
1331       { AArch64::ADDWrs,  AArch64::ADDXrs  }  },
1332     { { AArch64::SUBSWrs, AArch64::SUBSXrs },
1333       { AArch64::ADDSWrs, AArch64::ADDSXrs }  }
1334   };
1335   bool Is64Bit = RetVT == MVT::i64;
1336   unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit];
1337   const TargetRegisterClass *RC =
1338       Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
1339   unsigned ResultReg;
1340   if (WantResult)
1341     ResultReg = createResultReg(RC);
1342   else
1343     ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
1344 
1345   const MCInstrDesc &II = TII.get(Opc);
1346   LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs());
1347   RHSReg = constrainOperandRegClass(II, RHSReg, II.getNumDefs() + 1);
1348   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
1349       .addReg(LHSReg, getKillRegState(LHSIsKill))
1350       .addReg(RHSReg, getKillRegState(RHSIsKill))
1351       .addImm(getShifterImm(ShiftType, ShiftImm));
1352   return ResultReg;
1353 }
1354 
emitAddSub_rx(bool UseAdd,MVT RetVT,unsigned LHSReg,bool LHSIsKill,unsigned RHSReg,bool RHSIsKill,AArch64_AM::ShiftExtendType ExtType,uint64_t ShiftImm,bool SetFlags,bool WantResult)1355 unsigned AArch64FastISel::emitAddSub_rx(bool UseAdd, MVT RetVT, unsigned LHSReg,
1356                                         bool LHSIsKill, unsigned RHSReg,
1357                                         bool RHSIsKill,
1358                                         AArch64_AM::ShiftExtendType ExtType,
1359                                         uint64_t ShiftImm, bool SetFlags,
1360                                         bool WantResult) {
1361   assert(LHSReg && RHSReg && "Invalid register number.");
1362 
1363   if (RetVT != MVT::i32 && RetVT != MVT::i64)
1364     return 0;
1365 
1366   if (ShiftImm >= 4)
1367     return 0;
1368 
1369   static const unsigned OpcTable[2][2][2] = {
1370     { { AArch64::SUBWrx,  AArch64::SUBXrx  },
1371       { AArch64::ADDWrx,  AArch64::ADDXrx  }  },
1372     { { AArch64::SUBSWrx, AArch64::SUBSXrx },
1373       { AArch64::ADDSWrx, AArch64::ADDSXrx }  }
1374   };
1375   bool Is64Bit = RetVT == MVT::i64;
1376   unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit];
1377   const TargetRegisterClass *RC = nullptr;
1378   if (SetFlags)
1379     RC = Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
1380   else
1381     RC = Is64Bit ? &AArch64::GPR64spRegClass : &AArch64::GPR32spRegClass;
1382   unsigned ResultReg;
1383   if (WantResult)
1384     ResultReg = createResultReg(RC);
1385   else
1386     ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
1387 
1388   const MCInstrDesc &II = TII.get(Opc);
1389   LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs());
1390   RHSReg = constrainOperandRegClass(II, RHSReg, II.getNumDefs() + 1);
1391   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
1392       .addReg(LHSReg, getKillRegState(LHSIsKill))
1393       .addReg(RHSReg, getKillRegState(RHSIsKill))
1394       .addImm(getArithExtendImm(ExtType, ShiftImm));
1395   return ResultReg;
1396 }
1397 
emitCmp(const Value * LHS,const Value * RHS,bool IsZExt)1398 bool AArch64FastISel::emitCmp(const Value *LHS, const Value *RHS, bool IsZExt) {
1399   Type *Ty = LHS->getType();
1400   EVT EVT = TLI.getValueType(DL, Ty, true);
1401   if (!EVT.isSimple())
1402     return false;
1403   MVT VT = EVT.getSimpleVT();
1404 
1405   switch (VT.SimpleTy) {
1406   default:
1407     return false;
1408   case MVT::i1:
1409   case MVT::i8:
1410   case MVT::i16:
1411   case MVT::i32:
1412   case MVT::i64:
1413     return emitICmp(VT, LHS, RHS, IsZExt);
1414   case MVT::f32:
1415   case MVT::f64:
1416     return emitFCmp(VT, LHS, RHS);
1417   }
1418 }
1419 
emitICmp(MVT RetVT,const Value * LHS,const Value * RHS,bool IsZExt)1420 bool AArch64FastISel::emitICmp(MVT RetVT, const Value *LHS, const Value *RHS,
1421                                bool IsZExt) {
1422   return emitSub(RetVT, LHS, RHS, /*SetFlags=*/true, /*WantResult=*/false,
1423                  IsZExt) != 0;
1424 }
1425 
emitICmp_ri(MVT RetVT,unsigned LHSReg,bool LHSIsKill,uint64_t Imm)1426 bool AArch64FastISel::emitICmp_ri(MVT RetVT, unsigned LHSReg, bool LHSIsKill,
1427                                   uint64_t Imm) {
1428   return emitAddSub_ri(/*UseAdd=*/false, RetVT, LHSReg, LHSIsKill, Imm,
1429                        /*SetFlags=*/true, /*WantResult=*/false) != 0;
1430 }
1431 
emitFCmp(MVT RetVT,const Value * LHS,const Value * RHS)1432 bool AArch64FastISel::emitFCmp(MVT RetVT, const Value *LHS, const Value *RHS) {
1433   if (RetVT != MVT::f32 && RetVT != MVT::f64)
1434     return false;
1435 
1436   // Check to see if the 2nd operand is a constant that we can encode directly
1437   // in the compare.
1438   bool UseImm = false;
1439   if (const auto *CFP = dyn_cast<ConstantFP>(RHS))
1440     if (CFP->isZero() && !CFP->isNegative())
1441       UseImm = true;
1442 
1443   unsigned LHSReg = getRegForValue(LHS);
1444   if (!LHSReg)
1445     return false;
1446   bool LHSIsKill = hasTrivialKill(LHS);
1447 
1448   if (UseImm) {
1449     unsigned Opc = (RetVT == MVT::f64) ? AArch64::FCMPDri : AArch64::FCMPSri;
1450     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc))
1451         .addReg(LHSReg, getKillRegState(LHSIsKill));
1452     return true;
1453   }
1454 
1455   unsigned RHSReg = getRegForValue(RHS);
1456   if (!RHSReg)
1457     return false;
1458   bool RHSIsKill = hasTrivialKill(RHS);
1459 
1460   unsigned Opc = (RetVT == MVT::f64) ? AArch64::FCMPDrr : AArch64::FCMPSrr;
1461   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc))
1462       .addReg(LHSReg, getKillRegState(LHSIsKill))
1463       .addReg(RHSReg, getKillRegState(RHSIsKill));
1464   return true;
1465 }
1466 
emitAdd(MVT RetVT,const Value * LHS,const Value * RHS,bool SetFlags,bool WantResult,bool IsZExt)1467 unsigned AArch64FastISel::emitAdd(MVT RetVT, const Value *LHS, const Value *RHS,
1468                                   bool SetFlags, bool WantResult, bool IsZExt) {
1469   return emitAddSub(/*UseAdd=*/true, RetVT, LHS, RHS, SetFlags, WantResult,
1470                     IsZExt);
1471 }
1472 
1473 /// \brief This method is a wrapper to simplify add emission.
1474 ///
1475 /// First try to emit an add with an immediate operand using emitAddSub_ri. If
1476 /// that fails, then try to materialize the immediate into a register and use
1477 /// emitAddSub_rr instead.
emitAdd_ri_(MVT VT,unsigned Op0,bool Op0IsKill,int64_t Imm)1478 unsigned AArch64FastISel::emitAdd_ri_(MVT VT, unsigned Op0, bool Op0IsKill,
1479                                       int64_t Imm) {
1480   unsigned ResultReg;
1481   if (Imm < 0)
1482     ResultReg = emitAddSub_ri(false, VT, Op0, Op0IsKill, -Imm);
1483   else
1484     ResultReg = emitAddSub_ri(true, VT, Op0, Op0IsKill, Imm);
1485 
1486   if (ResultReg)
1487     return ResultReg;
1488 
1489   unsigned CReg = fastEmit_i(VT, VT, ISD::Constant, Imm);
1490   if (!CReg)
1491     return 0;
1492 
1493   ResultReg = emitAddSub_rr(true, VT, Op0, Op0IsKill, CReg, true);
1494   return ResultReg;
1495 }
1496 
emitSub(MVT RetVT,const Value * LHS,const Value * RHS,bool SetFlags,bool WantResult,bool IsZExt)1497 unsigned AArch64FastISel::emitSub(MVT RetVT, const Value *LHS, const Value *RHS,
1498                                   bool SetFlags, bool WantResult, bool IsZExt) {
1499   return emitAddSub(/*UseAdd=*/false, RetVT, LHS, RHS, SetFlags, WantResult,
1500                     IsZExt);
1501 }
1502 
emitSubs_rr(MVT RetVT,unsigned LHSReg,bool LHSIsKill,unsigned RHSReg,bool RHSIsKill,bool WantResult)1503 unsigned AArch64FastISel::emitSubs_rr(MVT RetVT, unsigned LHSReg,
1504                                       bool LHSIsKill, unsigned RHSReg,
1505                                       bool RHSIsKill, bool WantResult) {
1506   return emitAddSub_rr(/*UseAdd=*/false, RetVT, LHSReg, LHSIsKill, RHSReg,
1507                        RHSIsKill, /*SetFlags=*/true, WantResult);
1508 }
1509 
emitSubs_rs(MVT RetVT,unsigned LHSReg,bool LHSIsKill,unsigned RHSReg,bool RHSIsKill,AArch64_AM::ShiftExtendType ShiftType,uint64_t ShiftImm,bool WantResult)1510 unsigned AArch64FastISel::emitSubs_rs(MVT RetVT, unsigned LHSReg,
1511                                       bool LHSIsKill, unsigned RHSReg,
1512                                       bool RHSIsKill,
1513                                       AArch64_AM::ShiftExtendType ShiftType,
1514                                       uint64_t ShiftImm, bool WantResult) {
1515   return emitAddSub_rs(/*UseAdd=*/false, RetVT, LHSReg, LHSIsKill, RHSReg,
1516                        RHSIsKill, ShiftType, ShiftImm, /*SetFlags=*/true,
1517                        WantResult);
1518 }
1519 
emitLogicalOp(unsigned ISDOpc,MVT RetVT,const Value * LHS,const Value * RHS)1520 unsigned AArch64FastISel::emitLogicalOp(unsigned ISDOpc, MVT RetVT,
1521                                         const Value *LHS, const Value *RHS) {
1522   // Canonicalize immediates to the RHS first.
1523   if (isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS))
1524     std::swap(LHS, RHS);
1525 
1526   // Canonicalize mul by power-of-2 to the RHS.
1527   if (LHS->hasOneUse() && isValueAvailable(LHS))
1528     if (isMulPowOf2(LHS))
1529       std::swap(LHS, RHS);
1530 
1531   // Canonicalize shift immediate to the RHS.
1532   if (LHS->hasOneUse() && isValueAvailable(LHS))
1533     if (const auto *SI = dyn_cast<ShlOperator>(LHS))
1534       if (isa<ConstantInt>(SI->getOperand(1)))
1535         std::swap(LHS, RHS);
1536 
1537   unsigned LHSReg = getRegForValue(LHS);
1538   if (!LHSReg)
1539     return 0;
1540   bool LHSIsKill = hasTrivialKill(LHS);
1541 
1542   unsigned ResultReg = 0;
1543   if (const auto *C = dyn_cast<ConstantInt>(RHS)) {
1544     uint64_t Imm = C->getZExtValue();
1545     ResultReg = emitLogicalOp_ri(ISDOpc, RetVT, LHSReg, LHSIsKill, Imm);
1546   }
1547   if (ResultReg)
1548     return ResultReg;
1549 
1550   // Check if the mul can be folded into the instruction.
1551   if (RHS->hasOneUse() && isValueAvailable(RHS)) {
1552     if (isMulPowOf2(RHS)) {
1553       const Value *MulLHS = cast<MulOperator>(RHS)->getOperand(0);
1554       const Value *MulRHS = cast<MulOperator>(RHS)->getOperand(1);
1555 
1556       if (const auto *C = dyn_cast<ConstantInt>(MulLHS))
1557         if (C->getValue().isPowerOf2())
1558           std::swap(MulLHS, MulRHS);
1559 
1560       assert(isa<ConstantInt>(MulRHS) && "Expected a ConstantInt.");
1561       uint64_t ShiftVal = cast<ConstantInt>(MulRHS)->getValue().logBase2();
1562 
1563       unsigned RHSReg = getRegForValue(MulLHS);
1564       if (!RHSReg)
1565         return 0;
1566       bool RHSIsKill = hasTrivialKill(MulLHS);
1567       ResultReg = emitLogicalOp_rs(ISDOpc, RetVT, LHSReg, LHSIsKill, RHSReg,
1568                                    RHSIsKill, ShiftVal);
1569       if (ResultReg)
1570         return ResultReg;
1571     }
1572   }
1573 
1574   // Check if the shift can be folded into the instruction.
1575   if (RHS->hasOneUse() && isValueAvailable(RHS)) {
1576     if (const auto *SI = dyn_cast<ShlOperator>(RHS))
1577       if (const auto *C = dyn_cast<ConstantInt>(SI->getOperand(1))) {
1578         uint64_t ShiftVal = C->getZExtValue();
1579         unsigned RHSReg = getRegForValue(SI->getOperand(0));
1580         if (!RHSReg)
1581           return 0;
1582         bool RHSIsKill = hasTrivialKill(SI->getOperand(0));
1583         ResultReg = emitLogicalOp_rs(ISDOpc, RetVT, LHSReg, LHSIsKill, RHSReg,
1584                                      RHSIsKill, ShiftVal);
1585         if (ResultReg)
1586           return ResultReg;
1587       }
1588   }
1589 
1590   unsigned RHSReg = getRegForValue(RHS);
1591   if (!RHSReg)
1592     return 0;
1593   bool RHSIsKill = hasTrivialKill(RHS);
1594 
1595   MVT VT = std::max(MVT::i32, RetVT.SimpleTy);
1596   ResultReg = fastEmit_rr(VT, VT, ISDOpc, LHSReg, LHSIsKill, RHSReg, RHSIsKill);
1597   if (RetVT >= MVT::i8 && RetVT <= MVT::i16) {
1598     uint64_t Mask = (RetVT == MVT::i8) ? 0xff : 0xffff;
1599     ResultReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask);
1600   }
1601   return ResultReg;
1602 }
1603 
emitLogicalOp_ri(unsigned ISDOpc,MVT RetVT,unsigned LHSReg,bool LHSIsKill,uint64_t Imm)1604 unsigned AArch64FastISel::emitLogicalOp_ri(unsigned ISDOpc, MVT RetVT,
1605                                            unsigned LHSReg, bool LHSIsKill,
1606                                            uint64_t Imm) {
1607   static_assert((ISD::AND + 1 == ISD::OR) && (ISD::AND + 2 == ISD::XOR),
1608                 "ISD nodes are not consecutive!");
1609   static const unsigned OpcTable[3][2] = {
1610     { AArch64::ANDWri, AArch64::ANDXri },
1611     { AArch64::ORRWri, AArch64::ORRXri },
1612     { AArch64::EORWri, AArch64::EORXri }
1613   };
1614   const TargetRegisterClass *RC;
1615   unsigned Opc;
1616   unsigned RegSize;
1617   switch (RetVT.SimpleTy) {
1618   default:
1619     return 0;
1620   case MVT::i1:
1621   case MVT::i8:
1622   case MVT::i16:
1623   case MVT::i32: {
1624     unsigned Idx = ISDOpc - ISD::AND;
1625     Opc = OpcTable[Idx][0];
1626     RC = &AArch64::GPR32spRegClass;
1627     RegSize = 32;
1628     break;
1629   }
1630   case MVT::i64:
1631     Opc = OpcTable[ISDOpc - ISD::AND][1];
1632     RC = &AArch64::GPR64spRegClass;
1633     RegSize = 64;
1634     break;
1635   }
1636 
1637   if (!AArch64_AM::isLogicalImmediate(Imm, RegSize))
1638     return 0;
1639 
1640   unsigned ResultReg =
1641       fastEmitInst_ri(Opc, RC, LHSReg, LHSIsKill,
1642                       AArch64_AM::encodeLogicalImmediate(Imm, RegSize));
1643   if (RetVT >= MVT::i8 && RetVT <= MVT::i16 && ISDOpc != ISD::AND) {
1644     uint64_t Mask = (RetVT == MVT::i8) ? 0xff : 0xffff;
1645     ResultReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask);
1646   }
1647   return ResultReg;
1648 }
1649 
emitLogicalOp_rs(unsigned ISDOpc,MVT RetVT,unsigned LHSReg,bool LHSIsKill,unsigned RHSReg,bool RHSIsKill,uint64_t ShiftImm)1650 unsigned AArch64FastISel::emitLogicalOp_rs(unsigned ISDOpc, MVT RetVT,
1651                                            unsigned LHSReg, bool LHSIsKill,
1652                                            unsigned RHSReg, bool RHSIsKill,
1653                                            uint64_t ShiftImm) {
1654   static_assert((ISD::AND + 1 == ISD::OR) && (ISD::AND + 2 == ISD::XOR),
1655                 "ISD nodes are not consecutive!");
1656   static const unsigned OpcTable[3][2] = {
1657     { AArch64::ANDWrs, AArch64::ANDXrs },
1658     { AArch64::ORRWrs, AArch64::ORRXrs },
1659     { AArch64::EORWrs, AArch64::EORXrs }
1660   };
1661 
1662   // Don't deal with undefined shifts.
1663   if (ShiftImm >= RetVT.getSizeInBits())
1664     return 0;
1665 
1666   const TargetRegisterClass *RC;
1667   unsigned Opc;
1668   switch (RetVT.SimpleTy) {
1669   default:
1670     return 0;
1671   case MVT::i1:
1672   case MVT::i8:
1673   case MVT::i16:
1674   case MVT::i32:
1675     Opc = OpcTable[ISDOpc - ISD::AND][0];
1676     RC = &AArch64::GPR32RegClass;
1677     break;
1678   case MVT::i64:
1679     Opc = OpcTable[ISDOpc - ISD::AND][1];
1680     RC = &AArch64::GPR64RegClass;
1681     break;
1682   }
1683   unsigned ResultReg =
1684       fastEmitInst_rri(Opc, RC, LHSReg, LHSIsKill, RHSReg, RHSIsKill,
1685                        AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftImm));
1686   if (RetVT >= MVT::i8 && RetVT <= MVT::i16) {
1687     uint64_t Mask = (RetVT == MVT::i8) ? 0xff : 0xffff;
1688     ResultReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask);
1689   }
1690   return ResultReg;
1691 }
1692 
emitAnd_ri(MVT RetVT,unsigned LHSReg,bool LHSIsKill,uint64_t Imm)1693 unsigned AArch64FastISel::emitAnd_ri(MVT RetVT, unsigned LHSReg, bool LHSIsKill,
1694                                      uint64_t Imm) {
1695   return emitLogicalOp_ri(ISD::AND, RetVT, LHSReg, LHSIsKill, Imm);
1696 }
1697 
emitLoad(MVT VT,MVT RetVT,Address Addr,bool WantZExt,MachineMemOperand * MMO)1698 unsigned AArch64FastISel::emitLoad(MVT VT, MVT RetVT, Address Addr,
1699                                    bool WantZExt, MachineMemOperand *MMO) {
1700   if (!TLI.allowsMisalignedMemoryAccesses(VT))
1701     return 0;
1702 
1703   // Simplify this down to something we can handle.
1704   if (!simplifyAddress(Addr, VT))
1705     return 0;
1706 
1707   unsigned ScaleFactor = getImplicitScaleFactor(VT);
1708   if (!ScaleFactor)
1709     llvm_unreachable("Unexpected value type.");
1710 
1711   // Negative offsets require unscaled, 9-bit, signed immediate offsets.
1712   // Otherwise, we try using scaled, 12-bit, unsigned immediate offsets.
1713   bool UseScaled = true;
1714   if ((Addr.getOffset() < 0) || (Addr.getOffset() & (ScaleFactor - 1))) {
1715     UseScaled = false;
1716     ScaleFactor = 1;
1717   }
1718 
1719   static const unsigned GPOpcTable[2][8][4] = {
1720     // Sign-extend.
1721     { { AArch64::LDURSBWi,  AArch64::LDURSHWi,  AArch64::LDURWi,
1722         AArch64::LDURXi  },
1723       { AArch64::LDURSBXi,  AArch64::LDURSHXi,  AArch64::LDURSWi,
1724         AArch64::LDURXi  },
1725       { AArch64::LDRSBWui,  AArch64::LDRSHWui,  AArch64::LDRWui,
1726         AArch64::LDRXui  },
1727       { AArch64::LDRSBXui,  AArch64::LDRSHXui,  AArch64::LDRSWui,
1728         AArch64::LDRXui  },
1729       { AArch64::LDRSBWroX, AArch64::LDRSHWroX, AArch64::LDRWroX,
1730         AArch64::LDRXroX },
1731       { AArch64::LDRSBXroX, AArch64::LDRSHXroX, AArch64::LDRSWroX,
1732         AArch64::LDRXroX },
1733       { AArch64::LDRSBWroW, AArch64::LDRSHWroW, AArch64::LDRWroW,
1734         AArch64::LDRXroW },
1735       { AArch64::LDRSBXroW, AArch64::LDRSHXroW, AArch64::LDRSWroW,
1736         AArch64::LDRXroW }
1737     },
1738     // Zero-extend.
1739     { { AArch64::LDURBBi,   AArch64::LDURHHi,   AArch64::LDURWi,
1740         AArch64::LDURXi  },
1741       { AArch64::LDURBBi,   AArch64::LDURHHi,   AArch64::LDURWi,
1742         AArch64::LDURXi  },
1743       { AArch64::LDRBBui,   AArch64::LDRHHui,   AArch64::LDRWui,
1744         AArch64::LDRXui  },
1745       { AArch64::LDRBBui,   AArch64::LDRHHui,   AArch64::LDRWui,
1746         AArch64::LDRXui  },
1747       { AArch64::LDRBBroX,  AArch64::LDRHHroX,  AArch64::LDRWroX,
1748         AArch64::LDRXroX },
1749       { AArch64::LDRBBroX,  AArch64::LDRHHroX,  AArch64::LDRWroX,
1750         AArch64::LDRXroX },
1751       { AArch64::LDRBBroW,  AArch64::LDRHHroW,  AArch64::LDRWroW,
1752         AArch64::LDRXroW },
1753       { AArch64::LDRBBroW,  AArch64::LDRHHroW,  AArch64::LDRWroW,
1754         AArch64::LDRXroW }
1755     }
1756   };
1757 
1758   static const unsigned FPOpcTable[4][2] = {
1759     { AArch64::LDURSi,  AArch64::LDURDi  },
1760     { AArch64::LDRSui,  AArch64::LDRDui  },
1761     { AArch64::LDRSroX, AArch64::LDRDroX },
1762     { AArch64::LDRSroW, AArch64::LDRDroW }
1763   };
1764 
1765   unsigned Opc;
1766   const TargetRegisterClass *RC;
1767   bool UseRegOffset = Addr.isRegBase() && !Addr.getOffset() && Addr.getReg() &&
1768                       Addr.getOffsetReg();
1769   unsigned Idx = UseRegOffset ? 2 : UseScaled ? 1 : 0;
1770   if (Addr.getExtendType() == AArch64_AM::UXTW ||
1771       Addr.getExtendType() == AArch64_AM::SXTW)
1772     Idx++;
1773 
1774   bool IsRet64Bit = RetVT == MVT::i64;
1775   switch (VT.SimpleTy) {
1776   default:
1777     llvm_unreachable("Unexpected value type.");
1778   case MVT::i1: // Intentional fall-through.
1779   case MVT::i8:
1780     Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][0];
1781     RC = (IsRet64Bit && !WantZExt) ?
1782              &AArch64::GPR64RegClass: &AArch64::GPR32RegClass;
1783     break;
1784   case MVT::i16:
1785     Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][1];
1786     RC = (IsRet64Bit && !WantZExt) ?
1787              &AArch64::GPR64RegClass: &AArch64::GPR32RegClass;
1788     break;
1789   case MVT::i32:
1790     Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][2];
1791     RC = (IsRet64Bit && !WantZExt) ?
1792              &AArch64::GPR64RegClass: &AArch64::GPR32RegClass;
1793     break;
1794   case MVT::i64:
1795     Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][3];
1796     RC = &AArch64::GPR64RegClass;
1797     break;
1798   case MVT::f32:
1799     Opc = FPOpcTable[Idx][0];
1800     RC = &AArch64::FPR32RegClass;
1801     break;
1802   case MVT::f64:
1803     Opc = FPOpcTable[Idx][1];
1804     RC = &AArch64::FPR64RegClass;
1805     break;
1806   }
1807 
1808   // Create the base instruction, then add the operands.
1809   unsigned ResultReg = createResultReg(RC);
1810   MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1811                                     TII.get(Opc), ResultReg);
1812   addLoadStoreOperands(Addr, MIB, MachineMemOperand::MOLoad, ScaleFactor, MMO);
1813 
1814   // Loading an i1 requires special handling.
1815   if (VT == MVT::i1) {
1816     unsigned ANDReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, 1);
1817     assert(ANDReg && "Unexpected AND instruction emission failure.");
1818     ResultReg = ANDReg;
1819   }
1820 
1821   // For zero-extending loads to 64bit we emit a 32bit load and then convert
1822   // the 32bit reg to a 64bit reg.
1823   if (WantZExt && RetVT == MVT::i64 && VT <= MVT::i32) {
1824     unsigned Reg64 = createResultReg(&AArch64::GPR64RegClass);
1825     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1826             TII.get(AArch64::SUBREG_TO_REG), Reg64)
1827         .addImm(0)
1828         .addReg(ResultReg, getKillRegState(true))
1829         .addImm(AArch64::sub_32);
1830     ResultReg = Reg64;
1831   }
1832   return ResultReg;
1833 }
1834 
selectAddSub(const Instruction * I)1835 bool AArch64FastISel::selectAddSub(const Instruction *I) {
1836   MVT VT;
1837   if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true))
1838     return false;
1839 
1840   if (VT.isVector())
1841     return selectOperator(I, I->getOpcode());
1842 
1843   unsigned ResultReg;
1844   switch (I->getOpcode()) {
1845   default:
1846     llvm_unreachable("Unexpected instruction.");
1847   case Instruction::Add:
1848     ResultReg = emitAdd(VT, I->getOperand(0), I->getOperand(1));
1849     break;
1850   case Instruction::Sub:
1851     ResultReg = emitSub(VT, I->getOperand(0), I->getOperand(1));
1852     break;
1853   }
1854   if (!ResultReg)
1855     return false;
1856 
1857   updateValueMap(I, ResultReg);
1858   return true;
1859 }
1860 
selectLogicalOp(const Instruction * I)1861 bool AArch64FastISel::selectLogicalOp(const Instruction *I) {
1862   MVT VT;
1863   if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true))
1864     return false;
1865 
1866   if (VT.isVector())
1867     return selectOperator(I, I->getOpcode());
1868 
1869   unsigned ResultReg;
1870   switch (I->getOpcode()) {
1871   default:
1872     llvm_unreachable("Unexpected instruction.");
1873   case Instruction::And:
1874     ResultReg = emitLogicalOp(ISD::AND, VT, I->getOperand(0), I->getOperand(1));
1875     break;
1876   case Instruction::Or:
1877     ResultReg = emitLogicalOp(ISD::OR, VT, I->getOperand(0), I->getOperand(1));
1878     break;
1879   case Instruction::Xor:
1880     ResultReg = emitLogicalOp(ISD::XOR, VT, I->getOperand(0), I->getOperand(1));
1881     break;
1882   }
1883   if (!ResultReg)
1884     return false;
1885 
1886   updateValueMap(I, ResultReg);
1887   return true;
1888 }
1889 
selectLoad(const Instruction * I)1890 bool AArch64FastISel::selectLoad(const Instruction *I) {
1891   MVT VT;
1892   // Verify we have a legal type before going any further.  Currently, we handle
1893   // simple types that will directly fit in a register (i32/f32/i64/f64) or
1894   // those that can be sign or zero-extended to a basic operation (i1/i8/i16).
1895   if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true) ||
1896       cast<LoadInst>(I)->isAtomic())
1897     return false;
1898 
1899   const Value *SV = I->getOperand(0);
1900   if (TLI.supportSwiftError()) {
1901     // Swifterror values can come from either a function parameter with
1902     // swifterror attribute or an alloca with swifterror attribute.
1903     if (const Argument *Arg = dyn_cast<Argument>(SV)) {
1904       if (Arg->hasSwiftErrorAttr())
1905         return false;
1906     }
1907 
1908     if (const AllocaInst *Alloca = dyn_cast<AllocaInst>(SV)) {
1909       if (Alloca->isSwiftError())
1910         return false;
1911     }
1912   }
1913 
1914   // See if we can handle this address.
1915   Address Addr;
1916   if (!computeAddress(I->getOperand(0), Addr, I->getType()))
1917     return false;
1918 
1919   // Fold the following sign-/zero-extend into the load instruction.
1920   bool WantZExt = true;
1921   MVT RetVT = VT;
1922   const Value *IntExtVal = nullptr;
1923   if (I->hasOneUse()) {
1924     if (const auto *ZE = dyn_cast<ZExtInst>(I->use_begin()->getUser())) {
1925       if (isTypeSupported(ZE->getType(), RetVT))
1926         IntExtVal = ZE;
1927       else
1928         RetVT = VT;
1929     } else if (const auto *SE = dyn_cast<SExtInst>(I->use_begin()->getUser())) {
1930       if (isTypeSupported(SE->getType(), RetVT))
1931         IntExtVal = SE;
1932       else
1933         RetVT = VT;
1934       WantZExt = false;
1935     }
1936   }
1937 
1938   unsigned ResultReg =
1939       emitLoad(VT, RetVT, Addr, WantZExt, createMachineMemOperandFor(I));
1940   if (!ResultReg)
1941     return false;
1942 
1943   // There are a few different cases we have to handle, because the load or the
1944   // sign-/zero-extend might not be selected by FastISel if we fall-back to
1945   // SelectionDAG. There is also an ordering issue when both instructions are in
1946   // different basic blocks.
1947   // 1.) The load instruction is selected by FastISel, but the integer extend
1948   //     not. This usually happens when the integer extend is in a different
1949   //     basic block and SelectionDAG took over for that basic block.
1950   // 2.) The load instruction is selected before the integer extend. This only
1951   //     happens when the integer extend is in a different basic block.
1952   // 3.) The load instruction is selected by SelectionDAG and the integer extend
1953   //     by FastISel. This happens if there are instructions between the load
1954   //     and the integer extend that couldn't be selected by FastISel.
1955   if (IntExtVal) {
1956     // The integer extend hasn't been emitted yet. FastISel or SelectionDAG
1957     // could select it. Emit a copy to subreg if necessary. FastISel will remove
1958     // it when it selects the integer extend.
1959     unsigned Reg = lookUpRegForValue(IntExtVal);
1960     auto *MI = MRI.getUniqueVRegDef(Reg);
1961     if (!MI) {
1962       if (RetVT == MVT::i64 && VT <= MVT::i32) {
1963         if (WantZExt) {
1964           // Delete the last emitted instruction from emitLoad (SUBREG_TO_REG).
1965           std::prev(FuncInfo.InsertPt)->eraseFromParent();
1966           ResultReg = std::prev(FuncInfo.InsertPt)->getOperand(0).getReg();
1967         } else
1968           ResultReg = fastEmitInst_extractsubreg(MVT::i32, ResultReg,
1969                                                  /*IsKill=*/true,
1970                                                  AArch64::sub_32);
1971       }
1972       updateValueMap(I, ResultReg);
1973       return true;
1974     }
1975 
1976     // The integer extend has already been emitted - delete all the instructions
1977     // that have been emitted by the integer extend lowering code and use the
1978     // result from the load instruction directly.
1979     while (MI) {
1980       Reg = 0;
1981       for (auto &Opnd : MI->uses()) {
1982         if (Opnd.isReg()) {
1983           Reg = Opnd.getReg();
1984           break;
1985         }
1986       }
1987       MI->eraseFromParent();
1988       MI = nullptr;
1989       if (Reg)
1990         MI = MRI.getUniqueVRegDef(Reg);
1991     }
1992     updateValueMap(IntExtVal, ResultReg);
1993     return true;
1994   }
1995 
1996   updateValueMap(I, ResultReg);
1997   return true;
1998 }
1999 
emitStore(MVT VT,unsigned SrcReg,Address Addr,MachineMemOperand * MMO)2000 bool AArch64FastISel::emitStore(MVT VT, unsigned SrcReg, Address Addr,
2001                                 MachineMemOperand *MMO) {
2002   if (!TLI.allowsMisalignedMemoryAccesses(VT))
2003     return false;
2004 
2005   // Simplify this down to something we can handle.
2006   if (!simplifyAddress(Addr, VT))
2007     return false;
2008 
2009   unsigned ScaleFactor = getImplicitScaleFactor(VT);
2010   if (!ScaleFactor)
2011     llvm_unreachable("Unexpected value type.");
2012 
2013   // Negative offsets require unscaled, 9-bit, signed immediate offsets.
2014   // Otherwise, we try using scaled, 12-bit, unsigned immediate offsets.
2015   bool UseScaled = true;
2016   if ((Addr.getOffset() < 0) || (Addr.getOffset() & (ScaleFactor - 1))) {
2017     UseScaled = false;
2018     ScaleFactor = 1;
2019   }
2020 
2021   static const unsigned OpcTable[4][6] = {
2022     { AArch64::STURBBi,  AArch64::STURHHi,  AArch64::STURWi,  AArch64::STURXi,
2023       AArch64::STURSi,   AArch64::STURDi },
2024     { AArch64::STRBBui,  AArch64::STRHHui,  AArch64::STRWui,  AArch64::STRXui,
2025       AArch64::STRSui,   AArch64::STRDui },
2026     { AArch64::STRBBroX, AArch64::STRHHroX, AArch64::STRWroX, AArch64::STRXroX,
2027       AArch64::STRSroX,  AArch64::STRDroX },
2028     { AArch64::STRBBroW, AArch64::STRHHroW, AArch64::STRWroW, AArch64::STRXroW,
2029       AArch64::STRSroW,  AArch64::STRDroW }
2030   };
2031 
2032   unsigned Opc;
2033   bool VTIsi1 = false;
2034   bool UseRegOffset = Addr.isRegBase() && !Addr.getOffset() && Addr.getReg() &&
2035                       Addr.getOffsetReg();
2036   unsigned Idx = UseRegOffset ? 2 : UseScaled ? 1 : 0;
2037   if (Addr.getExtendType() == AArch64_AM::UXTW ||
2038       Addr.getExtendType() == AArch64_AM::SXTW)
2039     Idx++;
2040 
2041   switch (VT.SimpleTy) {
2042   default: llvm_unreachable("Unexpected value type.");
2043   case MVT::i1:  VTIsi1 = true;
2044   case MVT::i8:  Opc = OpcTable[Idx][0]; break;
2045   case MVT::i16: Opc = OpcTable[Idx][1]; break;
2046   case MVT::i32: Opc = OpcTable[Idx][2]; break;
2047   case MVT::i64: Opc = OpcTable[Idx][3]; break;
2048   case MVT::f32: Opc = OpcTable[Idx][4]; break;
2049   case MVT::f64: Opc = OpcTable[Idx][5]; break;
2050   }
2051 
2052   // Storing an i1 requires special handling.
2053   if (VTIsi1 && SrcReg != AArch64::WZR) {
2054     unsigned ANDReg = emitAnd_ri(MVT::i32, SrcReg, /*TODO:IsKill=*/false, 1);
2055     assert(ANDReg && "Unexpected AND instruction emission failure.");
2056     SrcReg = ANDReg;
2057   }
2058   // Create the base instruction, then add the operands.
2059   const MCInstrDesc &II = TII.get(Opc);
2060   SrcReg = constrainOperandRegClass(II, SrcReg, II.getNumDefs());
2061   MachineInstrBuilder MIB =
2062       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II).addReg(SrcReg);
2063   addLoadStoreOperands(Addr, MIB, MachineMemOperand::MOStore, ScaleFactor, MMO);
2064 
2065   return true;
2066 }
2067 
selectStore(const Instruction * I)2068 bool AArch64FastISel::selectStore(const Instruction *I) {
2069   MVT VT;
2070   const Value *Op0 = I->getOperand(0);
2071   // Verify we have a legal type before going any further.  Currently, we handle
2072   // simple types that will directly fit in a register (i32/f32/i64/f64) or
2073   // those that can be sign or zero-extended to a basic operation (i1/i8/i16).
2074   if (!isTypeSupported(Op0->getType(), VT, /*IsVectorAllowed=*/true) ||
2075       cast<StoreInst>(I)->isAtomic())
2076     return false;
2077 
2078   const Value *PtrV = I->getOperand(1);
2079   if (TLI.supportSwiftError()) {
2080     // Swifterror values can come from either a function parameter with
2081     // swifterror attribute or an alloca with swifterror attribute.
2082     if (const Argument *Arg = dyn_cast<Argument>(PtrV)) {
2083       if (Arg->hasSwiftErrorAttr())
2084         return false;
2085     }
2086 
2087     if (const AllocaInst *Alloca = dyn_cast<AllocaInst>(PtrV)) {
2088       if (Alloca->isSwiftError())
2089         return false;
2090     }
2091   }
2092 
2093   // Get the value to be stored into a register. Use the zero register directly
2094   // when possible to avoid an unnecessary copy and a wasted register.
2095   unsigned SrcReg = 0;
2096   if (const auto *CI = dyn_cast<ConstantInt>(Op0)) {
2097     if (CI->isZero())
2098       SrcReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
2099   } else if (const auto *CF = dyn_cast<ConstantFP>(Op0)) {
2100     if (CF->isZero() && !CF->isNegative()) {
2101       VT = MVT::getIntegerVT(VT.getSizeInBits());
2102       SrcReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
2103     }
2104   }
2105 
2106   if (!SrcReg)
2107     SrcReg = getRegForValue(Op0);
2108 
2109   if (!SrcReg)
2110     return false;
2111 
2112   // See if we can handle this address.
2113   Address Addr;
2114   if (!computeAddress(I->getOperand(1), Addr, I->getOperand(0)->getType()))
2115     return false;
2116 
2117   if (!emitStore(VT, SrcReg, Addr, createMachineMemOperandFor(I)))
2118     return false;
2119   return true;
2120 }
2121 
getCompareCC(CmpInst::Predicate Pred)2122 static AArch64CC::CondCode getCompareCC(CmpInst::Predicate Pred) {
2123   switch (Pred) {
2124   case CmpInst::FCMP_ONE:
2125   case CmpInst::FCMP_UEQ:
2126   default:
2127     // AL is our "false" for now. The other two need more compares.
2128     return AArch64CC::AL;
2129   case CmpInst::ICMP_EQ:
2130   case CmpInst::FCMP_OEQ:
2131     return AArch64CC::EQ;
2132   case CmpInst::ICMP_SGT:
2133   case CmpInst::FCMP_OGT:
2134     return AArch64CC::GT;
2135   case CmpInst::ICMP_SGE:
2136   case CmpInst::FCMP_OGE:
2137     return AArch64CC::GE;
2138   case CmpInst::ICMP_UGT:
2139   case CmpInst::FCMP_UGT:
2140     return AArch64CC::HI;
2141   case CmpInst::FCMP_OLT:
2142     return AArch64CC::MI;
2143   case CmpInst::ICMP_ULE:
2144   case CmpInst::FCMP_OLE:
2145     return AArch64CC::LS;
2146   case CmpInst::FCMP_ORD:
2147     return AArch64CC::VC;
2148   case CmpInst::FCMP_UNO:
2149     return AArch64CC::VS;
2150   case CmpInst::FCMP_UGE:
2151     return AArch64CC::PL;
2152   case CmpInst::ICMP_SLT:
2153   case CmpInst::FCMP_ULT:
2154     return AArch64CC::LT;
2155   case CmpInst::ICMP_SLE:
2156   case CmpInst::FCMP_ULE:
2157     return AArch64CC::LE;
2158   case CmpInst::FCMP_UNE:
2159   case CmpInst::ICMP_NE:
2160     return AArch64CC::NE;
2161   case CmpInst::ICMP_UGE:
2162     return AArch64CC::HS;
2163   case CmpInst::ICMP_ULT:
2164     return AArch64CC::LO;
2165   }
2166 }
2167 
2168 /// \brief Try to emit a combined compare-and-branch instruction.
emitCompareAndBranch(const BranchInst * BI)2169 bool AArch64FastISel::emitCompareAndBranch(const BranchInst *BI) {
2170   assert(isa<CmpInst>(BI->getCondition()) && "Expected cmp instruction");
2171   const CmpInst *CI = cast<CmpInst>(BI->getCondition());
2172   CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
2173 
2174   const Value *LHS = CI->getOperand(0);
2175   const Value *RHS = CI->getOperand(1);
2176 
2177   MVT VT;
2178   if (!isTypeSupported(LHS->getType(), VT))
2179     return false;
2180 
2181   unsigned BW = VT.getSizeInBits();
2182   if (BW > 64)
2183     return false;
2184 
2185   MachineBasicBlock *TBB = FuncInfo.MBBMap[BI->getSuccessor(0)];
2186   MachineBasicBlock *FBB = FuncInfo.MBBMap[BI->getSuccessor(1)];
2187 
2188   // Try to take advantage of fallthrough opportunities.
2189   if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
2190     std::swap(TBB, FBB);
2191     Predicate = CmpInst::getInversePredicate(Predicate);
2192   }
2193 
2194   int TestBit = -1;
2195   bool IsCmpNE;
2196   switch (Predicate) {
2197   default:
2198     return false;
2199   case CmpInst::ICMP_EQ:
2200   case CmpInst::ICMP_NE:
2201     if (isa<Constant>(LHS) && cast<Constant>(LHS)->isNullValue())
2202       std::swap(LHS, RHS);
2203 
2204     if (!isa<Constant>(RHS) || !cast<Constant>(RHS)->isNullValue())
2205       return false;
2206 
2207     if (const auto *AI = dyn_cast<BinaryOperator>(LHS))
2208       if (AI->getOpcode() == Instruction::And && isValueAvailable(AI)) {
2209         const Value *AndLHS = AI->getOperand(0);
2210         const Value *AndRHS = AI->getOperand(1);
2211 
2212         if (const auto *C = dyn_cast<ConstantInt>(AndLHS))
2213           if (C->getValue().isPowerOf2())
2214             std::swap(AndLHS, AndRHS);
2215 
2216         if (const auto *C = dyn_cast<ConstantInt>(AndRHS))
2217           if (C->getValue().isPowerOf2()) {
2218             TestBit = C->getValue().logBase2();
2219             LHS = AndLHS;
2220           }
2221       }
2222 
2223     if (VT == MVT::i1)
2224       TestBit = 0;
2225 
2226     IsCmpNE = Predicate == CmpInst::ICMP_NE;
2227     break;
2228   case CmpInst::ICMP_SLT:
2229   case CmpInst::ICMP_SGE:
2230     if (!isa<Constant>(RHS) || !cast<Constant>(RHS)->isNullValue())
2231       return false;
2232 
2233     TestBit = BW - 1;
2234     IsCmpNE = Predicate == CmpInst::ICMP_SLT;
2235     break;
2236   case CmpInst::ICMP_SGT:
2237   case CmpInst::ICMP_SLE:
2238     if (!isa<ConstantInt>(RHS))
2239       return false;
2240 
2241     if (cast<ConstantInt>(RHS)->getValue() != APInt(BW, -1, true))
2242       return false;
2243 
2244     TestBit = BW - 1;
2245     IsCmpNE = Predicate == CmpInst::ICMP_SLE;
2246     break;
2247   } // end switch
2248 
2249   static const unsigned OpcTable[2][2][2] = {
2250     { {AArch64::CBZW,  AArch64::CBZX },
2251       {AArch64::CBNZW, AArch64::CBNZX} },
2252     { {AArch64::TBZW,  AArch64::TBZX },
2253       {AArch64::TBNZW, AArch64::TBNZX} }
2254   };
2255 
2256   bool IsBitTest = TestBit != -1;
2257   bool Is64Bit = BW == 64;
2258   if (TestBit < 32 && TestBit >= 0)
2259     Is64Bit = false;
2260 
2261   unsigned Opc = OpcTable[IsBitTest][IsCmpNE][Is64Bit];
2262   const MCInstrDesc &II = TII.get(Opc);
2263 
2264   unsigned SrcReg = getRegForValue(LHS);
2265   if (!SrcReg)
2266     return false;
2267   bool SrcIsKill = hasTrivialKill(LHS);
2268 
2269   if (BW == 64 && !Is64Bit)
2270     SrcReg = fastEmitInst_extractsubreg(MVT::i32, SrcReg, SrcIsKill,
2271                                         AArch64::sub_32);
2272 
2273   if ((BW < 32) && !IsBitTest)
2274     SrcReg = emitIntExt(VT, SrcReg, MVT::i32, /*IsZExt=*/true);
2275 
2276   // Emit the combined compare and branch instruction.
2277   SrcReg = constrainOperandRegClass(II, SrcReg,  II.getNumDefs());
2278   MachineInstrBuilder MIB =
2279       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc))
2280           .addReg(SrcReg, getKillRegState(SrcIsKill));
2281   if (IsBitTest)
2282     MIB.addImm(TestBit);
2283   MIB.addMBB(TBB);
2284 
2285   finishCondBranch(BI->getParent(), TBB, FBB);
2286   return true;
2287 }
2288 
selectBranch(const Instruction * I)2289 bool AArch64FastISel::selectBranch(const Instruction *I) {
2290   const BranchInst *BI = cast<BranchInst>(I);
2291   if (BI->isUnconditional()) {
2292     MachineBasicBlock *MSucc = FuncInfo.MBBMap[BI->getSuccessor(0)];
2293     fastEmitBranch(MSucc, BI->getDebugLoc());
2294     return true;
2295   }
2296 
2297   MachineBasicBlock *TBB = FuncInfo.MBBMap[BI->getSuccessor(0)];
2298   MachineBasicBlock *FBB = FuncInfo.MBBMap[BI->getSuccessor(1)];
2299 
2300   if (const CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition())) {
2301     if (CI->hasOneUse() && isValueAvailable(CI)) {
2302       // Try to optimize or fold the cmp.
2303       CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
2304       switch (Predicate) {
2305       default:
2306         break;
2307       case CmpInst::FCMP_FALSE:
2308         fastEmitBranch(FBB, DbgLoc);
2309         return true;
2310       case CmpInst::FCMP_TRUE:
2311         fastEmitBranch(TBB, DbgLoc);
2312         return true;
2313       }
2314 
2315       // Try to emit a combined compare-and-branch first.
2316       if (emitCompareAndBranch(BI))
2317         return true;
2318 
2319       // Try to take advantage of fallthrough opportunities.
2320       if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
2321         std::swap(TBB, FBB);
2322         Predicate = CmpInst::getInversePredicate(Predicate);
2323       }
2324 
2325       // Emit the cmp.
2326       if (!emitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned()))
2327         return false;
2328 
2329       // FCMP_UEQ and FCMP_ONE cannot be checked with a single branch
2330       // instruction.
2331       AArch64CC::CondCode CC = getCompareCC(Predicate);
2332       AArch64CC::CondCode ExtraCC = AArch64CC::AL;
2333       switch (Predicate) {
2334       default:
2335         break;
2336       case CmpInst::FCMP_UEQ:
2337         ExtraCC = AArch64CC::EQ;
2338         CC = AArch64CC::VS;
2339         break;
2340       case CmpInst::FCMP_ONE:
2341         ExtraCC = AArch64CC::MI;
2342         CC = AArch64CC::GT;
2343         break;
2344       }
2345       assert((CC != AArch64CC::AL) && "Unexpected condition code.");
2346 
2347       // Emit the extra branch for FCMP_UEQ and FCMP_ONE.
2348       if (ExtraCC != AArch64CC::AL) {
2349         BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::Bcc))
2350             .addImm(ExtraCC)
2351             .addMBB(TBB);
2352       }
2353 
2354       // Emit the branch.
2355       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::Bcc))
2356           .addImm(CC)
2357           .addMBB(TBB);
2358 
2359       finishCondBranch(BI->getParent(), TBB, FBB);
2360       return true;
2361     }
2362   } else if (const auto *CI = dyn_cast<ConstantInt>(BI->getCondition())) {
2363     uint64_t Imm = CI->getZExtValue();
2364     MachineBasicBlock *Target = (Imm == 0) ? FBB : TBB;
2365     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::B))
2366         .addMBB(Target);
2367 
2368     // Obtain the branch probability and add the target to the successor list.
2369     if (FuncInfo.BPI) {
2370       auto BranchProbability = FuncInfo.BPI->getEdgeProbability(
2371           BI->getParent(), Target->getBasicBlock());
2372       FuncInfo.MBB->addSuccessor(Target, BranchProbability);
2373     } else
2374       FuncInfo.MBB->addSuccessorWithoutProb(Target);
2375     return true;
2376   } else {
2377     AArch64CC::CondCode CC = AArch64CC::NE;
2378     if (foldXALUIntrinsic(CC, I, BI->getCondition())) {
2379       // Fake request the condition, otherwise the intrinsic might be completely
2380       // optimized away.
2381       unsigned CondReg = getRegForValue(BI->getCondition());
2382       if (!CondReg)
2383         return false;
2384 
2385       // Emit the branch.
2386       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::Bcc))
2387         .addImm(CC)
2388         .addMBB(TBB);
2389 
2390       finishCondBranch(BI->getParent(), TBB, FBB);
2391       return true;
2392     }
2393   }
2394 
2395   unsigned CondReg = getRegForValue(BI->getCondition());
2396   if (CondReg == 0)
2397     return false;
2398   bool CondRegIsKill = hasTrivialKill(BI->getCondition());
2399 
2400   // i1 conditions come as i32 values, test the lowest bit with tb(n)z.
2401   unsigned Opcode = AArch64::TBNZW;
2402   if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
2403     std::swap(TBB, FBB);
2404     Opcode = AArch64::TBZW;
2405   }
2406 
2407   const MCInstrDesc &II = TII.get(Opcode);
2408   unsigned ConstrainedCondReg
2409     = constrainOperandRegClass(II, CondReg, II.getNumDefs());
2410   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
2411       .addReg(ConstrainedCondReg, getKillRegState(CondRegIsKill))
2412       .addImm(0)
2413       .addMBB(TBB);
2414 
2415   finishCondBranch(BI->getParent(), TBB, FBB);
2416   return true;
2417 }
2418 
selectIndirectBr(const Instruction * I)2419 bool AArch64FastISel::selectIndirectBr(const Instruction *I) {
2420   const IndirectBrInst *BI = cast<IndirectBrInst>(I);
2421   unsigned AddrReg = getRegForValue(BI->getOperand(0));
2422   if (AddrReg == 0)
2423     return false;
2424 
2425   // Emit the indirect branch.
2426   const MCInstrDesc &II = TII.get(AArch64::BR);
2427   AddrReg = constrainOperandRegClass(II, AddrReg,  II.getNumDefs());
2428   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II).addReg(AddrReg);
2429 
2430   // Make sure the CFG is up-to-date.
2431   for (auto *Succ : BI->successors())
2432     FuncInfo.MBB->addSuccessor(FuncInfo.MBBMap[Succ]);
2433 
2434   return true;
2435 }
2436 
selectCmp(const Instruction * I)2437 bool AArch64FastISel::selectCmp(const Instruction *I) {
2438   const CmpInst *CI = cast<CmpInst>(I);
2439 
2440   // Vectors of i1 are weird: bail out.
2441   if (CI->getType()->isVectorTy())
2442     return false;
2443 
2444   // Try to optimize or fold the cmp.
2445   CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
2446   unsigned ResultReg = 0;
2447   switch (Predicate) {
2448   default:
2449     break;
2450   case CmpInst::FCMP_FALSE:
2451     ResultReg = createResultReg(&AArch64::GPR32RegClass);
2452     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2453             TII.get(TargetOpcode::COPY), ResultReg)
2454         .addReg(AArch64::WZR, getKillRegState(true));
2455     break;
2456   case CmpInst::FCMP_TRUE:
2457     ResultReg = fastEmit_i(MVT::i32, MVT::i32, ISD::Constant, 1);
2458     break;
2459   }
2460 
2461   if (ResultReg) {
2462     updateValueMap(I, ResultReg);
2463     return true;
2464   }
2465 
2466   // Emit the cmp.
2467   if (!emitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned()))
2468     return false;
2469 
2470   ResultReg = createResultReg(&AArch64::GPR32RegClass);
2471 
2472   // FCMP_UEQ and FCMP_ONE cannot be checked with a single instruction. These
2473   // condition codes are inverted, because they are used by CSINC.
2474   static unsigned CondCodeTable[2][2] = {
2475     { AArch64CC::NE, AArch64CC::VC },
2476     { AArch64CC::PL, AArch64CC::LE }
2477   };
2478   unsigned *CondCodes = nullptr;
2479   switch (Predicate) {
2480   default:
2481     break;
2482   case CmpInst::FCMP_UEQ:
2483     CondCodes = &CondCodeTable[0][0];
2484     break;
2485   case CmpInst::FCMP_ONE:
2486     CondCodes = &CondCodeTable[1][0];
2487     break;
2488   }
2489 
2490   if (CondCodes) {
2491     unsigned TmpReg1 = createResultReg(&AArch64::GPR32RegClass);
2492     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::CSINCWr),
2493             TmpReg1)
2494         .addReg(AArch64::WZR, getKillRegState(true))
2495         .addReg(AArch64::WZR, getKillRegState(true))
2496         .addImm(CondCodes[0]);
2497     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::CSINCWr),
2498             ResultReg)
2499         .addReg(TmpReg1, getKillRegState(true))
2500         .addReg(AArch64::WZR, getKillRegState(true))
2501         .addImm(CondCodes[1]);
2502 
2503     updateValueMap(I, ResultReg);
2504     return true;
2505   }
2506 
2507   // Now set a register based on the comparison.
2508   AArch64CC::CondCode CC = getCompareCC(Predicate);
2509   assert((CC != AArch64CC::AL) && "Unexpected condition code.");
2510   AArch64CC::CondCode invertedCC = getInvertedCondCode(CC);
2511   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::CSINCWr),
2512           ResultReg)
2513       .addReg(AArch64::WZR, getKillRegState(true))
2514       .addReg(AArch64::WZR, getKillRegState(true))
2515       .addImm(invertedCC);
2516 
2517   updateValueMap(I, ResultReg);
2518   return true;
2519 }
2520 
2521 /// \brief Optimize selects of i1 if one of the operands has a 'true' or 'false'
2522 /// value.
optimizeSelect(const SelectInst * SI)2523 bool AArch64FastISel::optimizeSelect(const SelectInst *SI) {
2524   if (!SI->getType()->isIntegerTy(1))
2525     return false;
2526 
2527   const Value *Src1Val, *Src2Val;
2528   unsigned Opc = 0;
2529   bool NeedExtraOp = false;
2530   if (auto *CI = dyn_cast<ConstantInt>(SI->getTrueValue())) {
2531     if (CI->isOne()) {
2532       Src1Val = SI->getCondition();
2533       Src2Val = SI->getFalseValue();
2534       Opc = AArch64::ORRWrr;
2535     } else {
2536       assert(CI->isZero());
2537       Src1Val = SI->getFalseValue();
2538       Src2Val = SI->getCondition();
2539       Opc = AArch64::BICWrr;
2540     }
2541   } else if (auto *CI = dyn_cast<ConstantInt>(SI->getFalseValue())) {
2542     if (CI->isOne()) {
2543       Src1Val = SI->getCondition();
2544       Src2Val = SI->getTrueValue();
2545       Opc = AArch64::ORRWrr;
2546       NeedExtraOp = true;
2547     } else {
2548       assert(CI->isZero());
2549       Src1Val = SI->getCondition();
2550       Src2Val = SI->getTrueValue();
2551       Opc = AArch64::ANDWrr;
2552     }
2553   }
2554 
2555   if (!Opc)
2556     return false;
2557 
2558   unsigned Src1Reg = getRegForValue(Src1Val);
2559   if (!Src1Reg)
2560     return false;
2561   bool Src1IsKill = hasTrivialKill(Src1Val);
2562 
2563   unsigned Src2Reg = getRegForValue(Src2Val);
2564   if (!Src2Reg)
2565     return false;
2566   bool Src2IsKill = hasTrivialKill(Src2Val);
2567 
2568   if (NeedExtraOp) {
2569     Src1Reg = emitLogicalOp_ri(ISD::XOR, MVT::i32, Src1Reg, Src1IsKill, 1);
2570     Src1IsKill = true;
2571   }
2572   unsigned ResultReg = fastEmitInst_rr(Opc, &AArch64::GPR32RegClass, Src1Reg,
2573                                        Src1IsKill, Src2Reg, Src2IsKill);
2574   updateValueMap(SI, ResultReg);
2575   return true;
2576 }
2577 
selectSelect(const Instruction * I)2578 bool AArch64FastISel::selectSelect(const Instruction *I) {
2579   assert(isa<SelectInst>(I) && "Expected a select instruction.");
2580   MVT VT;
2581   if (!isTypeSupported(I->getType(), VT))
2582     return false;
2583 
2584   unsigned Opc;
2585   const TargetRegisterClass *RC;
2586   switch (VT.SimpleTy) {
2587   default:
2588     return false;
2589   case MVT::i1:
2590   case MVT::i8:
2591   case MVT::i16:
2592   case MVT::i32:
2593     Opc = AArch64::CSELWr;
2594     RC = &AArch64::GPR32RegClass;
2595     break;
2596   case MVT::i64:
2597     Opc = AArch64::CSELXr;
2598     RC = &AArch64::GPR64RegClass;
2599     break;
2600   case MVT::f32:
2601     Opc = AArch64::FCSELSrrr;
2602     RC = &AArch64::FPR32RegClass;
2603     break;
2604   case MVT::f64:
2605     Opc = AArch64::FCSELDrrr;
2606     RC = &AArch64::FPR64RegClass;
2607     break;
2608   }
2609 
2610   const SelectInst *SI = cast<SelectInst>(I);
2611   const Value *Cond = SI->getCondition();
2612   AArch64CC::CondCode CC = AArch64CC::NE;
2613   AArch64CC::CondCode ExtraCC = AArch64CC::AL;
2614 
2615   if (optimizeSelect(SI))
2616     return true;
2617 
2618   // Try to pickup the flags, so we don't have to emit another compare.
2619   if (foldXALUIntrinsic(CC, I, Cond)) {
2620     // Fake request the condition to force emission of the XALU intrinsic.
2621     unsigned CondReg = getRegForValue(Cond);
2622     if (!CondReg)
2623       return false;
2624   } else if (isa<CmpInst>(Cond) && cast<CmpInst>(Cond)->hasOneUse() &&
2625              isValueAvailable(Cond)) {
2626     const auto *Cmp = cast<CmpInst>(Cond);
2627     // Try to optimize or fold the cmp.
2628     CmpInst::Predicate Predicate = optimizeCmpPredicate(Cmp);
2629     const Value *FoldSelect = nullptr;
2630     switch (Predicate) {
2631     default:
2632       break;
2633     case CmpInst::FCMP_FALSE:
2634       FoldSelect = SI->getFalseValue();
2635       break;
2636     case CmpInst::FCMP_TRUE:
2637       FoldSelect = SI->getTrueValue();
2638       break;
2639     }
2640 
2641     if (FoldSelect) {
2642       unsigned SrcReg = getRegForValue(FoldSelect);
2643       if (!SrcReg)
2644         return false;
2645       unsigned UseReg = lookUpRegForValue(SI);
2646       if (UseReg)
2647         MRI.clearKillFlags(UseReg);
2648 
2649       updateValueMap(I, SrcReg);
2650       return true;
2651     }
2652 
2653     // Emit the cmp.
2654     if (!emitCmp(Cmp->getOperand(0), Cmp->getOperand(1), Cmp->isUnsigned()))
2655       return false;
2656 
2657     // FCMP_UEQ and FCMP_ONE cannot be checked with a single select instruction.
2658     CC = getCompareCC(Predicate);
2659     switch (Predicate) {
2660     default:
2661       break;
2662     case CmpInst::FCMP_UEQ:
2663       ExtraCC = AArch64CC::EQ;
2664       CC = AArch64CC::VS;
2665       break;
2666     case CmpInst::FCMP_ONE:
2667       ExtraCC = AArch64CC::MI;
2668       CC = AArch64CC::GT;
2669       break;
2670     }
2671     assert((CC != AArch64CC::AL) && "Unexpected condition code.");
2672   } else {
2673     unsigned CondReg = getRegForValue(Cond);
2674     if (!CondReg)
2675       return false;
2676     bool CondIsKill = hasTrivialKill(Cond);
2677 
2678     const MCInstrDesc &II = TII.get(AArch64::ANDSWri);
2679     CondReg = constrainOperandRegClass(II, CondReg, 1);
2680 
2681     // Emit a TST instruction (ANDS wzr, reg, #imm).
2682     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II,
2683             AArch64::WZR)
2684         .addReg(CondReg, getKillRegState(CondIsKill))
2685         .addImm(AArch64_AM::encodeLogicalImmediate(1, 32));
2686   }
2687 
2688   unsigned Src1Reg = getRegForValue(SI->getTrueValue());
2689   bool Src1IsKill = hasTrivialKill(SI->getTrueValue());
2690 
2691   unsigned Src2Reg = getRegForValue(SI->getFalseValue());
2692   bool Src2IsKill = hasTrivialKill(SI->getFalseValue());
2693 
2694   if (!Src1Reg || !Src2Reg)
2695     return false;
2696 
2697   if (ExtraCC != AArch64CC::AL) {
2698     Src2Reg = fastEmitInst_rri(Opc, RC, Src1Reg, Src1IsKill, Src2Reg,
2699                                Src2IsKill, ExtraCC);
2700     Src2IsKill = true;
2701   }
2702   unsigned ResultReg = fastEmitInst_rri(Opc, RC, Src1Reg, Src1IsKill, Src2Reg,
2703                                         Src2IsKill, CC);
2704   updateValueMap(I, ResultReg);
2705   return true;
2706 }
2707 
selectFPExt(const Instruction * I)2708 bool AArch64FastISel::selectFPExt(const Instruction *I) {
2709   Value *V = I->getOperand(0);
2710   if (!I->getType()->isDoubleTy() || !V->getType()->isFloatTy())
2711     return false;
2712 
2713   unsigned Op = getRegForValue(V);
2714   if (Op == 0)
2715     return false;
2716 
2717   unsigned ResultReg = createResultReg(&AArch64::FPR64RegClass);
2718   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::FCVTDSr),
2719           ResultReg).addReg(Op);
2720   updateValueMap(I, ResultReg);
2721   return true;
2722 }
2723 
selectFPTrunc(const Instruction * I)2724 bool AArch64FastISel::selectFPTrunc(const Instruction *I) {
2725   Value *V = I->getOperand(0);
2726   if (!I->getType()->isFloatTy() || !V->getType()->isDoubleTy())
2727     return false;
2728 
2729   unsigned Op = getRegForValue(V);
2730   if (Op == 0)
2731     return false;
2732 
2733   unsigned ResultReg = createResultReg(&AArch64::FPR32RegClass);
2734   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::FCVTSDr),
2735           ResultReg).addReg(Op);
2736   updateValueMap(I, ResultReg);
2737   return true;
2738 }
2739 
2740 // FPToUI and FPToSI
selectFPToInt(const Instruction * I,bool Signed)2741 bool AArch64FastISel::selectFPToInt(const Instruction *I, bool Signed) {
2742   MVT DestVT;
2743   if (!isTypeLegal(I->getType(), DestVT) || DestVT.isVector())
2744     return false;
2745 
2746   unsigned SrcReg = getRegForValue(I->getOperand(0));
2747   if (SrcReg == 0)
2748     return false;
2749 
2750   EVT SrcVT = TLI.getValueType(DL, I->getOperand(0)->getType(), true);
2751   if (SrcVT == MVT::f128 || SrcVT == MVT::f16)
2752     return false;
2753 
2754   unsigned Opc;
2755   if (SrcVT == MVT::f64) {
2756     if (Signed)
2757       Opc = (DestVT == MVT::i32) ? AArch64::FCVTZSUWDr : AArch64::FCVTZSUXDr;
2758     else
2759       Opc = (DestVT == MVT::i32) ? AArch64::FCVTZUUWDr : AArch64::FCVTZUUXDr;
2760   } else {
2761     if (Signed)
2762       Opc = (DestVT == MVT::i32) ? AArch64::FCVTZSUWSr : AArch64::FCVTZSUXSr;
2763     else
2764       Opc = (DestVT == MVT::i32) ? AArch64::FCVTZUUWSr : AArch64::FCVTZUUXSr;
2765   }
2766   unsigned ResultReg = createResultReg(
2767       DestVT == MVT::i32 ? &AArch64::GPR32RegClass : &AArch64::GPR64RegClass);
2768   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
2769       .addReg(SrcReg);
2770   updateValueMap(I, ResultReg);
2771   return true;
2772 }
2773 
selectIntToFP(const Instruction * I,bool Signed)2774 bool AArch64FastISel::selectIntToFP(const Instruction *I, bool Signed) {
2775   MVT DestVT;
2776   if (!isTypeLegal(I->getType(), DestVT) || DestVT.isVector())
2777     return false;
2778   // Let regular ISEL handle FP16
2779   if (DestVT == MVT::f16)
2780     return false;
2781 
2782   assert((DestVT == MVT::f32 || DestVT == MVT::f64) &&
2783          "Unexpected value type.");
2784 
2785   unsigned SrcReg = getRegForValue(I->getOperand(0));
2786   if (!SrcReg)
2787     return false;
2788   bool SrcIsKill = hasTrivialKill(I->getOperand(0));
2789 
2790   EVT SrcVT = TLI.getValueType(DL, I->getOperand(0)->getType(), true);
2791 
2792   // Handle sign-extension.
2793   if (SrcVT == MVT::i16 || SrcVT == MVT::i8 || SrcVT == MVT::i1) {
2794     SrcReg =
2795         emitIntExt(SrcVT.getSimpleVT(), SrcReg, MVT::i32, /*isZExt*/ !Signed);
2796     if (!SrcReg)
2797       return false;
2798     SrcIsKill = true;
2799   }
2800 
2801   unsigned Opc;
2802   if (SrcVT == MVT::i64) {
2803     if (Signed)
2804       Opc = (DestVT == MVT::f32) ? AArch64::SCVTFUXSri : AArch64::SCVTFUXDri;
2805     else
2806       Opc = (DestVT == MVT::f32) ? AArch64::UCVTFUXSri : AArch64::UCVTFUXDri;
2807   } else {
2808     if (Signed)
2809       Opc = (DestVT == MVT::f32) ? AArch64::SCVTFUWSri : AArch64::SCVTFUWDri;
2810     else
2811       Opc = (DestVT == MVT::f32) ? AArch64::UCVTFUWSri : AArch64::UCVTFUWDri;
2812   }
2813 
2814   unsigned ResultReg = fastEmitInst_r(Opc, TLI.getRegClassFor(DestVT), SrcReg,
2815                                       SrcIsKill);
2816   updateValueMap(I, ResultReg);
2817   return true;
2818 }
2819 
fastLowerArguments()2820 bool AArch64FastISel::fastLowerArguments() {
2821   if (!FuncInfo.CanLowerReturn)
2822     return false;
2823 
2824   const Function *F = FuncInfo.Fn;
2825   if (F->isVarArg())
2826     return false;
2827 
2828   CallingConv::ID CC = F->getCallingConv();
2829   if (CC != CallingConv::C)
2830     return false;
2831 
2832   // Only handle simple cases of up to 8 GPR and FPR each.
2833   unsigned GPRCnt = 0;
2834   unsigned FPRCnt = 0;
2835   unsigned Idx = 0;
2836   for (auto const &Arg : F->args()) {
2837     // The first argument is at index 1.
2838     ++Idx;
2839     if (F->getAttributes().hasAttribute(Idx, Attribute::ByVal) ||
2840         F->getAttributes().hasAttribute(Idx, Attribute::InReg) ||
2841         F->getAttributes().hasAttribute(Idx, Attribute::StructRet) ||
2842         F->getAttributes().hasAttribute(Idx, Attribute::SwiftSelf) ||
2843         F->getAttributes().hasAttribute(Idx, Attribute::SwiftError) ||
2844         F->getAttributes().hasAttribute(Idx, Attribute::Nest))
2845       return false;
2846 
2847     Type *ArgTy = Arg.getType();
2848     if (ArgTy->isStructTy() || ArgTy->isArrayTy())
2849       return false;
2850 
2851     EVT ArgVT = TLI.getValueType(DL, ArgTy);
2852     if (!ArgVT.isSimple())
2853       return false;
2854 
2855     MVT VT = ArgVT.getSimpleVT().SimpleTy;
2856     if (VT.isFloatingPoint() && !Subtarget->hasFPARMv8())
2857       return false;
2858 
2859     if (VT.isVector() &&
2860         (!Subtarget->hasNEON() || !Subtarget->isLittleEndian()))
2861       return false;
2862 
2863     if (VT >= MVT::i1 && VT <= MVT::i64)
2864       ++GPRCnt;
2865     else if ((VT >= MVT::f16 && VT <= MVT::f64) || VT.is64BitVector() ||
2866              VT.is128BitVector())
2867       ++FPRCnt;
2868     else
2869       return false;
2870 
2871     if (GPRCnt > 8 || FPRCnt > 8)
2872       return false;
2873   }
2874 
2875   static const MCPhysReg Registers[6][8] = {
2876     { AArch64::W0, AArch64::W1, AArch64::W2, AArch64::W3, AArch64::W4,
2877       AArch64::W5, AArch64::W6, AArch64::W7 },
2878     { AArch64::X0, AArch64::X1, AArch64::X2, AArch64::X3, AArch64::X4,
2879       AArch64::X5, AArch64::X6, AArch64::X7 },
2880     { AArch64::H0, AArch64::H1, AArch64::H2, AArch64::H3, AArch64::H4,
2881       AArch64::H5, AArch64::H6, AArch64::H7 },
2882     { AArch64::S0, AArch64::S1, AArch64::S2, AArch64::S3, AArch64::S4,
2883       AArch64::S5, AArch64::S6, AArch64::S7 },
2884     { AArch64::D0, AArch64::D1, AArch64::D2, AArch64::D3, AArch64::D4,
2885       AArch64::D5, AArch64::D6, AArch64::D7 },
2886     { AArch64::Q0, AArch64::Q1, AArch64::Q2, AArch64::Q3, AArch64::Q4,
2887       AArch64::Q5, AArch64::Q6, AArch64::Q7 }
2888   };
2889 
2890   unsigned GPRIdx = 0;
2891   unsigned FPRIdx = 0;
2892   for (auto const &Arg : F->args()) {
2893     MVT VT = TLI.getSimpleValueType(DL, Arg.getType());
2894     unsigned SrcReg;
2895     const TargetRegisterClass *RC;
2896     if (VT >= MVT::i1 && VT <= MVT::i32) {
2897       SrcReg = Registers[0][GPRIdx++];
2898       RC = &AArch64::GPR32RegClass;
2899       VT = MVT::i32;
2900     } else if (VT == MVT::i64) {
2901       SrcReg = Registers[1][GPRIdx++];
2902       RC = &AArch64::GPR64RegClass;
2903     } else if (VT == MVT::f16) {
2904       SrcReg = Registers[2][FPRIdx++];
2905       RC = &AArch64::FPR16RegClass;
2906     } else if (VT ==  MVT::f32) {
2907       SrcReg = Registers[3][FPRIdx++];
2908       RC = &AArch64::FPR32RegClass;
2909     } else if ((VT == MVT::f64) || VT.is64BitVector()) {
2910       SrcReg = Registers[4][FPRIdx++];
2911       RC = &AArch64::FPR64RegClass;
2912     } else if (VT.is128BitVector()) {
2913       SrcReg = Registers[5][FPRIdx++];
2914       RC = &AArch64::FPR128RegClass;
2915     } else
2916       llvm_unreachable("Unexpected value type.");
2917 
2918     unsigned DstReg = FuncInfo.MF->addLiveIn(SrcReg, RC);
2919     // FIXME: Unfortunately it's necessary to emit a copy from the livein copy.
2920     // Without this, EmitLiveInCopies may eliminate the livein if its only
2921     // use is a bitcast (which isn't turned into an instruction).
2922     unsigned ResultReg = createResultReg(RC);
2923     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2924             TII.get(TargetOpcode::COPY), ResultReg)
2925         .addReg(DstReg, getKillRegState(true));
2926     updateValueMap(&Arg, ResultReg);
2927   }
2928   return true;
2929 }
2930 
processCallArgs(CallLoweringInfo & CLI,SmallVectorImpl<MVT> & OutVTs,unsigned & NumBytes)2931 bool AArch64FastISel::processCallArgs(CallLoweringInfo &CLI,
2932                                       SmallVectorImpl<MVT> &OutVTs,
2933                                       unsigned &NumBytes) {
2934   CallingConv::ID CC = CLI.CallConv;
2935   SmallVector<CCValAssign, 16> ArgLocs;
2936   CCState CCInfo(CC, false, *FuncInfo.MF, ArgLocs, *Context);
2937   CCInfo.AnalyzeCallOperands(OutVTs, CLI.OutFlags, CCAssignFnForCall(CC));
2938 
2939   // Get a count of how many bytes are to be pushed on the stack.
2940   NumBytes = CCInfo.getNextStackOffset();
2941 
2942   // Issue CALLSEQ_START
2943   unsigned AdjStackDown = TII.getCallFrameSetupOpcode();
2944   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AdjStackDown))
2945     .addImm(NumBytes);
2946 
2947   // Process the args.
2948   for (CCValAssign &VA : ArgLocs) {
2949     const Value *ArgVal = CLI.OutVals[VA.getValNo()];
2950     MVT ArgVT = OutVTs[VA.getValNo()];
2951 
2952     unsigned ArgReg = getRegForValue(ArgVal);
2953     if (!ArgReg)
2954       return false;
2955 
2956     // Handle arg promotion: SExt, ZExt, AExt.
2957     switch (VA.getLocInfo()) {
2958     case CCValAssign::Full:
2959       break;
2960     case CCValAssign::SExt: {
2961       MVT DestVT = VA.getLocVT();
2962       MVT SrcVT = ArgVT;
2963       ArgReg = emitIntExt(SrcVT, ArgReg, DestVT, /*isZExt=*/false);
2964       if (!ArgReg)
2965         return false;
2966       break;
2967     }
2968     case CCValAssign::AExt:
2969     // Intentional fall-through.
2970     case CCValAssign::ZExt: {
2971       MVT DestVT = VA.getLocVT();
2972       MVT SrcVT = ArgVT;
2973       ArgReg = emitIntExt(SrcVT, ArgReg, DestVT, /*isZExt=*/true);
2974       if (!ArgReg)
2975         return false;
2976       break;
2977     }
2978     default:
2979       llvm_unreachable("Unknown arg promotion!");
2980     }
2981 
2982     // Now copy/store arg to correct locations.
2983     if (VA.isRegLoc() && !VA.needsCustom()) {
2984       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2985               TII.get(TargetOpcode::COPY), VA.getLocReg()).addReg(ArgReg);
2986       CLI.OutRegs.push_back(VA.getLocReg());
2987     } else if (VA.needsCustom()) {
2988       // FIXME: Handle custom args.
2989       return false;
2990     } else {
2991       assert(VA.isMemLoc() && "Assuming store on stack.");
2992 
2993       // Don't emit stores for undef values.
2994       if (isa<UndefValue>(ArgVal))
2995         continue;
2996 
2997       // Need to store on the stack.
2998       unsigned ArgSize = (ArgVT.getSizeInBits() + 7) / 8;
2999 
3000       unsigned BEAlign = 0;
3001       if (ArgSize < 8 && !Subtarget->isLittleEndian())
3002         BEAlign = 8 - ArgSize;
3003 
3004       Address Addr;
3005       Addr.setKind(Address::RegBase);
3006       Addr.setReg(AArch64::SP);
3007       Addr.setOffset(VA.getLocMemOffset() + BEAlign);
3008 
3009       unsigned Alignment = DL.getABITypeAlignment(ArgVal->getType());
3010       MachineMemOperand *MMO = FuncInfo.MF->getMachineMemOperand(
3011           MachinePointerInfo::getStack(*FuncInfo.MF, Addr.getOffset()),
3012           MachineMemOperand::MOStore, ArgVT.getStoreSize(), Alignment);
3013 
3014       if (!emitStore(ArgVT, ArgReg, Addr, MMO))
3015         return false;
3016     }
3017   }
3018   return true;
3019 }
3020 
finishCall(CallLoweringInfo & CLI,MVT RetVT,unsigned NumBytes)3021 bool AArch64FastISel::finishCall(CallLoweringInfo &CLI, MVT RetVT,
3022                                  unsigned NumBytes) {
3023   CallingConv::ID CC = CLI.CallConv;
3024 
3025   // Issue CALLSEQ_END
3026   unsigned AdjStackUp = TII.getCallFrameDestroyOpcode();
3027   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AdjStackUp))
3028     .addImm(NumBytes).addImm(0);
3029 
3030   // Now the return value.
3031   if (RetVT != MVT::isVoid) {
3032     SmallVector<CCValAssign, 16> RVLocs;
3033     CCState CCInfo(CC, false, *FuncInfo.MF, RVLocs, *Context);
3034     CCInfo.AnalyzeCallResult(RetVT, CCAssignFnForCall(CC));
3035 
3036     // Only handle a single return value.
3037     if (RVLocs.size() != 1)
3038       return false;
3039 
3040     // Copy all of the result registers out of their specified physreg.
3041     MVT CopyVT = RVLocs[0].getValVT();
3042 
3043     // TODO: Handle big-endian results
3044     if (CopyVT.isVector() && !Subtarget->isLittleEndian())
3045       return false;
3046 
3047     unsigned ResultReg = createResultReg(TLI.getRegClassFor(CopyVT));
3048     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3049             TII.get(TargetOpcode::COPY), ResultReg)
3050         .addReg(RVLocs[0].getLocReg());
3051     CLI.InRegs.push_back(RVLocs[0].getLocReg());
3052 
3053     CLI.ResultReg = ResultReg;
3054     CLI.NumResultRegs = 1;
3055   }
3056 
3057   return true;
3058 }
3059 
fastLowerCall(CallLoweringInfo & CLI)3060 bool AArch64FastISel::fastLowerCall(CallLoweringInfo &CLI) {
3061   CallingConv::ID CC  = CLI.CallConv;
3062   bool IsTailCall     = CLI.IsTailCall;
3063   bool IsVarArg       = CLI.IsVarArg;
3064   const Value *Callee = CLI.Callee;
3065   MCSymbol *Symbol = CLI.Symbol;
3066 
3067   if (!Callee && !Symbol)
3068     return false;
3069 
3070   // Allow SelectionDAG isel to handle tail calls.
3071   if (IsTailCall)
3072     return false;
3073 
3074   CodeModel::Model CM = TM.getCodeModel();
3075   // Only support the small and large code model.
3076   if (CM != CodeModel::Small && CM != CodeModel::Large)
3077     return false;
3078 
3079   // FIXME: Add large code model support for ELF.
3080   if (CM == CodeModel::Large && !Subtarget->isTargetMachO())
3081     return false;
3082 
3083   // Let SDISel handle vararg functions.
3084   if (IsVarArg)
3085     return false;
3086 
3087   // FIXME: Only handle *simple* calls for now.
3088   MVT RetVT;
3089   if (CLI.RetTy->isVoidTy())
3090     RetVT = MVT::isVoid;
3091   else if (!isTypeLegal(CLI.RetTy, RetVT))
3092     return false;
3093 
3094   for (auto Flag : CLI.OutFlags)
3095     if (Flag.isInReg() || Flag.isSRet() || Flag.isNest() || Flag.isByVal() ||
3096         Flag.isSwiftSelf() || Flag.isSwiftError())
3097       return false;
3098 
3099   // Set up the argument vectors.
3100   SmallVector<MVT, 16> OutVTs;
3101   OutVTs.reserve(CLI.OutVals.size());
3102 
3103   for (auto *Val : CLI.OutVals) {
3104     MVT VT;
3105     if (!isTypeLegal(Val->getType(), VT) &&
3106         !(VT == MVT::i1 || VT == MVT::i8 || VT == MVT::i16))
3107       return false;
3108 
3109     // We don't handle vector parameters yet.
3110     if (VT.isVector() || VT.getSizeInBits() > 64)
3111       return false;
3112 
3113     OutVTs.push_back(VT);
3114   }
3115 
3116   Address Addr;
3117   if (Callee && !computeCallAddress(Callee, Addr))
3118     return false;
3119 
3120   // Handle the arguments now that we've gotten them.
3121   unsigned NumBytes;
3122   if (!processCallArgs(CLI, OutVTs, NumBytes))
3123     return false;
3124 
3125   // Issue the call.
3126   MachineInstrBuilder MIB;
3127   if (CM == CodeModel::Small) {
3128     const MCInstrDesc &II = TII.get(Addr.getReg() ? AArch64::BLR : AArch64::BL);
3129     MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II);
3130     if (Symbol)
3131       MIB.addSym(Symbol, 0);
3132     else if (Addr.getGlobalValue())
3133       MIB.addGlobalAddress(Addr.getGlobalValue(), 0, 0);
3134     else if (Addr.getReg()) {
3135       unsigned Reg = constrainOperandRegClass(II, Addr.getReg(), 0);
3136       MIB.addReg(Reg);
3137     } else
3138       return false;
3139   } else {
3140     unsigned CallReg = 0;
3141     if (Symbol) {
3142       unsigned ADRPReg = createResultReg(&AArch64::GPR64commonRegClass);
3143       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP),
3144               ADRPReg)
3145           .addSym(Symbol, AArch64II::MO_GOT | AArch64II::MO_PAGE);
3146 
3147       CallReg = createResultReg(&AArch64::GPR64RegClass);
3148       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3149               TII.get(AArch64::LDRXui), CallReg)
3150           .addReg(ADRPReg)
3151           .addSym(Symbol,
3152                   AArch64II::MO_GOT | AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
3153     } else if (Addr.getGlobalValue())
3154       CallReg = materializeGV(Addr.getGlobalValue());
3155     else if (Addr.getReg())
3156       CallReg = Addr.getReg();
3157 
3158     if (!CallReg)
3159       return false;
3160 
3161     const MCInstrDesc &II = TII.get(AArch64::BLR);
3162     CallReg = constrainOperandRegClass(II, CallReg, 0);
3163     MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II).addReg(CallReg);
3164   }
3165 
3166   // Add implicit physical register uses to the call.
3167   for (auto Reg : CLI.OutRegs)
3168     MIB.addReg(Reg, RegState::Implicit);
3169 
3170   // Add a register mask with the call-preserved registers.
3171   // Proper defs for return values will be added by setPhysRegsDeadExcept().
3172   MIB.addRegMask(TRI.getCallPreservedMask(*FuncInfo.MF, CC));
3173 
3174   CLI.Call = MIB;
3175 
3176   // Finish off the call including any return values.
3177   return finishCall(CLI, RetVT, NumBytes);
3178 }
3179 
isMemCpySmall(uint64_t Len,unsigned Alignment)3180 bool AArch64FastISel::isMemCpySmall(uint64_t Len, unsigned Alignment) {
3181   if (Alignment)
3182     return Len / Alignment <= 4;
3183   else
3184     return Len < 32;
3185 }
3186 
tryEmitSmallMemCpy(Address Dest,Address Src,uint64_t Len,unsigned Alignment)3187 bool AArch64FastISel::tryEmitSmallMemCpy(Address Dest, Address Src,
3188                                          uint64_t Len, unsigned Alignment) {
3189   // Make sure we don't bloat code by inlining very large memcpy's.
3190   if (!isMemCpySmall(Len, Alignment))
3191     return false;
3192 
3193   int64_t UnscaledOffset = 0;
3194   Address OrigDest = Dest;
3195   Address OrigSrc = Src;
3196 
3197   while (Len) {
3198     MVT VT;
3199     if (!Alignment || Alignment >= 8) {
3200       if (Len >= 8)
3201         VT = MVT::i64;
3202       else if (Len >= 4)
3203         VT = MVT::i32;
3204       else if (Len >= 2)
3205         VT = MVT::i16;
3206       else {
3207         VT = MVT::i8;
3208       }
3209     } else {
3210       // Bound based on alignment.
3211       if (Len >= 4 && Alignment == 4)
3212         VT = MVT::i32;
3213       else if (Len >= 2 && Alignment == 2)
3214         VT = MVT::i16;
3215       else {
3216         VT = MVT::i8;
3217       }
3218     }
3219 
3220     unsigned ResultReg = emitLoad(VT, VT, Src);
3221     if (!ResultReg)
3222       return false;
3223 
3224     if (!emitStore(VT, ResultReg, Dest))
3225       return false;
3226 
3227     int64_t Size = VT.getSizeInBits() / 8;
3228     Len -= Size;
3229     UnscaledOffset += Size;
3230 
3231     // We need to recompute the unscaled offset for each iteration.
3232     Dest.setOffset(OrigDest.getOffset() + UnscaledOffset);
3233     Src.setOffset(OrigSrc.getOffset() + UnscaledOffset);
3234   }
3235 
3236   return true;
3237 }
3238 
3239 /// \brief Check if it is possible to fold the condition from the XALU intrinsic
3240 /// into the user. The condition code will only be updated on success.
foldXALUIntrinsic(AArch64CC::CondCode & CC,const Instruction * I,const Value * Cond)3241 bool AArch64FastISel::foldXALUIntrinsic(AArch64CC::CondCode &CC,
3242                                         const Instruction *I,
3243                                         const Value *Cond) {
3244   if (!isa<ExtractValueInst>(Cond))
3245     return false;
3246 
3247   const auto *EV = cast<ExtractValueInst>(Cond);
3248   if (!isa<IntrinsicInst>(EV->getAggregateOperand()))
3249     return false;
3250 
3251   const auto *II = cast<IntrinsicInst>(EV->getAggregateOperand());
3252   MVT RetVT;
3253   const Function *Callee = II->getCalledFunction();
3254   Type *RetTy =
3255   cast<StructType>(Callee->getReturnType())->getTypeAtIndex(0U);
3256   if (!isTypeLegal(RetTy, RetVT))
3257     return false;
3258 
3259   if (RetVT != MVT::i32 && RetVT != MVT::i64)
3260     return false;
3261 
3262   const Value *LHS = II->getArgOperand(0);
3263   const Value *RHS = II->getArgOperand(1);
3264 
3265   // Canonicalize immediate to the RHS.
3266   if (isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS) &&
3267       isCommutativeIntrinsic(II))
3268     std::swap(LHS, RHS);
3269 
3270   // Simplify multiplies.
3271   Intrinsic::ID IID = II->getIntrinsicID();
3272   switch (IID) {
3273   default:
3274     break;
3275   case Intrinsic::smul_with_overflow:
3276     if (const auto *C = dyn_cast<ConstantInt>(RHS))
3277       if (C->getValue() == 2)
3278         IID = Intrinsic::sadd_with_overflow;
3279     break;
3280   case Intrinsic::umul_with_overflow:
3281     if (const auto *C = dyn_cast<ConstantInt>(RHS))
3282       if (C->getValue() == 2)
3283         IID = Intrinsic::uadd_with_overflow;
3284     break;
3285   }
3286 
3287   AArch64CC::CondCode TmpCC;
3288   switch (IID) {
3289   default:
3290     return false;
3291   case Intrinsic::sadd_with_overflow:
3292   case Intrinsic::ssub_with_overflow:
3293     TmpCC = AArch64CC::VS;
3294     break;
3295   case Intrinsic::uadd_with_overflow:
3296     TmpCC = AArch64CC::HS;
3297     break;
3298   case Intrinsic::usub_with_overflow:
3299     TmpCC = AArch64CC::LO;
3300     break;
3301   case Intrinsic::smul_with_overflow:
3302   case Intrinsic::umul_with_overflow:
3303     TmpCC = AArch64CC::NE;
3304     break;
3305   }
3306 
3307   // Check if both instructions are in the same basic block.
3308   if (!isValueAvailable(II))
3309     return false;
3310 
3311   // Make sure nothing is in the way
3312   BasicBlock::const_iterator Start(I);
3313   BasicBlock::const_iterator End(II);
3314   for (auto Itr = std::prev(Start); Itr != End; --Itr) {
3315     // We only expect extractvalue instructions between the intrinsic and the
3316     // instruction to be selected.
3317     if (!isa<ExtractValueInst>(Itr))
3318       return false;
3319 
3320     // Check that the extractvalue operand comes from the intrinsic.
3321     const auto *EVI = cast<ExtractValueInst>(Itr);
3322     if (EVI->getAggregateOperand() != II)
3323       return false;
3324   }
3325 
3326   CC = TmpCC;
3327   return true;
3328 }
3329 
fastLowerIntrinsicCall(const IntrinsicInst * II)3330 bool AArch64FastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) {
3331   // FIXME: Handle more intrinsics.
3332   switch (II->getIntrinsicID()) {
3333   default: return false;
3334   case Intrinsic::frameaddress: {
3335     MachineFrameInfo *MFI = FuncInfo.MF->getFrameInfo();
3336     MFI->setFrameAddressIsTaken(true);
3337 
3338     const AArch64RegisterInfo *RegInfo =
3339         static_cast<const AArch64RegisterInfo *>(Subtarget->getRegisterInfo());
3340     unsigned FramePtr = RegInfo->getFrameRegister(*(FuncInfo.MF));
3341     unsigned SrcReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
3342     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3343             TII.get(TargetOpcode::COPY), SrcReg).addReg(FramePtr);
3344     // Recursively load frame address
3345     // ldr x0, [fp]
3346     // ldr x0, [x0]
3347     // ldr x0, [x0]
3348     // ...
3349     unsigned DestReg;
3350     unsigned Depth = cast<ConstantInt>(II->getOperand(0))->getZExtValue();
3351     while (Depth--) {
3352       DestReg = fastEmitInst_ri(AArch64::LDRXui, &AArch64::GPR64RegClass,
3353                                 SrcReg, /*IsKill=*/true, 0);
3354       assert(DestReg && "Unexpected LDR instruction emission failure.");
3355       SrcReg = DestReg;
3356     }
3357 
3358     updateValueMap(II, SrcReg);
3359     return true;
3360   }
3361   case Intrinsic::memcpy:
3362   case Intrinsic::memmove: {
3363     const auto *MTI = cast<MemTransferInst>(II);
3364     // Don't handle volatile.
3365     if (MTI->isVolatile())
3366       return false;
3367 
3368     // Disable inlining for memmove before calls to ComputeAddress.  Otherwise,
3369     // we would emit dead code because we don't currently handle memmoves.
3370     bool IsMemCpy = (II->getIntrinsicID() == Intrinsic::memcpy);
3371     if (isa<ConstantInt>(MTI->getLength()) && IsMemCpy) {
3372       // Small memcpy's are common enough that we want to do them without a call
3373       // if possible.
3374       uint64_t Len = cast<ConstantInt>(MTI->getLength())->getZExtValue();
3375       unsigned Alignment = MTI->getAlignment();
3376       if (isMemCpySmall(Len, Alignment)) {
3377         Address Dest, Src;
3378         if (!computeAddress(MTI->getRawDest(), Dest) ||
3379             !computeAddress(MTI->getRawSource(), Src))
3380           return false;
3381         if (tryEmitSmallMemCpy(Dest, Src, Len, Alignment))
3382           return true;
3383       }
3384     }
3385 
3386     if (!MTI->getLength()->getType()->isIntegerTy(64))
3387       return false;
3388 
3389     if (MTI->getSourceAddressSpace() > 255 || MTI->getDestAddressSpace() > 255)
3390       // Fast instruction selection doesn't support the special
3391       // address spaces.
3392       return false;
3393 
3394     const char *IntrMemName = isa<MemCpyInst>(II) ? "memcpy" : "memmove";
3395     return lowerCallTo(II, IntrMemName, II->getNumArgOperands() - 2);
3396   }
3397   case Intrinsic::memset: {
3398     const MemSetInst *MSI = cast<MemSetInst>(II);
3399     // Don't handle volatile.
3400     if (MSI->isVolatile())
3401       return false;
3402 
3403     if (!MSI->getLength()->getType()->isIntegerTy(64))
3404       return false;
3405 
3406     if (MSI->getDestAddressSpace() > 255)
3407       // Fast instruction selection doesn't support the special
3408       // address spaces.
3409       return false;
3410 
3411     return lowerCallTo(II, "memset", II->getNumArgOperands() - 2);
3412   }
3413   case Intrinsic::sin:
3414   case Intrinsic::cos:
3415   case Intrinsic::pow: {
3416     MVT RetVT;
3417     if (!isTypeLegal(II->getType(), RetVT))
3418       return false;
3419 
3420     if (RetVT != MVT::f32 && RetVT != MVT::f64)
3421       return false;
3422 
3423     static const RTLIB::Libcall LibCallTable[3][2] = {
3424       { RTLIB::SIN_F32, RTLIB::SIN_F64 },
3425       { RTLIB::COS_F32, RTLIB::COS_F64 },
3426       { RTLIB::POW_F32, RTLIB::POW_F64 }
3427     };
3428     RTLIB::Libcall LC;
3429     bool Is64Bit = RetVT == MVT::f64;
3430     switch (II->getIntrinsicID()) {
3431     default:
3432       llvm_unreachable("Unexpected intrinsic.");
3433     case Intrinsic::sin:
3434       LC = LibCallTable[0][Is64Bit];
3435       break;
3436     case Intrinsic::cos:
3437       LC = LibCallTable[1][Is64Bit];
3438       break;
3439     case Intrinsic::pow:
3440       LC = LibCallTable[2][Is64Bit];
3441       break;
3442     }
3443 
3444     ArgListTy Args;
3445     Args.reserve(II->getNumArgOperands());
3446 
3447     // Populate the argument list.
3448     for (auto &Arg : II->arg_operands()) {
3449       ArgListEntry Entry;
3450       Entry.Val = Arg;
3451       Entry.Ty = Arg->getType();
3452       Args.push_back(Entry);
3453     }
3454 
3455     CallLoweringInfo CLI;
3456     MCContext &Ctx = MF->getContext();
3457     CLI.setCallee(DL, Ctx, TLI.getLibcallCallingConv(LC), II->getType(),
3458                   TLI.getLibcallName(LC), std::move(Args));
3459     if (!lowerCallTo(CLI))
3460       return false;
3461     updateValueMap(II, CLI.ResultReg);
3462     return true;
3463   }
3464   case Intrinsic::fabs: {
3465     MVT VT;
3466     if (!isTypeLegal(II->getType(), VT))
3467       return false;
3468 
3469     unsigned Opc;
3470     switch (VT.SimpleTy) {
3471     default:
3472       return false;
3473     case MVT::f32:
3474       Opc = AArch64::FABSSr;
3475       break;
3476     case MVT::f64:
3477       Opc = AArch64::FABSDr;
3478       break;
3479     }
3480     unsigned SrcReg = getRegForValue(II->getOperand(0));
3481     if (!SrcReg)
3482       return false;
3483     bool SrcRegIsKill = hasTrivialKill(II->getOperand(0));
3484     unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT));
3485     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
3486       .addReg(SrcReg, getKillRegState(SrcRegIsKill));
3487     updateValueMap(II, ResultReg);
3488     return true;
3489   }
3490   case Intrinsic::trap: {
3491     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::BRK))
3492         .addImm(1);
3493     return true;
3494   }
3495   case Intrinsic::sqrt: {
3496     Type *RetTy = II->getCalledFunction()->getReturnType();
3497 
3498     MVT VT;
3499     if (!isTypeLegal(RetTy, VT))
3500       return false;
3501 
3502     unsigned Op0Reg = getRegForValue(II->getOperand(0));
3503     if (!Op0Reg)
3504       return false;
3505     bool Op0IsKill = hasTrivialKill(II->getOperand(0));
3506 
3507     unsigned ResultReg = fastEmit_r(VT, VT, ISD::FSQRT, Op0Reg, Op0IsKill);
3508     if (!ResultReg)
3509       return false;
3510 
3511     updateValueMap(II, ResultReg);
3512     return true;
3513   }
3514   case Intrinsic::sadd_with_overflow:
3515   case Intrinsic::uadd_with_overflow:
3516   case Intrinsic::ssub_with_overflow:
3517   case Intrinsic::usub_with_overflow:
3518   case Intrinsic::smul_with_overflow:
3519   case Intrinsic::umul_with_overflow: {
3520     // This implements the basic lowering of the xalu with overflow intrinsics.
3521     const Function *Callee = II->getCalledFunction();
3522     auto *Ty = cast<StructType>(Callee->getReturnType());
3523     Type *RetTy = Ty->getTypeAtIndex(0U);
3524 
3525     MVT VT;
3526     if (!isTypeLegal(RetTy, VT))
3527       return false;
3528 
3529     if (VT != MVT::i32 && VT != MVT::i64)
3530       return false;
3531 
3532     const Value *LHS = II->getArgOperand(0);
3533     const Value *RHS = II->getArgOperand(1);
3534     // Canonicalize immediate to the RHS.
3535     if (isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS) &&
3536         isCommutativeIntrinsic(II))
3537       std::swap(LHS, RHS);
3538 
3539     // Simplify multiplies.
3540     Intrinsic::ID IID = II->getIntrinsicID();
3541     switch (IID) {
3542     default:
3543       break;
3544     case Intrinsic::smul_with_overflow:
3545       if (const auto *C = dyn_cast<ConstantInt>(RHS))
3546         if (C->getValue() == 2) {
3547           IID = Intrinsic::sadd_with_overflow;
3548           RHS = LHS;
3549         }
3550       break;
3551     case Intrinsic::umul_with_overflow:
3552       if (const auto *C = dyn_cast<ConstantInt>(RHS))
3553         if (C->getValue() == 2) {
3554           IID = Intrinsic::uadd_with_overflow;
3555           RHS = LHS;
3556         }
3557       break;
3558     }
3559 
3560     unsigned ResultReg1 = 0, ResultReg2 = 0, MulReg = 0;
3561     AArch64CC::CondCode CC = AArch64CC::Invalid;
3562     switch (IID) {
3563     default: llvm_unreachable("Unexpected intrinsic!");
3564     case Intrinsic::sadd_with_overflow:
3565       ResultReg1 = emitAdd(VT, LHS, RHS, /*SetFlags=*/true);
3566       CC = AArch64CC::VS;
3567       break;
3568     case Intrinsic::uadd_with_overflow:
3569       ResultReg1 = emitAdd(VT, LHS, RHS, /*SetFlags=*/true);
3570       CC = AArch64CC::HS;
3571       break;
3572     case Intrinsic::ssub_with_overflow:
3573       ResultReg1 = emitSub(VT, LHS, RHS, /*SetFlags=*/true);
3574       CC = AArch64CC::VS;
3575       break;
3576     case Intrinsic::usub_with_overflow:
3577       ResultReg1 = emitSub(VT, LHS, RHS, /*SetFlags=*/true);
3578       CC = AArch64CC::LO;
3579       break;
3580     case Intrinsic::smul_with_overflow: {
3581       CC = AArch64CC::NE;
3582       unsigned LHSReg = getRegForValue(LHS);
3583       if (!LHSReg)
3584         return false;
3585       bool LHSIsKill = hasTrivialKill(LHS);
3586 
3587       unsigned RHSReg = getRegForValue(RHS);
3588       if (!RHSReg)
3589         return false;
3590       bool RHSIsKill = hasTrivialKill(RHS);
3591 
3592       if (VT == MVT::i32) {
3593         MulReg = emitSMULL_rr(MVT::i64, LHSReg, LHSIsKill, RHSReg, RHSIsKill);
3594         unsigned ShiftReg = emitLSR_ri(MVT::i64, MVT::i64, MulReg,
3595                                        /*IsKill=*/false, 32);
3596         MulReg = fastEmitInst_extractsubreg(VT, MulReg, /*IsKill=*/true,
3597                                             AArch64::sub_32);
3598         ShiftReg = fastEmitInst_extractsubreg(VT, ShiftReg, /*IsKill=*/true,
3599                                               AArch64::sub_32);
3600         emitSubs_rs(VT, ShiftReg, /*IsKill=*/true, MulReg, /*IsKill=*/false,
3601                     AArch64_AM::ASR, 31, /*WantResult=*/false);
3602       } else {
3603         assert(VT == MVT::i64 && "Unexpected value type.");
3604         // LHSReg and RHSReg cannot be killed by this Mul, since they are
3605         // reused in the next instruction.
3606         MulReg = emitMul_rr(VT, LHSReg, /*IsKill=*/false, RHSReg,
3607                             /*IsKill=*/false);
3608         unsigned SMULHReg = fastEmit_rr(VT, VT, ISD::MULHS, LHSReg, LHSIsKill,
3609                                         RHSReg, RHSIsKill);
3610         emitSubs_rs(VT, SMULHReg, /*IsKill=*/true, MulReg, /*IsKill=*/false,
3611                     AArch64_AM::ASR, 63, /*WantResult=*/false);
3612       }
3613       break;
3614     }
3615     case Intrinsic::umul_with_overflow: {
3616       CC = AArch64CC::NE;
3617       unsigned LHSReg = getRegForValue(LHS);
3618       if (!LHSReg)
3619         return false;
3620       bool LHSIsKill = hasTrivialKill(LHS);
3621 
3622       unsigned RHSReg = getRegForValue(RHS);
3623       if (!RHSReg)
3624         return false;
3625       bool RHSIsKill = hasTrivialKill(RHS);
3626 
3627       if (VT == MVT::i32) {
3628         MulReg = emitUMULL_rr(MVT::i64, LHSReg, LHSIsKill, RHSReg, RHSIsKill);
3629         emitSubs_rs(MVT::i64, AArch64::XZR, /*IsKill=*/true, MulReg,
3630                     /*IsKill=*/false, AArch64_AM::LSR, 32,
3631                     /*WantResult=*/false);
3632         MulReg = fastEmitInst_extractsubreg(VT, MulReg, /*IsKill=*/true,
3633                                             AArch64::sub_32);
3634       } else {
3635         assert(VT == MVT::i64 && "Unexpected value type.");
3636         // LHSReg and RHSReg cannot be killed by this Mul, since they are
3637         // reused in the next instruction.
3638         MulReg = emitMul_rr(VT, LHSReg, /*IsKill=*/false, RHSReg,
3639                             /*IsKill=*/false);
3640         unsigned UMULHReg = fastEmit_rr(VT, VT, ISD::MULHU, LHSReg, LHSIsKill,
3641                                         RHSReg, RHSIsKill);
3642         emitSubs_rr(VT, AArch64::XZR, /*IsKill=*/true, UMULHReg,
3643                     /*IsKill=*/false, /*WantResult=*/false);
3644       }
3645       break;
3646     }
3647     }
3648 
3649     if (MulReg) {
3650       ResultReg1 = createResultReg(TLI.getRegClassFor(VT));
3651       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3652               TII.get(TargetOpcode::COPY), ResultReg1).addReg(MulReg);
3653     }
3654 
3655     ResultReg2 = fastEmitInst_rri(AArch64::CSINCWr, &AArch64::GPR32RegClass,
3656                                   AArch64::WZR, /*IsKill=*/true, AArch64::WZR,
3657                                   /*IsKill=*/true, getInvertedCondCode(CC));
3658     (void)ResultReg2;
3659     assert((ResultReg1 + 1) == ResultReg2 &&
3660            "Nonconsecutive result registers.");
3661     updateValueMap(II, ResultReg1, 2);
3662     return true;
3663   }
3664   }
3665   return false;
3666 }
3667 
selectRet(const Instruction * I)3668 bool AArch64FastISel::selectRet(const Instruction *I) {
3669   const ReturnInst *Ret = cast<ReturnInst>(I);
3670   const Function &F = *I->getParent()->getParent();
3671 
3672   if (!FuncInfo.CanLowerReturn)
3673     return false;
3674 
3675   if (F.isVarArg())
3676     return false;
3677 
3678   if (TLI.supportSwiftError() &&
3679       F.getAttributes().hasAttrSomewhere(Attribute::SwiftError))
3680     return false;
3681 
3682   if (TLI.supportSplitCSR(FuncInfo.MF))
3683     return false;
3684 
3685   // Build a list of return value registers.
3686   SmallVector<unsigned, 4> RetRegs;
3687 
3688   if (Ret->getNumOperands() > 0) {
3689     CallingConv::ID CC = F.getCallingConv();
3690     SmallVector<ISD::OutputArg, 4> Outs;
3691     GetReturnInfo(F.getReturnType(), F.getAttributes(), Outs, TLI, DL);
3692 
3693     // Analyze operands of the call, assigning locations to each operand.
3694     SmallVector<CCValAssign, 16> ValLocs;
3695     CCState CCInfo(CC, F.isVarArg(), *FuncInfo.MF, ValLocs, I->getContext());
3696     CCAssignFn *RetCC = CC == CallingConv::WebKit_JS ? RetCC_AArch64_WebKit_JS
3697                                                      : RetCC_AArch64_AAPCS;
3698     CCInfo.AnalyzeReturn(Outs, RetCC);
3699 
3700     // Only handle a single return value for now.
3701     if (ValLocs.size() != 1)
3702       return false;
3703 
3704     CCValAssign &VA = ValLocs[0];
3705     const Value *RV = Ret->getOperand(0);
3706 
3707     // Don't bother handling odd stuff for now.
3708     if ((VA.getLocInfo() != CCValAssign::Full) &&
3709         (VA.getLocInfo() != CCValAssign::BCvt))
3710       return false;
3711 
3712     // Only handle register returns for now.
3713     if (!VA.isRegLoc())
3714       return false;
3715 
3716     unsigned Reg = getRegForValue(RV);
3717     if (Reg == 0)
3718       return false;
3719 
3720     unsigned SrcReg = Reg + VA.getValNo();
3721     unsigned DestReg = VA.getLocReg();
3722     // Avoid a cross-class copy. This is very unlikely.
3723     if (!MRI.getRegClass(SrcReg)->contains(DestReg))
3724       return false;
3725 
3726     EVT RVEVT = TLI.getValueType(DL, RV->getType());
3727     if (!RVEVT.isSimple())
3728       return false;
3729 
3730     // Vectors (of > 1 lane) in big endian need tricky handling.
3731     if (RVEVT.isVector() && RVEVT.getVectorNumElements() > 1 &&
3732         !Subtarget->isLittleEndian())
3733       return false;
3734 
3735     MVT RVVT = RVEVT.getSimpleVT();
3736     if (RVVT == MVT::f128)
3737       return false;
3738 
3739     MVT DestVT = VA.getValVT();
3740     // Special handling for extended integers.
3741     if (RVVT != DestVT) {
3742       if (RVVT != MVT::i1 && RVVT != MVT::i8 && RVVT != MVT::i16)
3743         return false;
3744 
3745       if (!Outs[0].Flags.isZExt() && !Outs[0].Flags.isSExt())
3746         return false;
3747 
3748       bool IsZExt = Outs[0].Flags.isZExt();
3749       SrcReg = emitIntExt(RVVT, SrcReg, DestVT, IsZExt);
3750       if (SrcReg == 0)
3751         return false;
3752     }
3753 
3754     // Make the copy.
3755     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3756             TII.get(TargetOpcode::COPY), DestReg).addReg(SrcReg);
3757 
3758     // Add register to return instruction.
3759     RetRegs.push_back(VA.getLocReg());
3760   }
3761 
3762   MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3763                                     TII.get(AArch64::RET_ReallyLR));
3764   for (unsigned RetReg : RetRegs)
3765     MIB.addReg(RetReg, RegState::Implicit);
3766   return true;
3767 }
3768 
selectTrunc(const Instruction * I)3769 bool AArch64FastISel::selectTrunc(const Instruction *I) {
3770   Type *DestTy = I->getType();
3771   Value *Op = I->getOperand(0);
3772   Type *SrcTy = Op->getType();
3773 
3774   EVT SrcEVT = TLI.getValueType(DL, SrcTy, true);
3775   EVT DestEVT = TLI.getValueType(DL, DestTy, true);
3776   if (!SrcEVT.isSimple())
3777     return false;
3778   if (!DestEVT.isSimple())
3779     return false;
3780 
3781   MVT SrcVT = SrcEVT.getSimpleVT();
3782   MVT DestVT = DestEVT.getSimpleVT();
3783 
3784   if (SrcVT != MVT::i64 && SrcVT != MVT::i32 && SrcVT != MVT::i16 &&
3785       SrcVT != MVT::i8)
3786     return false;
3787   if (DestVT != MVT::i32 && DestVT != MVT::i16 && DestVT != MVT::i8 &&
3788       DestVT != MVT::i1)
3789     return false;
3790 
3791   unsigned SrcReg = getRegForValue(Op);
3792   if (!SrcReg)
3793     return false;
3794   bool SrcIsKill = hasTrivialKill(Op);
3795 
3796   // If we're truncating from i64 to a smaller non-legal type then generate an
3797   // AND. Otherwise, we know the high bits are undefined and a truncate only
3798   // generate a COPY. We cannot mark the source register also as result
3799   // register, because this can incorrectly transfer the kill flag onto the
3800   // source register.
3801   unsigned ResultReg;
3802   if (SrcVT == MVT::i64) {
3803     uint64_t Mask = 0;
3804     switch (DestVT.SimpleTy) {
3805     default:
3806       // Trunc i64 to i32 is handled by the target-independent fast-isel.
3807       return false;
3808     case MVT::i1:
3809       Mask = 0x1;
3810       break;
3811     case MVT::i8:
3812       Mask = 0xff;
3813       break;
3814     case MVT::i16:
3815       Mask = 0xffff;
3816       break;
3817     }
3818     // Issue an extract_subreg to get the lower 32-bits.
3819     unsigned Reg32 = fastEmitInst_extractsubreg(MVT::i32, SrcReg, SrcIsKill,
3820                                                 AArch64::sub_32);
3821     // Create the AND instruction which performs the actual truncation.
3822     ResultReg = emitAnd_ri(MVT::i32, Reg32, /*IsKill=*/true, Mask);
3823     assert(ResultReg && "Unexpected AND instruction emission failure.");
3824   } else {
3825     ResultReg = createResultReg(&AArch64::GPR32RegClass);
3826     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3827             TII.get(TargetOpcode::COPY), ResultReg)
3828         .addReg(SrcReg, getKillRegState(SrcIsKill));
3829   }
3830 
3831   updateValueMap(I, ResultReg);
3832   return true;
3833 }
3834 
emiti1Ext(unsigned SrcReg,MVT DestVT,bool IsZExt)3835 unsigned AArch64FastISel::emiti1Ext(unsigned SrcReg, MVT DestVT, bool IsZExt) {
3836   assert((DestVT == MVT::i8 || DestVT == MVT::i16 || DestVT == MVT::i32 ||
3837           DestVT == MVT::i64) &&
3838          "Unexpected value type.");
3839   // Handle i8 and i16 as i32.
3840   if (DestVT == MVT::i8 || DestVT == MVT::i16)
3841     DestVT = MVT::i32;
3842 
3843   if (IsZExt) {
3844     unsigned ResultReg = emitAnd_ri(MVT::i32, SrcReg, /*TODO:IsKill=*/false, 1);
3845     assert(ResultReg && "Unexpected AND instruction emission failure.");
3846     if (DestVT == MVT::i64) {
3847       // We're ZExt i1 to i64.  The ANDWri Wd, Ws, #1 implicitly clears the
3848       // upper 32 bits.  Emit a SUBREG_TO_REG to extend from Wd to Xd.
3849       unsigned Reg64 = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
3850       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3851               TII.get(AArch64::SUBREG_TO_REG), Reg64)
3852           .addImm(0)
3853           .addReg(ResultReg)
3854           .addImm(AArch64::sub_32);
3855       ResultReg = Reg64;
3856     }
3857     return ResultReg;
3858   } else {
3859     if (DestVT == MVT::i64) {
3860       // FIXME: We're SExt i1 to i64.
3861       return 0;
3862     }
3863     return fastEmitInst_rii(AArch64::SBFMWri, &AArch64::GPR32RegClass, SrcReg,
3864                             /*TODO:IsKill=*/false, 0, 0);
3865   }
3866 }
3867 
emitMul_rr(MVT RetVT,unsigned Op0,bool Op0IsKill,unsigned Op1,bool Op1IsKill)3868 unsigned AArch64FastISel::emitMul_rr(MVT RetVT, unsigned Op0, bool Op0IsKill,
3869                                       unsigned Op1, bool Op1IsKill) {
3870   unsigned Opc, ZReg;
3871   switch (RetVT.SimpleTy) {
3872   default: return 0;
3873   case MVT::i8:
3874   case MVT::i16:
3875   case MVT::i32:
3876     RetVT = MVT::i32;
3877     Opc = AArch64::MADDWrrr; ZReg = AArch64::WZR; break;
3878   case MVT::i64:
3879     Opc = AArch64::MADDXrrr; ZReg = AArch64::XZR; break;
3880   }
3881 
3882   const TargetRegisterClass *RC =
3883       (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
3884   return fastEmitInst_rrr(Opc, RC, Op0, Op0IsKill, Op1, Op1IsKill,
3885                           /*IsKill=*/ZReg, true);
3886 }
3887 
emitSMULL_rr(MVT RetVT,unsigned Op0,bool Op0IsKill,unsigned Op1,bool Op1IsKill)3888 unsigned AArch64FastISel::emitSMULL_rr(MVT RetVT, unsigned Op0, bool Op0IsKill,
3889                                         unsigned Op1, bool Op1IsKill) {
3890   if (RetVT != MVT::i64)
3891     return 0;
3892 
3893   return fastEmitInst_rrr(AArch64::SMADDLrrr, &AArch64::GPR64RegClass,
3894                           Op0, Op0IsKill, Op1, Op1IsKill,
3895                           AArch64::XZR, /*IsKill=*/true);
3896 }
3897 
emitUMULL_rr(MVT RetVT,unsigned Op0,bool Op0IsKill,unsigned Op1,bool Op1IsKill)3898 unsigned AArch64FastISel::emitUMULL_rr(MVT RetVT, unsigned Op0, bool Op0IsKill,
3899                                         unsigned Op1, bool Op1IsKill) {
3900   if (RetVT != MVT::i64)
3901     return 0;
3902 
3903   return fastEmitInst_rrr(AArch64::UMADDLrrr, &AArch64::GPR64RegClass,
3904                           Op0, Op0IsKill, Op1, Op1IsKill,
3905                           AArch64::XZR, /*IsKill=*/true);
3906 }
3907 
emitLSL_rr(MVT RetVT,unsigned Op0Reg,bool Op0IsKill,unsigned Op1Reg,bool Op1IsKill)3908 unsigned AArch64FastISel::emitLSL_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill,
3909                                      unsigned Op1Reg, bool Op1IsKill) {
3910   unsigned Opc = 0;
3911   bool NeedTrunc = false;
3912   uint64_t Mask = 0;
3913   switch (RetVT.SimpleTy) {
3914   default: return 0;
3915   case MVT::i8:  Opc = AArch64::LSLVWr; NeedTrunc = true; Mask = 0xff;   break;
3916   case MVT::i16: Opc = AArch64::LSLVWr; NeedTrunc = true; Mask = 0xffff; break;
3917   case MVT::i32: Opc = AArch64::LSLVWr;                                  break;
3918   case MVT::i64: Opc = AArch64::LSLVXr;                                  break;
3919   }
3920 
3921   const TargetRegisterClass *RC =
3922       (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
3923   if (NeedTrunc) {
3924     Op1Reg = emitAnd_ri(MVT::i32, Op1Reg, Op1IsKill, Mask);
3925     Op1IsKill = true;
3926   }
3927   unsigned ResultReg = fastEmitInst_rr(Opc, RC, Op0Reg, Op0IsKill, Op1Reg,
3928                                        Op1IsKill);
3929   if (NeedTrunc)
3930     ResultReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask);
3931   return ResultReg;
3932 }
3933 
emitLSL_ri(MVT RetVT,MVT SrcVT,unsigned Op0,bool Op0IsKill,uint64_t Shift,bool IsZExt)3934 unsigned AArch64FastISel::emitLSL_ri(MVT RetVT, MVT SrcVT, unsigned Op0,
3935                                      bool Op0IsKill, uint64_t Shift,
3936                                      bool IsZExt) {
3937   assert(RetVT.SimpleTy >= SrcVT.SimpleTy &&
3938          "Unexpected source/return type pair.");
3939   assert((SrcVT == MVT::i1 || SrcVT == MVT::i8 || SrcVT == MVT::i16 ||
3940           SrcVT == MVT::i32 || SrcVT == MVT::i64) &&
3941          "Unexpected source value type.");
3942   assert((RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32 ||
3943           RetVT == MVT::i64) && "Unexpected return value type.");
3944 
3945   bool Is64Bit = (RetVT == MVT::i64);
3946   unsigned RegSize = Is64Bit ? 64 : 32;
3947   unsigned DstBits = RetVT.getSizeInBits();
3948   unsigned SrcBits = SrcVT.getSizeInBits();
3949   const TargetRegisterClass *RC =
3950       Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
3951 
3952   // Just emit a copy for "zero" shifts.
3953   if (Shift == 0) {
3954     if (RetVT == SrcVT) {
3955       unsigned ResultReg = createResultReg(RC);
3956       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3957               TII.get(TargetOpcode::COPY), ResultReg)
3958           .addReg(Op0, getKillRegState(Op0IsKill));
3959       return ResultReg;
3960     } else
3961       return emitIntExt(SrcVT, Op0, RetVT, IsZExt);
3962   }
3963 
3964   // Don't deal with undefined shifts.
3965   if (Shift >= DstBits)
3966     return 0;
3967 
3968   // For immediate shifts we can fold the zero-/sign-extension into the shift.
3969   // {S|U}BFM Wd, Wn, #r, #s
3970   // Wd<32+s-r,32-r> = Wn<s:0> when r > s
3971 
3972   // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
3973   // %2 = shl i16 %1, 4
3974   // Wd<32+7-28,32-28> = Wn<7:0> <- clamp s to 7
3975   // 0b1111_1111_1111_1111__1111_1010_1010_0000 sext
3976   // 0b0000_0000_0000_0000__0000_0101_0101_0000 sext | zext
3977   // 0b0000_0000_0000_0000__0000_1010_1010_0000 zext
3978 
3979   // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
3980   // %2 = shl i16 %1, 8
3981   // Wd<32+7-24,32-24> = Wn<7:0>
3982   // 0b1111_1111_1111_1111__1010_1010_0000_0000 sext
3983   // 0b0000_0000_0000_0000__0101_0101_0000_0000 sext | zext
3984   // 0b0000_0000_0000_0000__1010_1010_0000_0000 zext
3985 
3986   // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
3987   // %2 = shl i16 %1, 12
3988   // Wd<32+3-20,32-20> = Wn<3:0>
3989   // 0b1111_1111_1111_1111__1010_0000_0000_0000 sext
3990   // 0b0000_0000_0000_0000__0101_0000_0000_0000 sext | zext
3991   // 0b0000_0000_0000_0000__1010_0000_0000_0000 zext
3992 
3993   unsigned ImmR = RegSize - Shift;
3994   // Limit the width to the length of the source type.
3995   unsigned ImmS = std::min<unsigned>(SrcBits - 1, DstBits - 1 - Shift);
3996   static const unsigned OpcTable[2][2] = {
3997     {AArch64::SBFMWri, AArch64::SBFMXri},
3998     {AArch64::UBFMWri, AArch64::UBFMXri}
3999   };
4000   unsigned Opc = OpcTable[IsZExt][Is64Bit];
4001   if (SrcVT.SimpleTy <= MVT::i32 && RetVT == MVT::i64) {
4002     unsigned TmpReg = MRI.createVirtualRegister(RC);
4003     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
4004             TII.get(AArch64::SUBREG_TO_REG), TmpReg)
4005         .addImm(0)
4006         .addReg(Op0, getKillRegState(Op0IsKill))
4007         .addImm(AArch64::sub_32);
4008     Op0 = TmpReg;
4009     Op0IsKill = true;
4010   }
4011   return fastEmitInst_rii(Opc, RC, Op0, Op0IsKill, ImmR, ImmS);
4012 }
4013 
emitLSR_rr(MVT RetVT,unsigned Op0Reg,bool Op0IsKill,unsigned Op1Reg,bool Op1IsKill)4014 unsigned AArch64FastISel::emitLSR_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill,
4015                                      unsigned Op1Reg, bool Op1IsKill) {
4016   unsigned Opc = 0;
4017   bool NeedTrunc = false;
4018   uint64_t Mask = 0;
4019   switch (RetVT.SimpleTy) {
4020   default: return 0;
4021   case MVT::i8:  Opc = AArch64::LSRVWr; NeedTrunc = true; Mask = 0xff;   break;
4022   case MVT::i16: Opc = AArch64::LSRVWr; NeedTrunc = true; Mask = 0xffff; break;
4023   case MVT::i32: Opc = AArch64::LSRVWr; break;
4024   case MVT::i64: Opc = AArch64::LSRVXr; break;
4025   }
4026 
4027   const TargetRegisterClass *RC =
4028       (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4029   if (NeedTrunc) {
4030     Op0Reg = emitAnd_ri(MVT::i32, Op0Reg, Op0IsKill, Mask);
4031     Op1Reg = emitAnd_ri(MVT::i32, Op1Reg, Op1IsKill, Mask);
4032     Op0IsKill = Op1IsKill = true;
4033   }
4034   unsigned ResultReg = fastEmitInst_rr(Opc, RC, Op0Reg, Op0IsKill, Op1Reg,
4035                                        Op1IsKill);
4036   if (NeedTrunc)
4037     ResultReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask);
4038   return ResultReg;
4039 }
4040 
emitLSR_ri(MVT RetVT,MVT SrcVT,unsigned Op0,bool Op0IsKill,uint64_t Shift,bool IsZExt)4041 unsigned AArch64FastISel::emitLSR_ri(MVT RetVT, MVT SrcVT, unsigned Op0,
4042                                      bool Op0IsKill, uint64_t Shift,
4043                                      bool IsZExt) {
4044   assert(RetVT.SimpleTy >= SrcVT.SimpleTy &&
4045          "Unexpected source/return type pair.");
4046   assert((SrcVT == MVT::i1 || SrcVT == MVT::i8 || SrcVT == MVT::i16 ||
4047           SrcVT == MVT::i32 || SrcVT == MVT::i64) &&
4048          "Unexpected source value type.");
4049   assert((RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32 ||
4050           RetVT == MVT::i64) && "Unexpected return value type.");
4051 
4052   bool Is64Bit = (RetVT == MVT::i64);
4053   unsigned RegSize = Is64Bit ? 64 : 32;
4054   unsigned DstBits = RetVT.getSizeInBits();
4055   unsigned SrcBits = SrcVT.getSizeInBits();
4056   const TargetRegisterClass *RC =
4057       Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4058 
4059   // Just emit a copy for "zero" shifts.
4060   if (Shift == 0) {
4061     if (RetVT == SrcVT) {
4062       unsigned ResultReg = createResultReg(RC);
4063       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
4064               TII.get(TargetOpcode::COPY), ResultReg)
4065       .addReg(Op0, getKillRegState(Op0IsKill));
4066       return ResultReg;
4067     } else
4068       return emitIntExt(SrcVT, Op0, RetVT, IsZExt);
4069   }
4070 
4071   // Don't deal with undefined shifts.
4072   if (Shift >= DstBits)
4073     return 0;
4074 
4075   // For immediate shifts we can fold the zero-/sign-extension into the shift.
4076   // {S|U}BFM Wd, Wn, #r, #s
4077   // Wd<s-r:0> = Wn<s:r> when r <= s
4078 
4079   // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4080   // %2 = lshr i16 %1, 4
4081   // Wd<7-4:0> = Wn<7:4>
4082   // 0b0000_0000_0000_0000__0000_1111_1111_1010 sext
4083   // 0b0000_0000_0000_0000__0000_0000_0000_0101 sext | zext
4084   // 0b0000_0000_0000_0000__0000_0000_0000_1010 zext
4085 
4086   // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4087   // %2 = lshr i16 %1, 8
4088   // Wd<7-7,0> = Wn<7:7>
4089   // 0b0000_0000_0000_0000__0000_0000_1111_1111 sext
4090   // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext
4091   // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext
4092 
4093   // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4094   // %2 = lshr i16 %1, 12
4095   // Wd<7-7,0> = Wn<7:7> <- clamp r to 7
4096   // 0b0000_0000_0000_0000__0000_0000_0000_1111 sext
4097   // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext
4098   // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext
4099 
4100   if (Shift >= SrcBits && IsZExt)
4101     return materializeInt(ConstantInt::get(*Context, APInt(RegSize, 0)), RetVT);
4102 
4103   // It is not possible to fold a sign-extend into the LShr instruction. In this
4104   // case emit a sign-extend.
4105   if (!IsZExt) {
4106     Op0 = emitIntExt(SrcVT, Op0, RetVT, IsZExt);
4107     if (!Op0)
4108       return 0;
4109     Op0IsKill = true;
4110     SrcVT = RetVT;
4111     SrcBits = SrcVT.getSizeInBits();
4112     IsZExt = true;
4113   }
4114 
4115   unsigned ImmR = std::min<unsigned>(SrcBits - 1, Shift);
4116   unsigned ImmS = SrcBits - 1;
4117   static const unsigned OpcTable[2][2] = {
4118     {AArch64::SBFMWri, AArch64::SBFMXri},
4119     {AArch64::UBFMWri, AArch64::UBFMXri}
4120   };
4121   unsigned Opc = OpcTable[IsZExt][Is64Bit];
4122   if (SrcVT.SimpleTy <= MVT::i32 && RetVT == MVT::i64) {
4123     unsigned TmpReg = MRI.createVirtualRegister(RC);
4124     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
4125             TII.get(AArch64::SUBREG_TO_REG), TmpReg)
4126         .addImm(0)
4127         .addReg(Op0, getKillRegState(Op0IsKill))
4128         .addImm(AArch64::sub_32);
4129     Op0 = TmpReg;
4130     Op0IsKill = true;
4131   }
4132   return fastEmitInst_rii(Opc, RC, Op0, Op0IsKill, ImmR, ImmS);
4133 }
4134 
emitASR_rr(MVT RetVT,unsigned Op0Reg,bool Op0IsKill,unsigned Op1Reg,bool Op1IsKill)4135 unsigned AArch64FastISel::emitASR_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill,
4136                                      unsigned Op1Reg, bool Op1IsKill) {
4137   unsigned Opc = 0;
4138   bool NeedTrunc = false;
4139   uint64_t Mask = 0;
4140   switch (RetVT.SimpleTy) {
4141   default: return 0;
4142   case MVT::i8:  Opc = AArch64::ASRVWr; NeedTrunc = true; Mask = 0xff;   break;
4143   case MVT::i16: Opc = AArch64::ASRVWr; NeedTrunc = true; Mask = 0xffff; break;
4144   case MVT::i32: Opc = AArch64::ASRVWr;                                  break;
4145   case MVT::i64: Opc = AArch64::ASRVXr;                                  break;
4146   }
4147 
4148   const TargetRegisterClass *RC =
4149       (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4150   if (NeedTrunc) {
4151     Op0Reg = emitIntExt(RetVT, Op0Reg, MVT::i32, /*IsZExt=*/false);
4152     Op1Reg = emitAnd_ri(MVT::i32, Op1Reg, Op1IsKill, Mask);
4153     Op0IsKill = Op1IsKill = true;
4154   }
4155   unsigned ResultReg = fastEmitInst_rr(Opc, RC, Op0Reg, Op0IsKill, Op1Reg,
4156                                        Op1IsKill);
4157   if (NeedTrunc)
4158     ResultReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask);
4159   return ResultReg;
4160 }
4161 
emitASR_ri(MVT RetVT,MVT SrcVT,unsigned Op0,bool Op0IsKill,uint64_t Shift,bool IsZExt)4162 unsigned AArch64FastISel::emitASR_ri(MVT RetVT, MVT SrcVT, unsigned Op0,
4163                                      bool Op0IsKill, uint64_t Shift,
4164                                      bool IsZExt) {
4165   assert(RetVT.SimpleTy >= SrcVT.SimpleTy &&
4166          "Unexpected source/return type pair.");
4167   assert((SrcVT == MVT::i1 || SrcVT == MVT::i8 || SrcVT == MVT::i16 ||
4168           SrcVT == MVT::i32 || SrcVT == MVT::i64) &&
4169          "Unexpected source value type.");
4170   assert((RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32 ||
4171           RetVT == MVT::i64) && "Unexpected return value type.");
4172 
4173   bool Is64Bit = (RetVT == MVT::i64);
4174   unsigned RegSize = Is64Bit ? 64 : 32;
4175   unsigned DstBits = RetVT.getSizeInBits();
4176   unsigned SrcBits = SrcVT.getSizeInBits();
4177   const TargetRegisterClass *RC =
4178       Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4179 
4180   // Just emit a copy for "zero" shifts.
4181   if (Shift == 0) {
4182     if (RetVT == SrcVT) {
4183       unsigned ResultReg = createResultReg(RC);
4184       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
4185               TII.get(TargetOpcode::COPY), ResultReg)
4186       .addReg(Op0, getKillRegState(Op0IsKill));
4187       return ResultReg;
4188     } else
4189       return emitIntExt(SrcVT, Op0, RetVT, IsZExt);
4190   }
4191 
4192   // Don't deal with undefined shifts.
4193   if (Shift >= DstBits)
4194     return 0;
4195 
4196   // For immediate shifts we can fold the zero-/sign-extension into the shift.
4197   // {S|U}BFM Wd, Wn, #r, #s
4198   // Wd<s-r:0> = Wn<s:r> when r <= s
4199 
4200   // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4201   // %2 = ashr i16 %1, 4
4202   // Wd<7-4:0> = Wn<7:4>
4203   // 0b1111_1111_1111_1111__1111_1111_1111_1010 sext
4204   // 0b0000_0000_0000_0000__0000_0000_0000_0101 sext | zext
4205   // 0b0000_0000_0000_0000__0000_0000_0000_1010 zext
4206 
4207   // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4208   // %2 = ashr i16 %1, 8
4209   // Wd<7-7,0> = Wn<7:7>
4210   // 0b1111_1111_1111_1111__1111_1111_1111_1111 sext
4211   // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext
4212   // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext
4213 
4214   // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4215   // %2 = ashr i16 %1, 12
4216   // Wd<7-7,0> = Wn<7:7> <- clamp r to 7
4217   // 0b1111_1111_1111_1111__1111_1111_1111_1111 sext
4218   // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext
4219   // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext
4220 
4221   if (Shift >= SrcBits && IsZExt)
4222     return materializeInt(ConstantInt::get(*Context, APInt(RegSize, 0)), RetVT);
4223 
4224   unsigned ImmR = std::min<unsigned>(SrcBits - 1, Shift);
4225   unsigned ImmS = SrcBits - 1;
4226   static const unsigned OpcTable[2][2] = {
4227     {AArch64::SBFMWri, AArch64::SBFMXri},
4228     {AArch64::UBFMWri, AArch64::UBFMXri}
4229   };
4230   unsigned Opc = OpcTable[IsZExt][Is64Bit];
4231   if (SrcVT.SimpleTy <= MVT::i32 && RetVT == MVT::i64) {
4232     unsigned TmpReg = MRI.createVirtualRegister(RC);
4233     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
4234             TII.get(AArch64::SUBREG_TO_REG), TmpReg)
4235         .addImm(0)
4236         .addReg(Op0, getKillRegState(Op0IsKill))
4237         .addImm(AArch64::sub_32);
4238     Op0 = TmpReg;
4239     Op0IsKill = true;
4240   }
4241   return fastEmitInst_rii(Opc, RC, Op0, Op0IsKill, ImmR, ImmS);
4242 }
4243 
emitIntExt(MVT SrcVT,unsigned SrcReg,MVT DestVT,bool IsZExt)4244 unsigned AArch64FastISel::emitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT,
4245                                      bool IsZExt) {
4246   assert(DestVT != MVT::i1 && "ZeroExt/SignExt an i1?");
4247 
4248   // FastISel does not have plumbing to deal with extensions where the SrcVT or
4249   // DestVT are odd things, so test to make sure that they are both types we can
4250   // handle (i1/i8/i16/i32 for SrcVT and i8/i16/i32/i64 for DestVT), otherwise
4251   // bail out to SelectionDAG.
4252   if (((DestVT != MVT::i8) && (DestVT != MVT::i16) &&
4253        (DestVT != MVT::i32) && (DestVT != MVT::i64)) ||
4254       ((SrcVT !=  MVT::i1) && (SrcVT !=  MVT::i8) &&
4255        (SrcVT !=  MVT::i16) && (SrcVT !=  MVT::i32)))
4256     return 0;
4257 
4258   unsigned Opc;
4259   unsigned Imm = 0;
4260 
4261   switch (SrcVT.SimpleTy) {
4262   default:
4263     return 0;
4264   case MVT::i1:
4265     return emiti1Ext(SrcReg, DestVT, IsZExt);
4266   case MVT::i8:
4267     if (DestVT == MVT::i64)
4268       Opc = IsZExt ? AArch64::UBFMXri : AArch64::SBFMXri;
4269     else
4270       Opc = IsZExt ? AArch64::UBFMWri : AArch64::SBFMWri;
4271     Imm = 7;
4272     break;
4273   case MVT::i16:
4274     if (DestVT == MVT::i64)
4275       Opc = IsZExt ? AArch64::UBFMXri : AArch64::SBFMXri;
4276     else
4277       Opc = IsZExt ? AArch64::UBFMWri : AArch64::SBFMWri;
4278     Imm = 15;
4279     break;
4280   case MVT::i32:
4281     assert(DestVT == MVT::i64 && "IntExt i32 to i32?!?");
4282     Opc = IsZExt ? AArch64::UBFMXri : AArch64::SBFMXri;
4283     Imm = 31;
4284     break;
4285   }
4286 
4287   // Handle i8 and i16 as i32.
4288   if (DestVT == MVT::i8 || DestVT == MVT::i16)
4289     DestVT = MVT::i32;
4290   else if (DestVT == MVT::i64) {
4291     unsigned Src64 = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
4292     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
4293             TII.get(AArch64::SUBREG_TO_REG), Src64)
4294         .addImm(0)
4295         .addReg(SrcReg)
4296         .addImm(AArch64::sub_32);
4297     SrcReg = Src64;
4298   }
4299 
4300   const TargetRegisterClass *RC =
4301       (DestVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4302   return fastEmitInst_rii(Opc, RC, SrcReg, /*TODO:IsKill=*/false, 0, Imm);
4303 }
4304 
isZExtLoad(const MachineInstr * LI)4305 static bool isZExtLoad(const MachineInstr *LI) {
4306   switch (LI->getOpcode()) {
4307   default:
4308     return false;
4309   case AArch64::LDURBBi:
4310   case AArch64::LDURHHi:
4311   case AArch64::LDURWi:
4312   case AArch64::LDRBBui:
4313   case AArch64::LDRHHui:
4314   case AArch64::LDRWui:
4315   case AArch64::LDRBBroX:
4316   case AArch64::LDRHHroX:
4317   case AArch64::LDRWroX:
4318   case AArch64::LDRBBroW:
4319   case AArch64::LDRHHroW:
4320   case AArch64::LDRWroW:
4321     return true;
4322   }
4323 }
4324 
isSExtLoad(const MachineInstr * LI)4325 static bool isSExtLoad(const MachineInstr *LI) {
4326   switch (LI->getOpcode()) {
4327   default:
4328     return false;
4329   case AArch64::LDURSBWi:
4330   case AArch64::LDURSHWi:
4331   case AArch64::LDURSBXi:
4332   case AArch64::LDURSHXi:
4333   case AArch64::LDURSWi:
4334   case AArch64::LDRSBWui:
4335   case AArch64::LDRSHWui:
4336   case AArch64::LDRSBXui:
4337   case AArch64::LDRSHXui:
4338   case AArch64::LDRSWui:
4339   case AArch64::LDRSBWroX:
4340   case AArch64::LDRSHWroX:
4341   case AArch64::LDRSBXroX:
4342   case AArch64::LDRSHXroX:
4343   case AArch64::LDRSWroX:
4344   case AArch64::LDRSBWroW:
4345   case AArch64::LDRSHWroW:
4346   case AArch64::LDRSBXroW:
4347   case AArch64::LDRSHXroW:
4348   case AArch64::LDRSWroW:
4349     return true;
4350   }
4351 }
4352 
optimizeIntExtLoad(const Instruction * I,MVT RetVT,MVT SrcVT)4353 bool AArch64FastISel::optimizeIntExtLoad(const Instruction *I, MVT RetVT,
4354                                          MVT SrcVT) {
4355   const auto *LI = dyn_cast<LoadInst>(I->getOperand(0));
4356   if (!LI || !LI->hasOneUse())
4357     return false;
4358 
4359   // Check if the load instruction has already been selected.
4360   unsigned Reg = lookUpRegForValue(LI);
4361   if (!Reg)
4362     return false;
4363 
4364   MachineInstr *MI = MRI.getUniqueVRegDef(Reg);
4365   if (!MI)
4366     return false;
4367 
4368   // Check if the correct load instruction has been emitted - SelectionDAG might
4369   // have emitted a zero-extending load, but we need a sign-extending load.
4370   bool IsZExt = isa<ZExtInst>(I);
4371   const auto *LoadMI = MI;
4372   if (LoadMI->getOpcode() == TargetOpcode::COPY &&
4373       LoadMI->getOperand(1).getSubReg() == AArch64::sub_32) {
4374     unsigned LoadReg = MI->getOperand(1).getReg();
4375     LoadMI = MRI.getUniqueVRegDef(LoadReg);
4376     assert(LoadMI && "Expected valid instruction");
4377   }
4378   if (!(IsZExt && isZExtLoad(LoadMI)) && !(!IsZExt && isSExtLoad(LoadMI)))
4379     return false;
4380 
4381   // Nothing to be done.
4382   if (RetVT != MVT::i64 || SrcVT > MVT::i32) {
4383     updateValueMap(I, Reg);
4384     return true;
4385   }
4386 
4387   if (IsZExt) {
4388     unsigned Reg64 = createResultReg(&AArch64::GPR64RegClass);
4389     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
4390             TII.get(AArch64::SUBREG_TO_REG), Reg64)
4391         .addImm(0)
4392         .addReg(Reg, getKillRegState(true))
4393         .addImm(AArch64::sub_32);
4394     Reg = Reg64;
4395   } else {
4396     assert((MI->getOpcode() == TargetOpcode::COPY &&
4397             MI->getOperand(1).getSubReg() == AArch64::sub_32) &&
4398            "Expected copy instruction");
4399     Reg = MI->getOperand(1).getReg();
4400     MI->eraseFromParent();
4401   }
4402   updateValueMap(I, Reg);
4403   return true;
4404 }
4405 
selectIntExt(const Instruction * I)4406 bool AArch64FastISel::selectIntExt(const Instruction *I) {
4407   assert((isa<ZExtInst>(I) || isa<SExtInst>(I)) &&
4408          "Unexpected integer extend instruction.");
4409   MVT RetVT;
4410   MVT SrcVT;
4411   if (!isTypeSupported(I->getType(), RetVT))
4412     return false;
4413 
4414   if (!isTypeSupported(I->getOperand(0)->getType(), SrcVT))
4415     return false;
4416 
4417   // Try to optimize already sign-/zero-extended values from load instructions.
4418   if (optimizeIntExtLoad(I, RetVT, SrcVT))
4419     return true;
4420 
4421   unsigned SrcReg = getRegForValue(I->getOperand(0));
4422   if (!SrcReg)
4423     return false;
4424   bool SrcIsKill = hasTrivialKill(I->getOperand(0));
4425 
4426   // Try to optimize already sign-/zero-extended values from function arguments.
4427   bool IsZExt = isa<ZExtInst>(I);
4428   if (const auto *Arg = dyn_cast<Argument>(I->getOperand(0))) {
4429     if ((IsZExt && Arg->hasZExtAttr()) || (!IsZExt && Arg->hasSExtAttr())) {
4430       if (RetVT == MVT::i64 && SrcVT != MVT::i64) {
4431         unsigned ResultReg = createResultReg(&AArch64::GPR64RegClass);
4432         BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
4433                 TII.get(AArch64::SUBREG_TO_REG), ResultReg)
4434             .addImm(0)
4435             .addReg(SrcReg, getKillRegState(SrcIsKill))
4436             .addImm(AArch64::sub_32);
4437         SrcReg = ResultReg;
4438       }
4439       // Conservatively clear all kill flags from all uses, because we are
4440       // replacing a sign-/zero-extend instruction at IR level with a nop at MI
4441       // level. The result of the instruction at IR level might have been
4442       // trivially dead, which is now not longer true.
4443       unsigned UseReg = lookUpRegForValue(I);
4444       if (UseReg)
4445         MRI.clearKillFlags(UseReg);
4446 
4447       updateValueMap(I, SrcReg);
4448       return true;
4449     }
4450   }
4451 
4452   unsigned ResultReg = emitIntExt(SrcVT, SrcReg, RetVT, IsZExt);
4453   if (!ResultReg)
4454     return false;
4455 
4456   updateValueMap(I, ResultReg);
4457   return true;
4458 }
4459 
selectRem(const Instruction * I,unsigned ISDOpcode)4460 bool AArch64FastISel::selectRem(const Instruction *I, unsigned ISDOpcode) {
4461   EVT DestEVT = TLI.getValueType(DL, I->getType(), true);
4462   if (!DestEVT.isSimple())
4463     return false;
4464 
4465   MVT DestVT = DestEVT.getSimpleVT();
4466   if (DestVT != MVT::i64 && DestVT != MVT::i32)
4467     return false;
4468 
4469   unsigned DivOpc;
4470   bool Is64bit = (DestVT == MVT::i64);
4471   switch (ISDOpcode) {
4472   default:
4473     return false;
4474   case ISD::SREM:
4475     DivOpc = Is64bit ? AArch64::SDIVXr : AArch64::SDIVWr;
4476     break;
4477   case ISD::UREM:
4478     DivOpc = Is64bit ? AArch64::UDIVXr : AArch64::UDIVWr;
4479     break;
4480   }
4481   unsigned MSubOpc = Is64bit ? AArch64::MSUBXrrr : AArch64::MSUBWrrr;
4482   unsigned Src0Reg = getRegForValue(I->getOperand(0));
4483   if (!Src0Reg)
4484     return false;
4485   bool Src0IsKill = hasTrivialKill(I->getOperand(0));
4486 
4487   unsigned Src1Reg = getRegForValue(I->getOperand(1));
4488   if (!Src1Reg)
4489     return false;
4490   bool Src1IsKill = hasTrivialKill(I->getOperand(1));
4491 
4492   const TargetRegisterClass *RC =
4493       (DestVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4494   unsigned QuotReg = fastEmitInst_rr(DivOpc, RC, Src0Reg, /*IsKill=*/false,
4495                                      Src1Reg, /*IsKill=*/false);
4496   assert(QuotReg && "Unexpected DIV instruction emission failure.");
4497   // The remainder is computed as numerator - (quotient * denominator) using the
4498   // MSUB instruction.
4499   unsigned ResultReg = fastEmitInst_rrr(MSubOpc, RC, QuotReg, /*IsKill=*/true,
4500                                         Src1Reg, Src1IsKill, Src0Reg,
4501                                         Src0IsKill);
4502   updateValueMap(I, ResultReg);
4503   return true;
4504 }
4505 
selectMul(const Instruction * I)4506 bool AArch64FastISel::selectMul(const Instruction *I) {
4507   MVT VT;
4508   if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true))
4509     return false;
4510 
4511   if (VT.isVector())
4512     return selectBinaryOp(I, ISD::MUL);
4513 
4514   const Value *Src0 = I->getOperand(0);
4515   const Value *Src1 = I->getOperand(1);
4516   if (const auto *C = dyn_cast<ConstantInt>(Src0))
4517     if (C->getValue().isPowerOf2())
4518       std::swap(Src0, Src1);
4519 
4520   // Try to simplify to a shift instruction.
4521   if (const auto *C = dyn_cast<ConstantInt>(Src1))
4522     if (C->getValue().isPowerOf2()) {
4523       uint64_t ShiftVal = C->getValue().logBase2();
4524       MVT SrcVT = VT;
4525       bool IsZExt = true;
4526       if (const auto *ZExt = dyn_cast<ZExtInst>(Src0)) {
4527         if (!isIntExtFree(ZExt)) {
4528           MVT VT;
4529           if (isValueAvailable(ZExt) && isTypeSupported(ZExt->getSrcTy(), VT)) {
4530             SrcVT = VT;
4531             IsZExt = true;
4532             Src0 = ZExt->getOperand(0);
4533           }
4534         }
4535       } else if (const auto *SExt = dyn_cast<SExtInst>(Src0)) {
4536         if (!isIntExtFree(SExt)) {
4537           MVT VT;
4538           if (isValueAvailable(SExt) && isTypeSupported(SExt->getSrcTy(), VT)) {
4539             SrcVT = VT;
4540             IsZExt = false;
4541             Src0 = SExt->getOperand(0);
4542           }
4543         }
4544       }
4545 
4546       unsigned Src0Reg = getRegForValue(Src0);
4547       if (!Src0Reg)
4548         return false;
4549       bool Src0IsKill = hasTrivialKill(Src0);
4550 
4551       unsigned ResultReg =
4552           emitLSL_ri(VT, SrcVT, Src0Reg, Src0IsKill, ShiftVal, IsZExt);
4553 
4554       if (ResultReg) {
4555         updateValueMap(I, ResultReg);
4556         return true;
4557       }
4558     }
4559 
4560   unsigned Src0Reg = getRegForValue(I->getOperand(0));
4561   if (!Src0Reg)
4562     return false;
4563   bool Src0IsKill = hasTrivialKill(I->getOperand(0));
4564 
4565   unsigned Src1Reg = getRegForValue(I->getOperand(1));
4566   if (!Src1Reg)
4567     return false;
4568   bool Src1IsKill = hasTrivialKill(I->getOperand(1));
4569 
4570   unsigned ResultReg = emitMul_rr(VT, Src0Reg, Src0IsKill, Src1Reg, Src1IsKill);
4571 
4572   if (!ResultReg)
4573     return false;
4574 
4575   updateValueMap(I, ResultReg);
4576   return true;
4577 }
4578 
selectShift(const Instruction * I)4579 bool AArch64FastISel::selectShift(const Instruction *I) {
4580   MVT RetVT;
4581   if (!isTypeSupported(I->getType(), RetVT, /*IsVectorAllowed=*/true))
4582     return false;
4583 
4584   if (RetVT.isVector())
4585     return selectOperator(I, I->getOpcode());
4586 
4587   if (const auto *C = dyn_cast<ConstantInt>(I->getOperand(1))) {
4588     unsigned ResultReg = 0;
4589     uint64_t ShiftVal = C->getZExtValue();
4590     MVT SrcVT = RetVT;
4591     bool IsZExt = I->getOpcode() != Instruction::AShr;
4592     const Value *Op0 = I->getOperand(0);
4593     if (const auto *ZExt = dyn_cast<ZExtInst>(Op0)) {
4594       if (!isIntExtFree(ZExt)) {
4595         MVT TmpVT;
4596         if (isValueAvailable(ZExt) && isTypeSupported(ZExt->getSrcTy(), TmpVT)) {
4597           SrcVT = TmpVT;
4598           IsZExt = true;
4599           Op0 = ZExt->getOperand(0);
4600         }
4601       }
4602     } else if (const auto *SExt = dyn_cast<SExtInst>(Op0)) {
4603       if (!isIntExtFree(SExt)) {
4604         MVT TmpVT;
4605         if (isValueAvailable(SExt) && isTypeSupported(SExt->getSrcTy(), TmpVT)) {
4606           SrcVT = TmpVT;
4607           IsZExt = false;
4608           Op0 = SExt->getOperand(0);
4609         }
4610       }
4611     }
4612 
4613     unsigned Op0Reg = getRegForValue(Op0);
4614     if (!Op0Reg)
4615       return false;
4616     bool Op0IsKill = hasTrivialKill(Op0);
4617 
4618     switch (I->getOpcode()) {
4619     default: llvm_unreachable("Unexpected instruction.");
4620     case Instruction::Shl:
4621       ResultReg = emitLSL_ri(RetVT, SrcVT, Op0Reg, Op0IsKill, ShiftVal, IsZExt);
4622       break;
4623     case Instruction::AShr:
4624       ResultReg = emitASR_ri(RetVT, SrcVT, Op0Reg, Op0IsKill, ShiftVal, IsZExt);
4625       break;
4626     case Instruction::LShr:
4627       ResultReg = emitLSR_ri(RetVT, SrcVT, Op0Reg, Op0IsKill, ShiftVal, IsZExt);
4628       break;
4629     }
4630     if (!ResultReg)
4631       return false;
4632 
4633     updateValueMap(I, ResultReg);
4634     return true;
4635   }
4636 
4637   unsigned Op0Reg = getRegForValue(I->getOperand(0));
4638   if (!Op0Reg)
4639     return false;
4640   bool Op0IsKill = hasTrivialKill(I->getOperand(0));
4641 
4642   unsigned Op1Reg = getRegForValue(I->getOperand(1));
4643   if (!Op1Reg)
4644     return false;
4645   bool Op1IsKill = hasTrivialKill(I->getOperand(1));
4646 
4647   unsigned ResultReg = 0;
4648   switch (I->getOpcode()) {
4649   default: llvm_unreachable("Unexpected instruction.");
4650   case Instruction::Shl:
4651     ResultReg = emitLSL_rr(RetVT, Op0Reg, Op0IsKill, Op1Reg, Op1IsKill);
4652     break;
4653   case Instruction::AShr:
4654     ResultReg = emitASR_rr(RetVT, Op0Reg, Op0IsKill, Op1Reg, Op1IsKill);
4655     break;
4656   case Instruction::LShr:
4657     ResultReg = emitLSR_rr(RetVT, Op0Reg, Op0IsKill, Op1Reg, Op1IsKill);
4658     break;
4659   }
4660 
4661   if (!ResultReg)
4662     return false;
4663 
4664   updateValueMap(I, ResultReg);
4665   return true;
4666 }
4667 
selectBitCast(const Instruction * I)4668 bool AArch64FastISel::selectBitCast(const Instruction *I) {
4669   MVT RetVT, SrcVT;
4670 
4671   if (!isTypeLegal(I->getOperand(0)->getType(), SrcVT))
4672     return false;
4673   if (!isTypeLegal(I->getType(), RetVT))
4674     return false;
4675 
4676   unsigned Opc;
4677   if (RetVT == MVT::f32 && SrcVT == MVT::i32)
4678     Opc = AArch64::FMOVWSr;
4679   else if (RetVT == MVT::f64 && SrcVT == MVT::i64)
4680     Opc = AArch64::FMOVXDr;
4681   else if (RetVT == MVT::i32 && SrcVT == MVT::f32)
4682     Opc = AArch64::FMOVSWr;
4683   else if (RetVT == MVT::i64 && SrcVT == MVT::f64)
4684     Opc = AArch64::FMOVDXr;
4685   else
4686     return false;
4687 
4688   const TargetRegisterClass *RC = nullptr;
4689   switch (RetVT.SimpleTy) {
4690   default: llvm_unreachable("Unexpected value type.");
4691   case MVT::i32: RC = &AArch64::GPR32RegClass; break;
4692   case MVT::i64: RC = &AArch64::GPR64RegClass; break;
4693   case MVT::f32: RC = &AArch64::FPR32RegClass; break;
4694   case MVT::f64: RC = &AArch64::FPR64RegClass; break;
4695   }
4696   unsigned Op0Reg = getRegForValue(I->getOperand(0));
4697   if (!Op0Reg)
4698     return false;
4699   bool Op0IsKill = hasTrivialKill(I->getOperand(0));
4700   unsigned ResultReg = fastEmitInst_r(Opc, RC, Op0Reg, Op0IsKill);
4701 
4702   if (!ResultReg)
4703     return false;
4704 
4705   updateValueMap(I, ResultReg);
4706   return true;
4707 }
4708 
selectFRem(const Instruction * I)4709 bool AArch64FastISel::selectFRem(const Instruction *I) {
4710   MVT RetVT;
4711   if (!isTypeLegal(I->getType(), RetVT))
4712     return false;
4713 
4714   RTLIB::Libcall LC;
4715   switch (RetVT.SimpleTy) {
4716   default:
4717     return false;
4718   case MVT::f32:
4719     LC = RTLIB::REM_F32;
4720     break;
4721   case MVT::f64:
4722     LC = RTLIB::REM_F64;
4723     break;
4724   }
4725 
4726   ArgListTy Args;
4727   Args.reserve(I->getNumOperands());
4728 
4729   // Populate the argument list.
4730   for (auto &Arg : I->operands()) {
4731     ArgListEntry Entry;
4732     Entry.Val = Arg;
4733     Entry.Ty = Arg->getType();
4734     Args.push_back(Entry);
4735   }
4736 
4737   CallLoweringInfo CLI;
4738   MCContext &Ctx = MF->getContext();
4739   CLI.setCallee(DL, Ctx, TLI.getLibcallCallingConv(LC), I->getType(),
4740                 TLI.getLibcallName(LC), std::move(Args));
4741   if (!lowerCallTo(CLI))
4742     return false;
4743   updateValueMap(I, CLI.ResultReg);
4744   return true;
4745 }
4746 
selectSDiv(const Instruction * I)4747 bool AArch64FastISel::selectSDiv(const Instruction *I) {
4748   MVT VT;
4749   if (!isTypeLegal(I->getType(), VT))
4750     return false;
4751 
4752   if (!isa<ConstantInt>(I->getOperand(1)))
4753     return selectBinaryOp(I, ISD::SDIV);
4754 
4755   const APInt &C = cast<ConstantInt>(I->getOperand(1))->getValue();
4756   if ((VT != MVT::i32 && VT != MVT::i64) || !C ||
4757       !(C.isPowerOf2() || (-C).isPowerOf2()))
4758     return selectBinaryOp(I, ISD::SDIV);
4759 
4760   unsigned Lg2 = C.countTrailingZeros();
4761   unsigned Src0Reg = getRegForValue(I->getOperand(0));
4762   if (!Src0Reg)
4763     return false;
4764   bool Src0IsKill = hasTrivialKill(I->getOperand(0));
4765 
4766   if (cast<BinaryOperator>(I)->isExact()) {
4767     unsigned ResultReg = emitASR_ri(VT, VT, Src0Reg, Src0IsKill, Lg2);
4768     if (!ResultReg)
4769       return false;
4770     updateValueMap(I, ResultReg);
4771     return true;
4772   }
4773 
4774   int64_t Pow2MinusOne = (1ULL << Lg2) - 1;
4775   unsigned AddReg = emitAdd_ri_(VT, Src0Reg, /*IsKill=*/false, Pow2MinusOne);
4776   if (!AddReg)
4777     return false;
4778 
4779   // (Src0 < 0) ? Pow2 - 1 : 0;
4780   if (!emitICmp_ri(VT, Src0Reg, /*IsKill=*/false, 0))
4781     return false;
4782 
4783   unsigned SelectOpc;
4784   const TargetRegisterClass *RC;
4785   if (VT == MVT::i64) {
4786     SelectOpc = AArch64::CSELXr;
4787     RC = &AArch64::GPR64RegClass;
4788   } else {
4789     SelectOpc = AArch64::CSELWr;
4790     RC = &AArch64::GPR32RegClass;
4791   }
4792   unsigned SelectReg =
4793       fastEmitInst_rri(SelectOpc, RC, AddReg, /*IsKill=*/true, Src0Reg,
4794                        Src0IsKill, AArch64CC::LT);
4795   if (!SelectReg)
4796     return false;
4797 
4798   // Divide by Pow2 --> ashr. If we're dividing by a negative value we must also
4799   // negate the result.
4800   unsigned ZeroReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
4801   unsigned ResultReg;
4802   if (C.isNegative())
4803     ResultReg = emitAddSub_rs(/*UseAdd=*/false, VT, ZeroReg, /*IsKill=*/true,
4804                               SelectReg, /*IsKill=*/true, AArch64_AM::ASR, Lg2);
4805   else
4806     ResultReg = emitASR_ri(VT, VT, SelectReg, /*IsKill=*/true, Lg2);
4807 
4808   if (!ResultReg)
4809     return false;
4810 
4811   updateValueMap(I, ResultReg);
4812   return true;
4813 }
4814 
4815 /// This is mostly a copy of the existing FastISel getRegForGEPIndex code. We
4816 /// have to duplicate it for AArch64, because otherwise we would fail during the
4817 /// sign-extend emission.
getRegForGEPIndex(const Value * Idx)4818 std::pair<unsigned, bool> AArch64FastISel::getRegForGEPIndex(const Value *Idx) {
4819   unsigned IdxN = getRegForValue(Idx);
4820   if (IdxN == 0)
4821     // Unhandled operand. Halt "fast" selection and bail.
4822     return std::pair<unsigned, bool>(0, false);
4823 
4824   bool IdxNIsKill = hasTrivialKill(Idx);
4825 
4826   // If the index is smaller or larger than intptr_t, truncate or extend it.
4827   MVT PtrVT = TLI.getPointerTy(DL);
4828   EVT IdxVT = EVT::getEVT(Idx->getType(), /*HandleUnknown=*/false);
4829   if (IdxVT.bitsLT(PtrVT)) {
4830     IdxN = emitIntExt(IdxVT.getSimpleVT(), IdxN, PtrVT, /*IsZExt=*/false);
4831     IdxNIsKill = true;
4832   } else if (IdxVT.bitsGT(PtrVT))
4833     llvm_unreachable("AArch64 FastISel doesn't support types larger than i64");
4834   return std::pair<unsigned, bool>(IdxN, IdxNIsKill);
4835 }
4836 
4837 /// This is mostly a copy of the existing FastISel GEP code, but we have to
4838 /// duplicate it for AArch64, because otherwise we would bail out even for
4839 /// simple cases. This is because the standard fastEmit functions don't cover
4840 /// MUL at all and ADD is lowered very inefficientily.
selectGetElementPtr(const Instruction * I)4841 bool AArch64FastISel::selectGetElementPtr(const Instruction *I) {
4842   unsigned N = getRegForValue(I->getOperand(0));
4843   if (!N)
4844     return false;
4845   bool NIsKill = hasTrivialKill(I->getOperand(0));
4846 
4847   // Keep a running tab of the total offset to coalesce multiple N = N + Offset
4848   // into a single N = N + TotalOffset.
4849   uint64_t TotalOffs = 0;
4850   MVT VT = TLI.getPointerTy(DL);
4851   for (gep_type_iterator GTI = gep_type_begin(I), E = gep_type_end(I);
4852        GTI != E; ++GTI) {
4853     const Value *Idx = GTI.getOperand();
4854     if (auto *StTy = dyn_cast<StructType>(*GTI)) {
4855       unsigned Field = cast<ConstantInt>(Idx)->getZExtValue();
4856       // N = N + Offset
4857       if (Field)
4858         TotalOffs += DL.getStructLayout(StTy)->getElementOffset(Field);
4859     } else {
4860       Type *Ty = GTI.getIndexedType();
4861 
4862       // If this is a constant subscript, handle it quickly.
4863       if (const auto *CI = dyn_cast<ConstantInt>(Idx)) {
4864         if (CI->isZero())
4865           continue;
4866         // N = N + Offset
4867         TotalOffs +=
4868             DL.getTypeAllocSize(Ty) * cast<ConstantInt>(CI)->getSExtValue();
4869         continue;
4870       }
4871       if (TotalOffs) {
4872         N = emitAdd_ri_(VT, N, NIsKill, TotalOffs);
4873         if (!N)
4874           return false;
4875         NIsKill = true;
4876         TotalOffs = 0;
4877       }
4878 
4879       // N = N + Idx * ElementSize;
4880       uint64_t ElementSize = DL.getTypeAllocSize(Ty);
4881       std::pair<unsigned, bool> Pair = getRegForGEPIndex(Idx);
4882       unsigned IdxN = Pair.first;
4883       bool IdxNIsKill = Pair.second;
4884       if (!IdxN)
4885         return false;
4886 
4887       if (ElementSize != 1) {
4888         unsigned C = fastEmit_i(VT, VT, ISD::Constant, ElementSize);
4889         if (!C)
4890           return false;
4891         IdxN = emitMul_rr(VT, IdxN, IdxNIsKill, C, true);
4892         if (!IdxN)
4893           return false;
4894         IdxNIsKill = true;
4895       }
4896       N = fastEmit_rr(VT, VT, ISD::ADD, N, NIsKill, IdxN, IdxNIsKill);
4897       if (!N)
4898         return false;
4899     }
4900   }
4901   if (TotalOffs) {
4902     N = emitAdd_ri_(VT, N, NIsKill, TotalOffs);
4903     if (!N)
4904       return false;
4905   }
4906   updateValueMap(I, N);
4907   return true;
4908 }
4909 
fastSelectInstruction(const Instruction * I)4910 bool AArch64FastISel::fastSelectInstruction(const Instruction *I) {
4911   switch (I->getOpcode()) {
4912   default:
4913     break;
4914   case Instruction::Add:
4915   case Instruction::Sub:
4916     return selectAddSub(I);
4917   case Instruction::Mul:
4918     return selectMul(I);
4919   case Instruction::SDiv:
4920     return selectSDiv(I);
4921   case Instruction::SRem:
4922     if (!selectBinaryOp(I, ISD::SREM))
4923       return selectRem(I, ISD::SREM);
4924     return true;
4925   case Instruction::URem:
4926     if (!selectBinaryOp(I, ISD::UREM))
4927       return selectRem(I, ISD::UREM);
4928     return true;
4929   case Instruction::Shl:
4930   case Instruction::LShr:
4931   case Instruction::AShr:
4932     return selectShift(I);
4933   case Instruction::And:
4934   case Instruction::Or:
4935   case Instruction::Xor:
4936     return selectLogicalOp(I);
4937   case Instruction::Br:
4938     return selectBranch(I);
4939   case Instruction::IndirectBr:
4940     return selectIndirectBr(I);
4941   case Instruction::BitCast:
4942     if (!FastISel::selectBitCast(I))
4943       return selectBitCast(I);
4944     return true;
4945   case Instruction::FPToSI:
4946     if (!selectCast(I, ISD::FP_TO_SINT))
4947       return selectFPToInt(I, /*Signed=*/true);
4948     return true;
4949   case Instruction::FPToUI:
4950     return selectFPToInt(I, /*Signed=*/false);
4951   case Instruction::ZExt:
4952   case Instruction::SExt:
4953     return selectIntExt(I);
4954   case Instruction::Trunc:
4955     if (!selectCast(I, ISD::TRUNCATE))
4956       return selectTrunc(I);
4957     return true;
4958   case Instruction::FPExt:
4959     return selectFPExt(I);
4960   case Instruction::FPTrunc:
4961     return selectFPTrunc(I);
4962   case Instruction::SIToFP:
4963     if (!selectCast(I, ISD::SINT_TO_FP))
4964       return selectIntToFP(I, /*Signed=*/true);
4965     return true;
4966   case Instruction::UIToFP:
4967     return selectIntToFP(I, /*Signed=*/false);
4968   case Instruction::Load:
4969     return selectLoad(I);
4970   case Instruction::Store:
4971     return selectStore(I);
4972   case Instruction::FCmp:
4973   case Instruction::ICmp:
4974     return selectCmp(I);
4975   case Instruction::Select:
4976     return selectSelect(I);
4977   case Instruction::Ret:
4978     return selectRet(I);
4979   case Instruction::FRem:
4980     return selectFRem(I);
4981   case Instruction::GetElementPtr:
4982     return selectGetElementPtr(I);
4983   }
4984 
4985   // fall-back to target-independent instruction selection.
4986   return selectOperator(I, I->getOpcode());
4987   // Silence warnings.
4988   (void)&CC_AArch64_DarwinPCS_VarArg;
4989 }
4990 
4991 namespace llvm {
createFastISel(FunctionLoweringInfo & FuncInfo,const TargetLibraryInfo * LibInfo)4992 llvm::FastISel *AArch64::createFastISel(FunctionLoweringInfo &FuncInfo,
4993                                         const TargetLibraryInfo *LibInfo) {
4994   return new AArch64FastISel(FuncInfo, LibInfo);
4995 }
4996 }
4997